summaryrefslogtreecommitdiff
path: root/examples/go/url_authority.rl
blob: 3e651ad00dc46c65264c05a75ac37d49aca7ea19 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
// -*-go-*-
//
// URL Parser
// Copyright (c) 2010 J.A. Roberts Tunney
// MIT License
//

package main

import (
    "errors"
    "fmt"
    "strconv"
)

%% machine url_authority;
%% write data;

// i parse strings like `alice@pokémon.com`.
//
// sounds simple right?  but i also parse stuff like:
//
//   bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00
//
// which in actual reality is:
//
// - User: "bob barker"
// - Pass: "priceisright"
// - Host: "dead:beef::666"
// - Port: 5060
// - Params: "isup-oli=00"
//
// which was probably extracted from an absolute url that looked like:
//
//   sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg
//
// which was probably extracted from its address form:
//
//   "Bob Barker" <sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg>;tag=666
//
// who would have thought this could be so hard ._.
func (url *URL) parseAuthority(data []byte) (err error) {
	cs, p, pe, eof := 0, 0, len(data), len(data)
	mark := 0

	// temporary holding place for user:pass and/or host:port cuz an
	// optional term (user[:pass]) coming before a mandatory term
	// (host[:pass]) would require require backtracking and all that
	// evil nondeterministic stuff which ragel seems to hate.  (for
	// this same reason you're also allowed to use square quotes
	// around the username.)
	var b1, b2 string

	// this buffer is so we can unescape while we roll
	var hex byte
	buf := make([]byte, len(data))
	amt := 0

	%%{
		action mark        { mark = p                         }
		action str_start   { amt = 0                          }
		action str_char    { buf[amt] = fc; amt++             }
		action hex_hi      { hex = unhex(fc) * 16             }
		action hex_lo      { hex += unhex(fc)
		                     buf[amt] = hex; amt++            }
		action copy_b1     { b1 = string(buf[0:amt]); amt = 0 }
		action copy_b2     { b2 = string(buf[0:amt]); amt = 0 }
		action copy_host   { url.Host = string(b1); amt = 0   }

		action copy_port {
			if b2 != "" {
				url.Port, err = strconv.Atoi(string(b2))
		        if err != nil { goto fail }
		        if url.Port > 65535 { goto fail }
			}
		}

		action params {
			url.Params = string(data[mark:p])
		}

		action params_eof {
			url.Params = string(data[mark:p])
			return nil
		}

		action atsymbol {
			url.User = string(b1)
			url.Pass = string(b2)
			b2 = ""
		}

		action alldone {
			url.Host = string(b1)
			if url.Host == "" {
				url.Host = string(buf[0:amt])
			} else {
				if amt > 0 {
					b2 = string(buf[0:amt])
				}
				if b2 != "" {
					url.Port, err = strconv.Atoi(string(b2))
					if err != nil { goto fail }
					if url.Port > 65535 { goto fail }
				}
			}
			return nil
		}

		# define what a single character is allowed to be
		toxic         = ( cntrl | 127 ) ;
		scary         = ( toxic | space | "\"" | "#" | "%" | "<" | ">" ) ;
		authdelims    = ( "/" | "?" | "#" | ":" | "@" | ";" | "[" | "]" ) ;
		userchars     = any -- ( authdelims | scary ) ;
		userchars_esc = userchars | ":" ;
		passchars     = userchars ;
		hostchars     = passchars | "@" ;
		hostchars_esc = hostchars | ":" ;
		portchars     = digit ;
		paramchars    = hostchars | ":" | ";" ;

		# define how characters trigger actions
		escape        = "%" xdigit xdigit ;
		unescape      = "%" ( xdigit @hex_hi ) ( xdigit @hex_lo ) ;
		userchar      = unescape | ( userchars @str_char ) ;
		userchar_esc  = unescape | ( userchars_esc @str_char ) ;
		passchar      = unescape | ( passchars @str_char ) ;
		hostchar      = unescape | ( hostchars @str_char ) ;
		hostchar_esc  = unescape | ( hostchars_esc @str_char ) ;
		portchar      = unescape | ( portchars @str_char ) ;
		paramchar     = escape | paramchars ;

		# define multi-character patterns
		user_plain    = userchar+ >str_start %copy_b1 ;
		user_quoted   = "[" ( userchar_esc+ >str_start %copy_b1 ) "]" ;
		user          = ( user_quoted | user_plain ) %/alldone ;
		pass          = passchar+ >str_start %copy_b2 %/alldone ;
		host_plain    = hostchar+ >str_start %copy_b1 %copy_host ;
		host_quoted   = "[" ( hostchar_esc+ >str_start %copy_b1 %copy_host ) "]" ;
		host          = ( host_quoted | host_plain ) %/alldone ;
		port          = portchar* >str_start %copy_b2 %copy_port %/alldone ;
		params        = ";" ( paramchar* >mark %params %/params_eof ) ;
		userpass      = user ( ":" pass )? ;
		hostport      = host ( ":" port )? ;
		authority     = ( userpass ( "@" @atsymbol ) )? hostport params? ;

		main := authority;
		write init;
		write exec;
	}%%

	// if cs >= url_authority_first_final {
	// 	return nil
	// }

fail:
	// fmt.Println("error state", cs)
	// fmt.Println(string(data))
	// for i := 0; i < p; i++ {
	// 	fmt.Print(" ")
	// }
	// fmt.Println("^")
	// fmt.Println(url)
	return errors.New(fmt.Sprintf("bad url authority: %#v", string(data)))
}