diff options
Diffstat (limited to 'examples/go/url.rl')
-rw-r--r-- | examples/go/url.rl | 414 |
1 files changed, 0 insertions, 414 deletions
diff --git a/examples/go/url.rl b/examples/go/url.rl deleted file mode 100644 index e94d59c6..00000000 --- a/examples/go/url.rl +++ /dev/null @@ -1,414 +0,0 @@ -// -*-go-*- -// -// URL Parser -// Copyright (c) 2010 J.A. Roberts Tunney -// MIT License -// -// To compile: -// -// ragel -Z -T0 -o url.go url.rl -// ragel -Z -T0 -o url_authority.go url_authority.rl -// go build -o url url.go url_authority.go -// ./url -// -// To show a diagram of your state machine: -// -// ragel -V -Z -p -o url.dot url.rl -// xdot url.dot -// -// ragel -V -Z -p -o url_authority.dot url_authority.rl -// xdot url_authority.dot -// -// Reference: -// -// - http://tools.ietf.org/html/rfc3986 -// - -package main - -import ( - "errors" - "fmt" - "os" - "time" -) - -type URL struct { - Scheme string // http, sip, file, etc. (never blank, always lowercase) - User string // who is you yo - Pass string // for like, logging in - Host string // IP 4/6 address or hostname (mandatory) - Port int // like 80 or 5060 (default 0) - Params string // stuff after ';' (NOT UNESCAPED, used in sip) - Path string // stuff starting with '/' - Query string // stuff after '?' (NOT UNESCAPED) - Fragment string // stuff after '#' -} - -%% machine url; -%% write data; - -// i parse absolute urls and don't suck at it. i'll parse just about -// any type of url you can think of and give you a human-friendly data -// structure. -// -// this routine takes no more than a few microseconds, is reentrant, -// performs in a predictable manner (for security/soft-realtime,) -// doesn't modify your `data` buffer, and under no circumstances will -// it panic (i hope!) -func URLParse(data []byte) (url *URL, err error) { - cs, p, pe, eof := 0, 0, len(data), len(data) - mark := 0 - url = new(URL) - - // this buffer is so we can unescape while we roll - var hex byte - buf := make([]byte, len(data)) - amt := 0 - - %%{ - action mark { mark = p } - action str_start { amt = 0 } - action str_char { buf[amt] = fc; amt++ } - action str_lower { buf[amt] = fc + 0x20; amt++ } - action hex_hi { hex = unhex(fc) * 16 } - action hex_lo { hex += unhex(fc) - buf[amt] = hex; amt++ } - action scheme { url.Scheme = string(buf[0:amt]) } - action authority { err = url.parseAuthority(data[mark:p]) - if err != nil { return nil, err } } - action path { url.Path = string(buf[0:amt]) } - action query { url.Query = string(data[mark:p]) } - action fragment { url.Fragment = string(buf[0:amt]) } - - # # do this instead if you *actually* use URNs (lol) - # action authority { url.Authority = string(data[mark:p]) } - - # define what a single character is allowed to be - toxic = ( cntrl | 127 ) ; - scary = ( toxic | " " | "\"" | "#" | "%" | "<" | ">" ) ; - schmchars = ( lower | digit | "+" | "-" | "." ) ; - authchars = any -- ( scary | "/" | "?" | "#" ) ; - pathchars = any -- ( scary | "?" | "#" ) ; - querchars = any -- ( scary | "#" ) ; - fragchars = any -- ( scary ) ; - - # define how characters trigger actions - escape = "%" xdigit xdigit ; - unescape = "%" ( xdigit @hex_hi ) ( xdigit @hex_lo ) ; - schmfirst = ( upper @str_lower ) | ( lower @str_char ) ; - schmchar = ( upper @str_lower ) | ( schmchars @str_char ) ; - authchar = escape | authchars ; - pathchar = unescape | ( pathchars @str_char ) ; - querchar = escape | querchars ; - fragchar = unescape | ( fragchars @str_char ) ; - - # define multi-character patterns - scheme = ( schmfirst schmchar* ) >str_start %scheme ; - authority = authchar+ >mark %authority ; - path = ( ( "/" @str_char ) pathchar* ) >str_start %path ; - query = "?" ( querchar* >mark %query ) ; - fragment = "#" ( fragchar* >str_start %fragment ) ; - url = scheme ":" "//"? authority path? query? fragment? - | scheme ":" "//" authority? path? query? fragment? - ; - - main := url; - write init; - write exec; - }%% - - if cs < url_first_final { - if p == pe { - return nil, errors.New( - fmt.Sprintf("unexpected eof: %s", data)) - } else { - return nil, errors.New( - fmt.Sprintf("error in url at pos %d: %s", p, data)) - } - } - - return url, nil -} - -func unhex(b byte) byte { - switch { - case '0' <= b && b <= '9': - return b - '0' - case 'a' <= b && b <= 'f': - return b - 'a' + 10 - case 'A' <= b && b <= 'F': - return b - 'A' + 10 - } - return 0 -} - -////////////////////////////////////////////////////////////////////// - -type urlTest struct { - s []byte - url URL -} - -var urlTests = []urlTest{ - - urlTest{ - []byte("http://user:pass@example.com:80;hello/lol.php?fun#omg"), - URL{ - Scheme: "http", - User: "user", - Pass: "pass", - Host: "example.com", - Port: 80, - Params: "hello", - Path: "/lol.php", - Query: "fun", - Fragment: "omg", - }, - }, - - urlTest{ - []byte("a:b"), - URL{ - Scheme: "a", - Host: "b", - }, - }, - - urlTest{ - []byte("GoPHeR://@example.com@:;/?#"), - URL{ - Scheme: "gopher", - Host: "@example.com@", - Path: "/", - }, - }, - - urlTest{ - []byte("ldap://[2001:db8::7]/c=GB?objectClass/?one"), - URL{ - Scheme: "ldap", - Host: "2001:db8::7", - Path: "/c=GB", - Query: "objectClass/?one", - }, - }, - - urlTest{ - []byte("http://user@example.com"), - URL{ - Scheme: "http", - User: "user", - Host: "example.com", - }, - }, - - urlTest{ - []byte("http://品研发和研发管@☃.com:65000;%20"), - URL{ - Scheme: "http", - User: "品研发和研发管", - Host: "☃.com", - Port: 65000, - Params: "%20", - }, - }, - - urlTest{ - []byte("https://example.com:80"), - URL{ - Scheme: "https", - Host: "example.com", - Port: 80, - }, - }, - - urlTest{ - []byte("file:///etc/passwd"), - URL{ - Scheme: "file", - Path: "/etc/passwd", - }, - }, - - urlTest{ - []byte("file:///c:/WINDOWS/clock.avi"), - URL{ - Scheme: "file", - Path: "/c:/WINDOWS/clock.avi", // <-- is this kosher? - }, - }, - - urlTest{ - []byte("file://hostname/path/to/the%20file.txt"), - URL{ - Scheme: "file", - Host: "hostname", - Path: "/path/to/the file.txt", - }, - }, - - urlTest{ - []byte("sip:example.com"), - URL{ - Scheme: "sip", - Host: "example.com", - }, - }, - - urlTest{ - []byte("sip:example.com:5060"), - URL{ - Scheme: "sip", - Host: "example.com", - Port: 5060, - }, - }, - - urlTest{ - []byte("mailto:ditto@pokémon.com"), - URL{ - Scheme: "mailto", - User: "ditto", - Host: "pokémon.com", - }, - }, - - urlTest{ - []byte("sip:[dead:beef::666]:5060"), - URL{ - Scheme: "sip", - Host: "dead:beef::666", - Port: 5060, - }, - }, - - urlTest{ - []byte("tel:+12126660420"), - URL{ - Scheme: "tel", - Host: "+12126660420", - }, - }, - - urlTest{ - []byte("sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg"), - URL{ - Scheme: "sip", - User: "bob barker", - Pass: "priceisright", - Host: "dead:beef::666", - Port: 5060, - Params: "isup-oli=00", - Path: "/palfun.html", - Query: "haha", - Fragment: "omg", - }, - }, - - urlTest{ - []byte("http://www.google.com/search?%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai="), - URL{ - Scheme: "http", - Host: "www.google.com", - Path: "/search", - Query: "%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai=", - }, - }, - -} - -func (test *urlTest) compare(url *URL) (passed bool) { - if url.Scheme != test.url.Scheme { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) scheme: %#v != %#v\n", - string(test.s), url.Scheme, test.url.Scheme) - passed = true - } - if url.User != test.url.User { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) user: %#v != %#v\n", - string(test.s), url.User, test.url.User) - passed = true - } - if url.Pass != test.url.Pass { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) pass: %#v != %#v\n", - string(test.s), url.Pass, test.url.Pass) - passed = true - } - if url.Host != test.url.Host { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) host: %#v != %#v\n", - string(test.s), url.Host, test.url.Host) - passed = true - } - if url.Port != test.url.Port { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) port: %#v != %#v\n", - string(test.s), url.Port, test.url.Port) - passed = true - } - if url.Port != test.url.Port { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) port: %#v != %#v\n", - string(test.s), url.Port, test.url.Port) - passed = true - } - if url.Params != test.url.Params { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) params: %#v != %#v\n", - string(test.s), url.Params, test.url.Params) - passed = true - } - if url.Path != test.url.Path { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) path: %#v != %#v\n", - string(test.s), url.Path, test.url.Path) - passed = true - } - if url.Query != test.url.Query { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) query: %#v != %#v\n", - string(test.s), url.Query, test.url.Query) - passed = true - } - if url.Fragment != test.url.Fragment { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) fragment: %#v != %#v\n", - string(test.s), url.Fragment, test.url.Fragment) - passed = true - } - return !passed -} - -func bench() { - const rounds = 10000 - for _, s := range [][]byte{ - []byte("a:a"), - []byte("http://google.com/"), - []byte("sip:jtunney@lobstertech.com"), - []byte("http://user:pass@example.com:80;hello/lol.php?fun#omg"), - []byte("file:///etc/passwd"), - } { - ts1 := time.Now() - for i := 0; i < rounds; i++ { - URLParse(s) - } - ts2 := time.Now() - fmt.Printf("BENCH URLParse(%s) -> %d ns\n", s, ts2.Sub(ts1).Nanoseconds() / rounds) - } -} - -func test() (rc int) { - for _, test := range urlTests { - url, err := URLParse(test.s) - if err != nil { - fmt.Fprintf(os.Stderr, "FAIL url(%#v) %s\n", string(test.s), err) - rc = 1 - continue - } - if !test.compare(url) { - rc = 1 - } - } - return rc -} - -func main() { - rc := test() - if rc == 0 { - bench() - } - os.Exit(rc) -} |