summaryrefslogtreecommitdiff
path: root/test/ragel.d/url1.rl
diff options
context:
space:
mode:
Diffstat (limited to 'test/ragel.d/url1.rl')
-rw-r--r--test/ragel.d/url1.rl540
1 files changed, 0 insertions, 540 deletions
diff --git a/test/ragel.d/url1.rl b/test/ragel.d/url1.rl
deleted file mode 100644
index fffc6b4f..00000000
--- a/test/ragel.d/url1.rl
+++ /dev/null
@@ -1,540 +0,0 @@
-(*
- * @LANG: ocaml
- *)
-
-(*
-//
-// URL Parser
-// Copyright (c) 2010 J.A. Roberts Tunney
-// MIT License
-//
-// Converted to OCaml by ygrek
-//
-// To compile:
-//
-// ragel -O url.rl -o url.ml
-// ragel -O url_authority.rl -o url_authority.ml
-// ocamlopt -g unix.cmxa url_authority.ml url.ml -o url
-// ./url
-//
-// To show a diagram of your state machine:
-//
-// ragel -V -G2 -p -o url.dot url.rl
-// dot -Tpng -o url.png url.dot
-// chrome url.png
-//
-// ragel -V -G2 -p -o url_authority.dot url_authority.rl
-// dot -Tpng -o url_authority.png url_authority.dot
-// chrome url_authority.png
-//
-// Reference:
-//
-// - http://tools.ietf.org/html/rfc3986
-//
-*)
-
-(*
-// -*-go-*-
-//
-// URL Parser
-// Copyright (c) 2010 J.A. Roberts Tunney
-// MIT License
-//
-*)
-
-%% machine url_authority;
-%% write data;
-
-(*
-// i parse strings like `alice@pokémon.com`.
-//
-// sounds simple right? but i also parse stuff like:
-//
-// bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00
-//
-// which in actual reality is:
-//
-// - User: "bob barker"
-// - Pass: "priceisright"
-// - Host: "dead:beef::666"
-// - Port: 5060
-// - Params: "isup-oli=00"
-//
-// which was probably extracted from an absolute url that looked like:
-//
-// sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg
-//
-// which was probably extracted from its address form:
-//
-// "Bob Barker" <sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg>;tag=666
-//
-// who would have thought this could be so hard ._.
-*)
-
-type url = {
- scheme : string; (* http, sip, file, etc. (never blank, always lowercase) *)
- user : string; (* who is you *)
- pass : string; (* for like, logging in *)
- host : string; (* IP 4/6 address or hostname (mandatory) *)
- port : int; (* like 80 or 5060 (default 0) *)
- params : string; (* stuff after ';' (NOT UNESCAPED, used in sip) *)
- path : string; (* stuff starting with '/' *)
- query : string; (* stuff after '?' (NOT UNESCAPED) *)
- fragment : string; (* stuff after '#' *)
-}
-
-let fail fmt = Printf.ksprintf failwith fmt
-
-let unhex c =
- match c with
- | '0'..'9' -> Char.code c - Char.code '0'
- | 'a'..'f' -> Char.code c - Char.code 'a' + 10
- | 'A'..'F' -> Char.code c - Char.code 'A' + 10
- | _ -> fail "unhex %C" c
-
-let parse_authority u data =
- let (cs, p, pe, eof) = (ref 0, ref 0, ref (String.length data), ref (String.length data)) in
- let mark = ref 0 in
-
-(*
- // temporary holding place for user:pass and/or host:port cuz an
- // optional term (user[:pass]) coming before a mandatory term
- // (host[:pass]) would require require backtracking and all that
- // evil nondeterministic stuff which ragel seems to hate. (for
- // this same reason you're also allowed to use square quotes
- // around the username.)
-*)
- let (b1, b2) = (ref "", ref "") in
-
-(*
- // this buffer is so we can unescape while we roll
- var hex byte
- buf := make([]byte, len(data))
- amt := 0
-*)
- let buf = Buffer.create 10 in
- let hex = ref 0 in
-
- %%{
- action mark { mark := !p }
- action str_start { Buffer.reset buf }
- action str_char { Buffer.add_char buf data.[p.contents] }
- action str_lower { Buffer.add_char buf (Char.lowercase data.[p.contents])}
- action hex_hi { hex := unhex data.[p.contents] * 16 }
- action hex_lo { Buffer.add_char buf (Char.chr (!hex + unhex data.[p.contents])) }
- action copy_b1 { b1 := Buffer.contents buf; Buffer.clear buf }
- action copy_b2 { b2 := Buffer.contents buf; Buffer.clear buf }
- action copy_host { u := { !u with host = !b1 }; Buffer.clear buf }
-
- action copy_port {
- if !b2 <> "" then
- begin
- u := { !u with port = int_of_string !b2 };
- if !u.port > 65535 then fail "bad url authority: %S" data
- end
- }
-
- action params {
- u := { !u with params = String.sub data !mark (!p - !mark) }
- }
-
- action params_eof {
- u := { !u with params = String.sub data !mark (!p - !mark) }
-(* return nil *)
- }
-
- action atsymbol {
- u := { !u with user = !b1; pass = !b2 };
- b2 := ""
- }
-
- action alldone {
- u := { !u with host = !b1 };
- if !u.host = "" then
- u := { !u with host = Buffer.contents buf }
- else
- begin
- if Buffer.length buf > 0 then b2 := Buffer.contents buf;
- if !b2 <> "" then
- begin
- u := { !u with port = int_of_string !b2 };
- if !u.port > 65535 then fail "bad url authority: %S" data
- end
- end
-(* return nil *)
- }
-
- # define what a single character is allowed to be
- toxic = ( cntrl | 127 ) ;
- scary = ( toxic | space | "\"" | "#" | "%" | "<" | ">" ) ;
- authdelims = ( "/" | "?" | "#" | ":" | "@" | ";" | "[" | "]" ) ;
- userchars = any -- ( authdelims | scary ) ;
- userchars_esc = userchars | ":" ;
- passchars = userchars ;
- hostchars = passchars | "@" ;
- hostchars_esc = hostchars | ":" ;
- portchars = digit ;
- paramchars = hostchars | ":" | ";" ;
-
- # define how characters trigger actions
- escape = "%" xdigit xdigit ;
- unescape = "%" ( xdigit @hex_hi ) ( xdigit @hex_lo ) ;
- userchar = unescape | ( userchars @str_char ) ;
- userchar_esc = unescape | ( userchars_esc @str_char ) ;
- passchar = unescape | ( passchars @str_char ) ;
- hostchar = unescape | ( hostchars @str_char ) ;
- hostchar_esc = unescape | ( hostchars_esc @str_char ) ;
- portchar = unescape | ( portchars @str_char ) ;
- paramchar = escape | paramchars ;
-
- # define multi-character patterns
- user_plain = userchar+ >str_start %copy_b1 ;
- user_quoted = "[" ( userchar_esc+ >str_start %copy_b1 ) "]" ;
- user = ( user_quoted | user_plain ) %/alldone ;
- pass = passchar+ >str_start %copy_b2 %/alldone ;
- host_plain = hostchar+ >str_start %copy_b1 %copy_host ;
- host_quoted = "[" ( hostchar_esc+ >str_start %copy_b1 %copy_host ) "]" ;
- host = ( host_quoted | host_plain ) %/alldone ;
- port = portchar* >str_start %copy_b2 %copy_port %/alldone ;
- params = ";" ( paramchar* >mark %params %/params_eof ) ;
- userpass = user ( ":" pass )? ;
- hostport = host ( ":" port )? ;
- authority = ( userpass ( "@" @atsymbol ) )? hostport params? ;
-
- main := authority;
- write init;
- write exec;
- }%%
-
- (*
- // if cs >= url_authority_first_final {
- // return nil
- // }
- *)
-
- (*
- // fmt.Println("error state", cs)
- // fmt.Println(string(data))
- // for i := 0; i < p; i++ {
- // fmt.Print(" ")
- // }
- // fmt.Println("^")
- // fmt.Println(url)
- *)
- ;;
-
-
-let dummy = {
- scheme = ""; user = ""; pass = ""; host = ""; port = 0;
- params = ""; path = ""; query = ""; fragment = ""; }
-
-let show u =
- Printf.sprintf "%s :// %s : %s @ %s : %d ;%s %s ?%s #%s" u.scheme u.user u.pass u.host u.port
- u.params u.path u.query u.fragment
-
-%% machine url;
-%% write data;
-
-(*
-// i parse absolute urls and don't suck at it. i'll parse just about
-// any type of url you can think of and give you a human-friendly data
-// structure.
-//
-// this routine takes no more than a few microseconds, is reentrant,
-// performs in a predictable manner (for security/soft-realtime,)
-// doesn't modify your `data` buffer, and under no circumstances will
-// it panic (i hope!)
-*)
-let url_parse data =
- let (cs, p, pe, eof) = (ref 0, ref 0, ref (String.length data), ref (String.length data)) in
- let mark = ref 0 in
- let u = ref dummy in
-
- (*
- // this buffer is so we can unescape while we roll
- *)
- let buf = Buffer.create 16 in
- let hex = ref 0 in
-
- %%{
- action mark { mark := !p }
- action str_start { Buffer.reset buf }
- action str_char { Buffer.add_char buf data.[p.contents] }
- action str_lower { Buffer.add_char buf (Char.lowercase data.[p.contents])}
- action hex_hi { hex := unhex data.[p.contents] * 16 }
- action hex_lo { Buffer.add_char buf (Char.chr (!hex + unhex data.[p.contents])) }
- action scheme { u := { !u with scheme = Buffer.contents buf } }
- action authority { parse_authority u (String.sub data !mark (!p - !mark)) }
- action path { u := { !u with path = Buffer.contents buf } }
- action query { u := { !u with query = String.sub data !mark (!p - !mark) } }
- action fragment { u := { !u with fragment = Buffer.contents buf } }
-
- # # do this instead if you *actually* use URNs (lol)
- # action authority { url.Authority = string(data[mark:p]) }
-
- # define what a single character is allowed to be
- toxic = ( cntrl | 127 ) ;
- scary = ( toxic | " " | "\"" | "#" | "%" | "<" | ">" ) ;
- schmchars = ( lower | digit | "+" | "-" | "." ) ;
- authchars = any -- ( scary | "/" | "?" | "#" ) ;
- pathchars = any -- ( scary | "?" | "#" ) ;
- querchars = any -- ( scary | "#" ) ;
- fragchars = any -- ( scary ) ;
-
- # define how characters trigger actions
- escape = "%" xdigit xdigit ;
- unescape = "%" ( xdigit @hex_hi ) ( xdigit @hex_lo ) ;
- schmfirst = ( upper @str_lower ) | ( lower @str_char ) ;
- schmchar = ( upper @str_lower ) | ( schmchars @str_char ) ;
- authchar = escape | authchars ;
- pathchar = unescape | ( pathchars @str_char ) ;
- querchar = escape | querchars ;
- fragchar = unescape | ( fragchars @str_char ) ;
-
- # define multi-character patterns
- scheme = ( schmfirst schmchar* ) >str_start %scheme ;
- authority = authchar+ >mark %authority ;
- path = ( ( "/" @str_char ) pathchar* ) >str_start %path ;
- query = "?" ( querchar* >mark %query ) ;
- fragment = "#" ( fragchar* >str_start %fragment ) ;
- url = scheme ":" "//"? authority path? query? fragment?
- | scheme ":" "//" authority? path? query? fragment?
- ;
-
- main := url;
- write init;
- write exec;
- }%%
-
- if !cs < url_first_final then
- if !p = !pe then
- fail "unexpected eof: %s" data
- else
- fail "error in url at pos %d (%c): %s" !p data.[!p] data
- else
- !u
-
-(* ////////////////////////////////////////////////////////////////////// *)
-
-let tests = [
- "http://user:pass@example.com:80;hello/lol.php?fun#omg",
- {
- scheme = "http";
- user = "user";
- pass = "pass";
- host = "example.com";
- port = 80;
- params = "hello";
- path = "/lol.php";
- query = "fun";
- fragment = "omg";
- };
-
- "a:b",
- { dummy with
- scheme = "a";
- host = "b";
- };
-
- "GoPHeR://@example.com@:;/?#",
- { dummy with
- scheme = "gopher";
- host = "@example.com@";
- path = "/";
- };
-
- "ldap://[2001:db8::7]/c=GB?objectClass/?one",
- { dummy with
- scheme = "ldap";
- host = "2001:db8::7";
- path = "/c=GB";
- query = "objectClass/?one";
- };
-
- "http://user@example.com",
- { dummy with
- scheme = "http";
- user = "user";
- host = "example.com";
- };
-
- "http://品研发和研发管@☃.com:65000;%20",
- { dummy with
- scheme = "http";
- user = "品研发和研发管";
- host = "☃.com";
- port = 65000;
- params = "%20";
- };
-
- "https://example.com:80",
- { dummy with
- scheme = "https";
- host = "example.com";
- port = 80;
- };
-
- "file:///etc/passwd",
- { dummy with
- scheme = "file";
- path = "/etc/passwd";
- };
-
- "file:///c:/WINDOWS/clock.avi",
- { dummy with
- scheme = "file";
- path = "/c:/WINDOWS/clock.avi"; (* <-- is this kosher? *)
- };
-
- "file://hostname/path/to/the%20file.txt",
- { dummy with
- scheme = "file";
- host = "hostname";
- path = "/path/to/the file.txt";
- };
-
- "sip:example.com",
- { dummy with
- scheme = "sip";
- host = "example.com";
- };
-
- "sip:example.com:5060",
- { dummy with
- scheme = "sip";
- host = "example.com";
- port = 5060;
- };
-
- "mailto:ditto@pokémon.com",
- { dummy with
- scheme = "mailto";
- user = "ditto";
- host = "pokémon.com";
- };
-
- "sip:[dead:beef::666]:5060",
- { dummy with
- scheme = "sip";
- host = "dead:beef::666";
- port = 5060;
- };
-
- "tel:+12126660420",
- { dummy with
- scheme = "tel";
- host = "+12126660420";
- };
-
- "sip:bob%20barker:priceisright@[dead:beef::666]:5060;isup-oli=00/palfun.html?haha#omg",
- {
- scheme = "sip";
- user = "bob barker";
- pass = "priceisright";
- host = "dead:beef::666";
- port = 5060;
- params = "isup-oli=00";
- path = "/palfun.html";
- query = "haha";
- fragment = "omg";
- };
-
- "http://www.google.com/search?%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai=",
- { dummy with
- scheme = "http";
- host = "www.google.com";
- path = "/search";
- query = "%68l=en&safe=off&q=omfg&aq=f&aqi=g2g-s1g1g-s1g5&aql=&oq=&gs_rfai=";
- };
-]
-
-(*
-func (test *urlTest) compare(url *URL) (passed bool) {
- if url.Scheme != test.url.Scheme {
- fmt.Fprintf(os.Stderr, "FAIL url(%#v) scheme: %#v != %#v\n",
- string(test.s), url.Scheme, test.url.Scheme)
- passed = true
- }
- if url.User != test.url.User {
- fmt.Fprintf(os.Stderr, "FAIL url(%#v) user: %#v != %#v\n",
- string(test.s), url.User, test.url.User)
- passed = true
- }
- if url.Pass != test.url.Pass {
- fmt.Fprintf(os.Stderr, "FAIL url(%#v) pass: %#v != %#v\n",
- string(test.s), url.Pass, test.url.Pass)
- passed = true
- }
- if url.Host != test.url.Host {
- fmt.Fprintf(os.Stderr, "FAIL url(%#v) host: %#v != %#v\n",
- string(test.s), url.Host, test.url.Host)
- passed = true
- }
- if url.Port != test.url.Port {
- fmt.Fprintf(os.Stderr, "FAIL url(%#v) port: %#v != %#v\n",
- string(test.s), url.Port, test.url.Port)
- passed = true
- }
- if url.Port != test.url.Port {
- fmt.Fprintf(os.Stderr, "FAIL url(%#v) port: %#v != %#v\n",
- string(test.s), url.Port, test.url.Port)
- passed = true
- }
- if url.Params != test.url.Params {
- fmt.Fprintf(os.Stderr, "FAIL url(%#v) params: %#v != %#v\n",
- string(test.s), url.Params, test.url.Params)
- passed = true
- }
- if url.Path != test.url.Path {
- fmt.Fprintf(os.Stderr, "FAIL url(%#v) path: %#v != %#v\n",
- string(test.s), url.Path, test.url.Path)
- passed = true
- }
- if url.Query != test.url.Query {
- fmt.Fprintf(os.Stderr, "FAIL url(%#v) query: %#v != %#v\n",
- string(test.s), url.Query, test.url.Query)
- passed = true
- }
- if url.Fragment != test.url.Fragment {
- fmt.Fprintf(os.Stderr, "FAIL url(%#v) fragment: %#v != %#v\n",
- string(test.s), url.Fragment, test.url.Fragment)
- passed = true
- }
- return !passed
-}
-*)
-
-let bench () =
- let rounds = 0 in
- let urls = [
- "a:a";
- "http://google.com/";
- "sip:jtunney@lobstertech.com";
- "http://user:pass@example.com:80;hello/lol.php?fun#omg";
- "file:///etc/passwd";
- ] in
- List.iter (fun url ->
- for i = 1 to rounds do
- ignore (url_parse url)
- done;
- Printf.printf "BENCH parse %S \n%!" url
- ) urls
-
-let test () =
- List.iter (fun (s,res) ->
- let url = url_parse s in
- if url <> res then
- fail "got %S for %S" (show url) (*show res*) s
- ) tests
-
-let () =
- test ();
- bench ();
- exit 0
-
-##### OUTPUT #####
-BENCH parse "a:a"
-BENCH parse "http://google.com/"
-BENCH parse "sip:jtunney@lobstertech.com"
-BENCH parse "http://user:pass@example.com:80;hello/lol.php?fun#omg"
-BENCH parse "file:///etc/passwd"