diff options
-rw-r--r-- | tests/examplefiles/string.jl | 1031 |
1 files changed, 1031 insertions, 0 deletions
diff --git a/tests/examplefiles/string.jl b/tests/examplefiles/string.jl new file mode 100644 index 00000000..67bf6c70 --- /dev/null +++ b/tests/examplefiles/string.jl @@ -0,0 +1,1031 @@ +## core string functions ## + +length(s::String) = error("you must implement length(",typeof(s),")") +next(s::String, i::Int) = error("you must implement next(",typeof(s),",Int)") +next(s::DirectIndexString, i::Int) = (s[i],i+1) +next(s::String, i::Integer) = next(s,int(i)) + +## generic supplied functions ## + +start(s::String) = 1 +done(s::String,i) = (i > length(s)) +isempty(s::String) = done(s,start(s)) +ref(s::String, i::Int) = next(s,i)[1] +ref(s::String, i::Integer) = s[int(i)] +ref(s::String, x::Real) = s[iround(x)] +ref{T<:Integer}(s::String, r::Range1{T}) = s[int(first(r)):int(last(r))] + +symbol(s::String) = symbol(cstring(s)) +string(s::String) = s + +print(s::String) = for c=s; print(c); end +print(x...) = for i=x; print(i); end +println(args...) = print(args..., '\n') + +show(s::String) = print_quoted(s) + +(*)(s::String...) = strcat(s...) +(^)(s::String, r::Integer) = repeat(s,r) + +size(s::String) = (length(s),) +size(s::String, d::Integer) = d==1 ? length(s) : + error("in size: dimension ",d," out of range") + +strlen(s::DirectIndexString) = length(s) +function strlen(s::String) + i = start(s) + if done(s,i) + return 0 + end + n = 1 + while true + c, j = next(s,i) + if done(s,j) + return n + end + n += 1 + i = j + end +end + +isvalid(s::DirectIndexString, i::Integer) = (start(s) <= i <= length(s)) +function isvalid(s::String, i::Integer) + try + next(s,i) + true + catch + false + end +end + +prevind(s::DirectIndexString, i::Integer) = i-1 +thisind(s::DirectIndexString, i::Integer) = i +nextind(s::DirectIndexString, i::Integer) = i+1 + +prevind(s::String, i::Integer) = thisind(s,thisind(s,i)-1) + +function thisind(s::String, i::Integer) + for j = i:-1:1 + if isvalid(s,j) + return j + end + end + return 0 # out of range +end + +function nextind(s::String, i::Integer) + for j = i+1:length(s) + if isvalid(s,j) + return j + end + end + length(s)+1 # out of range +end + +ind2chr(s::DirectIndexString, i::Integer) = i +chr2ind(s::DirectIndexString, i::Integer) = i + +function ind2chr(s::String, i::Integer) + s[i] # throws error if invalid + j = 1 + k = start(s) + while true + c, l = next(s,k) + if i <= k + return j + end + j += 1 + k = l + end +end + +function chr2ind(s::String, i::Integer) + if i < 1 + return i + end + j = 1 + k = start(s) + while true + c, l = next(s,k) + if i == j + return k + end + j += 1 + k = l + end +end + +function strchr(s::String, c::Char, i::Integer) + i = nextind(s,i) + while !done(s,i) + d, j = next(s,i) + if c == d + return i + end + i = j + end + return 0 +end +strchr(s::String, c::Char) = strchr(s, c, start(s)) +contains(s::String, c::Char) = (strchr(s,c)!=0) + +function chars(s::String) + cx = Array(Char,strlen(s)) + i = 0 + for c in s + cx[i += 1] = c + end + return cx +end + +function cmp(a::String, b::String) + i = start(a) + j = start(b) + while !done(a,i) && !done(b,i) + c, i = next(a,i) + d, j = next(b,j) + if c != d + return c < d ? -1 : +1 + end + end + done(a,i) && !done(b,j) ? -1 : + !done(a,i) && done(b,j) ? +1 : 0 +end + +isequal(a::String, b::String) = cmp(a,b) == 0 +isless(a::String, b::String) = cmp(a,b) < 0 + +# faster comparisons for byte strings + +cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data) +isequal(a::ByteString, b::ByteString) = length(a)==length(b) && cmp(a,b)==0 + +## character column width function ## + +charwidth(c::Char) = max(0,int(ccall(:wcwidth, Int32, (Char,), c))) +strwidth(s::String) = (w=0; for c in s; w += charwidth(c); end; w) +strwidth(s::ByteString) = ccall(:u8_strwidth, Int, (Ptr{Uint8},), s.data) +# TODO: implement and use u8_strnwidth that takes a length argument + +## generic string uses only length and next ## + +type GenericString <: String + string::String +end + +length(s::GenericString) = length(s.string) +next(s::GenericString, i::Int) = next(s.string, i) + +## plain old character arrays ## + +type CharString <: String + chars::Array{Char,1} + + CharString(a::Array{Char,1}) = new(a) + CharString(c::Char...) = new([ c[i] | i=1:length(c) ]) +end +CharString(x...) = CharString(map(char,x)...) + +next(s::CharString, i::Int) = (s.chars[i], i+1) +length(s::CharString) = length(s.chars) +strlen(s::CharString) = length(s) + +string(c::Char) = CharString(c) +string(c::Char, x::Char...) = CharString(c, x...) + +## substrings reference original strings ## + +type SubString <: String + string::String + offset::Int + length::Int + + SubString(s::String, i::Int, j::Int) = new(s, i-1, j-i+1) + SubString(s::SubString, i::Int, j::Int) = + new(s.string, i-1+s.offset, j-i+1) +end +SubString(s::String, i::Integer, j::Integer) = SubString(s, int(i), int(j)) + +function next(s::SubString, i::Int) + if i < 1 || i > s.length + error("string index out of bounds") + end + c, i = next(s.string, i+s.offset) + c, i-s.offset +end + +length(s::SubString) = s.length +# TODO: strlen(s::SubString) = ?? +# default implementation will work but it's slow +# can this be delegated efficiently somehow? +# that may require additional string interfaces + +function ref(s::String, r::Range1{Int}) + if first(r) < 1 || length(s) < last(r) + error("in substring slice: index out of range") + end + SubString(s, first(r), last(r)) +end + +## efficient representation of repeated strings ## + +type RepString <: String + string::String + repeat::Integer +end + +length(s::RepString) = length(s.string)*s.repeat +strlen(s::RepString) = strlen(s.string)*s.repeat + +function next(s::RepString, i::Int) + if i < 1 || i > length(s) + error("string index out of bounds") + end + j = mod1(i,length(s.string)) + c, k = next(s.string, j) + c, k-j+i +end + +function repeat(s::String, r::Integer) + r < 0 ? error("can't repeat a string ",r," times") : + r == 0 ? "" : + r == 1 ? s : + RepString(s,r) +end + +## reversed strings without data movement ## + +type RevString <: String + string::String +end + +length(s::RevString) = length(s.string) +strlen(s::RevString) = strlen(s.string) + +start(s::RevString) = (n=length(s); n-thisind(s.string,n)+1) +function next(s::RevString, i::Int) + n = length(s); j = n-i+1 + (s.string[j], n-thisind(s.string,j-1)+1) +end + +reverse(s::String) = RevString(s) +reverse(s::RevString) = s.string + +## ropes for efficient concatenation, etc. ## + +# Idea: instead of this standard binary tree structure, +# how about we keep an array of substrings, with an +# offset array. We can do binary search on the offset +# array so we get O(log(n)) indexing time still, but we +# can compute the offsets lazily and avoid all the +# futzing around while the string is being constructed. + +type RopeString <: String + head::String + tail::String + depth::Int32 + length::Int + + RopeString(h::RopeString, t::RopeString) = + depth(h.tail) + depth(t) < depth(h.head) ? + RopeString(h.head, RopeString(h.tail, t)) : + new(h, t, max(h.depth,t.depth)+1, length(h)+length(t)) + + RopeString(h::RopeString, t::String) = + depth(h.tail) < depth(h.head) ? + RopeString(h.head, RopeString(h.tail, t)) : + new(h, t, h.depth+1, length(h)+length(t)) + + RopeString(h::String, t::RopeString) = + depth(t.head) < depth(t.tail) ? + RopeString(RopeString(h, t.head), t.tail) : + new(h, t, t.depth+1, length(h)+length(t)) + + RopeString(h::String, t::String) = + new(h, t, 1, length(h)+length(t)) +end + +depth(s::String) = 0 +depth(s::RopeString) = s.depth + +function next(s::RopeString, i::Int) + if i <= length(s.head) + return next(s.head, i) + else + c, j = next(s.tail, i-length(s.head)) + return c, j+length(s.head) + end +end + +length(s::RopeString) = s.length +strlen(s::RopeString) = strlen(s.head) + strlen(s.tail) + +strcat() = "" +strcat(s::String) = s +strcat(x...) = strcat(map(string,x)...) +strcat(s::String, t::String...) = + (t = strcat(t...); isempty(s) ? t : isempty(t) ? s : RopeString(s, t)) + +print(s::RopeString) = print(s.head, s.tail) + +## transformed strings ## + +type TransformedString <: String + transform::Function + string::String +end + +length(s::TransformedString) = length(s.string) +strlen(s::TransformedString) = strlen(s.string) + +function next(s::TransformedString, i::Int) + c, j = next(s.string,i) + c = s.transform(c, i) + return c, j +end + +## uppercase and lowercase transformations ## + +uppercase(c::Char) = ccall(:towupper, Char, (Char,), c) +lowercase(c::Char) = ccall(:towlower, Char, (Char,), c) + +uppercase(s::String) = TransformedString((c,i)->uppercase(c), s) +lowercase(s::String) = TransformedString((c,i)->lowercase(c), s) + +ucfirst(s::String) = TransformedString((c,i)->i==1 ? uppercase(c) : c, s) +lcfirst(s::String) = TransformedString((c,i)->i==1 ? lowercase(c) : c, s) + +const uc = uppercase +const lc = lowercase + +## string map ## + +function map(f::Function, s::String) + out = memio(length(s)) + for c in s + write(out, f(c)::Char) + end + takebuf_string(out) +end + +## conversion of general objects to strings ## + +string(x) = print_to_string(show, x) +cstring(x...) = print_to_string(print, x...) + +function cstring(p::Ptr{Uint8}) + p == C_NULL ? error("cannot convert NULL to string") : + ccall(:jl_cstr_to_string, Any, (Ptr{Uint8},), p)::ByteString +end + +## string promotion rules ## + +promote_rule(::Type{UTF8String} , ::Type{ASCIIString}) = UTF8String +promote_rule(::Type{UTF8String} , ::Type{CharString} ) = UTF8String +promote_rule(::Type{ASCIIString}, ::Type{CharString} ) = UTF8String + +## printing literal quoted string data ## + +# TODO: this is really the inverse of print_unbackslashed + +function print_quoted_literal(s::String) + print('"') + for c = s; c == '"' ? print("\\\"") : print(c); end + print('"') +end + +## string escaping & unescaping ## + +escape_nul(s::String, i::Int) = + !done(s,i) && '0' <= next(s,i)[1] <= '7' ? L"\x00" : L"\0" + +is_hex_digit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F' +need_full_hex(s::String, i::Int) = !done(s,i) && is_hex_digit(next(s,i)[1]) + +function print_escaped(s::String, esc::String) + i = start(s) + while !done(s,i) + c, j = next(s,i) + c == '\0' ? print(escape_nul(s,j)) : + c == '\e' ? print(L"\e") : + c == '\\' ? print("\\\\") : + contains(esc,c) ? print('\\', c) : + iswprint(c) ? print(c) : + 7 <= c <= 13 ? print('\\', "abtnvfr"[c-6]) : + c <= '\x7f' ? print(L"\x", hex(c, 2)) : + c <= '\uffff' ? print(L"\u", hex(c, need_full_hex(s,j) ? 4 : 2)) : + print(L"\U", hex(c, need_full_hex(s,j) ? 8 : 4)) + i = j + end +end + +escape_string(s::String) = print_to_string(length(s), print_escaped, s, "\"") +print_quoted(s::String) = (print('"'); print_escaped(s, "\"\$"); print('"')) +#" # work around syntax highlighting problem +quote_string(s::String) = print_to_string(length(s)+2, print_quoted, s) + +# bare minimum unescaping function unescapes only given characters + +function print_unescaped_chars(s::String, esc::String) + if !contains(esc,'\\') + esc = strcat("\\", esc) + end + i = start(s) + while !done(s,i) + c, i = next(s,i) + if c == '\\' && !done(s,i) && contains(esc,s[i]) + c, i = next(s,i) + end + print(c) + end +end + +unescape_chars(s::String, esc::String) = + print_to_string(length(s), print_unescaped_chars, s, esc) + +# general unescaping of traditional C and Unicode escape sequences + +function print_unescaped(s::String) + i = start(s) + while !done(s,i) + c, i = next(s,i) + if !done(s,i) && c == '\\' + c, i = next(s,i) + if c == 'x' || c == 'u' || c == 'U' + n = k = 0 + m = c == 'x' ? 2 : + c == 'u' ? 4 : 8 + while (k+=1) <= m && !done(s,i) + c, j = next(s,i) + n = '0' <= c <= '9' ? n<<4 + c-'0' : + 'a' <= c <= 'f' ? n<<4 + c-'a'+10 : + 'A' <= c <= 'F' ? n<<4 + c-'A'+10 : break + i = j + end + if k == 1 + error("\\x used with no following hex digits") + end + if m == 2 # \x escape sequence + write(uint8(n)) + else + print(char(n)) + end + elseif '0' <= c <= '7' + k = 1 + n = c-'0' + while (k+=1) <= 3 && !done(s,i) + c, j = next(s,i) + n = '0' <= c <= '7' ? n<<3 + c-'0' : break + i = j + end + if n > 255 + error("octal escape sequence out of range") + end + write(uint8(n)) + else + print(c == 'a' ? '\a' : + c == 'b' ? '\b' : + c == 't' ? '\t' : + c == 'n' ? '\n' : + c == 'v' ? '\v' : + c == 'f' ? '\f' : + c == 'r' ? '\r' : + c == 'e' ? '\e' : c) + end + else + print(c) + end + end +end + +unescape_string(s::String) = print_to_string(length(s), print_unescaped, s) + +## checking UTF-8 & ACSII validity ## + +byte_string_classify(s::ByteString) = + ccall(:u8_isvalid, Int32, (Ptr{Uint8}, Int), s.data, length(s)) + # 0: neither valid ASCII nor UTF-8 + # 1: valid ASCII + # 2: valid UTF-8 + +is_valid_ascii(s::ByteString) = byte_string_classify(s) == 1 +is_valid_utf8 (s::ByteString) = byte_string_classify(s) != 0 + +check_ascii(s::ByteString) = is_valid_ascii(s) ? s : error("invalid ASCII sequence") +check_utf8 (s::ByteString) = is_valid_utf8(s) ? s : error("invalid UTF-8 sequence") + +## string interpolation parsing ## + +function _jl_interp_parse(s::String, unescape::Function, printer::Function) + sx = {} + i = j = start(s) + while !done(s,j) + c, k = next(s,j) + if c == '$' + if !isempty(s[i:j-1]) + push(sx, unescape(s[i:j-1])) + end + ex, j = parseatom(s,k) + push(sx, ex) + i = j + elseif c == '\\' && !done(s,k) + if s[k] == '$' + if !isempty(s[i:j-1]) + push(sx, unescape(s[i:j-1])) + end + i = k + end + c, j = next(s,k) + else + j = k + end + end + if !isempty(s[i:]) + push(sx, unescape(s[i:j-1])) + end + length(sx) == 1 && isa(sx[1],ByteString) ? sx[1] : + expr(:call, :print_to_string, printer, sx...) +end + +_jl_interp_parse(s::String, u::Function) = _jl_interp_parse(s, u, print) +_jl_interp_parse(s::String) = _jl_interp_parse(s, x->check_utf8(unescape_string(x))) + +function _jl_interp_parse_bytes(s::String) + writer(x...) = for w=x; write(w); end + _jl_interp_parse(s, unescape_string, writer) +end + +## core string macros ## + +macro str(s); _jl_interp_parse(s); end +macro S_str(s); _jl_interp_parse(s); end +macro I_str(s); _jl_interp_parse(s, x->unescape_chars(x,"\"")); end +macro E_str(s); check_utf8(unescape_string(s)); end +macro B_str(s); _jl_interp_parse_bytes(s); end +macro b_str(s); ex = _jl_interp_parse_bytes(s); :(($ex).data); end + +## shell-like command parsing ## + +function _jl_shell_parse(s::String, interp::Bool) + + in_single_quotes = false + in_double_quotes = false + + args = {} + arg = {} + i = start(s) + j = i + + function update_arg(x) + if !isa(x,String) || !isempty(x) + push(arg, x) + end + end + function append_arg() + if isempty(arg); arg = {"",}; end + push(args, arg) + arg = {} + end + + while !done(s,j) + c, k = next(s,j) + if !in_single_quotes && !in_double_quotes && iswspace(c) + update_arg(s[i:j-1]) + append_arg() + j = k + while !done(s,j) + c, k = next(s,j) + if !iswspace(c) + i = j + break + end + j = k + end + elseif interp && !in_single_quotes && c == '$' + update_arg(s[i:j-1]); i = k; j = k + if done(s,k) + error("\$ right before end of command") + end + if iswspace(s[k]) + error("space not allowed right after \$") + end + ex, j = parseatom(s,j) + update_arg(ex); i = j + else + if !in_double_quotes && c == '\'' + in_single_quotes = !in_single_quotes + update_arg(s[i:j-1]); i = k + elseif !in_single_quotes && c == '"' + in_double_quotes = !in_double_quotes + update_arg(s[i:j-1]); i = k + elseif c == '\\' + if in_double_quotes + if done(s,k) + error("unterminated double quote") + end + if s[k] == '"' || s[k] == '$' + update_arg(s[i:j-1]); i = k + c, k = next(s,k) + end + elseif !in_single_quotes + if done(s,k) + error("dangling backslash") + end + update_arg(s[i:j-1]); i = k + c, k = next(s,k) + end + end + j = k + end + end + + if in_single_quotes; error("unterminated single quote"); end + if in_double_quotes; error("unterminated double quote"); end + + update_arg(s[i:]) + append_arg() + + if !interp + return args + end + + # construct an expression + exprs = {} + for arg in args + push(exprs, expr(:tuple, arg)) + end + expr(:tuple,exprs) +end +_jl_shell_parse(s::String) = _jl_shell_parse(s,true) + +function shell_split(s::String) + parsed = _jl_shell_parse(s,false) + args = String[] + for arg in parsed + push(args, strcat(arg...)) + end + args +end + +function print_shell_word(word::String) + if isempty(word) + print("''") + end + has_single = false + has_special = false + for c in word + if iswspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$' + has_special = true + if c == '\'' + has_single = true + end + end + end + if !has_special + print(word) + elseif !has_single + print('\'', word, '\'') + else + print('"') + for c in word + if c == '"' || c == '$' + print('\\') + end + print(c) + end + print('"') + end +end + +function print_shell_escaped(cmd::String, args::String...) + print_shell_word(cmd) + for arg in args + print(' ') + print_shell_word(arg) + end +end + +shell_escape(cmd::String, args::String...) = + print_to_string(print_shell_escaped, cmd, args...) + +## interface to parser ## + +function parse(s::String, pos, greedy) + # returns (expr, end_pos). expr is () in case of parse error. + ex, pos = ccall(:jl_parse_string, Any, + (Ptr{Uint8}, Int32, Int32), + cstring(s), pos-1, greedy ? 1:0) + if isa(ex,Expr) && is(ex.head,:error) + throw(ParseError(ex.args[1])) + end + if ex == (); throw(ParseError("end of input")); end + ex, pos+1 # C is zero-based, Julia is 1-based +end + +parse(s::String) = parse(s, 1, true) +parse(s::String, pos) = parse(s, pos, true) +parseatom(s::String) = parse(s, 1, false) +parseatom(s::String, pos) = parse(s, pos, false) + +## miscellaneous string functions ## + +function lpad(s::String, n::Integer, p::String) + m = n - strlen(s) + if m <= 0; return s; end + l = strlen(p) + if l==1 + return p^m * s + end + q = div(m,l) + r = m - q*l + cstring(p^q*p[1:chr2ind(p,r)]*s) +end + +function rpad(s::String, n::Integer, p::String) + m = n - strlen(s) + if m <= 0; return s; end + l = strlen(p) + if l==1 + return s * p^m + end + q = div(m,l) + r = m - q*l + cstring(s*p^q*p[1:chr2ind(p,r)]) +end + +lpad(s, n::Integer, p) = lpad(string(s), n, string(p)) +rpad(s, n::Integer, p) = rpad(string(s), n, string(p)) + +lpad(s, n::Integer) = lpad(string(s), n, " ") +rpad(s, n::Integer) = rpad(string(s), n, " ") + +function split(s::String, delims, include_empty::Bool) + i = 1 + strs = String[] + len = length(s) + while true + tokstart = tokend = i + while !done(s,i) + (c,i) = next(s,i) + if contains(delims, c) + break + end + tokend = i + end + tok = s[tokstart:(tokend-1)] + if include_empty || !isempty(tok) + push(strs, tok) + end + if !((i <= len) || (i==len+1 && tokend!=i)) + break + end + end + strs +end + +split(s::String) = split(s, (' ','\t','\n','\v','\f','\r'), false) +split(s::String, x) = split(s, x, true) +split(s::String, x::Char, incl::Bool) = split(s, (x,), incl) + +function print_joined(strings, delim, last) + i = start(strings) + if done(strings,i) + return + end + str, i = next(strings,i) + print(str) + while !done(strings,i) + str, i = next(strings,i) + print(done(strings,i) ? last : delim) + print(str) + end +end + +function print_joined(strings, delim) + i = start(strings) + while !done(strings,i) + str, i = next(strings,i) + print(str) + if !done(strings,i) + print(delim) + end + end +end +print_joined(strings) = print_joined(strings, "") + +join(args...) = print_to_string(print_joined, args...) + +chop(s::String) = s[1:thisind(s,length(s))-1] +chomp(s::String) = (i=thisind(s,length(s)); s[i]=='\n' ? s[1:i-1] : s) +chomp(s::ByteString) = s.data[end]==0x0a ? s[1:end-1] : s + +function lstrip(s::String) + i = start(s) + while !done(s,i) + c, j = next(s,i) + if !iswspace(c) + return s[i:end] + end + i = j + end + "" +end + +function rstrip(s::String) + r = reverse(s) + i = start(r) + while !done(r,i) + c, j = next(r,i) + if !iswspace(c) + return s[1:end-i+1] + end + i = j + end + "" +end + +strip(s::String) = lstrip(rstrip(s)) + +## string to integer functions ## + +function parse_int{T<:Integer}(::Type{T}, s::String, base::Integer) + if !(2 <= base <= 36); error("invalid base: ",base); end + i = start(s) + if done(s,i) + error("premature end of integer (in ",show_to_string(s),")") + end + c,i = next(s,i) + sgn = one(T) + if T <: Signed && c == '-' + sgn = -sgn + if done(s,i) + error("premature end of integer (in ",show_to_string(s),")") + end + c,i = next(s,i) + end + base = convert(T,base) + n::T = 0 + while true + d = '0' <= c <= '9' ? c-'0' : + 'A' <= c <= 'Z' ? c-'A'+10 : + 'a' <= c <= 'z' ? c-'a'+10 : typemax(Int) + if d >= base + error(show_to_string(c)," is not a valid digit (in ",show_to_string(s),")") + end + # TODO: overflow detection? + n = n*base + d + if done(s,i) + break + end + c,i = next(s,i) + end + return flipsign(n,sgn) +end + +parse_int(s::String, base::Integer) = parse_int(Int,s,base) +parse_int(T::Type, s::String) = parse_int(T,s,10) +parse_int(s::String) = parse_int(Int,s,10) + +parse_bin(T::Type, s::String) = parse_int(T,s,2) +parse_oct(T::Type, s::String) = parse_int(T,s,8) +parse_hex(T::Type, s::String) = parse_int(T,s,16) + +parse_bin(s::String) = parse_int(Int,s,2) +parse_oct(s::String) = parse_int(Int,s,8) +parse_hex(s::String) = parse_int(Int,s,16) + +integer (s::String) = int(s) +unsigned(s::String) = uint(s) +int (s::String) = parse_int(Int,s) +uint (s::String) = parse_int(Uint,s) +int8 (s::String) = parse_int(Int8,s) +uint8 (s::String) = parse_int(Uint8,s) +int16 (s::String) = parse_int(Int16,s) +uint16 (s::String) = parse_int(Uint16,s) +int32 (s::String) = parse_int(Int32,s) +uint32 (s::String) = parse_int(Uint32,s) +int64 (s::String) = parse_int(Int64,s) +uint64 (s::String) = parse_int(Uint64,s) + +## integer to string functions ## + +const _jl_dig_syms = "0123456789abcdefghijklmnopqrstuvwxyz".data + +function int2str(n::Union(Int64,Uint64), b::Integer, l::Int) + if b < 2 || b > 36; error("int2str: invalid base ", b); end + neg = n < 0 + n = unsigned(abs(n)) + b = convert(typeof(n), b) + ndig = ndigits(n, b) + sz = max(convert(Int, ndig), l) + neg + data = Array(Uint8, sz) + i = sz + if ispow2(b) + digmask = b-1 + shift = trailing_zeros(b) + while i > neg + ch = n & digmask + data[i] = _jl_dig_syms[int(ch)+1] + n >>= shift + i -= 1 + end + else + while i > neg + ch = n % b + data[i] = _jl_dig_syms[int(ch)+1] + n = div(n,b) + i -= 1 + end + end + if neg + data[1] = '-' + end + ASCIIString(data) +end +int2str(n::Integer, b::Integer) = int2str(n, b, 0) +int2str(n::Integer, b::Integer, l::Int) = int2str(int64(n), b, l) + +string(x::Signed) = dec(int64(x)) +cstring(x::Signed) = dec(int64(x)) + +## string to float functions ## + +function float64_isvalid(s::String, out::Array{Float64,1}) + s = cstring(s) + return (ccall(:jl_strtod, Int32, (Ptr{Uint8},Ptr{Float64}), s, out)==0) +end + +function float32_isvalid(s::String, out::Array{Float32,1}) + s = cstring(s) + return (ccall(:jl_strtof, Int32, (Ptr{Uint8},Ptr{Float32}), s, out)==0) +end + +begin + local tmp::Array{Float64,1} = Array(Float64,1) + local tmpf::Array{Float32,1} = Array(Float32,1) + global float64, float32 + function float64(s::String) + if !float64_isvalid(s, tmp) + throw(ArgumentError("float64(String): invalid number format")) + end + return tmp[1] + end + + function float32(s::String) + if !float32_isvalid(s, tmpf) + throw(ArgumentError("float32(String): invalid number format")) + end + return tmpf[1] + end +end + +float(x::String) = float64(x) +parse_float(x::String) = float64(x) +parse_float(::Type{Float64}, x::String) = float64(x) +parse_float(::Type{Float32}, x::String) = float32(x) + +# copying a byte string (generally not needed due to "immutability") + +strcpy{T<:ByteString}(s::T) = T(copy(s.data)) + +# lexicographically compare byte arrays (used by Latin-1 and UTF-8) + +function lexcmp(a::Array{Uint8,1}, b::Array{Uint8,1}) + c = ccall(:memcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}, Uint), + a, b, min(length(a),length(b))) + c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b)) +end + +# find the index of the first occurrence of a byte value in a byte array + +function memchr(a::Array{Uint8,1}, b::Integer) + p = pointer(a) + q = ccall(:memchr, Ptr{Uint8}, (Ptr{Uint8}, Int32, Uint), p, b, length(a)) + q == C_NULL ? 0 : q - p + 1 +end + +# concatenate byte arrays into a single array + +memcat() = Array(Uint8,0) +memcat(a::Array{Uint8,1}) = copy(a) + +function memcat(arrays::Array{Uint8,1}...) + n = 0 + for a in arrays + n += length(a) + end + arr = Array(Uint8, n) + ptr = pointer(arr) + offset = 0 + for a in arrays + ccall(:memcpy, Ptr{Uint8}, (Ptr{Uint8}, Ptr{Uint8}, Uint), + ptr+offset, a, length(a)) + offset += length(a) + end + return arr +end + +# concatenate the data fields of byte strings + +memcat(s::ByteString) = memcat(s.data) +memcat(sx::ByteString...) = memcat(map(s->s.data, sx)...) |