summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pygments/lexers/_mapping.py3
-rw-r--r--pygments/lexers/math.py161
-rw-r--r--tests/examplefiles/string.jl1031
3 files changed, 1192 insertions, 3 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index 7c745a89..8bcc1744 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -132,6 +132,8 @@ LEXERS = {
'JavascriptSmartyLexer': ('pygments.lexers.templates', 'JavaScript+Smarty', ('js+smarty', 'javascript+smarty'), (), ('application/x-javascript+smarty', 'text/x-javascript+smarty', 'text/javascript+smarty')),
'JspLexer': ('pygments.lexers.templates', 'Java Server Page', ('jsp',), ('*.jsp',), ('application/x-jsp',)),
'KotlinLexer': ('pygments.lexers.jvm', 'Kotlin', ('kotlin',), ('*.kt',), ('text/x-kotlin',)),
+ 'JuliaLexer': ('pygments.lexers.math', 'Julia', ('julia','jl'), ('*.jl',), ('text/x-julia','application/x-julia')),
+ 'JuliaConsoleLexer': ('pygments.lexers.math', 'Julia console', ('jlcon',), (), ()),
'LighttpdConfLexer': ('pygments.lexers.text', 'Lighttpd configuration file', ('lighty', 'lighttpd'), (), ('text/x-lighttpd-conf',)),
'LiterateHaskellLexer': ('pygments.lexers.functional', 'Literate Haskell', ('lhs', 'literate-haskell'), ('*.lhs',), ('text/x-literate-haskell',)),
'LlvmLexer': ('pygments.lexers.asm', 'LLVM', ('llvm',), ('*.ll',), ('text/x-llvm',)),
@@ -290,3 +292,4 @@ if __name__ == '__main__':
f.write('LEXERS = {\n %s,\n}\n\n' % ',\n '.join(found_lexers))
f.write(footer)
f.close()
+
diff --git a/pygments/lexers/math.py b/pygments/lexers/math.py
index f500231f..7ae7d6b2 100644
--- a/pygments/lexers/math.py
+++ b/pygments/lexers/math.py
@@ -11,15 +11,170 @@
import re
-from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
+from pygments.lexer import Lexer, RegexLexer, bygroups, include, \
+ combined, do_insertions
from pygments.token import Comment, String, Punctuation, Keyword, Name, \
Operator, Number, Text, Generic
from pygments.lexers.agile import PythonLexer
from pygments.lexers import _scilab_builtins
-__all__ = ['MuPADLexer', 'MatlabLexer', 'MatlabSessionLexer', 'OctaveLexer',
- 'ScilabLexer', 'NumPyLexer', 'RConsoleLexer', 'SLexer']
+__all__ = ['JuliaLexer', 'JuliaConsoleLexer', 'MuPADLexer', 'MatlabLexer',
+ 'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer', 'NumPyLexer',
+ 'RConsoleLexer', 'SLexer']
+
+
+class JuliaLexer(RegexLexer):
+ name = 'Julia'
+ aliases = ['julia','jl']
+ filenames = ['*.jl']
+ mimetypes = ['text/x-julia','application/x-julia']
+
+ builtins = [
+ 'exit','whos','edit','load','is','isa','isequal','typeof','tuple',
+ 'ntuple','uid','hash','finalizer','convert','promote','subtype',
+ 'typemin','typemax','realmin','realmax','sizeof','eps','promote_type',
+ 'method_exists','applicable','invoke','dlopen','dlsym','system',
+ 'error','throw','assert','new','Inf','Nan','pi','im',
+ ]
+
+ tokens = {
+ 'root': [
+ (r'\n', Text),
+ (r'[^\S\n]+', Text),
+ (r'#.*$', Comment),
+ (r'[]{}:(),;[@]', Punctuation),
+ (r'\\\n', Text),
+ (r'\\', Text),
+
+ # keywords
+ (r'(begin|while|for|in|return|break|continue|'
+ r'macro|quote|let|if|elseif|else|try|catch|end|'
+ r'bitstype|ccall)\b', Keyword),
+ (r'(local|global|const)\b', Keyword.Declaration),
+ (r'(module|import|export)\b', Keyword.Reserved),
+ (r'(Bool|Int|Int8|Int16|Int32|Int64|Uint|Uint8|Uint16|Uint32|Uint64'
+ r'|Float32|Float64|Complex64|Complex128|Any|Nothing|None)\b',
+ Keyword.Type),
+
+ # functions
+ (r'(function)((?:\s|\\\s)+)',
+ bygroups(Keyword,Name.Function), 'funcname'),
+
+ # types
+ (r'(type|typealias|abstract)((?:\s|\\\s)+)',
+ bygroups(Keyword,Name.Class), 'typename'),
+
+ # operators
+ (r'==|!=|<=|>=|->|&&|\|\||::|<:|[-~+/*%=<>&^|.?!$]', Operator),
+ (r'\.\*|\.\^|\.\\|\.\/|\\', Operator),
+
+ # builtins
+ ('(' + '|'.join(builtins) + r')\b', Name.Builtin),
+
+ # backticks
+ (r'`(?s).*?`', String.Backtick),
+
+ # chars
+ (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|\\U[a-fA-F0-9]{1,6}|[^\\\'\n])'", String.Char),
+
+ # try to match trailing transpose
+ (r'(?<=[.\w\)\]])\'', Operator),
+
+ # strings
+ (r'(?:[IL])"', String, 'string'),
+ (r'[E]?"', String, combined('stringescape', 'string')),
+
+ # names
+ (r'@[a-zA-Z0-9_.]+', Name.Decorator),
+ (r'[a-zA-Z_][a-zA-Z0-9_]*', Name),
+
+ # numbers
+ (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
+ (r'\d+[eE][+-]?[0-9]+', Number.Float),
+ (r'0[0-7]+', Number.Oct),
+ (r'0[xX][a-fA-F0-9]+', Number.Hex),
+ (r'\d+', Number.Integer)
+ ],
+
+ 'funcname': [
+ ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop'),
+ ('\([^\s\w{]{1,2}\)', Operator, '#pop'),
+ ('[^\s\w{]{1,2}', Operator, '#pop'),
+ ],
+
+ 'typename': [
+ ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
+ ],
+
+ 'stringescape': [
+ (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'
+ r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
+ ],
+
+ 'string': [
+ (r'"', String, '#pop'),
+ (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
+ (r'\$(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?',
+ String.Interpol),
+ (r'[^\\"$]+', String),
+ # quotes, dollar signs, and backslashes must be parsed one at a time
+ (r'["\\]', String),
+ # unhandled string formatting sign
+ (r'\$', String)
+ ],
+ }
+
+ def analyse_text(text):
+ return shebang_matches(text, r'julia')
+
+
+line_re = re.compile('.*?\n')
+
+class JuliaConsoleLexer(Lexer):
+ """
+ For Julia console sessions. Modeled after MatlabSessionLexer.
+ """
+ name = 'Julia console'
+ aliases = ['jlcon']
+
+ def get_tokens_unprocessed(self, text):
+ jllexer = JuliaLexer(**self.options)
+
+ curcode = ''
+ insertions = []
+
+ for match in line_re.finditer(text):
+ line = match.group()
+
+ if line.startswith('julia>'):
+ insertions.append((len(curcode),
+ [(0, Generic.Prompt, line[:3])]))
+ curcode += line[3:]
+
+ elif line.startswith(' '):
+
+ idx = len(curcode)
+
+ # without is showing error on same line as before...?
+ line = "\n" + line
+ token = (0, Generic.Traceback, line)
+ insertions.append((idx, [token]))
+
+ else:
+ if curcode:
+ for item in do_insertions(
+ insertions, jllexer.get_tokens_unprocessed(curcode)):
+ yield item
+ curcode = ''
+ insertions = []
+
+ yield match.start(), Generic.Output, line
+
+ if curcode: # or item:
+ for item in do_insertions(
+ insertions, jllexer.get_tokens_unprocessed(curcode)):
+ yield item
class MuPADLexer(RegexLexer):
diff --git a/tests/examplefiles/string.jl b/tests/examplefiles/string.jl
new file mode 100644
index 00000000..67bf6c70
--- /dev/null
+++ b/tests/examplefiles/string.jl
@@ -0,0 +1,1031 @@
+## core string functions ##
+
+length(s::String) = error("you must implement length(",typeof(s),")")
+next(s::String, i::Int) = error("you must implement next(",typeof(s),",Int)")
+next(s::DirectIndexString, i::Int) = (s[i],i+1)
+next(s::String, i::Integer) = next(s,int(i))
+
+## generic supplied functions ##
+
+start(s::String) = 1
+done(s::String,i) = (i > length(s))
+isempty(s::String) = done(s,start(s))
+ref(s::String, i::Int) = next(s,i)[1]
+ref(s::String, i::Integer) = s[int(i)]
+ref(s::String, x::Real) = s[iround(x)]
+ref{T<:Integer}(s::String, r::Range1{T}) = s[int(first(r)):int(last(r))]
+
+symbol(s::String) = symbol(cstring(s))
+string(s::String) = s
+
+print(s::String) = for c=s; print(c); end
+print(x...) = for i=x; print(i); end
+println(args...) = print(args..., '\n')
+
+show(s::String) = print_quoted(s)
+
+(*)(s::String...) = strcat(s...)
+(^)(s::String, r::Integer) = repeat(s,r)
+
+size(s::String) = (length(s),)
+size(s::String, d::Integer) = d==1 ? length(s) :
+ error("in size: dimension ",d," out of range")
+
+strlen(s::DirectIndexString) = length(s)
+function strlen(s::String)
+ i = start(s)
+ if done(s,i)
+ return 0
+ end
+ n = 1
+ while true
+ c, j = next(s,i)
+ if done(s,j)
+ return n
+ end
+ n += 1
+ i = j
+ end
+end
+
+isvalid(s::DirectIndexString, i::Integer) = (start(s) <= i <= length(s))
+function isvalid(s::String, i::Integer)
+ try
+ next(s,i)
+ true
+ catch
+ false
+ end
+end
+
+prevind(s::DirectIndexString, i::Integer) = i-1
+thisind(s::DirectIndexString, i::Integer) = i
+nextind(s::DirectIndexString, i::Integer) = i+1
+
+prevind(s::String, i::Integer) = thisind(s,thisind(s,i)-1)
+
+function thisind(s::String, i::Integer)
+ for j = i:-1:1
+ if isvalid(s,j)
+ return j
+ end
+ end
+ return 0 # out of range
+end
+
+function nextind(s::String, i::Integer)
+ for j = i+1:length(s)
+ if isvalid(s,j)
+ return j
+ end
+ end
+ length(s)+1 # out of range
+end
+
+ind2chr(s::DirectIndexString, i::Integer) = i
+chr2ind(s::DirectIndexString, i::Integer) = i
+
+function ind2chr(s::String, i::Integer)
+ s[i] # throws error if invalid
+ j = 1
+ k = start(s)
+ while true
+ c, l = next(s,k)
+ if i <= k
+ return j
+ end
+ j += 1
+ k = l
+ end
+end
+
+function chr2ind(s::String, i::Integer)
+ if i < 1
+ return i
+ end
+ j = 1
+ k = start(s)
+ while true
+ c, l = next(s,k)
+ if i == j
+ return k
+ end
+ j += 1
+ k = l
+ end
+end
+
+function strchr(s::String, c::Char, i::Integer)
+ i = nextind(s,i)
+ while !done(s,i)
+ d, j = next(s,i)
+ if c == d
+ return i
+ end
+ i = j
+ end
+ return 0
+end
+strchr(s::String, c::Char) = strchr(s, c, start(s))
+contains(s::String, c::Char) = (strchr(s,c)!=0)
+
+function chars(s::String)
+ cx = Array(Char,strlen(s))
+ i = 0
+ for c in s
+ cx[i += 1] = c
+ end
+ return cx
+end
+
+function cmp(a::String, b::String)
+ i = start(a)
+ j = start(b)
+ while !done(a,i) && !done(b,i)
+ c, i = next(a,i)
+ d, j = next(b,j)
+ if c != d
+ return c < d ? -1 : +1
+ end
+ end
+ done(a,i) && !done(b,j) ? -1 :
+ !done(a,i) && done(b,j) ? +1 : 0
+end
+
+isequal(a::String, b::String) = cmp(a,b) == 0
+isless(a::String, b::String) = cmp(a,b) < 0
+
+# faster comparisons for byte strings
+
+cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data)
+isequal(a::ByteString, b::ByteString) = length(a)==length(b) && cmp(a,b)==0
+
+## character column width function ##
+
+charwidth(c::Char) = max(0,int(ccall(:wcwidth, Int32, (Char,), c)))
+strwidth(s::String) = (w=0; for c in s; w += charwidth(c); end; w)
+strwidth(s::ByteString) = ccall(:u8_strwidth, Int, (Ptr{Uint8},), s.data)
+# TODO: implement and use u8_strnwidth that takes a length argument
+
+## generic string uses only length and next ##
+
+type GenericString <: String
+ string::String
+end
+
+length(s::GenericString) = length(s.string)
+next(s::GenericString, i::Int) = next(s.string, i)
+
+## plain old character arrays ##
+
+type CharString <: String
+ chars::Array{Char,1}
+
+ CharString(a::Array{Char,1}) = new(a)
+ CharString(c::Char...) = new([ c[i] | i=1:length(c) ])
+end
+CharString(x...) = CharString(map(char,x)...)
+
+next(s::CharString, i::Int) = (s.chars[i], i+1)
+length(s::CharString) = length(s.chars)
+strlen(s::CharString) = length(s)
+
+string(c::Char) = CharString(c)
+string(c::Char, x::Char...) = CharString(c, x...)
+
+## substrings reference original strings ##
+
+type SubString <: String
+ string::String
+ offset::Int
+ length::Int
+
+ SubString(s::String, i::Int, j::Int) = new(s, i-1, j-i+1)
+ SubString(s::SubString, i::Int, j::Int) =
+ new(s.string, i-1+s.offset, j-i+1)
+end
+SubString(s::String, i::Integer, j::Integer) = SubString(s, int(i), int(j))
+
+function next(s::SubString, i::Int)
+ if i < 1 || i > s.length
+ error("string index out of bounds")
+ end
+ c, i = next(s.string, i+s.offset)
+ c, i-s.offset
+end
+
+length(s::SubString) = s.length
+# TODO: strlen(s::SubString) = ??
+# default implementation will work but it's slow
+# can this be delegated efficiently somehow?
+# that may require additional string interfaces
+
+function ref(s::String, r::Range1{Int})
+ if first(r) < 1 || length(s) < last(r)
+ error("in substring slice: index out of range")
+ end
+ SubString(s, first(r), last(r))
+end
+
+## efficient representation of repeated strings ##
+
+type RepString <: String
+ string::String
+ repeat::Integer
+end
+
+length(s::RepString) = length(s.string)*s.repeat
+strlen(s::RepString) = strlen(s.string)*s.repeat
+
+function next(s::RepString, i::Int)
+ if i < 1 || i > length(s)
+ error("string index out of bounds")
+ end
+ j = mod1(i,length(s.string))
+ c, k = next(s.string, j)
+ c, k-j+i
+end
+
+function repeat(s::String, r::Integer)
+ r < 0 ? error("can't repeat a string ",r," times") :
+ r == 0 ? "" :
+ r == 1 ? s :
+ RepString(s,r)
+end
+
+## reversed strings without data movement ##
+
+type RevString <: String
+ string::String
+end
+
+length(s::RevString) = length(s.string)
+strlen(s::RevString) = strlen(s.string)
+
+start(s::RevString) = (n=length(s); n-thisind(s.string,n)+1)
+function next(s::RevString, i::Int)
+ n = length(s); j = n-i+1
+ (s.string[j], n-thisind(s.string,j-1)+1)
+end
+
+reverse(s::String) = RevString(s)
+reverse(s::RevString) = s.string
+
+## ropes for efficient concatenation, etc. ##
+
+# Idea: instead of this standard binary tree structure,
+# how about we keep an array of substrings, with an
+# offset array. We can do binary search on the offset
+# array so we get O(log(n)) indexing time still, but we
+# can compute the offsets lazily and avoid all the
+# futzing around while the string is being constructed.
+
+type RopeString <: String
+ head::String
+ tail::String
+ depth::Int32
+ length::Int
+
+ RopeString(h::RopeString, t::RopeString) =
+ depth(h.tail) + depth(t) < depth(h.head) ?
+ RopeString(h.head, RopeString(h.tail, t)) :
+ new(h, t, max(h.depth,t.depth)+1, length(h)+length(t))
+
+ RopeString(h::RopeString, t::String) =
+ depth(h.tail) < depth(h.head) ?
+ RopeString(h.head, RopeString(h.tail, t)) :
+ new(h, t, h.depth+1, length(h)+length(t))
+
+ RopeString(h::String, t::RopeString) =
+ depth(t.head) < depth(t.tail) ?
+ RopeString(RopeString(h, t.head), t.tail) :
+ new(h, t, t.depth+1, length(h)+length(t))
+
+ RopeString(h::String, t::String) =
+ new(h, t, 1, length(h)+length(t))
+end
+
+depth(s::String) = 0
+depth(s::RopeString) = s.depth
+
+function next(s::RopeString, i::Int)
+ if i <= length(s.head)
+ return next(s.head, i)
+ else
+ c, j = next(s.tail, i-length(s.head))
+ return c, j+length(s.head)
+ end
+end
+
+length(s::RopeString) = s.length
+strlen(s::RopeString) = strlen(s.head) + strlen(s.tail)
+
+strcat() = ""
+strcat(s::String) = s
+strcat(x...) = strcat(map(string,x)...)
+strcat(s::String, t::String...) =
+ (t = strcat(t...); isempty(s) ? t : isempty(t) ? s : RopeString(s, t))
+
+print(s::RopeString) = print(s.head, s.tail)
+
+## transformed strings ##
+
+type TransformedString <: String
+ transform::Function
+ string::String
+end
+
+length(s::TransformedString) = length(s.string)
+strlen(s::TransformedString) = strlen(s.string)
+
+function next(s::TransformedString, i::Int)
+ c, j = next(s.string,i)
+ c = s.transform(c, i)
+ return c, j
+end
+
+## uppercase and lowercase transformations ##
+
+uppercase(c::Char) = ccall(:towupper, Char, (Char,), c)
+lowercase(c::Char) = ccall(:towlower, Char, (Char,), c)
+
+uppercase(s::String) = TransformedString((c,i)->uppercase(c), s)
+lowercase(s::String) = TransformedString((c,i)->lowercase(c), s)
+
+ucfirst(s::String) = TransformedString((c,i)->i==1 ? uppercase(c) : c, s)
+lcfirst(s::String) = TransformedString((c,i)->i==1 ? lowercase(c) : c, s)
+
+const uc = uppercase
+const lc = lowercase
+
+## string map ##
+
+function map(f::Function, s::String)
+ out = memio(length(s))
+ for c in s
+ write(out, f(c)::Char)
+ end
+ takebuf_string(out)
+end
+
+## conversion of general objects to strings ##
+
+string(x) = print_to_string(show, x)
+cstring(x...) = print_to_string(print, x...)
+
+function cstring(p::Ptr{Uint8})
+ p == C_NULL ? error("cannot convert NULL to string") :
+ ccall(:jl_cstr_to_string, Any, (Ptr{Uint8},), p)::ByteString
+end
+
+## string promotion rules ##
+
+promote_rule(::Type{UTF8String} , ::Type{ASCIIString}) = UTF8String
+promote_rule(::Type{UTF8String} , ::Type{CharString} ) = UTF8String
+promote_rule(::Type{ASCIIString}, ::Type{CharString} ) = UTF8String
+
+## printing literal quoted string data ##
+
+# TODO: this is really the inverse of print_unbackslashed
+
+function print_quoted_literal(s::String)
+ print('"')
+ for c = s; c == '"' ? print("\\\"") : print(c); end
+ print('"')
+end
+
+## string escaping & unescaping ##
+
+escape_nul(s::String, i::Int) =
+ !done(s,i) && '0' <= next(s,i)[1] <= '7' ? L"\x00" : L"\0"
+
+is_hex_digit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
+need_full_hex(s::String, i::Int) = !done(s,i) && is_hex_digit(next(s,i)[1])
+
+function print_escaped(s::String, esc::String)
+ i = start(s)
+ while !done(s,i)
+ c, j = next(s,i)
+ c == '\0' ? print(escape_nul(s,j)) :
+ c == '\e' ? print(L"\e") :
+ c == '\\' ? print("\\\\") :
+ contains(esc,c) ? print('\\', c) :
+ iswprint(c) ? print(c) :
+ 7 <= c <= 13 ? print('\\', "abtnvfr"[c-6]) :
+ c <= '\x7f' ? print(L"\x", hex(c, 2)) :
+ c <= '\uffff' ? print(L"\u", hex(c, need_full_hex(s,j) ? 4 : 2)) :
+ print(L"\U", hex(c, need_full_hex(s,j) ? 8 : 4))
+ i = j
+ end
+end
+
+escape_string(s::String) = print_to_string(length(s), print_escaped, s, "\"")
+print_quoted(s::String) = (print('"'); print_escaped(s, "\"\$"); print('"'))
+#" # work around syntax highlighting problem
+quote_string(s::String) = print_to_string(length(s)+2, print_quoted, s)
+
+# bare minimum unescaping function unescapes only given characters
+
+function print_unescaped_chars(s::String, esc::String)
+ if !contains(esc,'\\')
+ esc = strcat("\\", esc)
+ end
+ i = start(s)
+ while !done(s,i)
+ c, i = next(s,i)
+ if c == '\\' && !done(s,i) && contains(esc,s[i])
+ c, i = next(s,i)
+ end
+ print(c)
+ end
+end
+
+unescape_chars(s::String, esc::String) =
+ print_to_string(length(s), print_unescaped_chars, s, esc)
+
+# general unescaping of traditional C and Unicode escape sequences
+
+function print_unescaped(s::String)
+ i = start(s)
+ while !done(s,i)
+ c, i = next(s,i)
+ if !done(s,i) && c == '\\'
+ c, i = next(s,i)
+ if c == 'x' || c == 'u' || c == 'U'
+ n = k = 0
+ m = c == 'x' ? 2 :
+ c == 'u' ? 4 : 8
+ while (k+=1) <= m && !done(s,i)
+ c, j = next(s,i)
+ n = '0' <= c <= '9' ? n<<4 + c-'0' :
+ 'a' <= c <= 'f' ? n<<4 + c-'a'+10 :
+ 'A' <= c <= 'F' ? n<<4 + c-'A'+10 : break
+ i = j
+ end
+ if k == 1
+ error("\\x used with no following hex digits")
+ end
+ if m == 2 # \x escape sequence
+ write(uint8(n))
+ else
+ print(char(n))
+ end
+ elseif '0' <= c <= '7'
+ k = 1
+ n = c-'0'
+ while (k+=1) <= 3 && !done(s,i)
+ c, j = next(s,i)
+ n = '0' <= c <= '7' ? n<<3 + c-'0' : break
+ i = j
+ end
+ if n > 255
+ error("octal escape sequence out of range")
+ end
+ write(uint8(n))
+ else
+ print(c == 'a' ? '\a' :
+ c == 'b' ? '\b' :
+ c == 't' ? '\t' :
+ c == 'n' ? '\n' :
+ c == 'v' ? '\v' :
+ c == 'f' ? '\f' :
+ c == 'r' ? '\r' :
+ c == 'e' ? '\e' : c)
+ end
+ else
+ print(c)
+ end
+ end
+end
+
+unescape_string(s::String) = print_to_string(length(s), print_unescaped, s)
+
+## checking UTF-8 & ACSII validity ##
+
+byte_string_classify(s::ByteString) =
+ ccall(:u8_isvalid, Int32, (Ptr{Uint8}, Int), s.data, length(s))
+ # 0: neither valid ASCII nor UTF-8
+ # 1: valid ASCII
+ # 2: valid UTF-8
+
+is_valid_ascii(s::ByteString) = byte_string_classify(s) == 1
+is_valid_utf8 (s::ByteString) = byte_string_classify(s) != 0
+
+check_ascii(s::ByteString) = is_valid_ascii(s) ? s : error("invalid ASCII sequence")
+check_utf8 (s::ByteString) = is_valid_utf8(s) ? s : error("invalid UTF-8 sequence")
+
+## string interpolation parsing ##
+
+function _jl_interp_parse(s::String, unescape::Function, printer::Function)
+ sx = {}
+ i = j = start(s)
+ while !done(s,j)
+ c, k = next(s,j)
+ if c == '$'
+ if !isempty(s[i:j-1])
+ push(sx, unescape(s[i:j-1]))
+ end
+ ex, j = parseatom(s,k)
+ push(sx, ex)
+ i = j
+ elseif c == '\\' && !done(s,k)
+ if s[k] == '$'
+ if !isempty(s[i:j-1])
+ push(sx, unescape(s[i:j-1]))
+ end
+ i = k
+ end
+ c, j = next(s,k)
+ else
+ j = k
+ end
+ end
+ if !isempty(s[i:])
+ push(sx, unescape(s[i:j-1]))
+ end
+ length(sx) == 1 && isa(sx[1],ByteString) ? sx[1] :
+ expr(:call, :print_to_string, printer, sx...)
+end
+
+_jl_interp_parse(s::String, u::Function) = _jl_interp_parse(s, u, print)
+_jl_interp_parse(s::String) = _jl_interp_parse(s, x->check_utf8(unescape_string(x)))
+
+function _jl_interp_parse_bytes(s::String)
+ writer(x...) = for w=x; write(w); end
+ _jl_interp_parse(s, unescape_string, writer)
+end
+
+## core string macros ##
+
+macro str(s); _jl_interp_parse(s); end
+macro S_str(s); _jl_interp_parse(s); end
+macro I_str(s); _jl_interp_parse(s, x->unescape_chars(x,"\"")); end
+macro E_str(s); check_utf8(unescape_string(s)); end
+macro B_str(s); _jl_interp_parse_bytes(s); end
+macro b_str(s); ex = _jl_interp_parse_bytes(s); :(($ex).data); end
+
+## shell-like command parsing ##
+
+function _jl_shell_parse(s::String, interp::Bool)
+
+ in_single_quotes = false
+ in_double_quotes = false
+
+ args = {}
+ arg = {}
+ i = start(s)
+ j = i
+
+ function update_arg(x)
+ if !isa(x,String) || !isempty(x)
+ push(arg, x)
+ end
+ end
+ function append_arg()
+ if isempty(arg); arg = {"",}; end
+ push(args, arg)
+ arg = {}
+ end
+
+ while !done(s,j)
+ c, k = next(s,j)
+ if !in_single_quotes && !in_double_quotes && iswspace(c)
+ update_arg(s[i:j-1])
+ append_arg()
+ j = k
+ while !done(s,j)
+ c, k = next(s,j)
+ if !iswspace(c)
+ i = j
+ break
+ end
+ j = k
+ end
+ elseif interp && !in_single_quotes && c == '$'
+ update_arg(s[i:j-1]); i = k; j = k
+ if done(s,k)
+ error("\$ right before end of command")
+ end
+ if iswspace(s[k])
+ error("space not allowed right after \$")
+ end
+ ex, j = parseatom(s,j)
+ update_arg(ex); i = j
+ else
+ if !in_double_quotes && c == '\''
+ in_single_quotes = !in_single_quotes
+ update_arg(s[i:j-1]); i = k
+ elseif !in_single_quotes && c == '"'
+ in_double_quotes = !in_double_quotes
+ update_arg(s[i:j-1]); i = k
+ elseif c == '\\'
+ if in_double_quotes
+ if done(s,k)
+ error("unterminated double quote")
+ end
+ if s[k] == '"' || s[k] == '$'
+ update_arg(s[i:j-1]); i = k
+ c, k = next(s,k)
+ end
+ elseif !in_single_quotes
+ if done(s,k)
+ error("dangling backslash")
+ end
+ update_arg(s[i:j-1]); i = k
+ c, k = next(s,k)
+ end
+ end
+ j = k
+ end
+ end
+
+ if in_single_quotes; error("unterminated single quote"); end
+ if in_double_quotes; error("unterminated double quote"); end
+
+ update_arg(s[i:])
+ append_arg()
+
+ if !interp
+ return args
+ end
+
+ # construct an expression
+ exprs = {}
+ for arg in args
+ push(exprs, expr(:tuple, arg))
+ end
+ expr(:tuple,exprs)
+end
+_jl_shell_parse(s::String) = _jl_shell_parse(s,true)
+
+function shell_split(s::String)
+ parsed = _jl_shell_parse(s,false)
+ args = String[]
+ for arg in parsed
+ push(args, strcat(arg...))
+ end
+ args
+end
+
+function print_shell_word(word::String)
+ if isempty(word)
+ print("''")
+ end
+ has_single = false
+ has_special = false
+ for c in word
+ if iswspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$'
+ has_special = true
+ if c == '\''
+ has_single = true
+ end
+ end
+ end
+ if !has_special
+ print(word)
+ elseif !has_single
+ print('\'', word, '\'')
+ else
+ print('"')
+ for c in word
+ if c == '"' || c == '$'
+ print('\\')
+ end
+ print(c)
+ end
+ print('"')
+ end
+end
+
+function print_shell_escaped(cmd::String, args::String...)
+ print_shell_word(cmd)
+ for arg in args
+ print(' ')
+ print_shell_word(arg)
+ end
+end
+
+shell_escape(cmd::String, args::String...) =
+ print_to_string(print_shell_escaped, cmd, args...)
+
+## interface to parser ##
+
+function parse(s::String, pos, greedy)
+ # returns (expr, end_pos). expr is () in case of parse error.
+ ex, pos = ccall(:jl_parse_string, Any,
+ (Ptr{Uint8}, Int32, Int32),
+ cstring(s), pos-1, greedy ? 1:0)
+ if isa(ex,Expr) && is(ex.head,:error)
+ throw(ParseError(ex.args[1]))
+ end
+ if ex == (); throw(ParseError("end of input")); end
+ ex, pos+1 # C is zero-based, Julia is 1-based
+end
+
+parse(s::String) = parse(s, 1, true)
+parse(s::String, pos) = parse(s, pos, true)
+parseatom(s::String) = parse(s, 1, false)
+parseatom(s::String, pos) = parse(s, pos, false)
+
+## miscellaneous string functions ##
+
+function lpad(s::String, n::Integer, p::String)
+ m = n - strlen(s)
+ if m <= 0; return s; end
+ l = strlen(p)
+ if l==1
+ return p^m * s
+ end
+ q = div(m,l)
+ r = m - q*l
+ cstring(p^q*p[1:chr2ind(p,r)]*s)
+end
+
+function rpad(s::String, n::Integer, p::String)
+ m = n - strlen(s)
+ if m <= 0; return s; end
+ l = strlen(p)
+ if l==1
+ return s * p^m
+ end
+ q = div(m,l)
+ r = m - q*l
+ cstring(s*p^q*p[1:chr2ind(p,r)])
+end
+
+lpad(s, n::Integer, p) = lpad(string(s), n, string(p))
+rpad(s, n::Integer, p) = rpad(string(s), n, string(p))
+
+lpad(s, n::Integer) = lpad(string(s), n, " ")
+rpad(s, n::Integer) = rpad(string(s), n, " ")
+
+function split(s::String, delims, include_empty::Bool)
+ i = 1
+ strs = String[]
+ len = length(s)
+ while true
+ tokstart = tokend = i
+ while !done(s,i)
+ (c,i) = next(s,i)
+ if contains(delims, c)
+ break
+ end
+ tokend = i
+ end
+ tok = s[tokstart:(tokend-1)]
+ if include_empty || !isempty(tok)
+ push(strs, tok)
+ end
+ if !((i <= len) || (i==len+1 && tokend!=i))
+ break
+ end
+ end
+ strs
+end
+
+split(s::String) = split(s, (' ','\t','\n','\v','\f','\r'), false)
+split(s::String, x) = split(s, x, true)
+split(s::String, x::Char, incl::Bool) = split(s, (x,), incl)
+
+function print_joined(strings, delim, last)
+ i = start(strings)
+ if done(strings,i)
+ return
+ end
+ str, i = next(strings,i)
+ print(str)
+ while !done(strings,i)
+ str, i = next(strings,i)
+ print(done(strings,i) ? last : delim)
+ print(str)
+ end
+end
+
+function print_joined(strings, delim)
+ i = start(strings)
+ while !done(strings,i)
+ str, i = next(strings,i)
+ print(str)
+ if !done(strings,i)
+ print(delim)
+ end
+ end
+end
+print_joined(strings) = print_joined(strings, "")
+
+join(args...) = print_to_string(print_joined, args...)
+
+chop(s::String) = s[1:thisind(s,length(s))-1]
+chomp(s::String) = (i=thisind(s,length(s)); s[i]=='\n' ? s[1:i-1] : s)
+chomp(s::ByteString) = s.data[end]==0x0a ? s[1:end-1] : s
+
+function lstrip(s::String)
+ i = start(s)
+ while !done(s,i)
+ c, j = next(s,i)
+ if !iswspace(c)
+ return s[i:end]
+ end
+ i = j
+ end
+ ""
+end
+
+function rstrip(s::String)
+ r = reverse(s)
+ i = start(r)
+ while !done(r,i)
+ c, j = next(r,i)
+ if !iswspace(c)
+ return s[1:end-i+1]
+ end
+ i = j
+ end
+ ""
+end
+
+strip(s::String) = lstrip(rstrip(s))
+
+## string to integer functions ##
+
+function parse_int{T<:Integer}(::Type{T}, s::String, base::Integer)
+ if !(2 <= base <= 36); error("invalid base: ",base); end
+ i = start(s)
+ if done(s,i)
+ error("premature end of integer (in ",show_to_string(s),")")
+ end
+ c,i = next(s,i)
+ sgn = one(T)
+ if T <: Signed && c == '-'
+ sgn = -sgn
+ if done(s,i)
+ error("premature end of integer (in ",show_to_string(s),")")
+ end
+ c,i = next(s,i)
+ end
+ base = convert(T,base)
+ n::T = 0
+ while true
+ d = '0' <= c <= '9' ? c-'0' :
+ 'A' <= c <= 'Z' ? c-'A'+10 :
+ 'a' <= c <= 'z' ? c-'a'+10 : typemax(Int)
+ if d >= base
+ error(show_to_string(c)," is not a valid digit (in ",show_to_string(s),")")
+ end
+ # TODO: overflow detection?
+ n = n*base + d
+ if done(s,i)
+ break
+ end
+ c,i = next(s,i)
+ end
+ return flipsign(n,sgn)
+end
+
+parse_int(s::String, base::Integer) = parse_int(Int,s,base)
+parse_int(T::Type, s::String) = parse_int(T,s,10)
+parse_int(s::String) = parse_int(Int,s,10)
+
+parse_bin(T::Type, s::String) = parse_int(T,s,2)
+parse_oct(T::Type, s::String) = parse_int(T,s,8)
+parse_hex(T::Type, s::String) = parse_int(T,s,16)
+
+parse_bin(s::String) = parse_int(Int,s,2)
+parse_oct(s::String) = parse_int(Int,s,8)
+parse_hex(s::String) = parse_int(Int,s,16)
+
+integer (s::String) = int(s)
+unsigned(s::String) = uint(s)
+int (s::String) = parse_int(Int,s)
+uint (s::String) = parse_int(Uint,s)
+int8 (s::String) = parse_int(Int8,s)
+uint8 (s::String) = parse_int(Uint8,s)
+int16 (s::String) = parse_int(Int16,s)
+uint16 (s::String) = parse_int(Uint16,s)
+int32 (s::String) = parse_int(Int32,s)
+uint32 (s::String) = parse_int(Uint32,s)
+int64 (s::String) = parse_int(Int64,s)
+uint64 (s::String) = parse_int(Uint64,s)
+
+## integer to string functions ##
+
+const _jl_dig_syms = "0123456789abcdefghijklmnopqrstuvwxyz".data
+
+function int2str(n::Union(Int64,Uint64), b::Integer, l::Int)
+ if b < 2 || b > 36; error("int2str: invalid base ", b); end
+ neg = n < 0
+ n = unsigned(abs(n))
+ b = convert(typeof(n), b)
+ ndig = ndigits(n, b)
+ sz = max(convert(Int, ndig), l) + neg
+ data = Array(Uint8, sz)
+ i = sz
+ if ispow2(b)
+ digmask = b-1
+ shift = trailing_zeros(b)
+ while i > neg
+ ch = n & digmask
+ data[i] = _jl_dig_syms[int(ch)+1]
+ n >>= shift
+ i -= 1
+ end
+ else
+ while i > neg
+ ch = n % b
+ data[i] = _jl_dig_syms[int(ch)+1]
+ n = div(n,b)
+ i -= 1
+ end
+ end
+ if neg
+ data[1] = '-'
+ end
+ ASCIIString(data)
+end
+int2str(n::Integer, b::Integer) = int2str(n, b, 0)
+int2str(n::Integer, b::Integer, l::Int) = int2str(int64(n), b, l)
+
+string(x::Signed) = dec(int64(x))
+cstring(x::Signed) = dec(int64(x))
+
+## string to float functions ##
+
+function float64_isvalid(s::String, out::Array{Float64,1})
+ s = cstring(s)
+ return (ccall(:jl_strtod, Int32, (Ptr{Uint8},Ptr{Float64}), s, out)==0)
+end
+
+function float32_isvalid(s::String, out::Array{Float32,1})
+ s = cstring(s)
+ return (ccall(:jl_strtof, Int32, (Ptr{Uint8},Ptr{Float32}), s, out)==0)
+end
+
+begin
+ local tmp::Array{Float64,1} = Array(Float64,1)
+ local tmpf::Array{Float32,1} = Array(Float32,1)
+ global float64, float32
+ function float64(s::String)
+ if !float64_isvalid(s, tmp)
+ throw(ArgumentError("float64(String): invalid number format"))
+ end
+ return tmp[1]
+ end
+
+ function float32(s::String)
+ if !float32_isvalid(s, tmpf)
+ throw(ArgumentError("float32(String): invalid number format"))
+ end
+ return tmpf[1]
+ end
+end
+
+float(x::String) = float64(x)
+parse_float(x::String) = float64(x)
+parse_float(::Type{Float64}, x::String) = float64(x)
+parse_float(::Type{Float32}, x::String) = float32(x)
+
+# copying a byte string (generally not needed due to "immutability")
+
+strcpy{T<:ByteString}(s::T) = T(copy(s.data))
+
+# lexicographically compare byte arrays (used by Latin-1 and UTF-8)
+
+function lexcmp(a::Array{Uint8,1}, b::Array{Uint8,1})
+ c = ccall(:memcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}, Uint),
+ a, b, min(length(a),length(b)))
+ c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b))
+end
+
+# find the index of the first occurrence of a byte value in a byte array
+
+function memchr(a::Array{Uint8,1}, b::Integer)
+ p = pointer(a)
+ q = ccall(:memchr, Ptr{Uint8}, (Ptr{Uint8}, Int32, Uint), p, b, length(a))
+ q == C_NULL ? 0 : q - p + 1
+end
+
+# concatenate byte arrays into a single array
+
+memcat() = Array(Uint8,0)
+memcat(a::Array{Uint8,1}) = copy(a)
+
+function memcat(arrays::Array{Uint8,1}...)
+ n = 0
+ for a in arrays
+ n += length(a)
+ end
+ arr = Array(Uint8, n)
+ ptr = pointer(arr)
+ offset = 0
+ for a in arrays
+ ccall(:memcpy, Ptr{Uint8}, (Ptr{Uint8}, Ptr{Uint8}, Uint),
+ ptr+offset, a, length(a))
+ offset += length(a)
+ end
+ return arr
+end
+
+# concatenate the data fields of byte strings
+
+memcat(s::ByteString) = memcat(s.data)
+memcat(sx::ByteString...) = memcat(map(s->s.data, sx)...)