Merged in nolta/pygments-main (pull request #61)

author: Georg Brandl <georg@python.org> 2012-04-04 08:42:40 +0200
committer: Georg Brandl <georg@python.org> 2012-04-04 08:42:40 +0200
commit: 7ebffbb996f28a765fe28637c427f6330e94dc70 (patch)
tree: e7577bedb63196ed68381522bad0c8357f98bb01
parent: e59174b0e587a784db432dfbfe2dade4ae08e54d (diff)
parent: 50bac235a507de7c347a065b48a826713883e67d (diff)
download: pygments-7ebffbb996f28a765fe28637c427f6330e94dc70.tar.gz
3 files changed, 1192 insertions, 3 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index 7c745a89..8bcc1744 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -132,6 +132,8 @@ LEXERS = {
     'JavascriptSmartyLexer': ('pygments.lexers.templates', 'JavaScript+Smarty', ('js+smarty', 'javascript+smarty'), (), ('application/x-javascript+smarty', 'text/x-javascript+smarty', 'text/javascript+smarty')),
     'JspLexer': ('pygments.lexers.templates', 'Java Server Page', ('jsp',), ('*.jsp',), ('application/x-jsp',)),
     'KotlinLexer': ('pygments.lexers.jvm', 'Kotlin', ('kotlin',), ('*.kt',), ('text/x-kotlin',)),
+    'JuliaLexer': ('pygments.lexers.math', 'Julia', ('julia','jl'), ('*.jl',), ('text/x-julia','application/x-julia')),
+    'JuliaConsoleLexer': ('pygments.lexers.math', 'Julia console', ('jlcon',), (), ()),
     'LighttpdConfLexer': ('pygments.lexers.text', 'Lighttpd configuration file', ('lighty', 'lighttpd'), (), ('text/x-lighttpd-conf',)),
     'LiterateHaskellLexer': ('pygments.lexers.functional', 'Literate Haskell', ('lhs', 'literate-haskell'), ('*.lhs',), ('text/x-literate-haskell',)),
     'LlvmLexer': ('pygments.lexers.asm', 'LLVM', ('llvm',), ('*.ll',), ('text/x-llvm',)),
@@ -290,3 +292,4 @@ if __name__ == '__main__':
     f.write('LEXERS = {\n    %s,\n}\n\n' % ',\n    '.join(found_lexers))
     f.write(footer)
     f.close()
+
diff --git a/pygments/lexers/math.py b/pygments/lexers/math.py
index f500231f..7ae7d6b2 100644
--- a/pygments/lexers/math.py
+++ b/pygments/lexers/math.py
@@ -11,15 +11,170 @@
 
 import re
 
-from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
+from pygments.lexer import Lexer, RegexLexer, bygroups, include, \
+    combined, do_insertions
 from pygments.token import Comment, String, Punctuation, Keyword, Name, \
     Operator, Number, Text, Generic
 
 from pygments.lexers.agile import PythonLexer
 from pygments.lexers import _scilab_builtins
 
-__all__ = ['MuPADLexer', 'MatlabLexer', 'MatlabSessionLexer', 'OctaveLexer',
-           'ScilabLexer', 'NumPyLexer', 'RConsoleLexer', 'SLexer']
+__all__ = ['JuliaLexer', 'JuliaConsoleLexer', 'MuPADLexer', 'MatlabLexer',
+           'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer', 'NumPyLexer',
+           'RConsoleLexer', 'SLexer']
+
+
+class JuliaLexer(RegexLexer):
+    name = 'Julia'
+    aliases = ['julia','jl']
+    filenames = ['*.jl']
+    mimetypes = ['text/x-julia','application/x-julia']
+
+    builtins = [
+        'exit','whos','edit','load','is','isa','isequal','typeof','tuple',
+        'ntuple','uid','hash','finalizer','convert','promote','subtype',
+        'typemin','typemax','realmin','realmax','sizeof','eps','promote_type',
+        'method_exists','applicable','invoke','dlopen','dlsym','system',
+        'error','throw','assert','new','Inf','Nan','pi','im',
+    ]
+
+    tokens = {
+        'root': [
+            (r'\n', Text),
+            (r'[^\S\n]+', Text),
+            (r'#.*$', Comment),
+            (r'[]{}:(),;[@]', Punctuation),
+            (r'\\\n', Text),
+            (r'\\', Text),
+
+            # keywords
+            (r'(begin|while|for|in|return|break|continue|'
+             r'macro|quote|let|if|elseif|else|try|catch|end|'
+             r'bitstype|ccall)\b', Keyword),
+            (r'(local|global|const)\b', Keyword.Declaration),
+            (r'(module|import|export)\b', Keyword.Reserved),
+            (r'(Bool|Int|Int8|Int16|Int32|Int64|Uint|Uint8|Uint16|Uint32|Uint64'
+             r'|Float32|Float64|Complex64|Complex128|Any|Nothing|None)\b',
+                Keyword.Type),
+
+            # functions
+            (r'(function)((?:\s|\\\s)+)',
+                bygroups(Keyword,Name.Function), 'funcname'),
+
+            # types
+            (r'(type|typealias|abstract)((?:\s|\\\s)+)',
+                bygroups(Keyword,Name.Class), 'typename'),
+
+            # operators
+            (r'==|!=|<=|>=|->|&&|\|\||::|<:|[-~+/*%=<>&^|.?!$]', Operator),
+            (r'\.\*|\.\^|\.\\|\.\/|\\', Operator),
+
+            # builtins
+            ('(' + '|'.join(builtins) + r')\b',  Name.Builtin),
+
+            # backticks
+            (r'`(?s).*?`', String.Backtick),
+
+            # chars
+            (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|\\U[a-fA-F0-9]{1,6}|[^\\\'\n])'", String.Char),
+
+            # try to match trailing transpose
+            (r'(?<=[.\w\)\]])\'', Operator),
+
+            # strings
+            (r'(?:[IL])"', String, 'string'),
+            (r'[E]?"', String, combined('stringescape', 'string')),
+
+            # names
+            (r'@[a-zA-Z0-9_.]+', Name.Decorator),
+            (r'[a-zA-Z_][a-zA-Z0-9_]*', Name),
+
+            # numbers
+            (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
+            (r'\d+[eE][+-]?[0-9]+', Number.Float),
+            (r'0[0-7]+', Number.Oct),
+            (r'0[xX][a-fA-F0-9]+', Number.Hex),
+            (r'\d+', Number.Integer)
+        ],
+
+        'funcname': [
+            ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop'),
+            ('\([^\s\w{]{1,2}\)', Operator, '#pop'),
+            ('[^\s\w{]{1,2}', Operator, '#pop'),
+        ],
+
+        'typename': [
+            ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
+        ],
+
+        'stringescape': [
+            (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'
+             r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
+        ],
+
+        'string': [
+            (r'"', String, '#pop'),
+            (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
+            (r'\$(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?',
+                String.Interpol),
+            (r'[^\\"$]+', String),
+            # quotes, dollar signs, and backslashes must be parsed one at a time
+            (r'["\\]', String),
+            # unhandled string formatting sign
+            (r'\$', String)
+        ],
+    }
+
+    def analyse_text(text):
+        return shebang_matches(text, r'julia')
+
+
+line_re  = re.compile('.*?\n')
+
+class JuliaConsoleLexer(Lexer):
+    """
+    For Julia console sessions. Modeled after MatlabSessionLexer.
+    """
+    name = 'Julia console'
+    aliases = ['jlcon']
+
+    def get_tokens_unprocessed(self, text):
+        jllexer = JuliaLexer(**self.options)
+
+        curcode = ''
+        insertions = []
+
+        for match in line_re.finditer(text):
+            line = match.group()
+
+            if line.startswith('julia>'):
+                insertions.append((len(curcode),
+                                   [(0, Generic.Prompt, line[:3])]))
+                curcode += line[3:]
+
+            elif line.startswith('      '):
+
+                idx = len(curcode)
+
+                # without is showing error on same line as before...?
+                line = "\n" + line
+                token = (0, Generic.Traceback, line)
+                insertions.append((idx, [token]))
+
+            else:
+                if curcode:
+                    for item in do_insertions(
+                        insertions, jllexer.get_tokens_unprocessed(curcode)):
+                        yield item
+                    curcode = ''
+                    insertions = []
+
+                yield match.start(), Generic.Output, line
+
+        if curcode: # or item:
+            for item in do_insertions(
+                insertions, jllexer.get_tokens_unprocessed(curcode)):
+                yield item
 
 
 class MuPADLexer(RegexLexer):
diff --git a/tests/examplefiles/string.jl b/tests/examplefiles/string.jl
new file mode 100644
index 00000000..67bf6c70
--- /dev/null
+++ b/tests/examplefiles/string.jl
@@ -0,0 +1,1031 @@
+## core string functions ##
+
+length(s::String) = error("you must implement length(",typeof(s),")")
+next(s::String, i::Int) = error("you must implement next(",typeof(s),",Int)")
+next(s::DirectIndexString, i::Int) = (s[i],i+1)
+next(s::String, i::Integer) = next(s,int(i))
+
+## generic supplied functions ##
+
+start(s::String) = 1
+done(s::String,i) = (i > length(s))
+isempty(s::String) = done(s,start(s))
+ref(s::String, i::Int) = next(s,i)[1]
+ref(s::String, i::Integer) = s[int(i)]
+ref(s::String, x::Real) = s[iround(x)]
+ref{T<:Integer}(s::String, r::Range1{T}) = s[int(first(r)):int(last(r))]
+
+symbol(s::String) = symbol(cstring(s))
+string(s::String) = s
+
+print(s::String) = for c=s; print(c); end
+print(x...) = for i=x; print(i); end
+println(args...) = print(args..., '\n')
+
+show(s::String) = print_quoted(s)
+
+(*)(s::String...) = strcat(s...)
+(^)(s::String, r::Integer) = repeat(s,r)
+
+size(s::String) = (length(s),)
+size(s::String, d::Integer) = d==1 ? length(s) :
+    error("in size: dimension ",d," out of range")
+
+strlen(s::DirectIndexString) = length(s)
+function strlen(s::String)
+    i = start(s)
+    if done(s,i)
+        return 0
+    end
+    n = 1
+    while true
+        c, j = next(s,i)
+        if done(s,j)
+            return n
+        end
+        n += 1
+        i = j
+    end
+end
+
+isvalid(s::DirectIndexString, i::Integer) = (start(s) <= i <= length(s))
+function isvalid(s::String, i::Integer)
+    try
+        next(s,i)
+        true
+    catch
+        false
+    end
+end
+
+prevind(s::DirectIndexString, i::Integer) = i-1
+thisind(s::DirectIndexString, i::Integer) = i
+nextind(s::DirectIndexString, i::Integer) = i+1
+
+prevind(s::String, i::Integer) = thisind(s,thisind(s,i)-1)
+
+function thisind(s::String, i::Integer)
+    for j = i:-1:1
+        if isvalid(s,j)
+            return j
+        end
+    end
+    return 0 # out of range
+end
+
+function nextind(s::String, i::Integer)
+    for j = i+1:length(s)
+        if isvalid(s,j)
+            return j
+        end
+    end
+    length(s)+1 # out of range
+end
+
+ind2chr(s::DirectIndexString, i::Integer) = i
+chr2ind(s::DirectIndexString, i::Integer) = i
+
+function ind2chr(s::String, i::Integer)
+    s[i] # throws error if invalid
+    j = 1
+    k = start(s)
+    while true
+        c, l = next(s,k)
+        if i <= k
+            return j
+        end
+        j += 1
+        k = l
+    end
+end
+
+function chr2ind(s::String, i::Integer)
+    if i < 1
+        return i
+    end
+    j = 1
+    k = start(s)
+    while true
+        c, l = next(s,k)
+        if i == j
+            return k
+        end
+        j += 1
+        k = l
+    end
+end
+
+function strchr(s::String, c::Char, i::Integer)
+    i = nextind(s,i)
+    while !done(s,i)
+        d, j = next(s,i)
+        if c == d
+            return i
+        end
+        i = j
+    end
+    return 0
+end
+strchr(s::String, c::Char) = strchr(s, c, start(s))
+contains(s::String, c::Char) = (strchr(s,c)!=0)
+
+function chars(s::String)
+    cx = Array(Char,strlen(s))
+    i = 0
+    for c in s
+        cx[i += 1] = c
+    end
+    return cx
+end
+
+function cmp(a::String, b::String)
+    i = start(a)
+    j = start(b)
+    while !done(a,i) && !done(b,i)
+        c, i = next(a,i)
+        d, j = next(b,j)
+        if c != d
+            return c < d ? -1 : +1
+        end
+    end
+    done(a,i) && !done(b,j) ? -1 :
+    !done(a,i) && done(b,j) ? +1 : 0
+end
+
+isequal(a::String, b::String) = cmp(a,b) == 0
+isless(a::String, b::String)  = cmp(a,b) <  0
+
+# faster comparisons for byte strings
+
+cmp(a::ByteString, b::ByteString)     = lexcmp(a.data, b.data)
+isequal(a::ByteString, b::ByteString) = length(a)==length(b) && cmp(a,b)==0
+
+## character column width function ##
+
+charwidth(c::Char) = max(0,int(ccall(:wcwidth, Int32, (Char,), c)))
+strwidth(s::String) = (w=0; for c in s; w += charwidth(c); end; w)
+strwidth(s::ByteString) = ccall(:u8_strwidth, Int, (Ptr{Uint8},), s.data)
+# TODO: implement and use u8_strnwidth that takes a length argument
+
+## generic string uses only length and next ##
+
+type GenericString <: String
+    string::String
+end
+
+length(s::GenericString) = length(s.string)
+next(s::GenericString, i::Int) = next(s.string, i)
+
+## plain old character arrays ##
+
+type CharString <: String
+    chars::Array{Char,1}
+
+    CharString(a::Array{Char,1}) = new(a)
+    CharString(c::Char...) = new([ c[i] | i=1:length(c) ])
+end
+CharString(x...) = CharString(map(char,x)...)
+
+next(s::CharString, i::Int) = (s.chars[i], i+1)
+length(s::CharString) = length(s.chars)
+strlen(s::CharString) = length(s)
+
+string(c::Char) = CharString(c)
+string(c::Char, x::Char...) = CharString(c, x...)
+
+## substrings reference original strings ##
+
+type SubString <: String
+    string::String
+    offset::Int
+    length::Int
+
+    SubString(s::String, i::Int, j::Int) = new(s, i-1, j-i+1)
+    SubString(s::SubString, i::Int, j::Int) =
+        new(s.string, i-1+s.offset, j-i+1)
+end
+SubString(s::String, i::Integer, j::Integer) = SubString(s, int(i), int(j))
+
+function next(s::SubString, i::Int)
+    if i < 1 || i > s.length
+        error("string index out of bounds")
+    end
+    c, i = next(s.string, i+s.offset)
+    c, i-s.offset
+end
+
+length(s::SubString) = s.length
+# TODO: strlen(s::SubString) = ??
+# default implementation will work but it's slow
+# can this be delegated efficiently somehow?
+# that may require additional string interfaces
+
+function ref(s::String, r::Range1{Int})
+    if first(r) < 1 || length(s) < last(r)
+        error("in substring slice: index out of range")
+    end
+    SubString(s, first(r), last(r))
+end
+
+## efficient representation of repeated strings ##
+
+type RepString <: String
+    string::String
+    repeat::Integer
+end
+
+length(s::RepString) = length(s.string)*s.repeat
+strlen(s::RepString) = strlen(s.string)*s.repeat
+
+function next(s::RepString, i::Int)
+    if i < 1 || i > length(s)
+        error("string index out of bounds")
+    end
+    j = mod1(i,length(s.string))
+    c, k = next(s.string, j)
+    c, k-j+i
+end
+
+function repeat(s::String, r::Integer)
+    r <  0 ? error("can't repeat a string ",r," times") :
+    r == 0 ? "" :
+    r == 1 ? s  :
+    RepString(s,r)
+end
+
+## reversed strings without data movement ##
+
+type RevString <: String
+    string::String
+end
+
+length(s::RevString) = length(s.string)
+strlen(s::RevString) = strlen(s.string)
+
+start(s::RevString) = (n=length(s); n-thisind(s.string,n)+1)
+function next(s::RevString, i::Int)
+    n = length(s); j = n-i+1
+    (s.string[j], n-thisind(s.string,j-1)+1)
+end
+
+reverse(s::String) = RevString(s)
+reverse(s::RevString) = s.string
+
+## ropes for efficient concatenation, etc. ##
+
+# Idea: instead of this standard binary tree structure,
+# how about we keep an array of substrings, with an
+# offset array. We can do binary search on the offset
+# array so we get O(log(n)) indexing time still, but we
+# can compute the offsets lazily and avoid all the
+# futzing around while the string is being constructed.
+
+type RopeString <: String
+    head::String
+    tail::String
+    depth::Int32
+    length::Int
+
+    RopeString(h::RopeString, t::RopeString) =
+        depth(h.tail) + depth(t) < depth(h.head) ?
+            RopeString(h.head, RopeString(h.tail, t)) :
+            new(h, t, max(h.depth,t.depth)+1, length(h)+length(t))
+
+    RopeString(h::RopeString, t::String) =
+        depth(h.tail) < depth(h.head) ?
+            RopeString(h.head, RopeString(h.tail, t)) :
+            new(h, t, h.depth+1, length(h)+length(t))
+
+    RopeString(h::String, t::RopeString) =
+        depth(t.head) < depth(t.tail) ?
+            RopeString(RopeString(h, t.head), t.tail) :
+            new(h, t, t.depth+1, length(h)+length(t))
+
+    RopeString(h::String, t::String) =
+        new(h, t, 1, length(h)+length(t))
+end
+
+depth(s::String) = 0
+depth(s::RopeString) = s.depth
+
+function next(s::RopeString, i::Int)
+    if i <= length(s.head)
+        return next(s.head, i)
+    else
+        c, j = next(s.tail, i-length(s.head))
+        return c, j+length(s.head)
+    end
+end
+
+length(s::RopeString) = s.length
+strlen(s::RopeString) = strlen(s.head) + strlen(s.tail)
+
+strcat() = ""
+strcat(s::String) = s
+strcat(x...) = strcat(map(string,x)...)
+strcat(s::String, t::String...) =
+    (t = strcat(t...); isempty(s) ? t : isempty(t) ? s : RopeString(s, t))
+
+print(s::RopeString) = print(s.head, s.tail)
+
+## transformed strings ##
+
+type TransformedString <: String
+    transform::Function
+    string::String
+end
+
+length(s::TransformedString) = length(s.string)
+strlen(s::TransformedString) = strlen(s.string)
+
+function next(s::TransformedString, i::Int)
+    c, j = next(s.string,i)
+    c = s.transform(c, i)
+    return c, j
+end
+
+## uppercase and lowercase transformations ##
+
+uppercase(c::Char) = ccall(:towupper, Char, (Char,), c)
+lowercase(c::Char) = ccall(:towlower, Char, (Char,), c)
+
+uppercase(s::String) = TransformedString((c,i)->uppercase(c), s)
+lowercase(s::String) = TransformedString((c,i)->lowercase(c), s)
+
+ucfirst(s::String) = TransformedString((c,i)->i==1 ? uppercase(c) : c, s)
+lcfirst(s::String) = TransformedString((c,i)->i==1 ? lowercase(c) : c, s)
+
+const uc = uppercase
+const lc = lowercase
+
+## string map ##
+
+function map(f::Function, s::String)
+    out = memio(length(s))
+    for c in s
+        write(out, f(c)::Char)
+    end
+    takebuf_string(out)
+end
+
+## conversion of general objects to strings ##
+
+string(x) = print_to_string(show, x)
+cstring(x...) = print_to_string(print, x...)
+
+function cstring(p::Ptr{Uint8})
+    p == C_NULL ? error("cannot convert NULL to string") :
+    ccall(:jl_cstr_to_string, Any, (Ptr{Uint8},), p)::ByteString
+end
+
+## string promotion rules ##
+
+promote_rule(::Type{UTF8String} , ::Type{ASCIIString}) = UTF8String
+promote_rule(::Type{UTF8String} , ::Type{CharString} ) = UTF8String
+promote_rule(::Type{ASCIIString}, ::Type{CharString} ) = UTF8String
+
+## printing literal quoted string data ##
+
+# TODO: this is really the inverse of print_unbackslashed
+
+function print_quoted_literal(s::String)
+    print('"')
+    for c = s; c == '"' ? print("\\\"") : print(c); end
+    print('"')
+end
+
+## string escaping & unescaping ##
+
+escape_nul(s::String, i::Int) =
+    !done(s,i) && '0' <= next(s,i)[1] <= '7' ? L"\x00" : L"\0"
+
+is_hex_digit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
+need_full_hex(s::String, i::Int) = !done(s,i) && is_hex_digit(next(s,i)[1])
+
+function print_escaped(s::String, esc::String)
+    i = start(s)
+    while !done(s,i)
+        c, j = next(s,i)
+        c == '\0'       ? print(escape_nul(s,j)) :
+        c == '\e'       ? print(L"\e") :
+        c == '\\'       ? print("\\\\") :
+        contains(esc,c) ? print('\\', c) :
+        iswprint(c)     ? print(c) :
+        7 <= c <= 13    ? print('\\', "abtnvfr"[c-6]) :
+        c <= '\x7f'     ? print(L"\x", hex(c, 2)) :
+        c <= '\uffff'   ? print(L"\u", hex(c, need_full_hex(s,j) ? 4 : 2)) :
+                          print(L"\U", hex(c, need_full_hex(s,j) ? 8 : 4))
+        i = j
+    end
+end
+
+escape_string(s::String) = print_to_string(length(s), print_escaped, s, "\"")
+print_quoted(s::String) = (print('"'); print_escaped(s, "\"\$"); print('"'))
+#"  # work around syntax highlighting problem
+quote_string(s::String) = print_to_string(length(s)+2, print_quoted, s)
+
+# bare minimum unescaping function unescapes only given characters
+
+function print_unescaped_chars(s::String, esc::String)
+    if !contains(esc,'\\')
+        esc = strcat("\\", esc)
+    end
+    i = start(s)
+    while !done(s,i)
+        c, i = next(s,i)
+        if c == '\\' && !done(s,i) && contains(esc,s[i])
+            c, i = next(s,i)
+        end
+        print(c)
+    end
+end
+
+unescape_chars(s::String, esc::String) =
+    print_to_string(length(s), print_unescaped_chars, s, esc)
+
+# general unescaping of traditional C and Unicode escape sequences
+
+function print_unescaped(s::String)
+    i = start(s)
+    while !done(s,i)
+        c, i = next(s,i)
+        if !done(s,i) && c == '\\'
+            c, i = next(s,i)
+            if c == 'x' || c == 'u' || c == 'U'
+                n = k = 0
+                m = c == 'x' ? 2 :
+                    c == 'u' ? 4 : 8
+                while (k+=1) <= m && !done(s,i)
+                    c, j = next(s,i)
+                    n = '0' <= c <= '9' ? n<<4 + c-'0' :
+                        'a' <= c <= 'f' ? n<<4 + c-'a'+10 :
+                        'A' <= c <= 'F' ? n<<4 + c-'A'+10 : break
+                    i = j
+                end
+                if k == 1
+                    error("\\x used with no following hex digits")
+                end
+                if m == 2 # \x escape sequence
+                    write(uint8(n))
+                else
+                    print(char(n))
+                end
+            elseif '0' <= c <= '7'
+                k = 1
+                n = c-'0'
+                while (k+=1) <= 3 && !done(s,i)
+                    c, j = next(s,i)
+                    n = '0' <= c <= '7' ? n<<3 + c-'0' : break
+                    i = j
+                end
+                if n > 255
+                    error("octal escape sequence out of range")
+                end
+                write(uint8(n))
+            else
+                print(c == 'a' ? '\a' :
+                      c == 'b' ? '\b' :
+                      c == 't' ? '\t' :
+                      c == 'n' ? '\n' :
+                      c == 'v' ? '\v' :
+                      c == 'f' ? '\f' :
+                      c == 'r' ? '\r' :
+                      c == 'e' ? '\e' : c)
+            end
+        else
+            print(c)
+        end
+    end
+end
+
+unescape_string(s::String) = print_to_string(length(s), print_unescaped, s)
+
+## checking UTF-8 & ACSII validity ##
+
+byte_string_classify(s::ByteString) =
+    ccall(:u8_isvalid, Int32, (Ptr{Uint8}, Int), s.data, length(s))
+    # 0: neither valid ASCII nor UTF-8
+    # 1: valid ASCII
+    # 2: valid UTF-8
+
+is_valid_ascii(s::ByteString) = byte_string_classify(s) == 1
+is_valid_utf8 (s::ByteString) = byte_string_classify(s) != 0
+
+check_ascii(s::ByteString) = is_valid_ascii(s) ? s : error("invalid ASCII sequence")
+check_utf8 (s::ByteString) = is_valid_utf8(s)  ? s : error("invalid UTF-8 sequence")
+
+## string interpolation parsing ##
+
+function _jl_interp_parse(s::String, unescape::Function, printer::Function)
+    sx = {}
+    i = j = start(s)
+    while !done(s,j)
+        c, k = next(s,j)
+        if c == '$'
+            if !isempty(s[i:j-1])
+                push(sx, unescape(s[i:j-1]))
+            end
+            ex, j = parseatom(s,k)
+            push(sx, ex)
+            i = j
+        elseif c == '\\' && !done(s,k)
+            if s[k] == '$'
+                if !isempty(s[i:j-1])
+                    push(sx, unescape(s[i:j-1]))
+                end
+                i = k
+            end
+            c, j = next(s,k)
+        else
+            j = k
+        end
+    end
+    if !isempty(s[i:])
+        push(sx, unescape(s[i:j-1]))
+    end
+    length(sx) == 1 && isa(sx[1],ByteString) ? sx[1] :
+        expr(:call, :print_to_string, printer, sx...)
+end
+
+_jl_interp_parse(s::String, u::Function) = _jl_interp_parse(s, u, print)
+_jl_interp_parse(s::String) = _jl_interp_parse(s, x->check_utf8(unescape_string(x)))
+
+function _jl_interp_parse_bytes(s::String)
+    writer(x...) = for w=x; write(w); end
+    _jl_interp_parse(s, unescape_string, writer)
+end
+
+## core string macros ##
+
+macro   str(s); _jl_interp_parse(s); end
+macro S_str(s); _jl_interp_parse(s); end
+macro I_str(s); _jl_interp_parse(s, x->unescape_chars(x,"\"")); end
+macro E_str(s); check_utf8(unescape_string(s)); end
+macro B_str(s); _jl_interp_parse_bytes(s); end
+macro b_str(s); ex = _jl_interp_parse_bytes(s); :(($ex).data); end
+
+## shell-like command parsing ##
+
+function _jl_shell_parse(s::String, interp::Bool)
+
+    in_single_quotes = false
+    in_double_quotes = false
+
+    args = {}
+    arg = {}
+    i = start(s)
+    j = i
+
+    function update_arg(x)
+        if !isa(x,String) || !isempty(x)
+            push(arg, x)
+        end
+    end
+    function append_arg()
+        if isempty(arg); arg = {"",}; end
+        push(args, arg)
+        arg = {}
+    end
+
+    while !done(s,j)
+        c, k = next(s,j)
+        if !in_single_quotes && !in_double_quotes && iswspace(c)
+            update_arg(s[i:j-1])
+            append_arg()
+            j = k
+            while !done(s,j)
+                c, k = next(s,j)
+                if !iswspace(c)
+                    i = j
+                    break
+                end
+                j = k
+            end
+        elseif interp && !in_single_quotes && c == '$'
+            update_arg(s[i:j-1]); i = k; j = k
+            if done(s,k)
+                error("\$ right before end of command")
+            end
+            if iswspace(s[k])
+                error("space not allowed right after \$")
+            end
+            ex, j = parseatom(s,j)
+            update_arg(ex); i = j
+        else
+            if !in_double_quotes && c == '\''
+                in_single_quotes = !in_single_quotes
+                update_arg(s[i:j-1]); i = k
+            elseif !in_single_quotes && c == '"'
+                in_double_quotes = !in_double_quotes
+                update_arg(s[i:j-1]); i = k
+            elseif c == '\\'
+                if in_double_quotes
+                    if done(s,k)
+                        error("unterminated double quote")
+                    end
+                    if s[k] == '"' || s[k] == '$'
+                        update_arg(s[i:j-1]); i = k
+                        c, k = next(s,k)
+                    end
+                elseif !in_single_quotes
+                    if done(s,k)
+                        error("dangling backslash")
+                    end
+                    update_arg(s[i:j-1]); i = k
+                    c, k = next(s,k)
+                end
+            end
+            j = k
+        end
+    end
+
+    if in_single_quotes; error("unterminated single quote"); end
+    if in_double_quotes; error("unterminated double quote"); end
+
+    update_arg(s[i:])
+    append_arg()
+
+    if !interp
+        return args
+    end
+
+    # construct an expression
+    exprs = {}
+    for arg in args
+        push(exprs, expr(:tuple, arg))
+    end
+    expr(:tuple,exprs)
+end
+_jl_shell_parse(s::String) = _jl_shell_parse(s,true)
+
+function shell_split(s::String)
+    parsed = _jl_shell_parse(s,false)
+    args = String[]
+    for arg in parsed
+       push(args, strcat(arg...))
+    end
+    args
+end
+
+function print_shell_word(word::String)
+    if isempty(word)
+        print("''")
+    end
+    has_single = false
+    has_special = false
+    for c in word
+        if iswspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$'
+            has_special = true
+            if c == '\''
+                has_single = true
+            end
+        end
+    end
+    if !has_special
+        print(word)
+    elseif !has_single
+        print('\'', word, '\'')
+    else
+        print('"')
+        for c in word
+            if c == '"' || c == '$'
+                print('\\')
+            end
+            print(c)
+        end
+        print('"')
+    end
+end
+
+function print_shell_escaped(cmd::String, args::String...)
+    print_shell_word(cmd)
+    for arg in args
+        print(' ')
+        print_shell_word(arg)
+    end
+end
+
+shell_escape(cmd::String, args::String...) =
+    print_to_string(print_shell_escaped, cmd, args...)
+
+## interface to parser ##
+
+function parse(s::String, pos, greedy)
+    # returns (expr, end_pos). expr is () in case of parse error.
+    ex, pos = ccall(:jl_parse_string, Any,
+                    (Ptr{Uint8}, Int32, Int32),
+                    cstring(s), pos-1, greedy ? 1:0)
+    if isa(ex,Expr) && is(ex.head,:error)
+        throw(ParseError(ex.args[1]))
+    end
+    if ex == (); throw(ParseError("end of input")); end
+    ex, pos+1 # C is zero-based, Julia is 1-based
+end
+
+parse(s::String)          = parse(s, 1, true)
+parse(s::String, pos)     = parse(s, pos, true)
+parseatom(s::String)      = parse(s, 1, false)
+parseatom(s::String, pos) = parse(s, pos, false)
+
+## miscellaneous string functions ##
+
+function lpad(s::String, n::Integer, p::String)
+    m = n - strlen(s)
+    if m <= 0; return s; end
+    l = strlen(p)
+    if l==1
+        return p^m * s
+    end
+    q = div(m,l)
+    r = m - q*l
+    cstring(p^q*p[1:chr2ind(p,r)]*s)
+end
+
+function rpad(s::String, n::Integer, p::String)
+    m = n - strlen(s)
+    if m <= 0; return s; end
+    l = strlen(p)
+    if l==1
+        return s * p^m
+    end
+    q = div(m,l)
+    r = m - q*l
+    cstring(s*p^q*p[1:chr2ind(p,r)])
+end
+
+lpad(s, n::Integer, p) = lpad(string(s), n, string(p))
+rpad(s, n::Integer, p) = rpad(string(s), n, string(p))
+
+lpad(s, n::Integer) = lpad(string(s), n, " ")
+rpad(s, n::Integer) = rpad(string(s), n, " ")
+
+function split(s::String, delims, include_empty::Bool)
+    i = 1
+    strs = String[]
+    len = length(s)
+    while true
+        tokstart = tokend = i
+        while !done(s,i)
+            (c,i) = next(s,i)
+            if contains(delims, c)
+                break
+            end
+            tokend = i
+        end
+        tok = s[tokstart:(tokend-1)]
+        if include_empty || !isempty(tok)
+            push(strs, tok)
+        end
+        if !((i <= len) || (i==len+1 && tokend!=i))
+            break
+        end
+    end
+    strs
+end
+
+split(s::String) = split(s, (' ','\t','\n','\v','\f','\r'), false)
+split(s::String, x) = split(s, x, true)
+split(s::String, x::Char, incl::Bool) = split(s, (x,), incl)
+
+function print_joined(strings, delim, last)
+    i = start(strings)
+    if done(strings,i)
+        return
+    end
+    str, i = next(strings,i)
+    print(str)
+    while !done(strings,i)
+        str, i = next(strings,i)
+        print(done(strings,i) ? last : delim)
+        print(str)
+    end
+end
+
+function print_joined(strings, delim)
+    i = start(strings)
+    while !done(strings,i)
+        str, i = next(strings,i)
+        print(str)
+        if !done(strings,i)
+            print(delim)
+        end
+    end
+end
+print_joined(strings) = print_joined(strings, "")
+
+join(args...) = print_to_string(print_joined, args...)
+
+chop(s::String) = s[1:thisind(s,length(s))-1]
+chomp(s::String) = (i=thisind(s,length(s)); s[i]=='\n' ? s[1:i-1] : s)
+chomp(s::ByteString) = s.data[end]==0x0a ? s[1:end-1] : s
+
+function lstrip(s::String)
+    i = start(s)
+    while !done(s,i)
+        c, j = next(s,i)
+        if !iswspace(c)
+            return s[i:end]
+        end
+        i = j
+    end
+    ""
+end
+
+function rstrip(s::String)
+    r = reverse(s)
+    i = start(r)
+    while !done(r,i)
+        c, j = next(r,i)
+        if !iswspace(c)
+            return s[1:end-i+1]
+        end
+        i = j
+    end
+    ""
+end
+
+strip(s::String) = lstrip(rstrip(s))
+
+## string to integer functions ##
+
+function parse_int{T<:Integer}(::Type{T}, s::String, base::Integer)
+    if !(2 <= base <= 36); error("invalid base: ",base); end
+    i = start(s)
+    if done(s,i)
+        error("premature end of integer (in ",show_to_string(s),")")
+    end
+    c,i = next(s,i)
+    sgn = one(T)
+    if T <: Signed && c == '-'
+        sgn = -sgn
+        if done(s,i)
+            error("premature end of integer (in ",show_to_string(s),")")
+        end
+        c,i = next(s,i)
+    end
+    base = convert(T,base)
+    n::T = 0
+    while true
+        d = '0' <= c <= '9' ? c-'0' :
+            'A' <= c <= 'Z' ? c-'A'+10 :
+            'a' <= c <= 'z' ? c-'a'+10 : typemax(Int)
+        if d >= base
+            error(show_to_string(c)," is not a valid digit (in ",show_to_string(s),")")
+        end
+        # TODO: overflow detection?
+        n = n*base + d
+        if done(s,i)
+            break
+        end
+        c,i = next(s,i)
+    end
+    return flipsign(n,sgn)
+end
+
+parse_int(s::String, base::Integer) = parse_int(Int,s,base)
+parse_int(T::Type, s::String)       = parse_int(T,s,10)
+parse_int(s::String)                = parse_int(Int,s,10)
+
+parse_bin(T::Type, s::String) = parse_int(T,s,2)
+parse_oct(T::Type, s::String) = parse_int(T,s,8)
+parse_hex(T::Type, s::String) = parse_int(T,s,16)
+
+parse_bin(s::String) = parse_int(Int,s,2)
+parse_oct(s::String) = parse_int(Int,s,8)
+parse_hex(s::String) = parse_int(Int,s,16)
+
+integer (s::String) = int(s)
+unsigned(s::String) = uint(s)
+int     (s::String) = parse_int(Int,s)
+uint    (s::String) = parse_int(Uint,s)
+int8    (s::String) = parse_int(Int8,s)
+uint8   (s::String) = parse_int(Uint8,s)
+int16   (s::String) = parse_int(Int16,s)
+uint16  (s::String) = parse_int(Uint16,s)
+int32   (s::String) = parse_int(Int32,s)
+uint32  (s::String) = parse_int(Uint32,s)
+int64   (s::String) = parse_int(Int64,s)
+uint64  (s::String) = parse_int(Uint64,s)
+
+## integer to string functions ##
+
+const _jl_dig_syms = "0123456789abcdefghijklmnopqrstuvwxyz".data
+
+function int2str(n::Union(Int64,Uint64), b::Integer, l::Int)
+    if b < 2 || b > 36; error("int2str: invalid base ", b); end
+    neg = n < 0
+    n = unsigned(abs(n))
+    b = convert(typeof(n), b)
+    ndig = ndigits(n, b)
+    sz = max(convert(Int, ndig), l) + neg
+    data = Array(Uint8, sz)
+    i = sz
+    if ispow2(b)
+        digmask = b-1
+        shift = trailing_zeros(b)
+        while i > neg
+            ch = n & digmask
+            data[i] = _jl_dig_syms[int(ch)+1]
+            n >>= shift
+            i -= 1
+        end
+    else
+        while i > neg
+            ch = n % b
+            data[i] = _jl_dig_syms[int(ch)+1]
+            n = div(n,b)
+            i -= 1
+        end
+    end
+    if neg
+        data[1] = '-'
+    end
+    ASCIIString(data)
+end
+int2str(n::Integer, b::Integer)         = int2str(n, b, 0)
+int2str(n::Integer, b::Integer, l::Int) = int2str(int64(n), b, l)
+
+string(x::Signed) = dec(int64(x))
+cstring(x::Signed) = dec(int64(x))
+
+## string to float functions ##
+
+function float64_isvalid(s::String, out::Array{Float64,1})
+    s = cstring(s)
+    return (ccall(:jl_strtod, Int32, (Ptr{Uint8},Ptr{Float64}), s, out)==0)
+end
+
+function float32_isvalid(s::String, out::Array{Float32,1})
+    s = cstring(s)
+    return (ccall(:jl_strtof, Int32, (Ptr{Uint8},Ptr{Float32}), s, out)==0)
+end
+
+begin
+    local tmp::Array{Float64,1} = Array(Float64,1)
+    local tmpf::Array{Float32,1} = Array(Float32,1)
+    global float64, float32
+    function float64(s::String)
+        if !float64_isvalid(s, tmp)
+            throw(ArgumentError("float64(String): invalid number format"))
+        end
+        return tmp[1]
+    end
+
+    function float32(s::String)
+        if !float32_isvalid(s, tmpf)
+            throw(ArgumentError("float32(String): invalid number format"))
+        end
+        return tmpf[1]
+    end
+end
+
+float(x::String) = float64(x)
+parse_float(x::String) = float64(x)
+parse_float(::Type{Float64}, x::String) = float64(x)
+parse_float(::Type{Float32}, x::String) = float32(x)
+
+# copying a byte string (generally not needed due to "immutability")
+
+strcpy{T<:ByteString}(s::T) = T(copy(s.data))
+
+# lexicographically compare byte arrays (used by Latin-1 and UTF-8)
+
+function lexcmp(a::Array{Uint8,1}, b::Array{Uint8,1})
+    c = ccall(:memcmp, Int32, (Ptr{Uint8}, Ptr{Uint8}, Uint),
+              a, b, min(length(a),length(b)))
+    c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b))
+end
+
+# find the index of the first occurrence of a byte value in a byte array
+
+function memchr(a::Array{Uint8,1}, b::Integer)
+    p = pointer(a)
+    q = ccall(:memchr, Ptr{Uint8}, (Ptr{Uint8}, Int32, Uint), p, b, length(a))
+    q == C_NULL ? 0 : q - p + 1
+end
+
+# concatenate byte arrays into a single array
+
+memcat() = Array(Uint8,0)
+memcat(a::Array{Uint8,1}) = copy(a)
+
+function memcat(arrays::Array{Uint8,1}...)
+    n = 0
+    for a in arrays
+        n += length(a)
+    end
+    arr = Array(Uint8, n)
+    ptr = pointer(arr)
+    offset = 0
+    for a in arrays
+        ccall(:memcpy, Ptr{Uint8}, (Ptr{Uint8}, Ptr{Uint8}, Uint),
+              ptr+offset, a, length(a))
+        offset += length(a)
+    end
+    return arr
+end
+
+# concatenate the data fields of byte strings
+
+memcat(s::ByteString) = memcat(s.data)
+memcat(sx::ByteString...) = memcat(map(s->s.data, sx)...)
author	Georg Brandl <georg@python.org>	2012-04-04 08:42:40 +0200
committer	Georg Brandl <georg@python.org>	2012-04-04 08:42:40 +0200
commit	7ebffbb996f28a765fe28637c427f6330e94dc70 (patch)
tree	e7577bedb63196ed68381522bad0c8357f98bb01
parent	e59174b0e587a784db432dfbfe2dade4ae08e54d (diff)
parent	50bac235a507de7c347a065b48a826713883e67d (diff)
download	pygments-7ebffbb996f28a765fe28637c427f6330e94dc70.tar.gz