[svn] added improved pascal lexer

author: blackbird <devnull@localhost> 2006-11-18 21:10:27 +0100
committer: blackbird <devnull@localhost> 2006-11-18 21:10:27 +0100
commit: b9e3b75424f7d5dc2e66d28da6c7f6a176089532 (patch)
tree: cef29d576b707c034479d6e3e878e44997136edb
parent: 7a505b67251a6b46f02f90abc68af64c099eb3e7 (diff)
download: pygments-b9e3b75424f7d5dc2e66d28da6c7f6a176089532.tar.gz
6 files changed, 1228 insertions, 114 deletions
diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py
index 33aada8d..4df6c732 100644
--- a/pygments/formatters/html.py
+++ b/pygments/formatters/html.py
@@ -41,12 +41,13 @@ def get_random_id():
 
 def _get_ttype_class(ttype):
     fname = STANDARD_TYPES.get(ttype)
-    if fname: return fname
+    if fname:
+        return fname
     aname = ''
     while fname is None:
         aname = '-' + ttype[-1] + aname
         ttype = ttype.parent
-        fname = STANDARD_TYPES.get(ttype)
+        fname = STANDARD_TYPES.get(ttype, '')
     return fname + aname
 
 
@@ -131,8 +132,7 @@ class HtmlFormatter(Formatter):
         the classprefix option."""
         if ttype in self._class_cache:
             return self._class_cache[ttype]
-
-        return self.classprefix + STANDARD_TYPES.get(ttype) or _get_ttype_class(ttype)
+        return self.classprefix + _get_ttype_class(ttype)
 
     def _create_stylesheet(self):
         t2c = self.ttype2class = {Token: ''}
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
index b2ccd31e..b8b9d5ab 100644
--- a/pygments/lexers/compiled.py
+++ b/pygments/lexers/compiled.py
@@ -10,10 +10,18 @@
 """
 
 import re
+try:
+    set
+except NameError:
+    from sets import Set as set
 
-from pygments.lexer import RegexLexer, include, bygroups, using, this
+from pygments.scanner import Scanner
+from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
+                           this
+from pygments.util import get_bool_opt, get_list_opt
 from pygments.token import \
-     Text, Comment, Operator, Keyword, Name, String, Number
+     Text, Comment, Operator, Keyword, Name, String, Number, Punctuation, \
+     Error
 
 
 __all__ = ['CLexer', 'CppLexer', 'DelphiLexer', 'JavaLexer']
@@ -184,112 +192,473 @@ class CppLexer(RegexLexer):
     }
 
 
-class DelphiLexer(RegexLexer):
+class DelphiLexer(Lexer):
     name = 'Delphi'
     aliases = ['delphi', 'pas', 'pascal', 'objectpascal']
     filenames = ['*.pas']
     mimetypes = ['text/x-pascal']
 
-    flags = re.IGNORECASE | re.MULTILINE | re.DOTALL
-    tokens = {
-        'root': [
-            (r'\s+', Text),
-            (r'asm\b', Keyword, 'asm'),
-            (r'(uses)(\s+)', bygroups(Keyword, Text), 'uses'),
-            (r'(procedure|function)(\s+)', bygroups(Keyword, Text), 'funcname'),
-            (r'(abstract|and|array|as|assembler|at|begin|case|cdecl|'
-             r'class|const|constructor|contains|destructor|dispinterface|'
-             r'div|do|downto|else|end|except|far|file|finalization|'
-             r'finally|for|goto|if|implementation|in|inherited|out|'
-             r'initialization|inline|interface|is|label|mod|near|nil|not|'
-             r'object|of|on|or|overload|override|package|packed|pascal|'
-             r'private|program|protected|public|'
-             r'published|raise|record|register|repeat|requires|resourcestring|'
-             r'safecall|self|set|shl|shr|stdcall|then|threadvar|to|try|'
-             r'type|unit|until|uses|var|varargs|virtual|while|with|xor|'
-             r'break|assert)\b', Keyword),
-            (r'(AnsiString|Boolean|Byte|ByteBool|Cardinal|Char|Comp|'
-             r'Currency|Double|Extended|Int64|Integer|LongBool|LongInt|Real|'
-             r'Real48|ShortInt|ShortString|Single|SmallInt|String|WideChar|'
-             r'WideString|Word|WordBool|Boolean)\b', Keyword.Type),
-            (r'property\b', Keyword, 'property'),
-            (r'(true|false|inc|dec)\b', Name.Builtin),
-            (r'(result)\b', Keyword.Pseudo),
-            include('comments'),
-            (r"'(''|[^']*)'", String),
-            (r'\$[0-9a-fA-F]+', Number),
-            (r'\#\$?[0-9]{1,3}', Number),
-            (r'[0-9]', Number),
-            (r'[@~!%^&*()+=|\[\]:;,.<>/?-]', Text),
-            (r'^(\s*)([a-zA-Z_][a-zA-Z0-9_]*)(:)',
-             bygroups(Text, Name.Label, Text)),
-            (r'[a-zA-Z_][a-zA-Z0-9_]*', Name),
-        ],
-        'comments': [
-            (r'\{.*?\}', Comment),
-            (r'\(\*.*?\*\)', Comment),
-            (r'//.*?\n', Comment)
-        ],
-        'uses': [
-            (r'(in)(\s+)(\'.*?\')', bygroups(Keyword, Text, String)),
-            (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace),
-            (r'[\s,]', Text),
-            include('comments'),
-            (r';', Text, '#pop')
+    TURBO_PASCAL_KEYWORDS = [
+        'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case',
+        'const', 'constructor', 'continue', 'destructor', 'div', 'do',
+        'downto', 'else', 'end', 'file', 'for', 'function', 'goto',
+        'if', 'implementation', 'in', 'inherited', 'inline', 'interface',
+        'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator',
+        'or', 'packed', 'procedure', 'program', 'record', 'reintroduce',
+        'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to',
+        'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor'
+    ]
+
+    DELPHI_KEYWORDS = [
+        'as', 'class', 'except', 'exports', 'finalization', 'finally',
+        'initialization', 'is', 'library', 'on', 'property', 'raise',
+        'threadvar', 'try'
+    ]
+
+    FREE_PASCAL_KEYWORDS = [
+        'dispose', 'exit', 'false', 'new', 'true'
+    ]
+
+    BLOCK_KEYWORDS = set([
+        'begin', 'class', 'const', 'constructor', 'destructor', 'end',
+        'finalization', 'function', 'implementation', 'initialization',
+        'label', 'library', 'operator', 'procedure', 'program', 'property',
+        'record', 'threadvar', 'type', 'unit', 'uses', 'var'
+    ])
+
+    FUNCTION_MODIFIERS = set([
+        'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe',
+        'pascal', 'register', 'safecall', 'softfloat', 'stdcall',
+        'varargs', 'name', 'dynamic', 'near', 'virtual', 'external',
+        'override', 'assembler'
+    ])
+
+    # XXX: those arn't global. but currently we know no way for defining
+    #      them just for the type context.
+    DIRECTIVES = set([
+        'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far',
+        'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected',
+        'published', 'public'
+    ])
+
+    BUILTIN_TYPES = set([
+        'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool',
+        'cardinal', 'char', 'comp', 'currency', 'double', 'dword',
+        'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint',
+        'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean',
+        'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency',
+        'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle',
+        'pint64', 'pinteger', 'plongint', 'plongword', 'pointer',
+        'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint',
+        'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword',
+        'pwordarray', 'pwordbool', 'real', 'real48', 'shortint',
+        'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate',
+        'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant',
+        'widechar', 'widestring', 'word', 'wordbool'
+    ])
+
+    BUILTIN_UNITS = {
+        'System': [
+            'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8',
+            'append', 'arctan', 'assert', 'assigned', 'assignfile',
+            'beginthread', 'blockread', 'blockwrite', 'break', 'chdir',
+            'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble',
+            'concat', 'continue', 'copy', 'cos', 'dec', 'delete',
+            'dispose', 'doubletocomp', 'endthread', 'enummodules',
+            'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr',
+            'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize',
+            'fillchar', 'finalize', 'findclasshinstance', 'findhinstance',
+            'findresourcehinstance', 'flush', 'frac', 'freemem',
+            'get8087cw', 'getdir', 'getlasterror', 'getmem',
+            'getmemorymanager', 'getmodulefilename', 'getvariantmanager',
+            'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert',
+            'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset',
+            'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd',
+            'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount',
+            'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random',
+            'randomize', 'read', 'readln', 'reallocmem',
+            'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir',
+            'round', 'runerror', 'seek', 'seekeof', 'seekeoln',
+            'set8087cw', 'setlength', 'setlinebreakstyle',
+            'setmemorymanager', 'setstring', 'settextbuf',
+            'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt',
+            'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar',
+            'succ', 'swap', 'trunc', 'truncate', 'typeinfo',
+            'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring',
+            'upcase', 'utf8decode', 'utf8encode', 'utf8toansi',
+            'utf8tounicode', 'val', 'vararrayredim', 'varclear',
+            'widecharlentostring', 'widecharlentostrvar',
+            'widechartostring', 'widechartostrvar',
+            'widestringtoucs4string', 'write', 'writeln'
         ],
-        'property': [
-            (r';', Text, '#pop'),
-            (r'(read|write)\b', Keyword),
-            include('root')
+        'SysUtils': [
+            'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks',
+            'allocmem', 'ansicomparefilename', 'ansicomparestr',
+            'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr',
+            'ansilastchar', 'ansilowercase', 'ansilowercasefilename',
+            'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext',
+            'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp',
+            'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan',
+            'ansistrscan', 'ansistrupper', 'ansiuppercase',
+            'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep',
+            'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype',
+            'callterminateprocs', 'changefileext', 'charlength',
+            'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr',
+            'comparetext', 'createdir', 'createguid', 'currentyear',
+            'currtostr', 'currtostrf', 'date', 'datetimetofiledate',
+            'datetimetostr', 'datetimetostring', 'datetimetosystemtime',
+            'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate',
+            'decodedatefully', 'decodetime', 'deletefile', 'directoryexists',
+            'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime',
+            'exceptionerrormessage', 'excludetrailingbackslash',
+            'excludetrailingpathdelimiter', 'expandfilename',
+            'expandfilenamecase', 'expanduncfilename', 'extractfiledir',
+            'extractfiledrive', 'extractfileext', 'extractfilename',
+            'extractfilepath', 'extractrelativepath', 'extractshortpathname',
+            'fileage', 'fileclose', 'filecreate', 'filedatetodatetime',
+            'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly',
+            'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr',
+            'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage',
+            'findclose', 'findcmdlineswitch', 'findfirst', 'findnext',
+            'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr',
+            'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr',
+            'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr',
+            'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir',
+            'getenvironmentvariable', 'getfileversion', 'getformatsettings',
+            'getlocaleformatsettings', 'getmodulename', 'getpackagedescription',
+            'getpackageinfo', 'gettime', 'guidtostring', 'incamonth',
+            'includetrailingbackslash', 'includetrailingpathdelimiter',
+            'incmonth', 'initializepackage', 'interlockeddecrement',
+            'interlockedexchange', 'interlockedexchangeadd',
+            'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter',
+            'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident',
+            'languages', 'lastdelimiter', 'loadpackage', 'loadstr',
+            'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now',
+            'outofmemoryerror', 'quotedstr', 'raiselastoserror',
+            'raiselastwin32error', 'removedir', 'renamefile', 'replacedate',
+            'replacetime', 'safeloadlibrary', 'samefilename', 'sametext',
+            'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize',
+            'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy',
+            'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp',
+            'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy',
+            'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew',
+            'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos',
+            'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr',
+            'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime',
+            'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint',
+            'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime',
+            'strtotimedef', 'strupper', 'supports', 'syserrormessage',
+            'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime',
+            'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright',
+            'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime',
+            'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime',
+            'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime',
+            'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext',
+            'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase',
+            'widesamestr', 'widesametext', 'wideuppercase', 'win32check',
+            'wraptext'
         ],
-        'funcname': [
-            (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Function, '#pop')
+        'Classes': [
+            'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize',
+            'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect',
+            'extractstrings', 'findclass', 'findglobalcomponent', 'getclass',
+            'groupdescendantswith', 'hextobin', 'identtoint',
+            'initinheritedcomponent', 'inttoident', 'invalidpoint',
+            'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext',
+            'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource',
+            'pointsequal', 'readcomponentres', 'readcomponentresex',
+            'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias',
+            'registerclasses', 'registercomponents', 'registerintegerconsts',
+            'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup',
+            'teststreamformat', 'unregisterclass', 'unregisterclasses',
+            'unregisterintegerconsts', 'unregistermoduleclasses',
+            'writecomponentresfile'
         ],
-        'asm': [
-            (r'end', Keyword, '#pop'),
-            (r'\s+', Text),
-            include('comments'),
-            (r'(AAA|AAD|AAM|AAS|ADC|ADD|AND|ARPL|BOUND|BSF|BSR|BSWAP|BT|'
-             r'BTC|BTR|BTS|CALL|CBW|CDQ|CLC|CLD|CLI|CLTS|CMC|CMP|CMPSB|'
-             r'CMPSD|CMPSW|CMPXCHG|CMPXCHG486|CMPXCHG8B|CPUID|CWD|CWDE|'
-             r'DAA|DAS|DEC|DIV|EMMS|ENTER|HLT|IBTS|ICEBP|IDIV|IMUL|IN|INC|'
-             r'INSB|INSD|INSW|INT|INT01|INT03|INT1|INT3|INTO|INVD|INVLPG|'
-             r'IRET|IRETD|IRETW|JCXZ|JECXZ|JMP|LAHF|LAR|LCALL|LDS|LEA|LEAVE|'
-             r'LES|LFS|LGDT|LGS|LIDT|LJMP|LLDT|LMSW|LOADALL|LOADALL286|LOCK|'
-             r'LODSB|LODSD|LODSW|LOOP|LOOPE|LOOPNE|LOOPNZ|LOOPZ|LSL|LSS|LTR|'
-             r'MOV|MOVD|MOVQ|MOVSB|MOVSD|MOVSW|MOVSX|MOVZX|MUL|NEG|NOP|NOT|'
-             r'OR|OUT|OUTSB|OUTSD|OUTSW|POP|POPA|POPAD|POPAW|POPF|POPFD|'
-             r'POPFW|PUSH|PUSHA|PUSHAD|PUSHAW|PUSHF|PUSHFD|PUSHFW|RCL|RCR|'
-             r'RDMSR|RDPMC|RDSHR|RDTSC|REP|REPE|REPNE|REPNZ|REPZ|RET|RETF|'
-             r'RETN|ROL|ROR|RSDC|RSLDT|RSM|SAHF|SAL|SALC|SAR|SBB|SCASB|SCASD|'
-             r'SCASW|SGDT|SHL|SHLD|SHR|SHRD|SIDT|SLDT|SMI|SMINT|SMINTOLD|'
-             r'SMSW|STC|STD|STI|STOSB|STOSD|STOSW|STR|SUB|SVDC|SVLDT|SVTS|'
-             r'SYSCALL|SYSENTER|SYSEXIT|SYSRET|TEST|UD1|UD2|UMOV|VERR|VERW|'
-             r'WAIT|WBINVD|WRMSR|WRSHR|XADD|XBTS|XCHG|XLAT|XLATB|XOR|cmova|'
-             r'cmovae|cmovb|cmovbe|cmovc|cmovcxz|cmove|cmovg|cmovge|cmovl|'
-             r'cmovle|cmovna|cmovnae|cmovnb|cmovnbe|cmovnc|cmovne|cmovng|'
-             r'cmovnge|cmovnl|cmovnle|cmovno|cmovnp|cmovns|cmovnz|cmovo|'
-             r'cmovp|cmovpe|cmovpo|cmovs|cmovz|ja|jae|jb|jbe|jc|jcxz|je|jg|'
-             r'jge|jl|jle|jna|jnae|jnb|jnbe|jnc|jne|jng|jnge|jnl|jnle|jno|'
-             r'jnp|jns|jnz|jo|jp|jpe|jpo|js|jz|seta|setae|setb|setbe|setc|'
-             r'setcxz|sete|setg|setge|setl|setle|setna|setnae|setnb|setnbe|'
-             r'setnc|setne|setng|setnge|setnl|setnle|setno|setnp|setns|setnz|'
-             r'seto|setp|setpe|setpo|sets|setz)\b', Keyword),
-            (r'(byte|dmtindex|dword|large|offset|ptr|qword|small|tbyte|'
-             r'type|vmtoffset|word)\b', Keyword.Pseudo),
-            (r'(ah|al|ax|bh|bl|bp|bx|ch|cl|cr0|cr1|cr2|cr3|cr4|cs|cx|dh|di|'
-             r'dl|dr0|dr1|dr2|dr3|dr4|dr5|dr6|dr7|ds|dx|eax|ebp|ebx|ecx|edi|'
-             r'edx|es|esi|esp|fs|gs|mm0|mm1|mm2|mm3|mm4|mm5|mm6|mm7|si|sp|'
-             r'ss|st0|st1|st2|st3|st4|st5|st6|st7|xmm0|xmm1|xmm2|xmm3|xmm4|'
-             r'xmm5|xmm6|xmm7)\b', Name.Builtin),
-            ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
-            (r'(@@[a-zA-Z0-9_]+)(:)?', bygroups(Name.Label, Text)),
-            (r'\$[0-9a-zA-Z]+', Number),
-            (r"'(''|[^']+)'", String),
-            (r'[\[\]&()*+,./;-]', Text)
+        'Math': [
+            'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec',
+            'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil',
+            'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc',
+            'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle',
+            'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance',
+            'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask',
+            'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg',
+            'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate',
+            'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero',
+            'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue',
+            'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue',
+            'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods',
+            'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance',
+            'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd',
+            'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant',
+            'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode',
+            'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev',
+            'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation',
+            'tan', 'tanh', 'totalvariance', 'variance'
         ]
     }
 
+    ASM_REGISTERS = set([
+        'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0',
+        'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0',
+        'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx',
+        'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp',
+        'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
+        'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5',
+        'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5',
+        'xmm6', 'xmm7'
+    ])
+
+    ASM_INSTRUCTIONS = set([
+        'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound',
+        'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw',
+        'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae',
+        'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg',
+        'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb',
+        'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl',
+        'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo',
+        'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb',
+        'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid',
+        'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt',
+        'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd',
+        'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd',
+        'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe',
+        'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle',
+        'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge',
+        'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe',
+        'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave',
+        'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw',
+        'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw',
+        'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr',
+        'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx',
+        'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd',
+        'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw',
+        'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw',
+        'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe',
+        'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror',
+        'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb',
+        'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe',
+        'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle',
+        'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng',
+        'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz',
+        'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl',
+        'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold',
+        'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str',
+        'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit',
+        'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait',
+        'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat',
+        'xlatb', 'xor'
+    ])
+
+    def __init__(self, **options):
+        Lexer.__init__(self, **options)
+        self.keywords = set()
+        if get_bool_opt(options, 'turbopascal', True):
+            self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
+        if get_bool_opt(options, 'delphi', True):
+            self.keywords.update(self.DELPHI_KEYWORDS)
+        if get_bool_opt(options, 'freepascal', True):
+            self.keywords.update(self.FREE_PASCAL_KEYWORDS)
+        self.builtins = set()
+        for unit in get_list_opt(options, 'units', self.BUILTIN_UNITS.keys()):
+            self.builtins.update(self.BUILTIN_UNITS[unit])
+    
+    def get_tokens_unprocessed(self, text):
+        scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE)
+        stack = ['initial']
+        in_function_block = False
+        in_property_block = False
+        was_dot = False
+        next_token_is_function = False
+        next_token_is_property = False
+        collect_labels = False
+        block_labels = set()
+        brace_balance = [0, 0]
+
+        while not scanner.eos:
+            token = Error
+
+            if stack[-1] == 'initial':
+                if scanner.scan(r'\s+'):
+                    token = Text
+                elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
+                    if scanner.match.startswith('$'):
+                        token = Comment.Preproc
+                    else:
+                        token = Comment.Multiline
+                elif scanner.scan(r'//.*?$'):
+                    token = Comment.Single
+                elif scanner.scan(r'[-+*\/=<>:;,.@\^]'):
+                    token = Operator
+                    # stop label highlighting on next ";"
+                    if collect_labels and scanner.match == ';':
+                        collect_labels = False
+                elif scanner.scan(r'[\(\)\[\]]+'):
+                    token = Punctuation
+                    # abort function naming ``foo = Function(...)``
+                    next_token_is_function = False
+                    # if we are in a function block we count the open
+                    # braces because ootherwise it's impossible to
+                    # determine the end of the modifier context
+                    if in_function_block or in_property_block:
+                        if scanner.match == '(':
+                            brace_balance[0] += 1
+                        elif scanner.match == ')':
+                            brace_balance[0] -= 1
+                        elif scanner.match == '[':
+                            brace_balance[1] += 1
+                        elif scanner.match == ']':
+                            brace_balance[1] -= 1
+                elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
+                    lowercase_name = scanner.match.lower()
+                    if lowercase_name == 'result':
+                        token = Name.Builtin.Pseudo
+                    elif lowercase_name in self.keywords:
+                        token = Keyword
+                        # if we are in a special block and a
+                        # block ending keyword occours (and the parenthesis
+                        # is balanced) we end the current block context
+                        if (in_function_block or in_property_block) and \
+                           lowercase_name in self.BLOCK_KEYWORDS and \
+                           brace_balance[0] <= 0 and \
+                           brace_balance[1] <= 0:
+                            in_function_block = False
+                            in_property_block = False
+                            brace_balance = [0, 0]
+                            block_labels = set()
+                        if lowercase_name in ('label', 'goto'):
+                            collect_labels = True
+                        elif lowercase_name == 'asm':
+                            stack.append('asm')
+                        elif lowercase_name == 'property':
+                            in_property_block = True
+                            next_token_is_property = True
+                        elif lowercase_name in ('procedure', 'operator',
+                                                'function', 'constructor',
+                                                'destructor'):
+                            in_function_block = True
+                            next_token_is_function = True
+                    # we are in a function block and the current name
+                    # is in the set of registered modifiers. highlight
+                    # it as pseudo keyword
+                    elif in_function_block and \
+                         lowercase_name in self.FUNCTION_MODIFIERS:
+                        token = Keyword.Pseudo
+                        next_token_is_function
+                    # if we are in a property highlight some more
+                    # modifiers
+                    elif in_property_block and \
+                         lowercase_name in ('read, write'):
+                        token = Keyword.Pseudo
+                        next_token_is_function = True
+                    # if the last iteration set next_token_is_function
+                    # to true we now want this name highlighted as
+                    # function. so do that and reset the state
+                    elif next_token_is_function:
+                        # Look if the next token is a dot. If yes it's
+                        # not a function, but a class name and the
+                        # part after the dot a function name
+                        if scanner.test(r'\s*\.\s*'):
+                            token = Name.Class
+                        # it's not a dot, our job is done
+                        else:
+                            token = Name.Function
+                            next_token_is_function = False
+                    # same for properties
+                    elif next_token_is_property:
+                        token = Name.Property
+                        next_token_is_property = False
+                    # Highlight this token as label and add it
+                    # to the list of known labels
+                    elif collect_labels:
+                        token = Name.Label
+                        block_labels.add(scanner.match.lower())
+                    # name is in list of known labels
+                    elif lowercase_name in block_labels:
+                        token = Name.Label
+                    elif lowercase_name in self.BUILTIN_TYPES:
+                        token = Keyword.Type
+                    elif lowercase_name in self.DIRECTIVES:
+                        token = Keyword.Pseudo
+                    # builtins are just builtins if the token
+                    # before isn't a dot
+                    elif not was_dot and lowercase_name in self.builtins:
+                        token = Name.Builtin
+                    else:
+                        token = Name
+                elif scanner.scan(r"'"):
+                    token = String
+                    stack.append('string')
+                elif scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
+                    token = String.Char
+                elif scanner.scan(r'\$[0-9A-Fa-f]+'):
+                    token = Number.Hex
+                elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
+                    token = Number.Integer
+                elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
+                    token = Number.Float
+                else:
+                    # if the stack depth is deeper than once, pop
+                    if len(stack) > 1:
+                        stack.pop()
+                    scanner.get_char()
+
+            elif stack[-1] == 'string':
+                if scanner.scan(r"''"):
+                    token = String.Escape
+                elif scanner.scan(r"'"):
+                    token = String
+                    stack.pop()
+                elif scanner.scan(r"[^']*"):
+                    token = String
+                else:
+                    scanner.get_char()
+                    stack.pop()
+
+            elif stack[-1] == 'asm':
+                if scanner.scan(r'\s+'):
+                    token = Text
+                elif scanner.scan(r'end'):
+                    token = Keyword
+                    stack.pop()
+                elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
+                    if scanner.match.startswith('$'):
+                        token = Comment.Preproc
+                    else:
+                        token = Comment.Multiline
+                elif scanner.scan(r'//.*?$'):
+                    token = Comment.Single
+                elif scanner.scan(r"'"):
+                    token = String
+                    stack.append('string')
+                elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'):
+                    token = Name.Label
+                elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
+                    lowercase_name = scanner.match.lower()
+                    if lowercase_name in self.ASM_INSTRUCTIONS:
+                        token = Keyword
+                    elif lowercase_name in self.ASM_REGISTERS:
+                        token = Name.Builtin
+                    else:
+                        token = Name
+                elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'):
+                    token = Operator
+                elif scanner.scan(r'[\(\)\[\]]+'):
+                    token = Punctuation
+                elif scanner.scan(r'\$[0-9A-Fa-f]+'):
+                    token = Number.Hex
+                elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
+                    token = Number.Integer
+                elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
+                    token = Number.Float
+                else:
+                    scanner.get_char()
+                    stack.pop()
+
+            # save the dot!!!11
+            if scanner.match.strip():
+                was_dot = scanner.match == '.'
+            yield scanner.start_pos, token, scanner.match or ''
+
 
 class JavaLexer(RegexLexer):
     name = 'Java'
diff --git a/pygments/scanner.py b/pygments/scanner.py
new file mode 100644
index 00000000..ba7d8a2e
--- /dev/null
+++ b/pygments/scanner.py
@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+"""
+    pygments.scanner
+    ~~~~~~~~~~~~~~~~
+
+    This library implements a regex based scanner. Some languages
+    like Pascal are easy to parse but have some keywords that
+    depend on the context. Because of this it's impossible to lex
+    that just by using a regular expression lexer like the
+    `RegexLexer`.
+
+    Have a look at the `DelphiLexer` to get an idea of how to use
+    this scanner.
+
+    :copyright: 2006 by Armin Ronacher.
+    :license: GNU LGPL, see LICENSE for more details.
+"""
+import re
+
+
+class EndOfText(RuntimeError):
+    """
+    Raise if end of text is reached and the user
+    tried to call a match function.
+    """
+
+
+class Scanner(object):
+    """
+    Simple scanner
+
+    All method patterns are regular expression strings (not
+    compiled expressions!)
+    """
+
+    def __init__(self, text, flags=0):
+        """
+        :param text:    The text which should be scanned
+        :param flags:   default regular expression flags
+        """
+        self.data = text
+        self.data_length = len(text)
+        self.start_pos = 0
+        self.pos = 0
+        self.flags = flags
+        self.last = None
+        self.match = None
+        self._re_cache = {}
+
+    def eos(self):
+        """`True` if the scanner reached the end of text."""
+        return self.pos >= self.data_length
+    eos = property(eos, eos.__doc__)
+
+    def check(self, pattern):
+        """
+        Apply `pattern` on the current position and return
+        the match object. (Doesn't touch pos). Use this for
+        lookahead.
+        """
+        if self.eos:
+            raise EndOfText()
+        if pattern not in self._re_cache:
+            self._re_cache[pattern] = re.compile(pattern, self.flags)
+        return self._re_cache[pattern].match(self.data, self.pos)
+
+    def test(self, pattern):
+        """Apply a pattern on the current position and check
+        if it patches. Doesn't touch pos."""
+        return self.check(pattern) is not None
+
+    def scan(self, pattern):
+        """
+        Scan the text for the given pattern and update pos/match
+        and related fields. The return value is a boolen that
+        indicates if the pattern matched. The matched value is
+        stored on the instance as ``match``, the last value is
+        stored as ``last``. ``start_pos`` is the position of the
+        pointer before the pattern was matched, ``pos`` is the
+        end position.
+        """
+        if self.eos:
+            raise EndOfText()
+        if pattern not in self._re_cache:
+            self._re_cache[pattern] = re.compile(pattern, self.flags)
+        self.last = self.match
+        m = self._re_cache[pattern].match(self.data, self.pos)
+        if m is None:
+            return False
+        self.start_pos = m.start()
+        self.pos = m.end()
+        self.match = m.group()
+        return True
+
+    def get_char(self):
+        """Scan exactly one char"""
+        self.scan('.')
+
+    def __repr__(self):
+        return '<%s %d/%d>' % (
+            self.__class__.__name__,
+            self.pos,
+            self.data_length
+        )
diff --git a/pygments/styles/pastie.py b/pygments/styles/pastie.py
index f7f46934..8763a2c1 100644
--- a/pygments/styles/pastie.py
+++ b/pygments/styles/pastie.py
@@ -40,6 +40,7 @@ class PastieStyle(Style):
         Name.Class:             'bold #bb0066',
         Name.Exception:         'bold #bb0066',
         Name.Function:          'bold #0066bb',
+        Name.Property:          'bold #336699',
         Name.Module:            'bold #bb0066',
         Name.Builtin:           '#003388',
         Name.Variable:          '#336699',
diff --git a/pygments/token.py b/pygments/token.py
index 6c30442c..b49fd61a 100644
--- a/pygments/token.py
+++ b/pygments/token.py
@@ -36,25 +36,26 @@ class _TokenType(tuple):
         return 'Token' + (self and '.' or '') + '.'.join(self)
 
 
-Token     = _TokenType()
+Token       = _TokenType()
 
 # Special token types
-Text      = Token.Text
-Error     = Token.Error
+Text        = Token.Text
+Error       = Token.Error
 # Text that doesn't belong to this lexer (e.g. HTML in PHP)
-Other     = Token.Other
+Other       = Token.Other
 
 # Common token types for source code
-Keyword   = Token.Keyword
-Name      = Token.Name
-Literal   = Token.Literal
-String    = Literal.String
-Number    = Literal.Number
-Operator  = Token.Operator
-Comment   = Token.Comment
+Keyword     = Token.Keyword
+Name        = Token.Name
+Literal     = Token.Literal
+String      = Literal.String
+Number      = Literal.Number
+Punctuation = Literal.Punctuation
+Operator    = Token.Operator
+Comment     = Token.Comment
 
 # Generic types for non-source code
-Generic   = Token.Generic
+Generic     = Token.Generic
 
 
 def is_token_subtype(ttype, other):
@@ -93,6 +94,7 @@ STANDARD_TYPES = {
     Name.Entity:                   'ni',
     Name.Exception:                'ne',
     Name.Function:                 'nf',
+    Name.Property:                 'py',
     Name.Label:                    'nl',
     Name.Namespace:                'nn',
     Name.Other:                    'nx',
@@ -128,6 +130,8 @@ STANDARD_TYPES = {
     Operator:                      'o',
     Operator.Word:                 'ow',
 
+    Punctuation:                   'p',
+
     Comment:                       'c',
     Comment.Multiline:             'cm',
     Comment.Preproc:               'cp',
diff --git a/tests/examplefiles/test.pas b/tests/examplefiles/test.pas
index 7b9c2d04..2724bbfd 100644
--- a/tests/examplefiles/test.pas
+++ b/tests/examplefiles/test.pas
@@ -17,10 +17,14 @@ var
 //  FindAllFilesInit
 //
 //
-procedure FindAllFilesInit;
+procedure FindAllFilesInit; external;
+label foo;
 begin
   CntFolders := 0;
   NumFolder := 0;
+foo:
+  Blub;
+  goto foo;
 end;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -105,3 +109,635 @@ begin
     Windows.FindClose(hFindFile);
   end;
 end;
+
+
+property test: boolean read ftest write ftest;
+procedure test: boolean read ftest write ftest;
+
+//
+// This sourcecode is part of omorphia
+//
+
+Function IsValidHandle(Const Handle: THandle): Boolean; {$IFDEF OMORPHIA_FEATURES_USEASM} Assembler;
+Asm
+    TEST    EAX, EAX
+    JZ      @@Finish
+    NOT     EAX
+    TEST    EAX, EAX
+    SETNZ   AL
+
+    {$IFDEF WINDOWS}
+    JZ      @@Finish
+
+    //Save the handle against modifications or loss
+    PUSH    EAX
+
+    //reserve some space for a later duplicate
+    PUSH    EAX
+
+    //Check if we are working on NT-Platform
+    CALL    IsWindowsNTSystem
+    TEST    EAX, EAX
+    JZ      @@NoNTSystem
+
+    PUSH    DWORD PTR [ESP]
+    LEA     EAX, DWORD PTR [ESP+$04]
+    PUSH    EAX
+    CALL    GetHandleInformation
+    TEST    EAX, EAX
+    JNZ     @@Finish2
+
+@@NoNTSystem:
+    //Result := DuplicateHandle(GetCurrentProcess, Handle, GetCurrentProcess,
+    //  @Duplicate, 0, False, DUPLICATE_SAME_ACCESS);
+    PUSH    DUPLICATE_SAME_ACCESS
+    PUSH    $00000000
+    PUSH    $00000000
+    LEA     EAX, DWORD PTR [ESP+$0C]
+    PUSH    EAX
+    CALL    GetCurrentProcess
+    PUSH    EAX
+    PUSH    DWORD PTR [ESP+$18]
+    PUSH    EAX
+    CALL    DuplicateHandle
+
+    TEST    EAX, EAX
+    JZ      @@Finish2
+
+    //  Result := CloseHandle(Duplicate);
+    PUSH    DWORD PTR [ESP]
+    CALL    CloseHandle
+
+@@Finish2:
+    POP     EDX
+    POP     EDX
+
+    PUSH    EAX
+    PUSH    $00000000
+    CALL    SetLastError
+    POP     EAX
+    {$ENDIF}
+
+@@Finish:
+End;
+{$ELSE}
+Var
+    Duplicate: THandle;
+    Flags: DWORD;
+Begin
+    If IsWinNT Then
+        Result := GetHandleInformation(Handle, Flags)
+    Else
+        Result := False;
+    If Not Result Then
+    Begin
+        // DuplicateHandle is used as an additional check for those object types not
+        // supported by GetHandleInformation (e.g. according to the documentation,
+        // GetHandleInformation doesn't support window stations and desktop although
+        // tests show that it does). GetHandleInformation is tried first because its
+        // much faster. Additionally GetHandleInformation is only supported on NT...
+        Result := DuplicateHandle(GetCurrentProcess, Handle, GetCurrentProcess,
+            @Duplicate, 0, False, DUPLICATE_SAME_ACCESS);
+        If Result Then
+            Result := CloseHandle(Duplicate);
+    End;
+End;
+{$ENDIF}
+
+
+    	
+
+{*******************************************************}
+{                                                       }
+{       Delphi Supplemental Components                  }
+{       ZLIB Data Compression Interface Unit            }
+{                                                       }
+{       Copyright (c) 1997 Borland International        }
+{                                                       }
+{*******************************************************}
+
+{ Modified for zlib 1.1.3 by Davide Moretti <dave@rimini.com }
+
+unit zlib;
+
+interface
+
+uses Sysutils, Classes;
+
+type
+  TAlloc = function (AppData: Pointer; Items, Size: Integer): Pointer;
+  TFree = procedure (AppData, Block: Pointer);
+
+  // Internal structure.  Ignore.
+  TZStreamRec = packed record
+    next_in: PChar;       // next input byte
+    avail_in: Integer;    // number of bytes available at next_in
+    total_in: Integer;    // total nb of input bytes read so far
+
+    next_out: PChar;      // next output byte should be put here
+    avail_out: Integer;   // remaining free space at next_out
+    total_out: Integer;   // total nb of bytes output so far
+
+    msg: PChar;           // last error message, NULL if no error
+    internal: Pointer;    // not visible by applications
+
+    zalloc: TAlloc;       // used to allocate the internal state
+    zfree: TFree;         // used to free the internal state
+    AppData: Pointer;     // private data object passed to zalloc and zfree
+
+    data_type: Integer;   //  best guess about the data type: ascii or binary
+    adler: Integer;       // adler32 value of the uncompressed data
+    reserved: Integer;    // reserved for future use
+  end;
+
+  // Abstract ancestor class
+  TCustomZlibStream = class(TStream)
+  private
+    FStrm: TStream;
+    FStrmPos: Integer;
+    FOnProgress: TNotifyEvent;
+    FZRec: TZStreamRec;
+    FBuffer: array [Word] of Char;
+  protected
+    procedure Progress(Sender: TObject); dynamic;
+    property OnProgress: TNotifyEvent read FOnProgress write FOnProgress;
+    constructor Create(Strm: TStream);
+  end;
+
+{ TCompressionStream compresses data on the fly as data is written to it, and
+  stores the compressed data to another stream.
+
+  TCompressionStream is write-only and strictly sequential. Reading from the
+  stream will raise an exception. Using Seek to move the stream pointer
+  will raise an exception.
+
+  Output data is cached internally, written to the output stream only when
+  the internal output buffer is full.  All pending output data is flushed
+  when the stream is destroyed.
+
+  The Position property returns the number of uncompressed bytes of
+  data that have been written to the stream so far.
+
+  CompressionRate returns the on-the-fly percentage by which the original
+  data has been compressed:  (1 - (CompressedBytes / UncompressedBytes)) * 100
+  If raw data size = 100 and compressed data size = 25, the CompressionRate
+  is 75%
+
+  The OnProgress event is called each time the output buffer is filled and
+  written to the output stream.  This is useful for updating a progress
+  indicator when you are writing a large chunk of data to the compression
+  stream in a single call.}
+
+
+  TCompressionLevel = (clNone, clFastest, clDefault, clMax);
+
+  TCompressionStream = class(TCustomZlibStream)
+  private
+    function GetCompressionRate: Single;
+  public
+    constructor Create(CompressionLevel: TCompressionLevel; Dest: TStream);
+    destructor Destroy; override;
+    function Read(var Buffer; Count: Longint): Longint; override;
+    function Write(const Buffer; Count: Longint): Longint; override;
+    function Seek(Offset: Longint; Origin: Word): Longint; override;
+    property CompressionRate: Single read GetCompressionRate;
+    property OnProgress;
+  end;
+
+{ TDecompressionStream decompresses data on the fly as data is read from it.
+
+  Compressed data comes from a separate source stream.  TDecompressionStream
+  is read-only and unidirectional; you can seek forward in the stream, but not
+  backwards.  The special case of setting the stream position to zero is
+  allowed.  Seeking forward decompresses data until the requested position in
+  the uncompressed data has been reached.  Seeking backwards, seeking relative
+  to the end of the stream, requesting the size of the stream, and writing to
+  the stream will raise an exception.
+
+  The Position property returns the number of bytes of uncompressed data that
+  have been read from the stream so far.
+
+  The OnProgress event is called each time the internal input buffer of
+  compressed data is exhausted and the next block is read from the input stream.
+  This is useful for updating a progress indicator when you are reading a
+  large chunk of data from the decompression stream in a single call.}
+
+  TDecompressionStream = class(TCustomZlibStream)
+  public
+    constructor Create(Source: TStream);
+    destructor Destroy; override;
+    function Read(var Buffer; Count: Longint): Longint; override;
+    function Write(const Buffer; Count: Longint): Longint; override;
+    function Seek(Offset: Longint; Origin: Word): Longint; override;
+    property OnProgress;
+  end;
+
+
+
+{ CompressBuf compresses data, buffer to buffer, in one call.
+   In: InBuf = ptr to compressed data
+       InBytes = number of bytes in InBuf
+  Out: OutBuf = ptr to newly allocated buffer containing decompressed data
+       OutBytes = number of bytes in OutBuf   }
+procedure CompressBuf(const InBuf: Pointer; InBytes: Integer;
+                      out OutBuf: Pointer; out OutBytes: Integer);
+
+
+{ DecompressBuf decompresses data, buffer to buffer, in one call.
+   In: InBuf = ptr to compressed data
+       InBytes = number of bytes in InBuf
+       OutEstimate = zero, or est. size of the decompressed data
+  Out: OutBuf = ptr to newly allocated buffer containing decompressed data
+       OutBytes = number of bytes in OutBuf   }
+procedure DecompressBuf(const InBuf: Pointer; InBytes: Integer;
+ OutEstimate: Integer; out OutBuf: Pointer; out OutBytes: Integer);
+
+const
+  zlib_version = '1.1.3';
+
+type
+  EZlibError = class(Exception);
+  ECompressionError = class(EZlibError);
+  EDecompressionError = class(EZlibError);
+
+function adler32(adler: Integer; buf: PChar; len: Integer): Integer;
+
+implementation
+
+const
+  Z_NO_FLUSH      = 0;
+  Z_PARTIAL_FLUSH = 1;
+  Z_SYNC_FLUSH    = 2;
+  Z_FULL_FLUSH    = 3;
+  Z_FINISH        = 4;
+
+  Z_OK            = 0;
+  Z_STREAM_END    = 1;
+  Z_NEED_DICT     = 2;
+  Z_ERRNO         = (-1);
+  Z_STREAM_ERROR  = (-2);
+  Z_DATA_ERROR    = (-3);
+  Z_MEM_ERROR     = (-4);
+  Z_BUF_ERROR     = (-5);
+  Z_VERSION_ERROR = (-6);
+
+  Z_NO_COMPRESSION       =   0;
+  Z_BEST_SPEED           =   1;
+  Z_BEST_COMPRESSION     =   9;
+  Z_DEFAULT_COMPRESSION  = (-1);
+
+  Z_FILTERED            = 1;
+  Z_HUFFMAN_ONLY        = 2;
+  Z_DEFAULT_STRATEGY    = 0;
+
+  Z_BINARY   = 0;
+  Z_ASCII    = 1;
+  Z_UNKNOWN  = 2;
+
+  Z_DEFLATED = 8;
+
+  _z_errmsg: array[0..9] of PChar = (
+    'need dictionary',      // Z_NEED_DICT      (2)
+    'stream end',           // Z_STREAM_END     (1)
+    '',                     // Z_OK             (0)
+    'file error',           // Z_ERRNO          (-1)
+    'stream error',         // Z_STREAM_ERROR   (-2)
+    'data error',           // Z_DATA_ERROR     (-3)
+    'insufficient memory',  // Z_MEM_ERROR      (-4)
+    'buffer error',         // Z_BUF_ERROR      (-5)
+    'incompatible version', // Z_VERSION_ERROR  (-6)
+    ''
+  );
+
+{$L deflate.obj}
+{$L inflate.obj}
+{$L inftrees.obj}
+{$L trees.obj}
+{$L adler32.obj}
+{$L infblock.obj}
+{$L infcodes.obj}
+{$L infutil.obj}
+{$L inffast.obj}
+
+procedure _tr_init; external;
+procedure _tr_tally; external;
+procedure _tr_flush_block; external;
+procedure _tr_align; external;
+procedure _tr_stored_block; external;
+function adler32; external;
+procedure inflate_blocks_new; external;
+procedure inflate_blocks; external;
+procedure inflate_blocks_reset; external;
+procedure inflate_blocks_free; external;
+procedure inflate_set_dictionary; external;
+procedure inflate_trees_bits; external;
+procedure inflate_trees_dynamic; external;
+procedure inflate_trees_fixed; external;
+procedure inflate_codes_new; external;
+procedure inflate_codes; external;
+procedure inflate_codes_free; external;
+procedure _inflate_mask; external;
+procedure inflate_flush; external;
+procedure inflate_fast; external;
+
+procedure _memset(P: Pointer; B: Byte; count: Integer);cdecl;
+begin
+  FillChar(P^, count, B);
+end;
+
+procedure _memcpy(dest, source: Pointer; count: Integer);cdecl;
+begin
+  Move(source^, dest^, count);
+end;
+
+
+
+// deflate compresses data
+function deflateInit_(var strm: TZStreamRec; level: Integer; version: PChar;
+  recsize: Integer): Integer; external;
+function deflate(var strm: TZStreamRec; flush: Integer): Integer; external;
+function deflateEnd(var strm: TZStreamRec): Integer; external;
+
+// inflate decompresses data
+function inflateInit_(var strm: TZStreamRec; version: PChar;
+  recsize: Integer): Integer; external;
+function inflate(var strm: TZStreamRec; flush: Integer): Integer; external;
+function inflateEnd(var strm: TZStreamRec): Integer; external;
+function inflateReset(var strm: TZStreamRec): Integer; external;
+
+
+function zcalloc(AppData: Pointer; Items, Size: Integer): Pointer;
+begin
+  GetMem(Result, Items*Size);
+end;
+
+procedure zcfree(AppData, Block: Pointer);
+begin
+  FreeMem(Block);
+end;
+
+function zlibCheck(code: Integer): Integer;
+begin
+  Result := code;
+  if code < 0 then
+    raise EZlibError.Create('error');    //!!
+end;
+
+function CCheck(code: Integer): Integer;
+begin
+  Result := code;
+  if code < 0 then
+    raise ECompressionError.Create('error'); //!!
+end;
+
+function DCheck(code: Integer): Integer;
+begin
+  Result := code;
+  if code < 0 then
+    raise EDecompressionError.Create('error');  //!!
+end;
+
+procedure CompressBuf(const InBuf: Pointer; InBytes: Integer;
+                      out OutBuf: Pointer; out OutBytes: Integer);
+var
+  strm: TZStreamRec;
+  P: Pointer;
+begin
+  FillChar(strm, sizeof(strm), 0);
+  OutBytes := ((InBytes + (InBytes div 10) + 12) + 255) and not 255;
+  GetMem(OutBuf, OutBytes);
+  try
+    strm.next_in := InBuf;
+    strm.avail_in := InBytes;
+    strm.next_out := OutBuf;
+    strm.avail_out := OutBytes;
+    CCheck(deflateInit_(strm, Z_BEST_COMPRESSION, zlib_version, sizeof(strm)));
+    try
+      while CCheck(deflate(strm, Z_FINISH)) <> Z_STREAM_END do
+      begin
+        P := OutBuf;
+        Inc(OutBytes, 256);
+        ReallocMem(OutBuf, OutBytes);
+        strm.next_out := PChar(Integer(OutBuf) + (Integer(strm.next_out) - Integer(P)));
+        strm.avail_out := 256;
+      end;
+    finally
+      CCheck(deflateEnd(strm));
+    end;
+    ReallocMem(OutBuf, strm.total_out);
+    OutBytes := strm.total_out;
+  except
+    FreeMem(OutBuf);
+    raise
+  end;
+end;
+
+
+procedure DecompressBuf(const InBuf: Pointer; InBytes: Integer;
+  OutEstimate: Integer; out OutBuf: Pointer; out OutBytes: Integer);
+var
+  strm: TZStreamRec;
+  P: Pointer;
+  BufInc: Integer;
+begin
+  FillChar(strm, sizeof(strm), 0);
+  BufInc := (InBytes + 255) and not 255;
+  if OutEstimate = 0 then
+    OutBytes := BufInc
+  else
+    OutBytes := OutEstimate;
+  GetMem(OutBuf, OutBytes);
+  try
+    strm.next_in := InBuf;
+    strm.avail_in := InBytes;
+    strm.next_out := OutBuf;
+    strm.avail_out := OutBytes;
+    DCheck(inflateInit_(strm, zlib_version, sizeof(strm)));
+    try
+      while DCheck(inflate(strm, Z_FINISH)) <> Z_STREAM_END do
+      begin
+        P := OutBuf;
+        Inc(OutBytes, BufInc);
+        ReallocMem(OutBuf, OutBytes);
+        strm.next_out := PChar(Integer(OutBuf) + (Integer(strm.next_out) - Integer(P)));
+        strm.avail_out := BufInc;
+      end;
+    finally
+      DCheck(inflateEnd(strm));
+    end;
+    ReallocMem(OutBuf, strm.total_out);
+    OutBytes := strm.total_out;
+  except
+    FreeMem(OutBuf);
+    raise
+  end;
+end;
+
+
+// TCustomZlibStream
+
+constructor TCustomZLibStream.Create(Strm: TStream);
+begin
+  inherited Create;
+  FStrm := Strm;
+  FStrmPos := Strm.Position;
+end;
+
+procedure TCustomZLibStream.Progress(Sender: TObject);
+begin
+  if Assigned(FOnProgress) then FOnProgress(Sender);
+end;
+
+
+// TCompressionStream
+
+constructor TCompressionStream.Create(CompressionLevel: TCompressionLevel;
+  Dest: TStream);
+const
+  Levels: array [TCompressionLevel] of ShortInt =
+    (Z_NO_COMPRESSION, Z_BEST_SPEED, Z_DEFAULT_COMPRESSION, Z_BEST_COMPRESSION);
+begin
+  inherited Create(Dest);
+  FZRec.next_out := FBuffer;
+  FZRec.avail_out := sizeof(FBuffer);
+  CCheck(deflateInit_(FZRec, Levels[CompressionLevel], zlib_version, sizeof(FZRec)));
+end;
+
+destructor TCompressionStream.Destroy;
+begin
+  FZRec.next_in := nil;
+  FZRec.avail_in := 0;
+  try
+    if FStrm.Position <> FStrmPos then FStrm.Position := FStrmPos;
+    while (CCheck(deflate(FZRec, Z_FINISH)) <> Z_STREAM_END)
+      and (FZRec.avail_out = 0) do
+    begin
+      FStrm.WriteBuffer(FBuffer, sizeof(FBuffer));
+      FZRec.next_out := FBuffer;
+      FZRec.avail_out := sizeof(FBuffer);
+    end;
+    if FZRec.avail_out < sizeof(FBuffer) then
+      FStrm.WriteBuffer(FBuffer, sizeof(FBuffer) - FZRec.avail_out);
+  finally
+    deflateEnd(FZRec);
+  end;
+  inherited Destroy;
+end;
+
+function TCompressionStream.Read(var Buffer; Count: Longint): Longint;
+begin
+  raise ECompressionError.Create('Invalid stream operation');
+end;
+
+function TCompressionStream.Write(const Buffer; Count: Longint): Longint;
+begin
+  FZRec.next_in := @Buffer;
+  FZRec.avail_in := Count;
+  if FStrm.Position <> FStrmPos then FStrm.Position := FStrmPos;
+  while (FZRec.avail_in > 0) do
+  begin
+    CCheck(deflate(FZRec, 0));
+    if FZRec.avail_out = 0 then
+    begin
+      FStrm.WriteBuffer(FBuffer, sizeof(FBuffer));
+      FZRec.next_out := FBuffer;
+      FZRec.avail_out := sizeof(FBuffer);
+      FStrmPos := FStrm.Position;
+      Progress(Self);
+    end;
+  end;
+  Result := Count;
+end;
+
+function TCompressionStream.Seek(Offset: Longint; Origin: Word): Longint;
+begin
+  if (Offset = 0) and (Origin = soFromCurrent) then
+    Result := FZRec.total_in
+  else
+    raise ECompressionError.Create('Invalid stream operation');
+end;
+
+function TCompressionStream.GetCompressionRate: Single;
+begin
+  if FZRec.total_in = 0 then
+    Result := 0
+  else
+    Result := (1.0 - (FZRec.total_out / FZRec.total_in)) * 100.0;
+end;
+
+
+// TDecompressionStream
+
+constructor TDecompressionStream.Create(Source: TStream);
+begin
+  inherited Create(Source);
+  FZRec.next_in := FBuffer;
+  FZRec.avail_in := 0;
+  DCheck(inflateInit_(FZRec, zlib_version, sizeof(FZRec)));
+end;
+
+destructor TDecompressionStream.Destroy;
+begin
+  inflateEnd(FZRec);
+  inherited Destroy;
+end;
+
+function TDecompressionStream.Read(var Buffer; Count: Longint): Longint;
+begin
+  FZRec.next_out := @Buffer;
+  FZRec.avail_out := Count;
+  if FStrm.Position <> FStrmPos then FStrm.Position := FStrmPos;
+  while (FZRec.avail_out > 0) do
+  begin
+    if FZRec.avail_in = 0 then
+    begin
+      FZRec.avail_in := FStrm.Read(FBuffer, sizeof(FBuffer));
+      if FZRec.avail_in = 0 then
+        begin
+          Result := Count - FZRec.avail_out;
+          Exit;
+        end;
+      FZRec.next_in := FBuffer;
+      FStrmPos := FStrm.Position;
+      Progress(Self);
+    end;
+    DCheck(inflate(FZRec, 0));
+  end;
+  Result := Count;
+end;
+
+function TDecompressionStream.Write(const Buffer; Count: Longint): Longint;
+begin
+  raise EDecompressionError.Create('Invalid stream operation');
+end;
+
+function TDecompressionStream.Seek(Offset: Longint; Origin: Word): Longint;
+var
+  I: Integer;
+  Buf: array [0..4095] of Char;
+begin
+  if (Offset = 0) and (Origin = soFromBeginning) then
+  begin
+    DCheck(inflateReset(FZRec));
+    FZRec.next_in := FBuffer;
+    FZRec.avail_in := 0;
+    FStrm.Position := 0;
+    FStrmPos := 0;
+  end
+  else if ( (Offset >= 0) and (Origin = soFromCurrent)) or
+          ( ((Offset - FZRec.total_out) > 0) and (Origin = soFromBeginning)) then
+  begin
+    if Origin = soFromBeginning then Dec(Offset, FZRec.total_out);
+    if Offset > 0 then
+    begin
+      for I := 1 to Offset div sizeof(Buf) do
+        ReadBuffer(Buf, sizeof(Buf));
+      ReadBuffer(Buf, Offset mod sizeof(Buf));
+    end;
+  end
+  else
+    raise EDecompressionError.Create('Invalid stream operation');
+  Result := FZRec.total_out;
+end;
+
+end.
author	blackbird <devnull@localhost>	2006-11-18 21:10:27 +0100
committer	blackbird <devnull@localhost>	2006-11-18 21:10:27 +0100
commit	b9e3b75424f7d5dc2e66d28da6c7f6a176089532 (patch)
tree	cef29d576b707c034479d6e3e878e44997136edb
parent	7a505b67251a6b46f02f90abc68af64c099eb3e7 (diff)
download	pygments-b9e3b75424f7d5dc2e66d28da6c7f6a176089532.tar.gz