Issue 73: initial pragma implementation

author: Eli Bendersky <eliben@gmail.com> 2012-08-10 07:41:42 +0300
committer: Eli Bendersky <eliben@gmail.com> 2012-08-10 07:41:42 +0300
commit: 09fc2006e19eb8d74a87c60d82234e6a0eca6eec (patch)
tree: 2e30e71a8429ba64589f3dbc4179f2e06b546933
parent: 949ad82b71ecba36cb3f0ea65e4627fa87932dc5 (diff)
download: pycparser-09fc2006e19eb8d74a87c60d82234e6a0eca6eec.tar.gz
3 files changed, 61 insertions, 11 deletions
diff --git a/CHANGES b/CHANGES
index 8efc772..fb8b632 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,7 @@
 + Version 2.08 (??)
 
+  - Issue 73: initial support for #pragma directives. Consume them without
+    errors and ignore (no tokens are returned). Line numbers are preserved.
   - Issue 68: more correct handling of source files without any actual content.
   - Issue 69: running all tests will now set appropriate return code.
   - Better error reporting in case where multiple type specifiers are provided.
diff --git a/pycparser/c_lexer.py b/pycparser/c_lexer.py
index 1054deb..45509aa 100644
--- a/pycparser/c_lexer.py
+++ b/pycparser/c_lexer.py
@@ -44,6 +44,8 @@ class CLexer(object):
         #
         self.line_pattern = re.compile('([ \t]*line\W)|([ \t]*\d+)')
 
+        self.pragma_pattern = re.compile('[ \t]*pragma\W')
+
     def build(self, **kwargs):
         """ Builds the lexer from the specification. Must be
             called after the lexer object is created. 
@@ -213,23 +215,25 @@ class CLexer(object):
     hex_floating_constant = '('+hex_prefix+'('+hex_digits+'|'+hex_fractional_constant+')'+binary_exponent_part+'[FfLl]?)'
 
     ##
-    ## Lexer states
+    ## Lexer states: used for preprocessor \n-terminated directives
     ##
     states = (
         # ppline: preprocessor line directives
         # 
         ('ppline', 'exclusive'),
+
+        # pppragma: pragma
+        #
+        ('pppragma', 'exclusive'),
     )
     
     def t_PPHASH(self, t):
         r'[ \t]*\#'
-        m = self.line_pattern.match(
-            t.lexer.lexdata, pos=t.lexer.lexpos)
-        
-        if m:
+        if self.line_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos):
             t.lexer.begin('ppline')
             self.pp_line = self.pp_filename = None
-            #~ print "ppline starts on line %s" % t.lexer.lineno
+        elif self.pragma_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos):
+            t.lexer.begin('pppragma')
         else:
             t.type = 'PPHASH'
             return t
@@ -273,8 +277,30 @@ class CLexer(object):
     t_ppline_ignore = ' \t'
 
     def t_ppline_error(self, t):
-        msg = 'invalid #line directive'
-        self._error(msg, t)
+        self._error('invalid #line directive', t)
+
+    ##
+    ## Rules for the pppragma state
+    ##
+    def t_pppragma_NEWLINE(self, t):
+        r'\n'
+        t.lexer.lineno += 1
+        t.lexer.begin('INITIAL')
+
+    def t_pppragma_PPPRAGMA(self, t):
+        r'pragma'
+        pass
+        
+    t_pppragma_ignore = ' \t<>.-{}();+-*/$%@&^~!?:,0123456789'
+
+    @TOKEN(string_literal)
+    def t_pppragma_STR(self, t): pass
+
+    @TOKEN(identifier)
+    def t_pppragma_ID(self, t): pass
+
+    def t_pppragma_error(self, t):
+        self._error('invalid #pragma directive', t)
 
     ##
     ## Rules for the normal state
@@ -448,7 +474,6 @@ if __name__ == "__main__":
         tok = clex.token()
         if not tok: break
             
-        #~ print type(tok)
         printme([tok.value, tok.type, tok.lineno, clex.filename, tok.lexpos])
 
         
diff --git a/tests/test_c_lexer.py b/tests/test_c_lexer.py
index 80ffc55..5f6b459 100644
--- a/tests/test_c_lexer.py
+++ b/tests/test_c_lexer.py
@@ -263,7 +263,30 @@ class TestCLexerNoErrors(unittest.TestCase):
         self.assertEqual(t5.value, 'tok2')
         self.assertEqual(t5.lineno, 99999)
         self.assertEqual(self.clex.filename, r'include/me.h')
-        
+
+    def test_preprocessor_pragma(self):
+        str = r'''
+        42
+        #pragma helo me
+        #pragma once
+        # pragma omp parallel private(th_id)
+        #pragma {pack: 2, smack: 3}
+        #pragma <includeme.h> "nowit.h"
+        #pragma "string"
+        #pragma id 124124 and numbers 0235495
+        59
+        '''
+
+        # Check that pragmas are ignored but the line number advances
+        self.clex.input(str)
+        self.clex.reset_lineno()
+
+        t1 = self.clex.token()
+        self.assertEqual(t1.type, 'INT_CONST_DEC')
+        t2 = self.clex.token()
+        self.assertEqual(t2.type, 'INT_CONST_DEC')
+        self.assertEqual(t2.lineno, 10)
+
 
 
 # Keeps all the errors the lexer spits in one place, to allow
@@ -307,7 +330,7 @@ class TestCLexerErrors(unittest.TestCase):
         token_types(self.clex) 
         
         # compare the error to the expected
-        self.failUnless(re.search(error_like, self.error),
+        self.assertTrue(re.search(error_like, self.error),
             "\nExpected error matching: %s\nGot: %s" % 
                 (error_like, self.error))
author	Eli Bendersky <eliben@gmail.com>	2012-08-10 07:41:42 +0300
committer	Eli Bendersky <eliben@gmail.com>	2012-08-10 07:41:42 +0300
commit	09fc2006e19eb8d74a87c60d82234e6a0eca6eec (patch)
tree	2e30e71a8429ba64589f3dbc4179f2e06b546933
parent	949ad82b71ecba36cb3f0ea65e4627fa87932dc5 (diff)
download	pycparser-09fc2006e19eb8d74a87c60d82234e6a0eca6eec.tar.gz