diff options
author | José Joaquín Atria <jjatria@gmail.com> | 2019-11-25 11:42:56 +0000 |
---|---|---|
committer | José Joaquín Atria <jjatria@gmail.com> | 2019-11-25 11:57:05 +0000 |
commit | 7421c39812b11b13406bccd98715cd234e2ceaaa (patch) | |
tree | c40b9649853da581e2ebc7beca1844bd61b06403 | |
parent | 3f403687036fce8c9f3d49a5bb2a8bbcdc41c8ba (diff) | |
download | pygments-git-7421c39812b11b13406bccd98715cd234e2ceaaa.tar.gz |
Improve detection of Praat interpolated variables
Changes squashed / updated from https://bitbucket.org/birkenfeld/pygments-main/pull-requests/586
-rw-r--r-- | pygments/lexers/praat.py | 66 | ||||
-rw-r--r-- | tests/examplefiles/example.praat | 82 | ||||
-rw-r--r-- | tests/test_praat.py | 76 |
3 files changed, 183 insertions, 41 deletions
diff --git a/pygments/lexers/praat.py b/pygments/lexers/praat.py index fa91880f..4a6a14f0 100644 --- a/pygments/lexers/praat.py +++ b/pygments/lexers/praat.py @@ -55,7 +55,7 @@ class PraatLexer(RegexLexer): 'exitScript', 'exp', 'extractNumber', 'fileReadable', 'fisherP', 'fisherQ', 'floor', 'gaussP', 'gaussQ', 'hertzToBark', 'hertzToErb', 'hertzToMel', 'hertzToSemitones', 'imax', 'imin', 'incompleteBeta', 'incompleteGammaP', 'index', - 'index_regex', 'invBinomialP', 'invBinomialQ', 'invChiSquareQ', 'invFisherQ', + 'index_regex', 'integer', 'invBinomialP', 'invBinomialQ', 'invChiSquareQ', 'invFisherQ', 'invGaussQ', 'invSigmoid', 'invStudentQ', 'length', 'ln', 'lnBeta', 'lnGamma', 'log10', 'log2', 'max', 'melToHertz', 'min', 'minusObject', 'natural', 'number', 'numberOfColumns', 'numberOfRows', 'numberOfSelected', 'objectsAreIdentical', @@ -63,9 +63,9 @@ class PraatLexer(RegexLexer): 'positive', 'randomBinomial', 'randomGauss', 'randomInteger', 'randomPoisson', 'randomUniform', 'real', 'readFile', 'removeObject', 'rindex', 'rindex_regex', 'round', 'runScript', 'runSystem', 'runSystem_nocheck', 'selectObject', - 'selected', 'semitonesToHertz', 'sentencetext', 'sigmoid', 'sin', 'sinc', + 'selected', 'semitonesToHertz', 'sentence', 'sentencetext', 'sigmoid', 'sin', 'sinc', 'sincpi', 'sinh', 'soundPressureToPhon', 'sqrt', 'startsWith', 'studentP', - 'studentQ', 'tan', 'tanh', 'variableExists', 'word', 'writeFile', 'writeFileLine', + 'studentQ', 'tan', 'tanh', 'text', 'variableExists', 'word', 'writeFile', 'writeFileLine', 'writeInfo', 'writeInfoLine', ) @@ -90,9 +90,9 @@ class PraatLexer(RegexLexer): 'KNN', 'KlattGrid', 'KlattTable', 'LFCC', 'LPC', 'Label', 'LegendreSeries', 'LinearRegression', 'LogisticRegression', 'LongSound', 'Ltas', 'MFCC', 'MSpline', 'ManPages', 'Manipulation', 'Matrix', 'MelFilter', 'MelSpectrogram', - 'MixingMatrix', 'Movie', 'Network', 'OTGrammar', 'OTHistory', 'OTMulti', 'PCA', - 'PairDistribution', 'ParamCurve', 'Pattern', 'Permutation', 'Photo', 'Pitch', - 'PitchModeler', 'PitchTier', 'PointProcess', 'Polygon', 'Polynomial', + 'MixingMatrix', 'Movie', 'Network', 'Object', 'OTGrammar', 'OTHistory', 'OTMulti', + 'PCA', 'PairDistribution', 'ParamCurve', 'Pattern', 'Permutation', 'Photo', + 'Pitch', 'PitchModeler', 'PitchTier', 'PointProcess', 'Polygon', 'Polynomial', 'PowerCepstrogram', 'PowerCepstrum', 'Procrustes', 'RealPoint', 'RealTier', 'ResultsMFC', 'Roots', 'SPINET', 'SSCP', 'SVD', 'Salience', 'ScalarProduct', 'Similarity', 'SimpleString', 'SortedSetOfString', 'Sound', 'Speaker', @@ -112,6 +112,10 @@ class PraatLexer(RegexLexer): 'defaultDirectory', ) + object_attributes = ( + 'ncol', 'nrow', 'xmin', 'ymin', 'xmax', 'ymax', 'nx', 'ny', 'dx', 'dy', + ) + tokens = { 'root': [ (r'(\s+)(#.*?$)', bygroups(Text, Comment.Single)), @@ -148,7 +152,9 @@ class PraatLexer(RegexLexer): ], 'command': [ (r'( ?[\w()-]+ ?)', Keyword), - (r"'(?=.*')", String.Interpol, 'string_interpolated'), + + include('string_interpolated'), + (r'\.{3}', Keyword, ('#pop', 'old_arguments')), (r':', Keyword, ('#pop', 'comma_list')), (r'\s', Text, '#pop'), @@ -207,50 +213,49 @@ class PraatLexer(RegexLexer): (r'\n', Text, '#pop'), (r'\b\d+(\.\d*)?([eE][-+]?\d+)?%?', Number), ], - 'object_attributes': [ - (r'\.?(n(col|row)|[xy]min|[xy]max|[nd][xy])\b', Name.Builtin, '#pop'), - (r'(\.?(?:col|row)\$)(\[)', - bygroups(Name.Builtin, Text), 'variable_name'), - (r'(\$?)(\[)', - bygroups(Name.Builtin, Text), ('#pop', 'comma_list')), + 'object_reference': [ + include('string_interpolated'), + (r'([a-z][a-zA-Z0-9_]*|\d+)', Name.Builtin), + + (words(object_attributes, prefix=r'\.'), Name.Builtin, '#pop'), + + (r'\$', Name.Builtin), + (r'\[', Text, '#pop'), ], 'variable_name': [ include('operator'), include('number'), (words(variables_string, suffix=r'\$'), Name.Variable.Global), - (words(variables_numeric, suffix=r'\b'), Name.Variable.Global), - - (r'\bObject_\w+', Name.Builtin, 'object_attributes'), - (words(objects, prefix=r'\b', suffix=r'_\w+'), - Name.Builtin, 'object_attributes'), + (words(variables_numeric, + suffix=r'(?=[^a-zA-Z0-9\._"\'\$#\[:\(]|\s|^|$)'), + Name.Variable.Global), - (r"\b(Object_)(')", - bygroups(Name.Builtin, String.Interpol), - ('object_attributes', 'string_interpolated')), - (words(objects, prefix=r'\b', suffix=r"(_)(')"), - bygroups(Name.Builtin, Name.Builtin, String.Interpol), - ('object_attributes', 'string_interpolated')), + (words(objects, prefix=r'\b', suffix=r"(_)"), + bygroups(Name.Builtin, Name.Builtin), + 'object_reference'), (r'\.?_?[a-z][\w.]*(\$|#)?', Text), (r'[\[\]]', Punctuation, 'comma_list'), - (r"'(?=.*')", String.Interpol, 'string_interpolated'), + + include('string_interpolated'), ], 'operator': [ (r'([+\/*<>=!-]=?|[&*|][&*|]?|\^|<>)', Operator), (r'(?<![\w.])(and|or|not|div|mod)(?![\w.])', Operator.Word), ], 'string_interpolated': [ - (r'\.?[_a-z][\w.]*[$#]?(?:\[[a-zA-Z0-9,]+\])?(:[0-9]+)?', + (r'\'[_a-z][^\[\]\'":]*(\[([\d,]+|"[\w\d,]+")\])?(:[0-9]+)?\'', String.Interpol), - (r"'", String.Interpol, '#pop'), ], 'string_unquoted': [ (r'(\n\s*)(\.{3})', bygroups(Text, Punctuation)), (r'\n', Text, '#pop'), (r'\s', Text), - (r"'(?=.*')", String.Interpol, 'string_interpolated'), + + include('string_interpolated'), + (r"'", String), (r"[^'\n]+", String), ], @@ -258,11 +263,14 @@ class PraatLexer(RegexLexer): (r'(\n\s*)(\.{3})', bygroups(Text, Punctuation)), (r'"', String, '#pop'), - (r"'(?=.*')", String.Interpol, 'string_interpolated'), + + include('string_interpolated'), + (r"'", String), (r'[^\'"\n]+', String), ], 'old_form': [ + (r'(\s+)(#.*?$)', bygroups(Text, Comment.Single)), (r'\s+', Text), (r'(optionmenu|choice)([ \t]+\S+:[ \t]+)', diff --git a/tests/examplefiles/example.praat b/tests/examplefiles/example.praat index 85573919..2b782b8d 100644 --- a/tests/examplefiles/example.praat +++ b/tests/examplefiles/example.praat @@ -1,4 +1,5 @@ form Highlighter test + # This is a regular comment sentence Blank sentence My_sentence This should all be a string text My_text This should also all be a string @@ -7,9 +8,11 @@ form Highlighter test boolean Text no boolean Quoted "yes" comment This should be a string - optionmenu Choice: 1 + optionmenu Drop-down: 1 + option Foo + option 100 + choice Radio: 1 option Foo - option Bar option 100 real left_Range -123.6 positive right_Range_max 3.3 @@ -17,6 +20,25 @@ form Highlighter test natural Nat 4 endform +beginPause: "Highlighter test" + sentence: "Blank", "" + sentence: "My sentence", "This should all be a string" + text: "My text", "This should also all be a string" + word: "My word", "Only the first word is a string, the rest is discarded" + boolean: "Binary", 1 + comment: "This should be a string" + optionMenu: "Drop-down", 1 + option: "Foo" + option: "100" + choice: "Choice", 1 + option: "Foo" + option: "100" + real: "left Range", -123.6 + positive: "right Range max", 3.3 + integer: "Int", 4 + natural: "Nat", 4 +button = endPause("Cancel", "OK", 1, 2) + # Periods do not establish boundaries for keywords form.var = 10 # Or operators @@ -30,8 +52,7 @@ execute /path/to/file # Predefined variables a = praatVersion -a = e -a = pi +a = e + pi * ( all+right) / left mod average + (mono - stereo) a$ = homeDirectory$ + tab$ + newline$ a$ = temporaryDirectory$ a$ = praatVersion$ @@ -40,6 +61,9 @@ a$ = homeDirectory$ a$ = preferencesDirectory$ a$ = defaultDirectory$ nocheck selectObject: undefined +# Not predefined variables +a$ = e$ +a$ = pi$ # Arrays are not comments a# = zero# (5, 6) @@ -59,9 +83,43 @@ else macintosh == 1 exit We are on Mac endif -string$ = "Strings can be 'interpolated'" +# Interpolation with precision digits +echo unquoted 'a:3' +echo unquoted 'a.a:3' +echo unquoted 'a[1]:3' +echo unquoted 'a1:3' + +appendInfoLine: "quoted 'a:3'" +appendInfoLine: "quoted 'a.a:3'" +appendInfoLine: "quoted 'a[1]:3'" +appendInfoLine: "quoted 'a1:3'" + +# Interpolations are not recursive +echo unquoted 'a'1':3' +appendInfoLine: "quoted 'a'1':3'" + +# Interpolation without precision digits +echo unquoted 'var' numeric +echo unquoted 'var$' string +echo unquoted 'var["a"]' numeric hash +echo unquoted 'var$["a"]' string hash +echo unquoted 'var[1]' numeric indexed variable +echo unquoted 'var$[1]' string indexed variable + +appendInfoLine: "quoted 'var' numeric" +appendInfoLine: "quoted 'var$' string" +appendInfoLine: "quoted 'var["a"]' numeric hash" +appendInfoLine: "quoted 'var$["a"]' string hash" +appendInfoLine: "quoted 'var[1]' numeric indexed variable" +appendInfoLine: "quoted 'var$[1]' string indexed variable" + +# Indeces in interpolations must be literal +echo 'var[a]' +echo 'var[a$]' + string$ = "But don't interpolate everything!" -string$(10) +string$ = "interpolatin' " + "across" + " strings ain't cool either" +string$(10) ; This is a function repeat string$ = string$ - right$(string$) @@ -77,6 +135,12 @@ value$ = Table_'table'$[25, "f0"] fixed = Sound_10.xmin fixed = Object_foo.xmin fixed = Procrustes_foo.nx +var["vaa"] = 1 ; Hash + +# Special two-word keyword +select all +# Keyword with a predefined variable +select all # old-style procedure call call oldStyle "quoted" 2 unquoted string @@ -103,7 +167,7 @@ endfor i = 1 while i < n - i++ + i += 1 # Different styles of object selection select sound'i' sound = selected() @@ -153,7 +217,7 @@ while i < n ..."duration response" # Function call with trailing space - removeObject: pitch, table + removeObject: pitch, table # Picture window commands selectObject: sound @@ -251,7 +315,7 @@ procedure newStyle (.str1$, .num, .str2$) .local = Get total duration .local = Get 'some' duration .local = Get 'some[1]' value... hello 10 p[i] - .local = Get 'some[1,3]' value: "hello", 10, 'p[i]' + .local = Get 'some[1,3]' value: "hello", 10, p[i] .local = Get 'some$' duration .local = Get 'some$[1]' duration endproc diff --git a/tests/test_praat.py b/tests/test_praat.py index d17ad02f..61ddfd57 100644 --- a/tests/test_praat.py +++ b/tests/test_praat.py @@ -95,9 +95,7 @@ def test_broken_unquoted_string(lexer): (Token.Text, u'\n'), (Token.Punctuation, u'...'), (Token.Text, u' '), - (Token.Literal.String.Interpol, u"'"), - (Token.Literal.String.Interpol, u'interpolated'), - (Token.Literal.String.Interpol, u"'"), + (Token.Literal.String.Interpol, u"'interpolated'"), (Token.Text, u' '), (Token.Literal.String, u'string'), (Token.Text, u'\n'), @@ -133,3 +131,75 @@ def test_inline_if(lexer): (Token.Text, u'\n'), ] assert list(lexer.get_tokens(fragment)) == tokens + +def test_interpolation_boundary(lexer): + fragment = u'"\'" + "\'"' + tokens = [ + (Token.Literal.String, u'"'), + (Token.Literal.String, u"'"), + (Token.Literal.String, u'"'), + (Token.Text, u' '), + (Token.Operator, u'+'), + (Token.Text, u' '), + (Token.Literal.String, u'"'), + (Token.Literal.String, u"'"), + (Token.Literal.String, u'"'), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + +def test_interpolated_numeric_indexed(lexer): + fragment = u"'a[3]'" + tokens = [ + (Token.Literal.String.Interpol, u"'a[3]'"), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + +def test_interpolated_numeric_hash(lexer): + fragment = u"'a[\"b\"]'" + tokens = [ + (Token.Literal.String.Interpol, u"'a[\"b\"]'"), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + +def test_interpolated_string_indexed(lexer): + fragment = u"'a$[3]'" + tokens = [ + (Token.Literal.String.Interpol, u"'a$[3]'"), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + +def test_interpolated_string_hash(lexer): + fragment = u"'a$[\"b\"]'" + tokens = [ + (Token.Literal.String.Interpol, u"'a$[\"b\"]'"), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + +def test_interpolated_numeric_with_precision(lexer): + fragment = u"'a:3'" + tokens = [ + (Token.Literal.String.Interpol, u"'a:3'"), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + +def test_interpolated_indexed_numeric_with_precision(lexer): + fragment = u"'a[3]:3'" + tokens = [ + (Token.Literal.String.Interpol, u"'a[3]:3'"), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + +def test_interpolated_local_numeric_with_precision(lexer): + fragment = u"'a.a:3'" + tokens = [ + (Token.Literal.String.Interpol, u"'a.a:3'"), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens |