summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosé Joaquín Atria <jjatria@gmail.com>2019-11-25 11:42:56 +0000
committerJosé Joaquín Atria <jjatria@gmail.com>2019-11-25 11:57:05 +0000
commit7421c39812b11b13406bccd98715cd234e2ceaaa (patch)
treec40b9649853da581e2ebc7beca1844bd61b06403
parent3f403687036fce8c9f3d49a5bb2a8bbcdc41c8ba (diff)
downloadpygments-git-7421c39812b11b13406bccd98715cd234e2ceaaa.tar.gz
Improve detection of Praat interpolated variables
Changes squashed / updated from https://bitbucket.org/birkenfeld/pygments-main/pull-requests/586
-rw-r--r--pygments/lexers/praat.py66
-rw-r--r--tests/examplefiles/example.praat82
-rw-r--r--tests/test_praat.py76
3 files changed, 183 insertions, 41 deletions
diff --git a/pygments/lexers/praat.py b/pygments/lexers/praat.py
index fa91880f..4a6a14f0 100644
--- a/pygments/lexers/praat.py
+++ b/pygments/lexers/praat.py
@@ -55,7 +55,7 @@ class PraatLexer(RegexLexer):
'exitScript', 'exp', 'extractNumber', 'fileReadable', 'fisherP', 'fisherQ',
'floor', 'gaussP', 'gaussQ', 'hertzToBark', 'hertzToErb', 'hertzToMel',
'hertzToSemitones', 'imax', 'imin', 'incompleteBeta', 'incompleteGammaP', 'index',
- 'index_regex', 'invBinomialP', 'invBinomialQ', 'invChiSquareQ', 'invFisherQ',
+ 'index_regex', 'integer', 'invBinomialP', 'invBinomialQ', 'invChiSquareQ', 'invFisherQ',
'invGaussQ', 'invSigmoid', 'invStudentQ', 'length', 'ln', 'lnBeta', 'lnGamma',
'log10', 'log2', 'max', 'melToHertz', 'min', 'minusObject', 'natural', 'number',
'numberOfColumns', 'numberOfRows', 'numberOfSelected', 'objectsAreIdentical',
@@ -63,9 +63,9 @@ class PraatLexer(RegexLexer):
'positive', 'randomBinomial', 'randomGauss', 'randomInteger', 'randomPoisson',
'randomUniform', 'real', 'readFile', 'removeObject', 'rindex', 'rindex_regex',
'round', 'runScript', 'runSystem', 'runSystem_nocheck', 'selectObject',
- 'selected', 'semitonesToHertz', 'sentencetext', 'sigmoid', 'sin', 'sinc',
+ 'selected', 'semitonesToHertz', 'sentence', 'sentencetext', 'sigmoid', 'sin', 'sinc',
'sincpi', 'sinh', 'soundPressureToPhon', 'sqrt', 'startsWith', 'studentP',
- 'studentQ', 'tan', 'tanh', 'variableExists', 'word', 'writeFile', 'writeFileLine',
+ 'studentQ', 'tan', 'tanh', 'text', 'variableExists', 'word', 'writeFile', 'writeFileLine',
'writeInfo', 'writeInfoLine',
)
@@ -90,9 +90,9 @@ class PraatLexer(RegexLexer):
'KNN', 'KlattGrid', 'KlattTable', 'LFCC', 'LPC', 'Label', 'LegendreSeries',
'LinearRegression', 'LogisticRegression', 'LongSound', 'Ltas', 'MFCC', 'MSpline',
'ManPages', 'Manipulation', 'Matrix', 'MelFilter', 'MelSpectrogram',
- 'MixingMatrix', 'Movie', 'Network', 'OTGrammar', 'OTHistory', 'OTMulti', 'PCA',
- 'PairDistribution', 'ParamCurve', 'Pattern', 'Permutation', 'Photo', 'Pitch',
- 'PitchModeler', 'PitchTier', 'PointProcess', 'Polygon', 'Polynomial',
+ 'MixingMatrix', 'Movie', 'Network', 'Object', 'OTGrammar', 'OTHistory', 'OTMulti',
+ 'PCA', 'PairDistribution', 'ParamCurve', 'Pattern', 'Permutation', 'Photo',
+ 'Pitch', 'PitchModeler', 'PitchTier', 'PointProcess', 'Polygon', 'Polynomial',
'PowerCepstrogram', 'PowerCepstrum', 'Procrustes', 'RealPoint', 'RealTier',
'ResultsMFC', 'Roots', 'SPINET', 'SSCP', 'SVD', 'Salience', 'ScalarProduct',
'Similarity', 'SimpleString', 'SortedSetOfString', 'Sound', 'Speaker',
@@ -112,6 +112,10 @@ class PraatLexer(RegexLexer):
'defaultDirectory',
)
+ object_attributes = (
+ 'ncol', 'nrow', 'xmin', 'ymin', 'xmax', 'ymax', 'nx', 'ny', 'dx', 'dy',
+ )
+
tokens = {
'root': [
(r'(\s+)(#.*?$)', bygroups(Text, Comment.Single)),
@@ -148,7 +152,9 @@ class PraatLexer(RegexLexer):
],
'command': [
(r'( ?[\w()-]+ ?)', Keyword),
- (r"'(?=.*')", String.Interpol, 'string_interpolated'),
+
+ include('string_interpolated'),
+
(r'\.{3}', Keyword, ('#pop', 'old_arguments')),
(r':', Keyword, ('#pop', 'comma_list')),
(r'\s', Text, '#pop'),
@@ -207,50 +213,49 @@ class PraatLexer(RegexLexer):
(r'\n', Text, '#pop'),
(r'\b\d+(\.\d*)?([eE][-+]?\d+)?%?', Number),
],
- 'object_attributes': [
- (r'\.?(n(col|row)|[xy]min|[xy]max|[nd][xy])\b', Name.Builtin, '#pop'),
- (r'(\.?(?:col|row)\$)(\[)',
- bygroups(Name.Builtin, Text), 'variable_name'),
- (r'(\$?)(\[)',
- bygroups(Name.Builtin, Text), ('#pop', 'comma_list')),
+ 'object_reference': [
+ include('string_interpolated'),
+ (r'([a-z][a-zA-Z0-9_]*|\d+)', Name.Builtin),
+
+ (words(object_attributes, prefix=r'\.'), Name.Builtin, '#pop'),
+
+ (r'\$', Name.Builtin),
+ (r'\[', Text, '#pop'),
],
'variable_name': [
include('operator'),
include('number'),
(words(variables_string, suffix=r'\$'), Name.Variable.Global),
- (words(variables_numeric, suffix=r'\b'), Name.Variable.Global),
-
- (r'\bObject_\w+', Name.Builtin, 'object_attributes'),
- (words(objects, prefix=r'\b', suffix=r'_\w+'),
- Name.Builtin, 'object_attributes'),
+ (words(variables_numeric,
+ suffix=r'(?=[^a-zA-Z0-9\._"\'\$#\[:\(]|\s|^|$)'),
+ Name.Variable.Global),
- (r"\b(Object_)(')",
- bygroups(Name.Builtin, String.Interpol),
- ('object_attributes', 'string_interpolated')),
- (words(objects, prefix=r'\b', suffix=r"(_)(')"),
- bygroups(Name.Builtin, Name.Builtin, String.Interpol),
- ('object_attributes', 'string_interpolated')),
+ (words(objects, prefix=r'\b', suffix=r"(_)"),
+ bygroups(Name.Builtin, Name.Builtin),
+ 'object_reference'),
(r'\.?_?[a-z][\w.]*(\$|#)?', Text),
(r'[\[\]]', Punctuation, 'comma_list'),
- (r"'(?=.*')", String.Interpol, 'string_interpolated'),
+
+ include('string_interpolated'),
],
'operator': [
(r'([+\/*<>=!-]=?|[&*|][&*|]?|\^|<>)', Operator),
(r'(?<![\w.])(and|or|not|div|mod)(?![\w.])', Operator.Word),
],
'string_interpolated': [
- (r'\.?[_a-z][\w.]*[$#]?(?:\[[a-zA-Z0-9,]+\])?(:[0-9]+)?',
+ (r'\'[_a-z][^\[\]\'":]*(\[([\d,]+|"[\w\d,]+")\])?(:[0-9]+)?\'',
String.Interpol),
- (r"'", String.Interpol, '#pop'),
],
'string_unquoted': [
(r'(\n\s*)(\.{3})', bygroups(Text, Punctuation)),
(r'\n', Text, '#pop'),
(r'\s', Text),
- (r"'(?=.*')", String.Interpol, 'string_interpolated'),
+
+ include('string_interpolated'),
+
(r"'", String),
(r"[^'\n]+", String),
],
@@ -258,11 +263,14 @@ class PraatLexer(RegexLexer):
(r'(\n\s*)(\.{3})', bygroups(Text, Punctuation)),
(r'"', String, '#pop'),
- (r"'(?=.*')", String.Interpol, 'string_interpolated'),
+
+ include('string_interpolated'),
+
(r"'", String),
(r'[^\'"\n]+', String),
],
'old_form': [
+ (r'(\s+)(#.*?$)', bygroups(Text, Comment.Single)),
(r'\s+', Text),
(r'(optionmenu|choice)([ \t]+\S+:[ \t]+)',
diff --git a/tests/examplefiles/example.praat b/tests/examplefiles/example.praat
index 85573919..2b782b8d 100644
--- a/tests/examplefiles/example.praat
+++ b/tests/examplefiles/example.praat
@@ -1,4 +1,5 @@
form Highlighter test
+ # This is a regular comment
sentence Blank
sentence My_sentence This should all be a string
text My_text This should also all be a string
@@ -7,9 +8,11 @@ form Highlighter test
boolean Text no
boolean Quoted "yes"
comment This should be a string
- optionmenu Choice: 1
+ optionmenu Drop-down: 1
+ option Foo
+ option 100
+ choice Radio: 1
option Foo
- option Bar
option 100
real left_Range -123.6
positive right_Range_max 3.3
@@ -17,6 +20,25 @@ form Highlighter test
natural Nat 4
endform
+beginPause: "Highlighter test"
+ sentence: "Blank", ""
+ sentence: "My sentence", "This should all be a string"
+ text: "My text", "This should also all be a string"
+ word: "My word", "Only the first word is a string, the rest is discarded"
+ boolean: "Binary", 1
+ comment: "This should be a string"
+ optionMenu: "Drop-down", 1
+ option: "Foo"
+ option: "100"
+ choice: "Choice", 1
+ option: "Foo"
+ option: "100"
+ real: "left Range", -123.6
+ positive: "right Range max", 3.3
+ integer: "Int", 4
+ natural: "Nat", 4
+button = endPause("Cancel", "OK", 1, 2)
+
# Periods do not establish boundaries for keywords
form.var = 10
# Or operators
@@ -30,8 +52,7 @@ execute /path/to/file
# Predefined variables
a = praatVersion
-a = e
-a = pi
+a = e + pi * ( all+right) / left mod average + (mono - stereo)
a$ = homeDirectory$ + tab$ + newline$
a$ = temporaryDirectory$
a$ = praatVersion$
@@ -40,6 +61,9 @@ a$ = homeDirectory$
a$ = preferencesDirectory$
a$ = defaultDirectory$
nocheck selectObject: undefined
+# Not predefined variables
+a$ = e$
+a$ = pi$
# Arrays are not comments
a# = zero# (5, 6)
@@ -59,9 +83,43 @@ else macintosh == 1
exit We are on Mac
endif
-string$ = "Strings can be 'interpolated'"
+# Interpolation with precision digits
+echo unquoted 'a:3'
+echo unquoted 'a.a:3'
+echo unquoted 'a[1]:3'
+echo unquoted 'a1:3'
+
+appendInfoLine: "quoted 'a:3'"
+appendInfoLine: "quoted 'a.a:3'"
+appendInfoLine: "quoted 'a[1]:3'"
+appendInfoLine: "quoted 'a1:3'"
+
+# Interpolations are not recursive
+echo unquoted 'a'1':3'
+appendInfoLine: "quoted 'a'1':3'"
+
+# Interpolation without precision digits
+echo unquoted 'var' numeric
+echo unquoted 'var$' string
+echo unquoted 'var["a"]' numeric hash
+echo unquoted 'var$["a"]' string hash
+echo unquoted 'var[1]' numeric indexed variable
+echo unquoted 'var$[1]' string indexed variable
+
+appendInfoLine: "quoted 'var' numeric"
+appendInfoLine: "quoted 'var$' string"
+appendInfoLine: "quoted 'var["a"]' numeric hash"
+appendInfoLine: "quoted 'var$["a"]' string hash"
+appendInfoLine: "quoted 'var[1]' numeric indexed variable"
+appendInfoLine: "quoted 'var$[1]' string indexed variable"
+
+# Indeces in interpolations must be literal
+echo 'var[a]'
+echo 'var[a$]'
+
string$ = "But don't interpolate everything!"
-string$(10)
+string$ = "interpolatin' " + "across" + " strings ain't cool either"
+string$(10) ; This is a function
repeat
string$ = string$ - right$(string$)
@@ -77,6 +135,12 @@ value$ = Table_'table'$[25, "f0"]
fixed = Sound_10.xmin
fixed = Object_foo.xmin
fixed = Procrustes_foo.nx
+var["vaa"] = 1 ; Hash
+
+# Special two-word keyword
+select all
+# Keyword with a predefined variable
+select all
# old-style procedure call
call oldStyle "quoted" 2 unquoted string
@@ -103,7 +167,7 @@ endfor
i = 1
while i < n
- i++
+ i += 1
# Different styles of object selection
select sound'i'
sound = selected()
@@ -153,7 +217,7 @@ while i < n
..."duration response"
# Function call with trailing space
- removeObject: pitch, table
+ removeObject: pitch, table
# Picture window commands
selectObject: sound
@@ -251,7 +315,7 @@ procedure newStyle (.str1$, .num, .str2$)
.local = Get total duration
.local = Get 'some' duration
.local = Get 'some[1]' value... hello 10 p[i]
- .local = Get 'some[1,3]' value: "hello", 10, 'p[i]'
+ .local = Get 'some[1,3]' value: "hello", 10, p[i]
.local = Get 'some$' duration
.local = Get 'some$[1]' duration
endproc
diff --git a/tests/test_praat.py b/tests/test_praat.py
index d17ad02f..61ddfd57 100644
--- a/tests/test_praat.py
+++ b/tests/test_praat.py
@@ -95,9 +95,7 @@ def test_broken_unquoted_string(lexer):
(Token.Text, u'\n'),
(Token.Punctuation, u'...'),
(Token.Text, u' '),
- (Token.Literal.String.Interpol, u"'"),
- (Token.Literal.String.Interpol, u'interpolated'),
- (Token.Literal.String.Interpol, u"'"),
+ (Token.Literal.String.Interpol, u"'interpolated'"),
(Token.Text, u' '),
(Token.Literal.String, u'string'),
(Token.Text, u'\n'),
@@ -133,3 +131,75 @@ def test_inline_if(lexer):
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens
+
+def test_interpolation_boundary(lexer):
+ fragment = u'"\'" + "\'"'
+ tokens = [
+ (Token.Literal.String, u'"'),
+ (Token.Literal.String, u"'"),
+ (Token.Literal.String, u'"'),
+ (Token.Text, u' '),
+ (Token.Operator, u'+'),
+ (Token.Text, u' '),
+ (Token.Literal.String, u'"'),
+ (Token.Literal.String, u"'"),
+ (Token.Literal.String, u'"'),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+def test_interpolated_numeric_indexed(lexer):
+ fragment = u"'a[3]'"
+ tokens = [
+ (Token.Literal.String.Interpol, u"'a[3]'"),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+def test_interpolated_numeric_hash(lexer):
+ fragment = u"'a[\"b\"]'"
+ tokens = [
+ (Token.Literal.String.Interpol, u"'a[\"b\"]'"),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+def test_interpolated_string_indexed(lexer):
+ fragment = u"'a$[3]'"
+ tokens = [
+ (Token.Literal.String.Interpol, u"'a$[3]'"),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+def test_interpolated_string_hash(lexer):
+ fragment = u"'a$[\"b\"]'"
+ tokens = [
+ (Token.Literal.String.Interpol, u"'a$[\"b\"]'"),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+def test_interpolated_numeric_with_precision(lexer):
+ fragment = u"'a:3'"
+ tokens = [
+ (Token.Literal.String.Interpol, u"'a:3'"),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+def test_interpolated_indexed_numeric_with_precision(lexer):
+ fragment = u"'a[3]:3'"
+ tokens = [
+ (Token.Literal.String.Interpol, u"'a[3]:3'"),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+def test_interpolated_local_numeric_with_precision(lexer):
+ fragment = u"'a.a:3'"
+ tokens = [
+ (Token.Literal.String.Interpol, u"'a.a:3'"),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens