diff options
author | Paul McGuire <ptmcg@users.noreply.github.com> | 2018-01-06 23:38:53 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-06 23:38:53 -0600 |
commit | 430c5ad767cc946e9da7cd5f4673a4e3bd135a3c (patch) | |
tree | 5a7df11e0fd52ab320b0ef3e670e260f315ca9ae /examples/position.py | |
parent | f1d12567a8da4d254e6d62bb0d650c87c7d0bb89 (diff) | |
parent | d953150a6db3ac247a64b047edc2df7156f3e56b (diff) | |
download | pyparsing-git-430c5ad767cc946e9da7cd5f4673a4e3bd135a3c.tar.gz |
Merge pull request #1 from cngkaygusuz/master
Add Scrutinizer-CI configuration and other niceties
Diffstat (limited to 'examples/position.py')
-rw-r--r-- | examples/position.py | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/examples/position.py b/examples/position.py new file mode 100644 index 0000000..984c018 --- /dev/null +++ b/examples/position.py @@ -0,0 +1,55 @@ +from pyparsing import *
+
+text = """Lorem ipsum dolor sit amet, consectetur adipisicing
+elit, sed do eiusmod tempor incididunt ut labore et dolore magna
+aliqua. Ut enim ad minim veniam, quis nostrud exercitation
+ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis
+aute irure dolor in reprehenderit in voluptate velit esse cillum
+dolore eu fugiat nulla pariatur. Excepteur sint occaecat
+cupidatat non proident, sunt in culpa qui officia deserunt
+mollit anim id est laborum"""
+
+# find all words beginning with a vowel
+vowels = "aeiouAEIOU"
+initialVowelWord = Word(vowels,alphas)
+
+# Unfortunately, searchString will advance character by character through
+# the input text, so it will detect that the initial "Lorem" is not an
+# initialVowelWord, but then it will test "orem" and think that it is. So
+# we need to add a do-nothing term that will match the words that start with
+# consonants, but we will just throw them away when we match them. The key is
+# that, in having been matched, the parser will skip over them entirely when
+# looking for initialVowelWords.
+consonants = ''.join(c for c in alphas if c not in vowels)
+initialConsWord = Word(consonants, alphas).suppress()
+
+# using scanString to locate where tokens are matched
+for t,start,end in (initialConsWord|initialVowelWord).scanString(text):
+ if t:
+ print(start,':', t[0])
+
+# add parse action to annotate the parsed tokens with their location in the
+# input string
+def addLocnToTokens(s,l,t):
+ t['locn'] = l
+ t['word'] = t[0]
+initialVowelWord.setParseAction(addLocnToTokens)
+
+for ivowelInfo in (initialConsWord | initialVowelWord).searchString(text):
+ if not ivowelInfo:
+ continue
+ print(ivowelInfo.locn, ':', ivowelInfo.word)
+
+
+# alternative - add an Empty that will save the current location
+def location(name):
+ return Empty().setParseAction(lambda s,l,t: t.__setitem__(name,l))
+locateInitialVowels = location("locn") + initialVowelWord("word")
+
+# search through the input text
+for ivowelInfo in (initialConsWord | locateInitialVowels).searchString(text):
+ if not ivowelInfo:
+ continue
+ print(ivowelInfo.locn, ':', ivowelInfo.word)
+
+
|