diff options
Diffstat (limited to 'examples/removeLineBreaks.py')
-rw-r--r-- | examples/removeLineBreaks.py | 30 |
1 files changed, 21 insertions, 9 deletions
diff --git a/examples/removeLineBreaks.py b/examples/removeLineBreaks.py index 90b77e4..1a77231 100644 --- a/examples/removeLineBreaks.py +++ b/examples/removeLineBreaks.py @@ -13,18 +13,22 @@ # import pyparsing as pp -# define an expression for the body of a line of text - use a parse action to reject any -# empty lines -def mustBeNonBlank(s, l, t): - return bool(t[0]) +line_end = pp.LineEnd() -lineBody = pp.SkipTo(pp.lineEnd).addCondition(mustBeNonBlank, message="line body can't be empty") +# define an expression for the body of a line of text - use a predicate condition to +# accept only lines with some content. +def mustBeNonBlank(t): + return t[0] != '' + # could also be written as + # return bool(t[0]) + +lineBody = pp.SkipTo(line_end).addCondition(mustBeNonBlank, message="line body can't be empty") # now define a line with a trailing lineEnd, to be replaced with a space character -textLine = lineBody + pp.Suppress(pp.lineEnd).setParseAction(pp.replaceWith(" ")) +textLine = lineBody + line_end().setParseAction(pp.replaceWith(" ")) # define a paragraph, with a separating lineEnd, to be replaced with a double newline -para = pp.OneOrMore(textLine) + pp.Suppress(pp.lineEnd).setParseAction(pp.replaceWith("\n\n")) +para = pp.OneOrMore(textLine) + line_end().setParseAction(pp.replaceWith("\n\n")) # run a test test = """ @@ -40,6 +44,14 @@ test = """ print(para.transformString(test)) # process an entire file -original = open("Successful Methods of Public Speaking.txt").read() +# Project Gutenberg EBook of Successful Methods of Public Speaking, by Grenville Kleiser +# Download from http://www.gutenberg.org/cache/epub/18095/pg18095.txt +# +with open("18095-8.txt") as source_file: + original = source_file.read() + +# use transformString to convert line breaks transformed = para.transformString(original) -open("Successful Methods of Public Speaking(2).txt", "w").write(transformed) + +with open("18095-8_reformatted.txt", "w") as transformed_file: + transformed_file.write(transformed) |