summaryrefslogtreecommitdiff
path: root/examples/removeLineBreaks.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/removeLineBreaks.py')
-rw-r--r--examples/removeLineBreaks.py45
1 files changed, 45 insertions, 0 deletions
diff --git a/examples/removeLineBreaks.py b/examples/removeLineBreaks.py
new file mode 100644
index 0000000..ba4b498
--- /dev/null
+++ b/examples/removeLineBreaks.py
@@ -0,0 +1,45 @@
+# removeLineBreaks.py
+#
+# Demonstration of the pyparsing module, converting text files
+# with hard line-breaks to text files with line breaks only
+# between paragraphs. (Helps when converting downloads from Project
+# Gutenberg - http://www.gutenberg.org - to import to word processing apps
+# that can reformat paragraphs once hard line-breaks are removed.)
+#
+# Uses parse actions and transformString to remove unwanted line breaks,
+# and to double up line breaks between paragraphs.
+#
+# Copyright 2006, by Paul McGuire
+#
+from pyparsing import *
+
+# define an expression for the body of a line of text - use a parse action to reject any
+# empty lines
+def mustBeNonBlank(s,l,t):
+ if not t[0]:
+ raise ParseException(s,l,"line body can't be empty")
+lineBody = SkipTo(lineEnd).setParseAction(mustBeNonBlank)
+
+# now define a line with a trailing lineEnd, to be replaced with a space character
+textLine = lineBody + Suppress(lineEnd).setParseAction(replaceWith(" "))
+
+# define a paragraph, with a separating lineEnd, to be replaced with a double newline
+para = OneOrMore(textLine) + Suppress(lineEnd).setParseAction(replaceWith("\n\n"))
+
+
+# run a test
+test = """
+ Now is the
+ time for
+ all
+ good men
+ to come to
+
+ the aid of their
+ country.
+"""
+print(para.transformString(test))
+
+# process an entire file
+z = para.transformString(file("Successful Methods of Public Speaking.txt").read())
+file("Successful Methods of Public Speaking(2).txt","w").write(z)