summaryrefslogtreecommitdiff
path: root/src/examples/removeLineBreaks.py
blob: 232034f107966a30837c1a1b7942f6a0e84ba7e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# removeLineBreaks.py
#
# Demonstration of the pyparsing module, converting text files
# with hard line-breaks to text files with line breaks only
# between paragraphs.  (Helps when converting downloads from Project
# Gutenberg - http://www.gutenberg.org - to import to word processing apps 
# that can reformat paragraphs once hard line-breaks are removed.)
#
# Uses parse actions and transformString to remove unwanted line breaks,
# and to double up line breaks between paragraphs.
#
# Copyright 2006, by Paul McGuire
#
from pyparsing import *

# define an expression for the body of a line of text - use a parse action to reject any
# empty lines
def mustBeNonBlank(s,l,t):
    if not t[0]:
        raise ParseException(s,l,"line body can't be empty")
lineBody = SkipTo(lineEnd).setParseAction(mustBeNonBlank)

# now define a line with a trailing lineEnd, to be replaced with a space character
textLine = lineBody + Suppress(lineEnd).setParseAction(replaceWith(" "))

# define a paragraph, with a separating lineEnd, to be replaced with a double newline
para = OneOrMore(textLine) + Suppress(lineEnd).setParseAction(replaceWith("\n\n"))


# run a test
test = """
    Now is the
    time for
    all
    good men
    to come to

    the aid of their
    country.
"""
print para.transformString(test)

# process an entire file
z = para.transformString(file("Successful Methods of Public Speaking.txt").read())
file("Successful Methods of Public Speaking(2).txt","w").write(z)