src/examples/wordsToNum.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

# wordsToNum.py
# Copyright 2006, Paul McGuire
#
# Sample parser grammar to read a number given in words, and return the numeric value.
#
from pyparsing import *
from operator import mul
from functools import reduce

def makeLit(s,val):
    ret = CaselessLiteral(s).setName(s)
    return ret.setParseAction( replaceWith(val) )

unitDefinitions = [
    ("zero",       0),
    ("oh",         0),
    ("zip",        0),
    ("zilch",      0),
    ("nada",       0),
    ("bupkis",     0),
    ("one",        1),
    ("two",        2),
    ("three",      3),
    ("four",       4),
    ("five",       5),
    ("six",        6),
    ("seven",      7),
    ("eight",      8),
    ("nine",       9),
    ("ten",       10),
    ("eleven",    11),
    ("twelve",    12),
    ("thirteen",  13),
    ("fourteen",  14),
    ("fifteen",   15),
    ("sixteen",   16),
    ("seventeen", 17),
    ("eighteen",  18),
    ("nineteen",  19),
    ]
units = Or(makeLit(s,v) for s,v in unitDefinitions)

tensDefinitions = [
    ("ten",     10),
    ("twenty",  20),
    ("thirty",  30),
    ("forty",   40),
    ("fourty",  40), # for the spelling-challenged...
    ("fifty",   50),
    ("sixty",   60),
    ("seventy", 70),
    ("eighty",  80),
    ("ninety",  90),
    ]
tens = Or(makeLit(s,v) for s,v in tensDefinitions)

hundreds = makeLit("hundred", 100)

majorDefinitions = [
    ("thousand",    int(1e3)),
    ("million",     int(1e6)),
    ("billion",     int(1e9)),
    ("trillion",    int(1e12)),
    ("quadrillion", int(1e15)),
    ("quintillion", int(1e18)),
    ]
mag = Or(makeLit(s,v) for s,v in majorDefinitions)

wordprod = lambda t: reduce(mul,t)
wordsum = lambda t: sum(t)
numPart = (((( units + Optional(hundreds) ).setParseAction(wordprod) + 
               Optional(tens)).setParseAction(wordsum) 
               ^ tens )
               + Optional(units) ).setParseAction(wordsum)
numWords = OneOrMore( (numPart + Optional(mag)).setParseAction(wordprod) 
                    ).setParseAction(wordsum) + StringEnd()
numWords.ignore(Literal("-"))
numWords.ignore(CaselessLiteral("and"))

def test(s,expected):
    try:
        fail_expected = (expected is None)
        success, results_tup = numWords.runTests(s, failureTests=fail_expected)
        assert success, "Failed test!"
        if not fail_expected:
            teststr, results = results_tup[0]
            observed = results[0]
            assert expected == observed, "incorrect parsed value, {} -> {}, should be {}".format(teststr, observed, expected)
    except Exception as exc:
        print("{}: {}".format(type(exc).__name__, exc))

test("one hundred twenty hundred", None)
test("one hundred and twennty", None)
test("one hundred and twenty", 120)
test("one hundred and three", 103)
test("one hundred twenty-three", 123)
test("one hundred and twenty three", 123)
test("one hundred twenty three million", 123000000)
test("one hundred and twenty three million", 123000000)
test("one hundred twenty three million and three", 123000003)
test("fifteen hundred and sixty five", 1565)
test("seventy-seven thousand eight hundred and nineteen", 77819)
test("seven hundred seventy-seven thousand seven hundred and seventy-seven", 777777)
test("zero", 0)
test("forty two", 42)
test("fourty two", 42)