summaryrefslogtreecommitdiff
path: root/test/test_textutils.py
blob: 2f309055b5e7f3055d463f6ee8d3a4c867ba4ffb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
# -*- coding: utf-8 -*-
# copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of logilab-common.
#
# logilab-common is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option) any
# later version.
#
# logilab-common is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with logilab-common.  If not, see <http://www.gnu.org/licenses/>.
"""
unit tests for module textutils
squeleton generated by /home/syt/cvs_work/logilab/pyreverse/py2tests.py on Sep 08 at 09:1:31

"""

# flake8: noqa: E501

import doctest
import re
from os import linesep

from logilab.common import textutils as tu
from logilab.common.testlib import TestCase, unittest_main


if linesep != "\n":
    LINE_RGX = re.compile(linesep)

    def ulines(string):
        return LINE_RGX.sub("\n", string)


else:

    def ulines(string):
        return string


class NormalizeTextTC(TestCase):
    def test_known_values(self):
        self.assertEqual(
            ulines(
                tu.normalize_text(
                    """some really malformated
        text.
With some times some veeeeeeeeeeeeeeerrrrryyyyyyyyyyyyyyyyyyy loooooooooooooooooooooong linnnnnnnnnnnes

and empty lines!
        """
                )
            ),
            """some really malformated text. With some times some
veeeeeeeeeeeeeeerrrrryyyyyyyyyyyyyyyyyyy loooooooooooooooooooooong
linnnnnnnnnnnes

and empty lines!""",
        )
        self.assertMultiLineEqual(
            ulines(
                tu.normalize_text(
                    """\
some ReST formated text
=======================
With some times some veeeeeeeeeeeeeeerrrrryyyyyyyyyyyyyyyyyyy loooooooooooooooooooooong linnnnnnnnnnnes
and normal lines!

another paragraph
        """,
                    rest=True,
                )
            ),
            """\
some ReST formated text
=======================
With some times some veeeeeeeeeeeeeeerrrrryyyyyyyyyyyyyyyyyyy
loooooooooooooooooooooong linnnnnnnnnnnes
and normal lines!

another paragraph""",
        )

    def test_nonregr_unsplitable_word(self):
        self.assertEqual(
            ulines(
                tu.normalize_text(
                    """petit complement :

http://www.plonefr.net/blog/archive/2005/10/30/tester-la-future-infrastructure-i18n
""",
                    80,
                )
            ),
            """petit complement :

http://www.plonefr.net/blog/archive/2005/10/30/tester-la-future-infrastructure-i18n""",
        )

    def test_nonregr_rest_normalize(self):
        self.assertEqual(
            ulines(
                tu.normalize_text(
                    """... Il est donc evident que tout le monde doit lire le compte-rendu de RSH et aller discuter avec les autres si c'est utile ou necessaire.
        """,
                    rest=True,
                )
            ),
            """... Il est donc evident que tout le monde doit lire le compte-rendu de RSH et
aller discuter avec les autres si c'est utile ou necessaire.""",
        )

    def test_normalize_rest_paragraph(self):
        self.assertEqual(
            ulines(tu.normalize_rest_paragraph("""**nico**: toto""")), """**nico**: toto"""
        )

    def test_normalize_rest_paragraph2(self):
        self.assertEqual(
            ulines(
                tu.normalize_rest_paragraph(
                    """.. _tdm: http://www.editions-eni.fr/Livres/Python-Les-fondamentaux-du-langage---La-programmation-pour-les-scientifiques-Table-des-matieres/.20_adaa41fb-c125-4919-aece-049601e81c8e_0_0.pdf
.. _extrait: http://www.editions-eni.fr/Livres/Python-Les-fondamentaux-du-langage---La-programmation-pour-les-scientifiques-Extrait-du-livre/.20_d6eed0be-0d36-4384-be59-2dd09e081012_0_0.pdf""",
                    indent="> ",
                )
            ),
            """> .. _tdm:
> http://www.editions-eni.fr/Livres/Python-Les-fondamentaux-du-langage---La-programmation-pour-les-scientifiques-Table-des-matieres/.20_adaa41fb-c125-4919-aece-049601e81c8e_0_0.pdf
> .. _extrait:
> http://www.editions-eni.fr/Livres/Python-Les-fondamentaux-du-langage---La-programmation-pour-les-scientifiques-Extrait-du-livre/.20_d6eed0be-0d36-4384-be59-2dd09e081012_0_0.pdf""",
        )

    def test_normalize_paragraph2(self):
        self.assertEqual(
            ulines(
                tu.normalize_paragraph(
                    """.. _tdm: http://www.editions-eni.fr/Livres/Python-Les-fondamentaux-du-langage---La-programmation-pour-les-scientifiques-Table-des-matieres/.20_adaa41fb-c125-4919-aece-049601e81c8e_0_0.pdf
.. _extrait: http://www.editions-eni.fr/Livres/Python-Les-fondamentaux-du-langage---La-programmation-pour-les-scientifiques-Extrait-du-livre/.20_d6eed0be-0d36-4384-be59-2dd09e081012_0_0.pdf""",
                    indent="> ",
                )
            ),
            """> .. _tdm:
> http://www.editions-eni.fr/Livres/Python-Les-fondamentaux-du-langage---La-programmation-pour-les-scientifiques-Table-des-matieres/.20_adaa41fb-c125-4919-aece-049601e81c8e_0_0.pdf
> .. _extrait:
> http://www.editions-eni.fr/Livres/Python-Les-fondamentaux-du-langage---La-programmation-pour-les-scientifiques-Extrait-du-livre/.20_d6eed0be-0d36-4384-be59-2dd09e081012_0_0.pdf""",
        )


class NormalizeParagraphTC(TestCase):
    def test_known_values(self):
        self.assertEqual(
            ulines(
                tu.normalize_text(
                    """This package contains test files shared by the logilab-common package. It isn't
necessary to install this package unless you want to execute or look at
the tests.""",
                    indent=" ",
                    line_len=70,
                )
            ),
            """\
 This package contains test files shared by the logilab-common
 package. It isn't necessary to install this package unless you want
 to execute or look at the tests.""",
        )


class GetCsvTC(TestCase):
    def test_known(self):
        self.assertEqual(tu.splitstrip("a, b,c "), ["a", "b", "c"])


class UnitsTC(TestCase):
    def setUp(self):
        self.units = {
            "m": 60,
            "kb": 1024,
            "mb": 1024 * 1024,
        }

    def test_empty_base(self):
        self.assertEqual(tu.apply_units("17", {}), 17)

    def test_empty_inter(self):
        def inter(value):
            return int(float(value)) * 2

        result = tu.apply_units("12.4", {}, inter=inter)
        self.assertEqual(result, 12 * 2)
        self.assertIsInstance(result, float)

    def test_empty_final(self):
        # int('12.4') raise value error
        self.assertRaises(ValueError, tu.apply_units, "12.4", {}, final=int)

    def test_empty_inter_final(self):
        result = tu.apply_units("12.4", {}, inter=float, final=int)
        self.assertEqual(result, 12)
        self.assertIsInstance(result, int)

    def test_blank_base(self):
        result = tu.apply_units(" 42  ", {}, final=int)
        self.assertEqual(result, 42)

    def test_blank_space(self):
        result = tu.apply_units(" 1 337 ", {}, final=int)
        self.assertEqual(result, 1337)

    def test_blank_coma(self):
        result = tu.apply_units(" 4,298.42 ", {})
        self.assertEqual(result, 4298.42)

    def test_blank_mixed(self):
        result = tu.apply_units("45, 317, 337", {}, final=int)
        self.assertEqual(result, 45317337)

    def test_unit_singleunit_singleletter(self):
        result = tu.apply_units("15m", self.units)
        self.assertEqual(result, 15 * self.units["m"])

    def test_unit_singleunit_multipleletter(self):
        result = tu.apply_units("47KB", self.units)
        self.assertEqual(result, 47 * self.units["kb"])

    def test_unit_singleunit_caseinsensitive(self):
        result = tu.apply_units("47kb", self.units)
        self.assertEqual(result, 47 * self.units["kb"])

    def test_unit_multipleunit(self):
        result = tu.apply_units("47KB 1.5MB", self.units)
        self.assertEqual(result, 47 * self.units["kb"] + 1.5 * self.units["mb"])

    def test_unit_with_blank(self):
        result = tu.apply_units("1 000 KB", self.units)
        self.assertEqual(result, 1000 * self.units["kb"])

    def test_unit_wrong_input(self):
        self.assertRaises(ValueError, tu.apply_units, "", self.units)
        self.assertRaises(ValueError, tu.apply_units, "wrong input", self.units)
        self.assertRaises(ValueError, tu.apply_units, "wrong13 input", self.units)
        self.assertRaises(ValueError, tu.apply_units, "wrong input42", self.units)
        with self.assertRaises(ValueError) as cm:
            tu.apply_units("42 cakes", self.units)
        self.assertIn("invalid unit cakes.", str(cm.exception))


RGX = re.compile("abcd")


class PrettyMatchTC(TestCase):
    def test_known(self):
        string = "hiuherabcdef"
        self.assertEqual(
            ulines(tu.pretty_match(RGX.search(string), string)), "hiuherabcdef\n      ^^^^"
        )

    def test_known_values_1(self):
        rgx = re.compile("(to*)")
        string = "toto"
        match = rgx.search(string)
        self.assertEqual(
            ulines(tu.pretty_match(match, string)),
            """toto
^^""",
        )

    def test_known_values_2(self):
        rgx = re.compile("(to*)")
        string = """ ... ... to to
 ... ... """
        match = rgx.search(string)
        self.assertEqual(
            ulines(tu.pretty_match(match, string)),
            """ ... ... to to
         ^^
 ... ...""",
        )


class UnquoteTC(TestCase):
    def test(self):
        self.assertEqual(tu.unquote('"toto"'), "toto")
        self.assertEqual(tu.unquote("'l'inenarrable toto'"), "l'inenarrable toto")
        self.assertEqual(tu.unquote("no quote"), "no quote")


class ColorizeAnsiTC(TestCase):
    def test_known(self):
        self.assertEqual(tu.colorize_ansi("hello", "blue", "strike"), "\x1b[9;34mhello\x1b[0m")
        self.assertEqual(
            tu.colorize_ansi("hello", style="strike, inverse"), "\x1b[9;7mhello\x1b[0m"
        )
        self.assertEqual(tu.colorize_ansi("hello", None, None), "hello")
        self.assertEqual(tu.colorize_ansi("hello", "", ""), "hello")

    def test_raise(self):
        self.assertRaises(KeyError, tu.colorize_ansi, "hello", "bleu", None)
        self.assertRaises(KeyError, tu.colorize_ansi, "hello", None, "italique")


class UnormalizeTC(TestCase):
    def test_unormalize_no_substitute(self):
        data = [
            ("\u0153nologie", "oenologie"),
            ("\u0152nologie", "OEnologie"),
            ("l\xf8to", "loto"),
            ("été", "ete"),
            ("àèùéïîôêç", "aeueiioec"),
            ("ÀÈÙÉÏÎÔÊÇ", "AEUEIIOEC"),
            ("\xa0", " "),  # NO-BREAK SPACE managed by NFKD decomposition
            ("\u0154", "R"),
            ("Pointe d\u2019Yves", "Pointe d'Yves"),
            ("Bordeaux\u2013Mérignac", "Bordeaux-Merignac"),
        ]
        for input, output in data:
            yield self.assertEqual, tu.unormalize(input), output

    def test_unormalize_substitute(self):
        self.assertEqual(tu.unormalize("ab \u8000 cd", substitute="_"), "ab _ cd")

    def test_unormalize_backward_compat(self):
        self.assertRaises(ValueError, tu.unormalize, "\u8000")
        self.assertEqual(tu.unormalize("\u8000", substitute=""), "")


def load_tests(loader, tests, ignore):
    tests.addTests(doctest.DocTestSuite(tu))
    return tests


if __name__ == "__main__":
    unittest_main()