diff options
author | wl <wl> | 2010-12-13 15:30:19 +0000 |
---|---|---|
committer | wl <wl> | 2010-12-13 15:30:19 +0000 |
commit | d5d8909e11e0c613f7a1dfba3a20a405ae7b4da4 (patch) | |
tree | 88c9d26db241f5a613239e1372eb22ff239070cc /src/roff/troff | |
parent | 67525a8a24c8a0a7d6413de6814c8901f9401a39 (diff) | |
download | groff-d5d8909e11e0c613f7a1dfba3a20a405ae7b4da4.tar.gz |
Implement support for character classes.
This patch uses standard C++ headers, contrary to the rest of groff.
Ideally, everything in groff should be updated to do the same.
* src/include/font.h (glyph_to_unicode): New function.
* src/libs/libgroff/font.cpp (glyph_to_unicode): Implement it.
(font::contains, font::get_code): Use it.
* src/roff/troff/charinfo.h: Include <vector> and <utility>.
(charinfo): New members `ranges' and `nested_classes'.
New member functions `get_unicode_code' and `get_flags'.
New member functions `add_to_class', `is_class', and `contains'.
(charinfo::overlaps_horizontally, charinfo::overlaps_vertically,
charinfo::can_break_before, charinfo::can_break_after,
charinfo::can_break_after, charinfo::ends_sentence,
charinfo::transparent,, charinfo:ignore_hcodes): Use `get_flags',
which handles character classes also.
* src/roff/troff/input.cpp (char_class_dictionary): New global
variable.
(define_class): New function.
(init_input_requests): Register `class'.
(charinfo::get_unicode_code, charinfo::get_flags,
charinfo::contains): Implement it.
* NEWS, doc/groff.texinfo (Character Classes), man/groff_diff.man,
man/groff.man: Document it.
Diffstat (limited to 'src/roff/troff')
-rw-r--r-- | src/roff/troff/charinfo.h | 55 | ||||
-rw-r--r-- | src/roff/troff/input.cpp | 171 |
2 files changed, 216 insertions, 10 deletions
diff --git a/src/roff/troff/charinfo.h b/src/roff/troff/charinfo.h index 2c2c2685..c71383ed 100644 --- a/src/roff/troff/charinfo.h +++ b/src/roff/troff/charinfo.h @@ -1,5 +1,5 @@ // -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2002, 2006, 2009 +/* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2002, 2006, 2009, 2010 Free Software Foundation, Inc. Written by James Clark (jjc@jclark.com) @@ -18,6 +18,9 @@ for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <vector> +#include <utility> + class macro; class charinfo : glyph { @@ -35,6 +38,9 @@ class charinfo : glyph { char translate_input; // non-zero means that asciify_code is // active for .asciify (set by .trin) char_mode mode; + // Unicode character classes + std::vector<std::pair<int, int> > ranges; + std::vector<charinfo *> nested_classes; public: enum { // Values for the flags bitmask. See groff // manual, description of the `.cflags' request. @@ -66,6 +72,7 @@ public: unsigned char get_hyphenation_code(); unsigned char get_ascii_code(); unsigned char get_asciify_code(); + int get_unicode_code(); void set_hyphenation_code(unsigned char); void set_ascii_code(unsigned char); void set_asciify_code(unsigned char); @@ -73,6 +80,7 @@ public: int get_translation_input(); charinfo *get_translation(int = 0); void set_translation(charinfo *, int, int); + unsigned char get_flags(); void set_flags(unsigned char); void set_special_translation(int, int); int get_special_translation(int = 0); @@ -87,6 +95,13 @@ public: int is_fallback(); int is_special(); symbol *get_symbol(); + void add_to_class(int); + void add_to_class(int, int); + void add_to_class(charinfo *); + bool is_class(); + bool contains(int); + bool contains(symbol); + bool contains(charinfo *); }; charinfo *get_charinfo(symbol); @@ -95,37 +110,37 @@ charinfo *get_charinfo_by_number(int); inline int charinfo::overlaps_horizontally() { - return flags & OVERLAPS_HORIZONTALLY; + return get_flags() & OVERLAPS_HORIZONTALLY; } inline int charinfo::overlaps_vertically() { - return flags & OVERLAPS_VERTICALLY; + return get_flags() & OVERLAPS_VERTICALLY; } inline int charinfo::can_break_before() { - return flags & BREAK_BEFORE; + return get_flags() & BREAK_BEFORE; } inline int charinfo::can_break_after() { - return flags & BREAK_AFTER; + return get_flags() & BREAK_AFTER; } inline int charinfo::ends_sentence() { - return flags & ENDS_SENTENCE; + return get_flags() & ENDS_SENTENCE; } inline int charinfo::transparent() { - return flags & TRANSPARENT; + return get_flags() & TRANSPARENT; } inline int charinfo::ignore_hcodes() { - return flags & IGNORE_HCODES; + return get_flags() & IGNORE_HCODES; } inline int charinfo::numbered() @@ -214,5 +229,27 @@ inline int charinfo::first_time_not_found() inline symbol *charinfo::get_symbol() { - return( &nm ); + return &nm; +} + +inline void charinfo::add_to_class(int c) +{ + // TODO ranges cumbersome for single characters? + ranges.push_back(std::pair<int, int>(c, c)); +} + +inline void charinfo::add_to_class(int lo, + int hi) +{ + ranges.push_back(std::pair<int, int>(lo, hi)); +} + +inline void charinfo::add_to_class(charinfo *ci) +{ + nested_classes.push_back(ci); +} + +inline bool charinfo::is_class() +{ + return (!ranges.empty() || !nested_classes.empty()); } diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp index 5335c1ce..cdb82fe2 100644 --- a/src/roff/troff/input.cpp +++ b/src/roff/troff/input.cpp @@ -1,6 +1,6 @@ // -*- C++ -*- /* Copyright (C) 1989, 1990, 1991, 1992, 2000, 2001, 2002, 2003, 2004, 2005, - 2006, 2007, 2008, 2009 + 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. Written by James Clark (jjc@jclark.com) @@ -6740,6 +6740,102 @@ void hyphenation_patterns_file_code() skip_line(); } +dictionary char_class_dictionary(501); + +void define_class() +{ + tok.skip(); + symbol nm = get_name(1); + if (nm.is_null()) { + skip_line(); + return; + } + charinfo *ci = get_charinfo(nm); + charinfo *child1 = 0, *child2 = 0; + while (!tok.newline() && !tok.eof()) { + tok.skip(); + if (child1 != 0 && tok.ch() == '-') { + tok.next(); + child2 = tok.get_char(1); + if (!child2) { + warning(WARN_MISSING, + "missing end of character range in class `%1'", + nm.contents()); + skip_line(); + return; + } + if (child1->is_class() || child2->is_class()) { + warning(WARN_SYNTAX, + "nested character class is not allowed in range definition"); + skip_line(); + return; + } + int u1 = child1->get_unicode_code(); + int u2 = child2->get_unicode_code(); + if (u1 < 0) { + warning(WARN_SYNTAX, + "invalid start value in character range"); + skip_line(); + return; + } + if (u2 < 0) { + warning(WARN_SYNTAX, + "invalid end value in character range"); + skip_line(); + return; + } + ci->add_to_class(u1, u2); + child1 = child2 = 0; + } + else if (child1 != 0) { + if (child1->is_class()) + ci->add_to_class(child1); + else { + int u1 = child1->get_unicode_code(); + if (u1 < 0) { + warning(WARN_SYNTAX, + "invalid character value in class `%1'", + nm.contents()); + skip_line(); + return; + } + ci->add_to_class(u1); + } + child1 = 0; + } + child1 = tok.get_char(1); + tok.next(); + if (!child1) { + if (!tok.newline()) + skip_line(); + break; + } + } + if (child1 != 0) { + if (child1->is_class()) + ci->add_to_class(child1); + else { + int u1 = child1->get_unicode_code(); + if (u1 < 0) { + warning(WARN_SYNTAX, + "invalid character value in class `%1'", + nm.contents()); + skip_line(); + return; + } + ci->add_to_class(u1); + } + child1 = 0; + } + if (!ci->is_class()) { + warning(WARN_SYNTAX, + "empty class definition for `%1'", + nm.contents()); + return; + } + (void)char_class_dictionary.lookup(nm, ci); +} + charinfo *token::get_char(int required) { if (type == TOKEN_CHAR) @@ -7817,6 +7913,7 @@ void init_input_requests() init_request("cflags", char_flags); init_request("char", define_character); init_request("chop", chop_macro); + init_request("class", define_class); init_request("close", close_request); init_request("color", activate_color); init_request("composite", composite_request); @@ -8367,6 +8464,13 @@ charinfo::charinfo(symbol s) number = -1; } +int charinfo::get_unicode_code() +{ + if (ascii_code != '\0') + return ascii_code; + return glyph_to_unicode(this); +} + void charinfo::set_hyphenation_code(unsigned char c) { hyphenation_code = c; @@ -8388,6 +8492,27 @@ void charinfo::set_translation(charinfo *ci, int tt, int ti) transparent_translate = tt; } +// Get the union of all flags affecting this charinfo. +unsigned char charinfo::get_flags() +{ + unsigned char all_flags = flags; + dictionary_iterator iter(char_class_dictionary); + charinfo *cp; + symbol s; + while (iter.get(&s, (void **)&cp)) { + assert(!s.is_null()); + if (cp->contains(get_unicode_code())) { +#if defined(DEBUGGING) + if (debug_state) + fprintf(stderr, "charinfo::get_flags %p %s %d\n", + (void *)cp, cp->nm.contents(), cp->flags); +#endif + all_flags |= cp->flags; + } + } + return all_flags; +} + void charinfo::set_special_translation(int c, int tt) { special_translation = c; @@ -8432,6 +8557,50 @@ int charinfo::get_number() return number; } +bool charinfo::contains(int c) +{ + std::vector<std::pair<int, int> >::const_iterator ranges_iter; + ranges_iter = ranges.begin(); + while (ranges_iter != ranges.end()) { + if (c >= ranges_iter->first && c <= ranges_iter->second) { +#if defined(DEBUGGING) + if (debug_state) + fprintf(stderr, "charinfo::contains(%d)\n", c); +#endif + return true; + } + ++ranges_iter; + } + + std::vector<charinfo *>::const_iterator nested_iter; + nested_iter = nested_classes.begin(); + while (nested_iter != nested_classes.end()) { + if ((*nested_iter)->contains(c)) + return true; + ++nested_iter; + } + + return false; +} + +bool charinfo::contains(symbol s) +{ + const char *unicode = glyph_name_to_unicode(s.contents()); + if (unicode != NULL && strchr(unicode, '_') == NULL) { + char *ignore; + int c = (int)strtol(unicode, &ignore, 16); + return contains(c); + } + else + return false; +} + +bool charinfo::contains(charinfo *) +{ + // TODO + return false; +} + symbol UNNAMED_SYMBOL("---"); // For numbered characters not between 0 and 255, we make a symbol out |