diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/include/font.h | 5 | ||||
-rw-r--r-- | src/libs/libgroff/font.cpp | 123 | ||||
-rw-r--r-- | src/roff/troff/charinfo.h | 55 | ||||
-rw-r--r-- | src/roff/troff/input.cpp | 171 |
4 files changed, 270 insertions, 84 deletions
diff --git a/src/include/font.h b/src/include/font.h index 944250b9..75d2ef16 100644 --- a/src/include/font.h +++ b/src/include/font.h @@ -1,5 +1,5 @@ // -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992, 2002, 2004, 2006, 2009 +/* Copyright (C) 1989, 1990, 1991, 1992, 2002, 2004, 2006, 2009, 2010 Free Software Foundation, Inc. Written by James Clark (jjc@jclark.com) @@ -73,6 +73,9 @@ inline int glyph_to_number(glyph *); // Convert the given glyph back to // a numbered character. inline int glyph_to_index(glyph *); // Return the unique index that is // associated with the given glyph. It is >= 0. +extern int glyph_to_unicode(glyph *); // Convert the given glyph to its + // Unicode codepoint. Return -1 if it does not + // designate a Unicode character. inline int glyph_to_number(glyph *g) { diff --git a/src/libs/libgroff/font.cpp b/src/libs/libgroff/font.cpp index d0b4a12e..8dff71e7 100644 --- a/src/libs/libgroff/font.cpp +++ b/src/libs/libgroff/font.cpp @@ -1,6 +1,6 @@ // -*- C++ -*- /* Copyright (C) 1989, 1990, 1991, 1992, 2000, 2001, 2002, 2003, 2004, 2005, - 2006, 2008, 2009 + 2006, 2008, 2009, 2010 Free Software Foundation, Inc. Written by James Clark (jjc@jclark.com) @@ -147,6 +147,47 @@ void text_file::error(const char *format, error_with_file_and_line(path, lineno, format, arg1, arg2, arg3); } +int glyph_to_unicode(glyph *g) +{ + const char *nm = glyph_to_name(g); + if (nm != NULL) { + // ASCII character? + if (nm[0] == 'c' && nm[1] == 'h' && nm[2] == 'a' && nm[3] == 'r' + && (nm[4] >= '0' && nm[4] <= '9')) { + int n = (nm[4] - '0'); + if (nm[5] == '\0') + return n; + if (n > 0 && (nm[5] >= '0' && nm[5] <= '9')) { + n = 10*n + (nm[5] - '0'); + if (nm[6] == '\0') + return n; + if (nm[6] >= '0' && nm[6] <= '9') { + n = 10*n + (nm[6] - '0'); + if (nm[7] == '\0' && n < 128) + return n; + } + } + } + // Unicode character? + if (check_unicode_name(nm)) { + char *ignore; + return (int)strtol(nm + 1, &ignore, 16); + } + // If `nm' is a single letter `x', the glyph name is `\x'. + char buf[] = { '\\', '\0', '\0' }; + if (nm[1] == '\0') { + buf[1] = nm[0]; + nm = buf; + } + // groff glyphs that map to Unicode? + const char *unicode = glyph_name_to_unicode(nm); + if (unicode != NULL && strchr(unicode, '_') == NULL) { + char *ignore; + return (int)strtol(unicode, &ignore, 16); + } + } + return -1; +} /* font functions */ @@ -269,42 +310,11 @@ int font::contains(glyph *g) return 1; if (is_unicode) { // Unicode font - const char *nm = glyph_to_name(g); - if (nm != NULL) { - // ASCII character? - if (nm[0] == 'c' && nm[1] == 'h' && nm[2] == 'a' && nm[3] == 'r' - && (nm[4] >= '0' && nm[4] <= '9')) { - int n = (nm[4] - '0'); - if (nm[5] == '\0') - return 1; - if (n > 0 && (nm[5] >= '0' && nm[5] <= '9')) { - n = 10*n + (nm[5] - '0'); - if (nm[6] == '\0') - return 1; - if (nm[6] >= '0' && nm[6] <= '9') { - n = 10*n + (nm[6] - '0'); - if (nm[7] == '\0' && n < 128) - return 1; - } - } - } - // Unicode character? - if (check_unicode_name(nm)) - return 1; - // If `nm' is a single letter `x', the glyph name is `\x'. - char buf[] = { '\\', '\0', '\0' }; - if (nm[1] == '\0') { - buf[1] = nm[0]; - nm = buf; - } - // groff glyph name that maps to Unicode? - const char *unicode = glyph_name_to_unicode(nm); - if (unicode != NULL && strchr(unicode, '_') == NULL) - return 1; - } + // ASCII or Unicode character, or groff glyph name that maps to Unicode? + if (glyph_to_unicode(g) >= 0) + return 1; // Numbered character? - int n = glyph_to_number(g); - if (n >= 0) + if (glyph_to_number(g) >= 0) return 1; } return 0; @@ -554,43 +564,10 @@ int font::get_code(glyph *g) } if (is_unicode) { // Unicode font - const char *nm = glyph_to_name(g); - if (nm != NULL) { - // ASCII character? - if (nm[0] == 'c' && nm[1] == 'h' && nm[2] == 'a' && nm[3] == 'r' - && (nm[4] >= '0' && nm[4] <= '9')) { - int n = (nm[4] - '0'); - if (nm[5] == '\0') - return n; - if (n > 0 && (nm[5] >= '0' && nm[5] <= '9')) { - n = 10*n + (nm[5] - '0'); - if (nm[6] == '\0') - return n; - if (nm[6] >= '0' && nm[6] <= '9') { - n = 10*n + (nm[6] - '0'); - if (nm[7] == '\0' && n < 128) - return n; - } - } - } - // Unicode character? - if (check_unicode_name(nm)) { - char *ignore; - return (int)strtol(nm + 1, &ignore, 16); - } - // If `nm' is a single letter `x', the glyph name is `\x'. - char buf[] = { '\\', '\0', '\0' }; - if (nm[1] == '\0') { - buf[1] = nm[0]; - nm = buf; - } - // groff glyphs that map to Unicode? - const char *unicode = glyph_name_to_unicode(nm); - if (unicode != NULL && strchr(unicode, '_') == NULL) { - char *ignore; - return (int)strtol(unicode, &ignore, 16); - } - } + // ASCII or Unicode character, or groff glyph name that maps to Unicode? + int uni = glyph_to_unicode(g); + if (uni >= 0) + return uni; // Numbered character? int n = glyph_to_number(g); if (n >= 0) diff --git a/src/roff/troff/charinfo.h b/src/roff/troff/charinfo.h index 2c2c2685..c71383ed 100644 --- a/src/roff/troff/charinfo.h +++ b/src/roff/troff/charinfo.h @@ -1,5 +1,5 @@ // -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2002, 2006, 2009 +/* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2002, 2006, 2009, 2010 Free Software Foundation, Inc. Written by James Clark (jjc@jclark.com) @@ -18,6 +18,9 @@ for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <vector> +#include <utility> + class macro; class charinfo : glyph { @@ -35,6 +38,9 @@ class charinfo : glyph { char translate_input; // non-zero means that asciify_code is // active for .asciify (set by .trin) char_mode mode; + // Unicode character classes + std::vector<std::pair<int, int> > ranges; + std::vector<charinfo *> nested_classes; public: enum { // Values for the flags bitmask. See groff // manual, description of the `.cflags' request. @@ -66,6 +72,7 @@ public: unsigned char get_hyphenation_code(); unsigned char get_ascii_code(); unsigned char get_asciify_code(); + int get_unicode_code(); void set_hyphenation_code(unsigned char); void set_ascii_code(unsigned char); void set_asciify_code(unsigned char); @@ -73,6 +80,7 @@ public: int get_translation_input(); charinfo *get_translation(int = 0); void set_translation(charinfo *, int, int); + unsigned char get_flags(); void set_flags(unsigned char); void set_special_translation(int, int); int get_special_translation(int = 0); @@ -87,6 +95,13 @@ public: int is_fallback(); int is_special(); symbol *get_symbol(); + void add_to_class(int); + void add_to_class(int, int); + void add_to_class(charinfo *); + bool is_class(); + bool contains(int); + bool contains(symbol); + bool contains(charinfo *); }; charinfo *get_charinfo(symbol); @@ -95,37 +110,37 @@ charinfo *get_charinfo_by_number(int); inline int charinfo::overlaps_horizontally() { - return flags & OVERLAPS_HORIZONTALLY; + return get_flags() & OVERLAPS_HORIZONTALLY; } inline int charinfo::overlaps_vertically() { - return flags & OVERLAPS_VERTICALLY; + return get_flags() & OVERLAPS_VERTICALLY; } inline int charinfo::can_break_before() { - return flags & BREAK_BEFORE; + return get_flags() & BREAK_BEFORE; } inline int charinfo::can_break_after() { - return flags & BREAK_AFTER; + return get_flags() & BREAK_AFTER; } inline int charinfo::ends_sentence() { - return flags & ENDS_SENTENCE; + return get_flags() & ENDS_SENTENCE; } inline int charinfo::transparent() { - return flags & TRANSPARENT; + return get_flags() & TRANSPARENT; } inline int charinfo::ignore_hcodes() { - return flags & IGNORE_HCODES; + return get_flags() & IGNORE_HCODES; } inline int charinfo::numbered() @@ -214,5 +229,27 @@ inline int charinfo::first_time_not_found() inline symbol *charinfo::get_symbol() { - return( &nm ); + return &nm; +} + +inline void charinfo::add_to_class(int c) +{ + // TODO ranges cumbersome for single characters? + ranges.push_back(std::pair<int, int>(c, c)); +} + +inline void charinfo::add_to_class(int lo, + int hi) +{ + ranges.push_back(std::pair<int, int>(lo, hi)); +} + +inline void charinfo::add_to_class(charinfo *ci) +{ + nested_classes.push_back(ci); +} + +inline bool charinfo::is_class() +{ + return (!ranges.empty() || !nested_classes.empty()); } diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp index 5335c1ce..cdb82fe2 100644 --- a/src/roff/troff/input.cpp +++ b/src/roff/troff/input.cpp @@ -1,6 +1,6 @@ // -*- C++ -*- /* Copyright (C) 1989, 1990, 1991, 1992, 2000, 2001, 2002, 2003, 2004, 2005, - 2006, 2007, 2008, 2009 + 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. Written by James Clark (jjc@jclark.com) @@ -6740,6 +6740,102 @@ void hyphenation_patterns_file_code() skip_line(); } +dictionary char_class_dictionary(501); + +void define_class() +{ + tok.skip(); + symbol nm = get_name(1); + if (nm.is_null()) { + skip_line(); + return; + } + charinfo *ci = get_charinfo(nm); + charinfo *child1 = 0, *child2 = 0; + while (!tok.newline() && !tok.eof()) { + tok.skip(); + if (child1 != 0 && tok.ch() == '-') { + tok.next(); + child2 = tok.get_char(1); + if (!child2) { + warning(WARN_MISSING, + "missing end of character range in class `%1'", + nm.contents()); + skip_line(); + return; + } + if (child1->is_class() || child2->is_class()) { + warning(WARN_SYNTAX, + "nested character class is not allowed in range definition"); + skip_line(); + return; + } + int u1 = child1->get_unicode_code(); + int u2 = child2->get_unicode_code(); + if (u1 < 0) { + warning(WARN_SYNTAX, + "invalid start value in character range"); + skip_line(); + return; + } + if (u2 < 0) { + warning(WARN_SYNTAX, + "invalid end value in character range"); + skip_line(); + return; + } + ci->add_to_class(u1, u2); + child1 = child2 = 0; + } + else if (child1 != 0) { + if (child1->is_class()) + ci->add_to_class(child1); + else { + int u1 = child1->get_unicode_code(); + if (u1 < 0) { + warning(WARN_SYNTAX, + "invalid character value in class `%1'", + nm.contents()); + skip_line(); + return; + } + ci->add_to_class(u1); + } + child1 = 0; + } + child1 = tok.get_char(1); + tok.next(); + if (!child1) { + if (!tok.newline()) + skip_line(); + break; + } + } + if (child1 != 0) { + if (child1->is_class()) + ci->add_to_class(child1); + else { + int u1 = child1->get_unicode_code(); + if (u1 < 0) { + warning(WARN_SYNTAX, + "invalid character value in class `%1'", + nm.contents()); + skip_line(); + return; + } + ci->add_to_class(u1); + } + child1 = 0; + } + if (!ci->is_class()) { + warning(WARN_SYNTAX, + "empty class definition for `%1'", + nm.contents()); + return; + } + (void)char_class_dictionary.lookup(nm, ci); +} + charinfo *token::get_char(int required) { if (type == TOKEN_CHAR) @@ -7817,6 +7913,7 @@ void init_input_requests() init_request("cflags", char_flags); init_request("char", define_character); init_request("chop", chop_macro); + init_request("class", define_class); init_request("close", close_request); init_request("color", activate_color); init_request("composite", composite_request); @@ -8367,6 +8464,13 @@ charinfo::charinfo(symbol s) number = -1; } +int charinfo::get_unicode_code() +{ + if (ascii_code != '\0') + return ascii_code; + return glyph_to_unicode(this); +} + void charinfo::set_hyphenation_code(unsigned char c) { hyphenation_code = c; @@ -8388,6 +8492,27 @@ void charinfo::set_translation(charinfo *ci, int tt, int ti) transparent_translate = tt; } +// Get the union of all flags affecting this charinfo. +unsigned char charinfo::get_flags() +{ + unsigned char all_flags = flags; + dictionary_iterator iter(char_class_dictionary); + charinfo *cp; + symbol s; + while (iter.get(&s, (void **)&cp)) { + assert(!s.is_null()); + if (cp->contains(get_unicode_code())) { +#if defined(DEBUGGING) + if (debug_state) + fprintf(stderr, "charinfo::get_flags %p %s %d\n", + (void *)cp, cp->nm.contents(), cp->flags); +#endif + all_flags |= cp->flags; + } + } + return all_flags; +} + void charinfo::set_special_translation(int c, int tt) { special_translation = c; @@ -8432,6 +8557,50 @@ int charinfo::get_number() return number; } +bool charinfo::contains(int c) +{ + std::vector<std::pair<int, int> >::const_iterator ranges_iter; + ranges_iter = ranges.begin(); + while (ranges_iter != ranges.end()) { + if (c >= ranges_iter->first && c <= ranges_iter->second) { +#if defined(DEBUGGING) + if (debug_state) + fprintf(stderr, "charinfo::contains(%d)\n", c); +#endif + return true; + } + ++ranges_iter; + } + + std::vector<charinfo *>::const_iterator nested_iter; + nested_iter = nested_classes.begin(); + while (nested_iter != nested_classes.end()) { + if ((*nested_iter)->contains(c)) + return true; + ++nested_iter; + } + + return false; +} + +bool charinfo::contains(symbol s) +{ + const char *unicode = glyph_name_to_unicode(s.contents()); + if (unicode != NULL && strchr(unicode, '_') == NULL) { + char *ignore; + int c = (int)strtol(unicode, &ignore, 16); + return contains(c); + } + else + return false; +} + +bool charinfo::contains(charinfo *) +{ + // TODO + return false; +} + symbol UNNAMED_SYMBOL("---"); // For numbered characters not between 0 and 255, we make a symbol out |