summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwl <wl>2010-12-18 09:13:16 +0000
committerwl <wl>2010-12-18 09:13:16 +0000
commit81d80a6f71cc6d6fbd3d1111498318fb22a6857a (patch)
tree998eb2024c65eb8da534b6f1448f63d434575f85
parent995c9418dc28ee74941ca052ed98b246faf5d3dd (diff)
downloadgroff-81d80a6f71cc6d6fbd3d1111498318fb22a6857a.tar.gz
Improve CJK support with new values for `.cflags'.
This patch introduces three new values to `.cflags': don't break before character: 128 don't break after character: 256 allow inter-character break: 512 They are handled differently if compared to other cflags values: (1) hcode values are completely ignored (2) similar to kern values, and contrary to the other cflags values, troff looks at pairs of characters to decide whether a break gets inserted A yet-to-be-written patch should add inter-character spacing if those flags are active; currently, only zero-width breakpoints are inserted. * src/roff/troff/charinfo.h (charinfo): Change type of `flags' to `int'. Update callers accordingly. New enum values `DONT_BREAK_BEFORE', `DONT_BREAK_AFTER', and `INTER_CHAR_SPACE'. New member functions `prohibit_break_before', `prohibit_break_after', and `inter_char_space'. * src/roff/troff/input.cpp: Updated. * src/roff/troff/node.cpp (inter_char_space_node): New class similar to kern_pair_node, collecting charinfo entities with the abovementioned cflags values. (break_char_type): Add new enum values. (glyph_node::merge_glyph_node): Handle abovementioned cflags values and emit an `inter_char_space_node' if necessary. * tmac/ja.tmac: Use new cflags values. * doc/groff.texinfo, NEWS, man/groff_diff.man: Document new values.
-rw-r--r--ChangeLog40
-rw-r--r--NEWS7
-rw-r--r--doc/groff.texinfo21
-rw-r--r--man/groff_diff.man26
-rw-r--r--src/roff/troff/charinfo.h43
-rw-r--r--src/roff/troff/input.cpp4
-rw-r--r--src/roff/troff/node.cpp258
-rw-r--r--tmac/ja.tmac6
8 files changed, 382 insertions, 23 deletions
diff --git a/ChangeLog b/ChangeLog
index f5963d93..acd42273 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,45 @@
2010-12-18 Werner LEMBERG <wl@gnu.org>
+ Improve CJK support with new values for `.cflags'.
+
+ This patch introduces three new values to `.cflags':
+
+ don't break before character: 128
+ don't break after character: 256
+ allow inter-character break: 512
+
+ They are handled differently if compared to other cflags values:
+
+ (1) hcode values are completely ignored
+ (2) similar to kern values, and contrary to the other cflags
+ values, troff looks at pairs of characters to decide whether a
+ break gets inserted
+
+ A yet-to-be-written patch should add inter-character spacing if
+ those flags are active; currently, only zero-width breakpoints are
+ inserted.
+
+ * src/roff/troff/charinfo.h (charinfo): Change type of `flags' to
+ `int'. Update callers accordingly.
+ New enum values `DONT_BREAK_BEFORE', `DONT_BREAK_AFTER', and
+ `INTER_CHAR_SPACE'.
+ New member functions `prohibit_break_before',
+ `prohibit_break_after', and `inter_char_space'.
+ * src/roff/troff/input.cpp: Updated.
+
+ * src/roff/troff/node.cpp (inter_char_space_node): New class similar
+ to kern_pair_node, collecting charinfo entities with the
+ abovementioned cflags values.
+ (break_char_type): Add new enum values.
+ (glyph_node::merge_glyph_node): Handle abovementioned cflags values
+ and emit an `inter_char_space_node' if necessary.
+
+ * tmac/ja.tmac: Use new cflags values.
+
+ * doc/groff.texinfo, NEWS, man/groff_diff.man: Document new values.
+
+2010-12-18 Werner LEMBERG <wl@gnu.org>
+
Remove compiler warning.
* src/libs/libgroff/relocate.cpp (msw2posixpath): Remove redundant
diff --git a/NEWS b/NEWS
index 00a41b47..ce6410cc 100644
--- a/NEWS
+++ b/NEWS
@@ -28,6 +28,13 @@ o The new `class' request assigns a short name to a set of characters
which can be referred to in the `cflags' request. This is especially
useful to control line-breaking and hyphenation rules in CJK languages.
+o Three new values for the `cflags' request have been added, which are
+ needed for proper CJK support.
+
+ 128 prohibit before but allow break after character
+ 256 prohibit after but allow break before character
+ 512 allow break before and after character
+
Tbl
---
diff --git a/doc/groff.texinfo b/doc/groff.texinfo
index 42cf6166..dc81fb5a 100644
--- a/doc/groff.texinfo
+++ b/doc/groff.texinfo
@@ -9537,6 +9537,27 @@ into your document. Note, however, that this can lead to bad layout if
done without thinking; in most situations, a better solution instead of
changing the @code{cflags} value is to insert @code{\:} right after the
hyphen at the places which really need a break point.
+
+@item 128
+Prohibit a line break before the character, but allow a line break after
+the character. This works only in combination with flags 256 and 512
+(see below) and has no effect otherwise.
+
+@item 256
+Prohibit a line break after the character, but allow a line break before
+the character. This works only in combination with flags 128 and 512
+(see below) and has no effect otherwise.
+
+@item 512
+Allow line break before or after the character. This works only in
+combination with flags 128 and 256 and has no effect otherwise.
+
+Contrary to flag values 2 and@tie{}4, the flags 128, 256, and 512 work
+pairwise. If, for example, the left character has value 512, and the
+right character 128, no line break gets inserted. If we use
+value@tie{}6 instead for the left character, a line break after the
+character can't be suppressed since the right neighbour character
+doesn't get examined.
@end table
@endDefreq
diff --git a/man/groff_diff.man b/man/groff_diff.man
index e0d355ea..408d7bf7 100644
--- a/man/groff_diff.man
+++ b/man/groff_diff.man
@@ -993,7 +993,6 @@ Lines can be broken after the character (initially characters
have this property); a line is not broken at a character with this
property unless the characters on each side both have non-zero
hyphenation codes.
-
This can be overridden with value 64.
.
.IP 8
@@ -1021,8 +1020,33 @@ have this property).
Ignore hyphenation code values of the surrounding characters.
Use this in combination with values 2 and\~4 (initially no characters
have this property).
+.
+.IP 128
+Prohibit a line break before the character, but allow a line break after the
+character.
+This works only in combination with flags 256 and 512 and has no effect
+otherwise.
+.
+.IP 256
+Prohibit a line break after the character, but allow a line break before
+the character.
+This works only in combination with flags 128 and 512 and has no effect
+otherwise.
+.
+.IP 512
+Allow line break before or after the character.
+This works only in combination with flags 128 and 256 and has no effect
+otherwise.
.RE
.
+.IP
+Contrary to flag values 2 and\~4, the flags 128, 256, and 512 work pairwise.
+If, for example, the left character has value 512, and the right character
+128, no line break gets inserted.
+If we use value\~6 instead for the left character, a line break after the
+character can't be suppressed since the right neighbour character doesn't
+get examined.
+.
.TP
.BI .char\ c\ string
[This request can both define characters and glyphs.]
diff --git a/src/roff/troff/charinfo.h b/src/roff/troff/charinfo.h
index c71383ed..544f24f2 100644
--- a/src/roff/troff/charinfo.h
+++ b/src/roff/troff/charinfo.h
@@ -29,7 +29,7 @@ class charinfo : glyph {
macro *mac;
unsigned char special_translation;
unsigned char hyphenation_code;
- unsigned char flags;
+ unsigned int flags;
unsigned char ascii_code;
unsigned char asciify_code;
char not_found;
@@ -44,13 +44,16 @@ class charinfo : glyph {
public:
enum { // Values for the flags bitmask. See groff
// manual, description of the `.cflags' request.
- ENDS_SENTENCE = 1,
- BREAK_BEFORE = 2,
- BREAK_AFTER = 4,
- OVERLAPS_HORIZONTALLY = 8,
- OVERLAPS_VERTICALLY = 16,
- TRANSPARENT = 32,
- IGNORE_HCODES = 64
+ ENDS_SENTENCE = 0x01,
+ BREAK_BEFORE = 0x02,
+ BREAK_AFTER = 0x04,
+ OVERLAPS_HORIZONTALLY = 0x08,
+ OVERLAPS_VERTICALLY = 0x10,
+ TRANSPARENT = 0x20,
+ IGNORE_HCODES = 0x40,
+ DONT_BREAK_BEFORE = 0x80,
+ DONT_BREAK_AFTER = 0x100,
+ INTER_CHAR_SPACE = 0x200
};
enum {
TRANSLATE_NONE,
@@ -69,6 +72,9 @@ public:
int can_break_after();
int transparent();
int ignore_hcodes();
+ int prohibit_break_before();
+ int prohibit_break_after();
+ int inter_char_space();
unsigned char get_hyphenation_code();
unsigned char get_ascii_code();
unsigned char get_asciify_code();
@@ -80,8 +86,8 @@ public:
int get_translation_input();
charinfo *get_translation(int = 0);
void set_translation(charinfo *, int, int);
- unsigned char get_flags();
- void set_flags(unsigned char);
+ unsigned int get_flags();
+ void set_flags(unsigned int);
void set_special_translation(int, int);
int get_special_translation(int = 0);
macro *set_macro(macro *);
@@ -143,6 +149,21 @@ inline int charinfo::ignore_hcodes()
return get_flags() & IGNORE_HCODES;
}
+inline int charinfo::prohibit_break_before()
+{
+ return get_flags() & DONT_BREAK_BEFORE;
+}
+
+inline int charinfo::prohibit_break_after()
+{
+ return get_flags() & DONT_BREAK_AFTER;
+}
+
+inline int charinfo::inter_char_space()
+{
+ return get_flags() & INTER_CHAR_SPACE;
+}
+
inline int charinfo::numbered()
{
return number >= 0;
@@ -185,7 +206,7 @@ inline unsigned char charinfo::get_asciify_code()
return (translate_input ? asciify_code : 0);
}
-inline void charinfo::set_flags(unsigned char c)
+inline void charinfo::set_flags(unsigned int c)
{
flags = c;
}
diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp
index 8d5c1b33..4d579f80 100644
--- a/src/roff/troff/input.cpp
+++ b/src/roff/troff/input.cpp
@@ -8496,9 +8496,9 @@ void charinfo::set_translation(charinfo *ci, int tt, int ti)
}
// Get the union of all flags affecting this charinfo.
-unsigned char charinfo::get_flags()
+unsigned int charinfo::get_flags()
{
- unsigned char all_flags = flags;
+ unsigned int all_flags = flags;
dictionary_iterator iter(char_class_dictionary);
charinfo *cp;
symbol s;
diff --git a/src/roff/troff/node.cpp b/src/roff/troff/node.cpp
index 9a964a0d..ec082ff2 100644
--- a/src/roff/troff/node.cpp
+++ b/src/roff/troff/node.cpp
@@ -2111,6 +2111,47 @@ node *node::merge_glyph_node(glyph_node *)
return 0;
}
+class inter_char_space_node : public node {
+ hunits amount;
+ char left_break_code;
+ char right_break_code;
+ color *col;
+ node *n1;
+ node *n2;
+public:
+ inter_char_space_node(hunits, char, char, color *, node *, node *,
+ statem *, int, node * = 0);
+ ~inter_char_space_node();
+ node *copy();
+ node *merge_glyph_node(glyph_node *);
+ node *add_self(node *, hyphen_list **);
+ hyphen_list *get_hyphen_list(hyphen_list *, int *);
+ node *add_discretionary_hyphen();
+ hunits width();
+ node *last_char_node();
+ hunits italic_correction();
+ hunits subscript_correction();
+ void tprint(troff_output_file *);
+ hyphenation_type get_hyphenation_type();
+ int ends_sentence();
+ void ascii_print(ascii_output_file *);
+ void asciify(macro *);
+ int same(node *);
+ const char *type();
+ int force_tprint();
+ int is_tag();
+ void vertical_extent(vunits *, vunits *);
+};
+
+enum break_char_type {
+ CAN_BREAK_BEFORE = 0x01,
+ CAN_BREAK_AFTER = 0x02,
+ IGNORE_HCODES = 0x04,
+ PROHIBIT_BREAK_BEFORE = 0x08,
+ PROHIBIT_BREAK_AFTER = 0x10,
+ INTER_CHAR_SPACE = 0x20
+};
+
node *glyph_node::merge_glyph_node(glyph_node *gn)
{
if (tf == gn->tf && gcol == gn->gcol && fcol == gn->fcol) {
@@ -2129,6 +2170,28 @@ node *glyph_node::merge_glyph_node(glyph_node *gn)
gn->div_nest_level, next1);
}
}
+ int left_bc = 0, right_bc = 0;
+ if (ci->prohibit_break_before())
+ left_bc = PROHIBIT_BREAK_BEFORE;
+ if (gn->ci->prohibit_break_before())
+ right_bc = PROHIBIT_BREAK_BEFORE;
+ if (ci->prohibit_break_after())
+ left_bc |= PROHIBIT_BREAK_AFTER;
+ if (gn->ci->prohibit_break_after())
+ right_bc |= PROHIBIT_BREAK_AFTER;
+ if (ci->inter_char_space())
+ left_bc |= INTER_CHAR_SPACE;
+ if (gn->ci->inter_char_space())
+ right_bc |= INTER_CHAR_SPACE;
+ if (left_bc && right_bc) {
+ node *next1 = next;
+ next = 0;
+ // ic_space not supported yet
+ int ic_space = 0;
+ return new inter_char_space_node(ic_space, left_bc, right_bc,
+ gcol, this, gn, state,
+ gn->div_nest_level, next1);
+ }
return 0;
}
@@ -2757,12 +2820,6 @@ int italic_corrected_node::character_type()
return n->character_type();
}
-enum break_char_type {
- CAN_BREAK_BEFORE = 0x01,
- CAN_BREAK_AFTER = 0x02,
- IGNORE_HCODES = 0x04
-};
-
class break_char_node : public node {
node *ch;
char break_code;
@@ -5718,6 +5775,195 @@ int dbreak_node::is_tag()
return 0;
}
+inter_char_space_node::inter_char_space_node(hunits n,
+ char left, char right,
+ color *c, node *first, node *second,
+ statem* s, int pop, node *x)
+: node(x, s, pop), amount(n), left_break_code(left), right_break_code(right),
+ col(c), n1(first), n2(second)
+{
+}
+
+inter_char_space_node::~inter_char_space_node()
+{
+ if (n1 != 0)
+ delete n1;
+ if (n2 != 0)
+ delete n2;
+}
+
+node *inter_char_space_node::merge_glyph_node(glyph_node *gn)
+{
+ node *nd = n2->merge_glyph_node(gn);
+ if (nd == 0)
+ return 0;
+ n2 = nd;
+ nd = n2->merge_self(n1);
+ if (nd) {
+ nd->next = next;
+ n1 = 0;
+ n2 = 0;
+ delete this;
+ return nd;
+ }
+ return this;
+}
+
+hunits inter_char_space_node::italic_correction()
+{
+ return n2->italic_correction();
+}
+
+hunits inter_char_space_node::subscript_correction()
+{
+ return n2->subscript_correction();
+}
+
+void inter_char_space_node::vertical_extent(vunits *min, vunits *max)
+{
+ n1->vertical_extent(min, max);
+ vunits min2, max2;
+ n2->vertical_extent(&min2, &max2);
+ if (min2 < *min)
+ *min = min2;
+ if (max2 > *max)
+ *max = max2;
+}
+
+node *inter_char_space_node::add_discretionary_hyphen()
+{
+ tfont *tf = n1->get_tfont();
+ if (tf) {
+ if (tf->contains(soft_hyphen_char)) {
+ color *gcol = n2->get_glyph_color();
+ color *fcol = n2->get_fill_color();
+ node *next1 = next;
+ next = 0;
+ node *n = copy();
+ glyph_node *gn = new glyph_node(soft_hyphen_char, tf, gcol, fcol,
+ state, div_nest_level);
+ node *nn = n->merge_glyph_node(gn);
+ if (nn == 0) {
+ gn->next = n;
+ nn = gn;
+ }
+ return new dbreak_node(this, nn, state, div_nest_level, next1);
+ }
+ }
+ return this;
+}
+
+node *inter_char_space_node::copy()
+{
+ return new inter_char_space_node(amount, left_break_code, right_break_code,
+ col, n1->copy(), n2->copy(),
+ state, div_nest_level);
+}
+
+hyphen_list *inter_char_space_node::get_hyphen_list(hyphen_list *tail,
+ int *count)
+{
+ hyphen_list *hl = n2->get_hyphen_list(tail, count);
+ return n1->get_hyphen_list(hl, count);
+}
+
+node *inter_char_space_node::add_self(node *n, hyphen_list **p)
+{
+ n = n1->add_self(n, p);
+ if (left_break_code & INTER_CHAR_SPACE
+ || left_break_code & PROHIBIT_BREAK_AFTER) {
+ if (right_break_code & PROHIBIT_BREAK_BEFORE)
+ // stretchable zero-width space not implemented yet
+ ;
+ else {
+ // breakable, stretchable zero-width space not implemented yet
+ n = new space_node(H0, col, n);
+ n->freeze_space();
+ }
+ }
+ n = n2->add_self(n, p);
+ n1 = n2 = 0;
+ delete this;
+ return n;
+}
+
+hunits inter_char_space_node::width()
+{
+ return n1->width() + n2->width();
+}
+
+node *inter_char_space_node::last_char_node()
+{
+ node *nd = n2->last_char_node();
+ if (nd)
+ return nd;
+ return n1->last_char_node();
+}
+
+int inter_char_space_node::ends_sentence()
+{
+ switch (n2->ends_sentence()) {
+ case 0:
+ return 0;
+ case 1:
+ return 1;
+ case 2:
+ break;
+ default:
+ assert(0);
+ }
+ return n1->ends_sentence();
+}
+
+void inter_char_space_node::ascii_print(ascii_output_file *ascii)
+{
+ n1->ascii_print(ascii);
+ n2->ascii_print(ascii);
+}
+
+void inter_char_space_node::asciify(macro *m)
+{
+ n1->asciify(m);
+ n2->asciify(m);
+ n1 = n2 = 0;
+ delete this;
+}
+
+hyphenation_type inter_char_space_node::get_hyphenation_type()
+{
+ return HYPHEN_MIDDLE;
+}
+
+void inter_char_space_node::tprint(troff_output_file *out)
+{
+ n1->tprint(out);
+ n2->tprint(out);
+}
+
+int inter_char_space_node::same(node *nd)
+{
+ return (amount == ((inter_char_space_node *)nd)->amount
+ && left_break_code == ((inter_char_space_node *)nd)->left_break_code
+ && right_break_code == ((inter_char_space_node *)nd)->right_break_code
+ && same_node(n1, ((inter_char_space_node *)nd)->n1)
+ && same_node(n2, ((inter_char_space_node *)nd)->n2));
+}
+
+const char *inter_char_space_node::type()
+{
+ return "inter_char_space_node";
+}
+
+int inter_char_space_node::force_tprint()
+{
+ return 0;
+}
+
+int inter_char_space_node::is_tag()
+{
+ return 0;
+}
+
int break_char_node::same(node *nd)
{
return break_code == ((break_char_node *)nd)->break_code
diff --git a/tmac/ja.tmac b/tmac/ja.tmac
index d6883f8f..f0ecd75b 100644
--- a/tmac/ja.tmac
+++ b/tmac/ja.tmac
@@ -44,6 +44,6 @@
.class [CJKnormal] \
\[u3041]-\[u3096] \[u30A0]-\[u30FF] \[u4E00]-\[u9FFF]
.
-.cflags 2 \C'[CJKprepunct]'
-.cflags 4 \C'[CJKpostpunct]'
-.cflags 66 \C'[CJKnormal]'
+.cflags 128 \C'[CJKprepunct]'
+.cflags 266 \C'[CJKpostpunct]'
+.cflags 512 \C'[CJKnormal]'