summaryrefslogtreecommitdiff
path: root/lib/fribidi-bidi.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/fribidi-bidi.c')
-rw-r--r--lib/fribidi-bidi.c301
1 files changed, 257 insertions, 44 deletions
diff --git a/lib/fribidi-bidi.c b/lib/fribidi-bidi.c
index 00b1848..7fdcdec 100644
--- a/lib/fribidi-bidi.c
+++ b/lib/fribidi-bidi.c
@@ -55,6 +55,7 @@
#define RL_LEN(list) ((list)->len)
#define RL_POS(list) ((list)->pos)
#define RL_LEVEL(list) ((list)->level)
+#define RL_ISOLATE_LEVEL(list) ((list)->isolate_level)
static FriBidiRun *
merge_with_prev (
@@ -111,6 +112,37 @@ compact_neutrals (
}
}
+/* Search for an adjacent run in the forward or backward direction.
+ This search is O(n) and thus algorithms using it become O(n^2).
+ The concept should be replaced with isolate adjacent runs in the
+ forward and backward directions.
+ */
+static FriBidiRun *get_adjacent_run(FriBidiRun *list, fribidi_boolean forward, fribidi_boolean skip_neutral)
+{
+ FriBidiRun *ppp = forward ? list->next : list->prev;
+ while(ppp)
+ {
+ FriBidiCharType ppp_type = RL_TYPE (ppp);
+
+ if (ppp_type == _FRIBIDI_TYPE_SENTINEL)
+ break;
+
+ /* Note that when sweeping forward we continue one run
+ beyond the PDI to see what lies behind. When looking
+ backwards, this is not necessary as the leading isolate
+ run has already been assigned the resolved level. */
+ if (ppp->isolate_level > list->isolate_level
+ || (forward && ppp_type == FRIBIDI_TYPE_PDI)
+ || (skip_neutral && !FRIBIDI_IS_STRONG(ppp_type)))
+ {
+ ppp = forward ? ppp->next : ppp->prev;
+ continue;
+ }
+ break;
+ }
+ return ppp;
+}
+
#if DEBUG+0
/*======================================================================
* For debugging, define some functions for printing the types and the
@@ -130,6 +162,8 @@ static char char_from_level_array[] = {
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z',
+ /* TBD - insert another 125-64 levels */
+
'@', /* 62 == only must appear after resolving
* implicits. */
@@ -152,8 +186,8 @@ print_types_re (
MSG (" Run types : ");
for_run_list (pp, pp)
{
- MSG5 ("%d:%d(%s)[%d] ",
- pp->pos, pp->len, fribidi_get_bidi_type_name (pp->type), pp->level);
+ MSG6 ("%d:%d(%s)[%d,%d] ",
+ pp->pos, pp->len, fribidi_get_bidi_type_name (pp->type), pp->level, pp->isolate_level);
}
MSG ("\n");
}
@@ -218,20 +252,23 @@ print_bidi_string (
/* There are a few little points in pushing into and poping from the status
stack:
1. when the embedding level is not valid (more than
- FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL=61), you must reject it, and not to push
+ FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL=125), you must reject it, and not to push
into the stack, but when you see a PDF, you must find the matching code,
and if it was pushed in the stack, pop it, it means you must pop if and
only if you have pushed the matching code, the over_pushed var counts the
number of rejected codes so far.
+
2. there's a more confusing point too, when the embedding level is exactly
- FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL-1=60, an LRO or LRE is rejected
- because the new level would be FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL+1=62, that
- is invalid; but an RLO or RLE is accepted because the new level is
- FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL=61, that is valid, so the rejected codes
+ FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL-1=124, an LRO, LRE, or LRI is rejected
+ because the new level would be FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL+1=126, that
+ is invalid; but an RLO, RLE, or RLI is accepted because the new level is
+ FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL=125, that is valid, so the rejected codes
may be not continuous in the logical order, in fact there are at most two
- continuous intervals of codes, with an RLO or RLE between them. To support
- this case, the first_interval var counts the number of rejected codes in
- the first interval, when it is 0, means that there is only one interval.
+ continuous intervals of codes, with an RLO, RLE, or RLI between them. To
+ support this case, the first_interval var counts the number of rejected
+ codes in the first interval, when it is 0, means that there is only one
+ interval.
+
*/
/* a. If this new level would be valid, then this embedding code is valid.
@@ -248,11 +285,13 @@ print_bidi_string (
if UNLIKELY(level == FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL - 1) \
first_interval = over_pushed; \
status_stack[stack_size].level = level; \
+ status_stack[stack_size].isolate_level = isolate_level; \
+ status_stack[stack_size].isolate = isolate; \
status_stack[stack_size].override = override; \
stack_size++; \
level = new_level; \
override = new_override; \
- } else \
+ } else if LIKELY(isolate_overflow == 0) \
over_pushed++; \
FRIBIDI_END_STMT
@@ -272,6 +311,8 @@ print_bidi_string (
stack_size--; \
level = status_stack[stack_size].level; \
override = status_stack[stack_size].override; \
+ isolate = status_stack[stack_size].isolate; \
+ isolate_level = status_stack[stack_size].isolate_level; \
} \
} \
FRIBIDI_END_STMT
@@ -365,12 +406,24 @@ fribidi_get_par_embedding_levels (
/* P2. P3. Search for first strong character and use its direction as
base direction */
{
- for_run_list (pp, main_run_list) if (FRIBIDI_IS_LETTER (RL_TYPE (pp)))
- {
- base_level = FRIBIDI_DIR_TO_LEVEL (RL_TYPE (pp));
- *pbase_dir = FRIBIDI_LEVEL_TO_DIR (base_level);
- break;
- }
+ int valid_isolate_count = 0;
+ for_run_list (pp, main_run_list)
+ {
+ if (RL_TYPE(pp) == FRIBIDI_TYPE_PDI)
+ {
+ /* Ignore if there is no matching isolate */
+ if (valid_isolate_count>0)
+ valid_isolate_count--;
+ }
+ else if (FRIBIDI_IS_ISOLATE(RL_TYPE(pp)))
+ valid_isolate_count++;
+ else if (valid_isolate_count==0 && FRIBIDI_IS_LETTER (RL_TYPE (pp)))
+ {
+ base_level = FRIBIDI_DIR_TO_LEVEL (RL_TYPE (pp));
+ *pbase_dir = FRIBIDI_LEVEL_TO_DIR (base_level);
+ break;
+ }
+ }
}
base_dir = FRIBIDI_LEVEL_TO_DIR (base_level);
DBG2 (" base level : %c", fribidi_char_from_level (base_level));
@@ -388,13 +441,19 @@ fribidi_get_par_embedding_levels (
DBG ("explicit levels and directions");
{
FriBidiLevel level, new_level;
+ int isolate_level = 0;
FriBidiCharType override, new_override;
FriBidiStrIndex i;
int stack_size, over_pushed, first_interval;
+ int valid_isolate_count = 0;
+ int isolate_overflow = 0;
+ int isolate = 0; /* The isolate status flag */
struct
{
FriBidiCharType override; /* only LTR, RTL and ON are valid */
FriBidiLevel level;
+ int isolate;
+ int isolate_level;
} *status_stack;
FriBidiRun temp_link;
@@ -407,9 +466,11 @@ fribidi_get_par_embedding_levels (
(!explicits_list) goto out;
/* X1. Begin by setting the current embedding level to the paragraph
- embedding level. Set the directional override status to neutral.
- Process each character iteratively, applying rules X2 through X9.
- Only embedding levels from 0 to 61 are valid in this phase. */
+ embedding level. Set the directional override status to neutral,
+ and directional isolate status to false.
+
+ Process each character iteratively, applying rules X2 through X8.
+ Only embedding levels from 0 to 123 are valid in this phase. */
level = base_level;
override = FRIBIDI_TYPE_ON;
@@ -417,12 +478,16 @@ fribidi_get_par_embedding_levels (
stack_size = 0;
over_pushed = 0;
first_interval = 0;
+ valid_isolate_count = 0;
+ isolate_overflow = 0;
status_stack = fribidi_malloc (sizeof (status_stack[0]) *
FRIBIDI_BIDI_MAX_RESOLVED_LEVELS);
for_run_list (pp, main_run_list)
{
FriBidiCharType this_type = RL_TYPE (pp);
+ RL_ISOLATE_LEVEL (pp) = isolate_level;
+
if (FRIBIDI_IS_EXPLICIT_OR_BN (this_type))
{
if (FRIBIDI_IS_STRONG (this_type))
@@ -443,6 +508,7 @@ fribidi_get_par_embedding_levels (
new_level =
((level + FRIBIDI_DIR_TO_LEVEL (this_type) + 2) & ~1) -
FRIBIDI_DIR_TO_LEVEL (this_type);
+ isolate = 0;
PUSH_STATUS;
}
}
@@ -451,8 +517,12 @@ fribidi_get_par_embedding_levels (
/* 3. Terminating Embeddings and overrides */
/* X7. With each PDF, determine the matching embedding or
override code. */
- for (i = RL_LEN (pp); i; i--)
- POP_STATUS;
+ for (i = RL_LEN (pp); i; i--)
+ {
+ if (stack_size && status_stack[stack_size-1].isolate != 0)
+ break;
+ POP_STATUS;
+ }
}
/* X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes. */
@@ -462,6 +532,87 @@ fribidi_get_par_embedding_levels (
move_node_before (pp, explicits_list);
pp = &temp_link;
}
+ else if (this_type == FRIBIDI_TYPE_PDI)
+ /* X6a. pop the direction of the stack */
+ {
+ for (i = RL_LEN (pp); i; i--)
+ {
+ if (isolate_overflow > 0)
+ isolate_overflow--;
+ else if (valid_isolate_count > 0)
+ {
+ /* Pop away all LRE,RLE,LRO, RLO levels
+ from the stack, as these are implicitly
+ terminated by the PDI */
+ while (stack_size && !status_stack[stack_size-1].isolate)
+ POP_STATUS;
+ POP_STATUS;
+ isolate_level-- ;
+ valid_isolate_count--;
+ RL_LEVEL (pp) = level;
+ RL_ISOLATE_LEVEL (pp) = isolate_level;
+ }
+ else
+ {
+ /* Ignore isolated PDI's by turning them into ON's */
+ RL_TYPE (pp) = FRIBIDI_TYPE_ON;
+ RL_LEVEL (pp) = level;
+ }
+ }
+ }
+ else if (FRIBIDI_IS_ISOLATE(this_type))
+ {
+ /* TBD support RL_LEN > 1 */
+ new_override = FRIBIDI_TYPE_ON;
+ isolate = 1;
+ if (this_type == FRIBIDI_TYPE_LRI)
+ new_level = level + 2 - (level%2);
+ else if (this_type == FRIBIDI_TYPE_RLI)
+ new_level = level + 1 + (level%2);
+ else if (this_type == FRIBIDI_TYPE_FSI)
+ {
+ /* Search for a local strong character until we
+ meet the corresponding PDI or the end of the
+ paragraph */
+ FriBidiRun *fsi_pp;
+ int isolate_count = 0;
+ int fsi_base_level = 0;
+ for_run_list (fsi_pp, pp)
+ {
+ if (RL_TYPE(fsi_pp) == FRIBIDI_TYPE_PDI)
+ {
+ isolate_count--;
+ if (valid_isolate_count < 0)
+ break;
+ }
+ else if (FRIBIDI_IS_ISOLATE(RL_TYPE(fsi_pp)))
+ isolate_count++;
+ else if (isolate_count==0 && FRIBIDI_IS_LETTER (RL_TYPE (fsi_pp)))
+ {
+ fsi_base_level = FRIBIDI_DIR_TO_LEVEL (RL_TYPE (fsi_pp));
+ break;
+ }
+ }
+
+ /* Same behavior like RLI and LRI above */
+ if (FRIBIDI_LEVEL_IS_RTL (fsi_base_level))
+ new_level = level + 1 + (level%2);
+ else
+ new_level = level + 2 - (level%2);
+ }
+
+ RL_LEVEL (pp) = level;
+ RL_ISOLATE_LEVEL (pp) = isolate_level++;
+
+ if (new_level <= FRIBIDI_BIDI_MAX_EXPLICIT_LEVEL)
+ {
+ valid_isolate_count++;
+ PUSH_STATUS;
+ level = new_level;
+ }
+ else
+ isolate_overflow += 1;
+ }
else if (this_type == FRIBIDI_TYPE_BS)
{
/* X8. All explicit directional embeddings and overrides are
@@ -517,21 +668,38 @@ fribidi_get_par_embedding_levels (
/* 4. Resolving weak types */
DBG ("resolving weak types");
{
- FriBidiCharType last_strong, prev_type_orig;
+ int *last_strong_stack;
+ FriBidiCharType prev_type_orig;
fribidi_boolean w4;
- last_strong = base_dir;
+ last_strong_stack = fribidi_malloc (sizeof (int)
+ * FRIBIDI_BIDI_MAX_RESOLVED_LEVELS);
+ last_strong_stack[0] = base_dir;
for_run_list (pp, main_run_list)
{
register FriBidiCharType prev_type, this_type, next_type;
+ FriBidiRun *ppp_prev, *ppp_next;
+ int iso_level;
+
+ ppp_prev = get_adjacent_run(pp, FALSE, FALSE);
+ ppp_next = get_adjacent_run(pp, TRUE, FALSE);
- prev_type = PREV_TYPE_OR_SOR (pp);
this_type = RL_TYPE (pp);
- next_type = NEXT_TYPE_OR_EOR (pp);
+ iso_level = RL_ISOLATE_LEVEL(pp);
+
+ if (RL_LEVEL(ppp_prev) == RL_LEVEL(pp))
+ prev_type = RL_TYPE(ppp_prev);
+ else
+ prev_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_prev), RL_LEVEL(pp)));
+
+ if (RL_LEVEL(ppp_next) == RL_LEVEL(pp))
+ next_type = RL_TYPE(ppp_next);
+ else
+ next_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_next), RL_LEVEL(pp)));
if (FRIBIDI_IS_STRONG (prev_type))
- last_strong = prev_type;
+ last_strong_stack[iso_level] = prev_type;
/* W1. NSM
Examine each non-spacing mark (NSM) in the level run, and change the
@@ -543,31 +711,41 @@ fribidi_get_par_embedding_levels (
adjacent ETs are in one FriBidiRun. */
if (this_type == FRIBIDI_TYPE_NSM)
{
- if (RL_LEVEL (pp->prev) == RL_LEVEL (pp))
- pp = merge_with_prev (pp);
+ /* New rule in Unicode 6.3 */
+ if (FRIBIDI_IS_ISOLATE (RL_TYPE (pp->prev)))
+ RL_TYPE(pp) = FRIBIDI_TYPE_ON;
+
+ if (RL_LEVEL (ppp_prev) == RL_LEVEL (pp))
+ {
+ if (ppp_prev == pp->prev)
+ pp = merge_with_prev (pp);
+ }
else
- RL_TYPE (pp) = prev_type;
+ RL_TYPE (pp) = prev_type;
+
if (prev_type == next_type && RL_LEVEL (pp) == RL_LEVEL (pp->next))
{
- pp = merge_with_prev (pp->next);
+ if (ppp_next == pp->next)
+ pp = merge_with_prev (pp->next);
}
continue; /* As we know the next condition cannot be true. */
}
/* W2: European numbers. */
- if (this_type == FRIBIDI_TYPE_EN && last_strong == FRIBIDI_TYPE_AL)
+ if (this_type == FRIBIDI_TYPE_EN && last_strong_stack[iso_level] == FRIBIDI_TYPE_AL)
{
RL_TYPE (pp) = FRIBIDI_TYPE_AN;
/* Resolving dependency of loops for rules W1 and W2, so we
can merge them in one loop. */
if (next_type == FRIBIDI_TYPE_NSM)
- RL_TYPE (pp->next) = FRIBIDI_TYPE_AN;
+ RL_TYPE (ppp_next) = FRIBIDI_TYPE_AN;
}
}
- last_strong = base_dir;
+ last_strong_stack[0] = base_dir;
+
/* Resolving dependency of loops for rules W4 and W5, W5 may
want to prevent W4 to take effect in the next turn, do this
through "w4". */
@@ -577,16 +755,31 @@ fribidi_get_par_embedding_levels (
so W4 and W5 in next turn can still do their works. */
prev_type_orig = FRIBIDI_TYPE_ON;
+ /* Each isolate level has its own memory of the last strong character */
for_run_list (pp, main_run_list)
{
register FriBidiCharType prev_type, this_type, next_type;
+ int iso_level;
+ FriBidiRun *ppp_prev, *ppp_next;
- prev_type = PREV_TYPE_OR_SOR (pp);
this_type = RL_TYPE (pp);
- next_type = NEXT_TYPE_OR_EOR (pp);
+ iso_level = RL_ISOLATE_LEVEL(pp);
+
+ ppp_prev = get_adjacent_run(pp, FALSE, FALSE);
+ ppp_next = get_adjacent_run(pp, TRUE, FALSE);
+
+ if (RL_LEVEL(ppp_prev) == RL_LEVEL(pp))
+ prev_type = RL_TYPE(ppp_prev);
+ else
+ prev_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_prev), RL_LEVEL(pp)));
+
+ if (RL_LEVEL(ppp_next) == RL_LEVEL(pp))
+ next_type = RL_TYPE(ppp_next);
+ else
+ next_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_next), RL_LEVEL(pp)));
if (FRIBIDI_IS_STRONG (prev_type))
- last_strong = prev_type;
+ last_strong_stack[iso_level] = prev_type;
/* W3: Change ALs to R. */
if (this_type == FRIBIDI_TYPE_AL)
@@ -628,7 +821,7 @@ fribidi_get_par_embedding_levels (
RL_TYPE (pp) = FRIBIDI_TYPE_ON;
/* W7. Change european numbers to L. */
- if (this_type == FRIBIDI_TYPE_EN && last_strong == FRIBIDI_TYPE_LTR)
+ if (this_type == FRIBIDI_TYPE_EN && last_strong_stack[iso_level] == FRIBIDI_TYPE_LTR)
{
RL_TYPE (pp) = FRIBIDI_TYPE_LTR;
prev_type_orig = (RL_LEVEL (pp) == RL_LEVEL (pp->next) ?
@@ -637,6 +830,8 @@ fribidi_get_par_embedding_levels (
else
prev_type_orig = PREV_TYPE_OR_SOR (pp->next);
}
+
+ fribidi_free (last_strong_stack);
}
compact_neutrals (main_run_list);
@@ -654,16 +849,31 @@ fribidi_get_par_embedding_levels (
DBG ("resolving neutral types");
{
/* N1. and N2.
- For each neutral, resolve it. */
+ For each neutral, resolve it.
+
+ TBDov: This must be done one isolating run at a time!
+ */
for_run_list (pp, main_run_list)
{
FriBidiCharType prev_type, this_type, next_type;
+ FriBidiRun *ppp_prev, *ppp_next;
+
+ ppp_prev = get_adjacent_run(pp, FALSE, FALSE);
+ ppp_next = get_adjacent_run(pp, TRUE, FALSE);
/* "European and Arabic numbers are treated as though they were R"
FRIBIDI_CHANGE_NUMBER_TO_RTL does this. */
this_type = FRIBIDI_CHANGE_NUMBER_TO_RTL (RL_TYPE (pp));
- prev_type = FRIBIDI_CHANGE_NUMBER_TO_RTL (PREV_TYPE_OR_SOR (pp));
- next_type = FRIBIDI_CHANGE_NUMBER_TO_RTL (NEXT_TYPE_OR_EOR (pp));
+
+ if (RL_LEVEL(ppp_prev) == RL_LEVEL(pp))
+ prev_type = FRIBIDI_CHANGE_NUMBER_TO_RTL (RL_TYPE(ppp_prev));
+ else
+ prev_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_prev), RL_LEVEL(pp)));
+
+ if (RL_LEVEL(ppp_next) == RL_LEVEL(pp))
+ next_type = FRIBIDI_CHANGE_NUMBER_TO_RTL (RL_TYPE(ppp_next));
+ else
+ next_type = FRIBIDI_LEVEL_TO_DIR(MAX(RL_LEVEL(ppp_next), RL_LEVEL(pp)));
if (FRIBIDI_IS_NEUTRAL (this_type))
RL_TYPE (pp) = (prev_type == next_type) ?
@@ -764,7 +974,9 @@ fribidi_get_par_embedding_levels (
1. segment separators,
2. paragraph separators,
3. any sequence of whitespace characters preceding a segment
- separator or paragraph separator, and
+ separator or paragraph separator, and
+ 4. any sequence of whitespace characters and/or isolate formatting
+ characters at the end of the line.
... (to be continued in fribidi_reorder_line()). */
list = new_run_list ();
if UNLIKELY
@@ -784,8 +996,9 @@ fribidi_get_par_embedding_levels (
state = 1;
pos = j;
}
- else if (state && !FRIBIDI_IS_EXPLICIT_OR_SEPARATOR_OR_BN_OR_WS
- (char_type))
+ else if (state &&
+ !(FRIBIDI_IS_EXPLICIT_OR_SEPARATOR_OR_BN_OR_WS(char_type)
+ || FRIBIDI_IS_ISOLATE(char_type)))
{
state = 0;
p = new_run ();