Merge from trunk.

author: Paul Eggert <eggert@cs.ucla.edu> 2011-10-22 23:38:24 -0700
committer: Paul Eggert <eggert@cs.ucla.edu> 2011-10-22 23:38:24 -0700
commit: cfc09582247ffef6a46b6249e2fba9136a62d21e (patch)
tree: 50e102f64a2b88c692d9110990abd416c78c32f0 /src/bidi.c
parent: 92c938895c639463681ae1c58a944cae62b70b87 (diff)
parent: 86c606818495d9411fd5d6b1477f9a097eb18020 (diff)
download: emacs-cfc09582247ffef6a46b6249e2fba9136a62d21e.tar.gz
1 files changed, 76 insertions, 23 deletions
diff --git a/src/bidi.c b/src/bidi.c
index c6d7db96576..e8f2df89a9e 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -846,7 +846,10 @@ bidi_line_init (struct bidi_it *bidi_it)
   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
   bidi_it->invalid_levels = 0;
   bidi_it->invalid_rl_levels = -1;
-  bidi_it->next_en_pos = -1;
+  /* Setting this to zero will force its recomputation the first time
+     we need it for W5.  */
+  bidi_it->next_en_pos = 0;
+  bidi_it->next_en_type = UNKNOWN_BT;
   bidi_it->next_for_ws.type = UNKNOWN_BT;
   bidi_set_sor_type (bidi_it,
 		     (bidi_it->paragraph_dir == R2L ? 1 : 0),
@@ -1435,7 +1438,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
 	      }
 	  }
 	else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
-		 || bidi_it->next_en_pos > bidi_it->charpos)
+		 || (bidi_it->next_en_pos > bidi_it->charpos
+		     && bidi_it->next_en_type == WEAK_EN))
 	  type = WEAK_EN;
 	break;
       case LRE:	/* X3 */
@@ -1471,7 +1475,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
 	      }
 	  }
 	else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
-		 || bidi_it->next_en_pos > bidi_it->charpos)
+		 || (bidi_it->next_en_pos > bidi_it->charpos
+		     && bidi_it->next_en_type == WEAK_EN))
 	  type = WEAK_EN;
 	break;
       case PDF:	/* X7 */
@@ -1497,7 +1502,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
 	      }
 	  }
 	else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
-		 || bidi_it->next_en_pos > bidi_it->charpos)
+		 || (bidi_it->next_en_pos > bidi_it->charpos
+		     && bidi_it->next_en_type == WEAK_EN))
 	  type = WEAK_EN;
 	break;
       default:
@@ -1729,10 +1735,15 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
       else if (type == WEAK_ET	/* W5: ET with EN before or after it */
 	       || type == WEAK_BN)	/* W5/Retaining */
 	{
-	  if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
-	      || bidi_it->next_en_pos > bidi_it->charpos)
+	  if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */
 	    type = WEAK_EN;
-	  else			/* W5: ET/BN with EN after it.  */
+	  else if (bidi_it->next_en_pos > bidi_it->charpos
+		   && bidi_it->next_en_type != WEAK_BN)
+	    {
+	      if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
+		type = WEAK_EN;
+	    }
+	  else if (bidi_it->next_en_pos >=0)
 	    {
 	      ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
 	      const unsigned char *s = (STRINGP (bidi_it->string.lstring)
@@ -1761,20 +1772,27 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
 		  en_pos = bidi_it->charpos;
 		  bidi_copy_it (bidi_it, &saved_it);
 		}
+	      /* Remember this position, to speed up processing of the
+		 next ETs.  */
+	      bidi_it->next_en_pos = en_pos;
 	      if (type_of_next == WEAK_EN)
 		{
 		  /* If the last strong character is AL, the EN we've
 		     found will become AN when we get to it (W2). */
-		  if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
-		    {
-		      type = WEAK_EN;
-		      /* Remember this EN position, to speed up processing
-			 of the next ETs.  */
-		      bidi_it->next_en_pos = en_pos;
-		    }
+		  if (bidi_it->last_strong.type_after_w1 == STRONG_AL)
+		    type_of_next = WEAK_AN;
 		  else if (type == WEAK_BN)
 		    type = NEUTRAL_ON; /* W6/Retaining */
+		  else
+		    type = WEAK_EN;
 		}
+	      else if (type_of_next == NEUTRAL_B)
+		/* Record the fact that there are no more ENs from
+		   here to the end of paragraph, to avoid entering the
+		   loop above ever again in this paragraph.  */
+		bidi_it->next_en_pos = -1;
+	      /* Record the type of the character where we ended our search.  */
+	      bidi_it->next_en_type = type_of_next;
 	    }
 	}
     }
@@ -1843,13 +1861,45 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
 	|| type == NEUTRAL_ON))
     abort ();
 
-  if (bidi_get_category (type) == NEUTRAL
+  if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
+			    we are already at paragraph end.  */
+       && bidi_get_category (type) == NEUTRAL)
       || (type == WEAK_BN && prev_level == current_level))
     {
       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
 	type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
 				       bidi_it->next_for_neutral.type,
 				       current_level);
+      /* The next two "else if" clauses are shortcuts for the
+	 important special case when we have a long sequence of
+	 neutral or WEAK_BN characters, such as whitespace or nulls or
+	 other control characters, on the base embedding level of the
+	 paragraph, and that sequence goes all the way to the end of
+	 the paragraph and follows a character whose resolved
+	 directionality is identical to the base embedding level.
+	 (This is what happens in a buffer with plain L2R text that
+	 happens to include long sequences of control characters.)  By
+	 virtue of N1, the result of examining this long sequence will
+	 always be either STRONG_L or STRONG_R, depending on the base
+	 embedding level.  So we use this fact directly instead of
+	 entering the expensive loop in the "else" clause.  */
+      else if (current_level == 0
+	       && bidi_it->prev_for_neutral.type == STRONG_L
+	       && !bidi_explicit_dir_char (bidi_it->ch))
+	type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
+				       STRONG_L, current_level);
+      else if (/* current level is 1 */
+	       current_level == 1
+	       /* base embedding level is also 1 */
+	       && bidi_it->level_stack[0].level == 1
+	       /* previous character is one of those considered R for
+		  the purposes of W5 */
+	       && (bidi_it->prev_for_neutral.type == STRONG_R
+		   || bidi_it->prev_for_neutral.type == WEAK_EN
+		   || bidi_it->prev_for_neutral.type == WEAK_AN)
+	       && !bidi_explicit_dir_char (bidi_it->ch))
+	type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
+				       STRONG_R, current_level);
       else
 	{
 	  /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
@@ -1900,6 +1950,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
 	      case STRONG_L:
 	      case STRONG_R:
 	      case STRONG_AL:
+		/* Actually, STRONG_AL cannot happen here, because
+		   bidi_resolve_weak converts it to STRONG_R, per W3.  */
+		xassert (type != STRONG_AL);
 		next_type = type;
 		break;
 	      case WEAK_EN:
@@ -1907,7 +1960,6 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
 		/* N1: ``European and Arabic numbers are treated as
 		   though they were R.''  */
 		next_type = STRONG_R;
-		saved_it.next_for_neutral.type = STRONG_R;
 		break;
 	      case WEAK_BN:
 		if (!bidi_explicit_dir_char (bidi_it->ch))
@@ -1920,11 +1972,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
 		   member.  */
 		if (saved_it.type != WEAK_BN
 		    || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
-		  {
-		    next_type = bidi_it->prev_for_neutral.type;
-		    saved_it.next_for_neutral.type = next_type;
-		    bidi_check_type (next_type);
-		  }
+		  next_type = bidi_it->prev_for_neutral.type;
 		else
 		  {
 		    /* This is a BN which does not adjoin neutrals.
@@ -1938,7 +1986,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
 	    }
 	  type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
 					 next_type, current_level);
+	  saved_it.next_for_neutral.type = next_type;
 	  saved_it.type = type;
+	  bidi_check_type (next_type);
 	  bidi_check_type (type);
 	  bidi_copy_it (bidi_it, &saved_it);
 	}
@@ -2014,7 +2064,10 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
 	bidi_it->next_for_neutral.type = UNKNOWN_BT;
       if (bidi_it->next_en_pos >= 0
 	  && bidi_it->charpos >= bidi_it->next_en_pos)
-	bidi_it->next_en_pos = -1;
+	{
+	  bidi_it->next_en_pos = 0;
+	  bidi_it->next_en_type = UNKNOWN_BT;
+	}
       if (bidi_it->next_for_ws.type != UNKNOWN_BT
 	  && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
 	bidi_it->next_for_ws.type = UNKNOWN_BT;
@@ -2140,7 +2193,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
     }
 
   /* Resolve implicit levels, with a twist: PDFs get the embedding
-     level of the enbedding they terminate.  See below for the
+     level of the embedding they terminate.  See below for the
      reason.  */
   if (bidi_it->orig_type == PDF
       /* Don't do this if this formatting code didn't change the
author	Paul Eggert <eggert@cs.ucla.edu>	2011-10-22 23:38:24 -0700
committer	Paul Eggert <eggert@cs.ucla.edu>	2011-10-22 23:38:24 -0700
commit	cfc09582247ffef6a46b6249e2fba9136a62d21e (patch)
tree	50e102f64a2b88c692d9110990abd416c78c32f0 /src/bidi.c
parent	92c938895c639463681ae1c58a944cae62b70b87 (diff)
parent	86c606818495d9411fd5d6b1477f9a097eb18020 (diff)
download	emacs-cfc09582247ffef6a46b6249e2fba9136a62d21e.tar.gz