1 files changed, 753 insertions, 753 deletions
diff --git a/pango/break.c b/pango/break.c
index d3c033d3..e159b024 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -345,15 +345,15 @@ static const int line_break_indexes[] = {
 };
 
 #define BREAK_TYPE_SAFE(btype)            \
-         (btype < G_N_ELEMENTS(line_break_indexes) ? btype : G_UNICODE_BREAK_UNKNOWN)
+	 (btype < G_N_ELEMENTS(line_break_indexes) ? btype : G_UNICODE_BREAK_UNKNOWN)
 #define BREAK_INDEX(btype)                \
-         (line_break_indexes[(btype)])
+	 (line_break_indexes[(btype)])
 #define BREAK_ROW(before_type)            \
-         (line_break_rows[BREAK_INDEX (before_type)])
+	 (line_break_rows[BREAK_INDEX (before_type)])
 #define BREAK_OP(before_type, after_type) \
-         (BREAK_ROW (before_type)[BREAK_INDEX (after_type)])
+	 (BREAK_ROW (before_type)[BREAK_INDEX (after_type)])
 #define IN_BREAK_TABLE(btype)             \
-         (btype < G_N_ELEMENTS(line_break_indexes) && BREAK_INDEX(btype) < INDEX_END_OF_TABLE)
+	 (btype < G_N_ELEMENTS(line_break_indexes) && BREAK_INDEX(btype) < INDEX_END_OF_TABLE)
 
 
 
@@ -425,19 +425,19 @@ static const CharJamoProps HangulJamoProps[] = {
  */
 #define VIRAMA_SCRIPT(wc)        ((wc) >= 0x0901 && (wc) <= 0x17FF)
 #define VIRAMA(wc) ((wc) == 0x094D || \
-                    (wc) == 0x09CD || \
-                    (wc) == 0x0A4D || \
-                    (wc) == 0x0ACD || \
-                    (wc) == 0x0B4D || \
-                    (wc) == 0x0BCD || \
-                    (wc) == 0x0C4D || \
-                    (wc) == 0x0CCD || \
-                    (wc) == 0x0D4D || \
-                    (wc) == 0x0DCA || \
-                    (wc) == 0x0E3A || \
-                    (wc) == 0x0F84 || \
-                    (wc) == 0x1039 || \
-                    (wc) == 0x17D2)
+		    (wc) == 0x09CD || \
+		    (wc) == 0x0A4D || \
+		    (wc) == 0x0ACD || \
+		    (wc) == 0x0B4D || \
+		    (wc) == 0x0BCD || \
+		    (wc) == 0x0C4D || \
+		    (wc) == 0x0CCD || \
+		    (wc) == 0x0D4D || \
+		    (wc) == 0x0DCA || \
+		    (wc) == 0x0E3A || \
+		    (wc) == 0x0F84 || \
+		    (wc) == 0x1039 || \
+		    (wc) == 0x17D2)
 /* Types of Japanese characters */
 #define JAPANESE(wc) ((wc) >= 0x2F00 && (wc) <= 0x30FF)
 #define KANJI(wc)    ((wc) >= 0x2F00 && (wc) <= 0x2FDF)
@@ -502,10 +502,10 @@ typedef enum
  **/
 void
 pango_default_break (const gchar   *text,
-                     gint           length,
-                     PangoAnalysis *analysis,
-                     PangoLogAttr  *attrs,
-                     int            attrs_len)
+		     gint           length,
+		     PangoAnalysis *analysis,
+		     PangoLogAttr  *attrs,
+		     int            attrs_len)
 {
   /* The rationale for all this is in section 5.15 of the Unicode 3.0 book,
    * the line breaking stuff is also in TR14 on unicode.org
@@ -578,41 +578,41 @@ pango_default_break (const gchar   *text,
       break_type = next_break_type;
 
       if (almost_done)
-        {
-          /*
-           * If we have already reached the end of @text g_utf8_next_char()
-           * may not increment next
-           */
-          next_wc = 0;
+	{
+	  /*
+	   * If we have already reached the end of @text g_utf8_next_char()
+	   * may not increment next
+	   */
+	  next_wc = 0;
 	  next_break_type = G_UNICODE_BREAK_UNKNOWN;
 	  done = TRUE;
-        }
+	}
       else
-        {
-          next = g_utf8_next_char (next);
+	{
+	  next = g_utf8_next_char (next);
 
 	  if ((length >= 0 && next >= text + length) || *next == '\0')
-            {
-              /* This is how we fill in the last element (end position) of the
-               * attr array - assume there's a paragraph separators off the end
+	    {
+	      /* This is how we fill in the last element (end position) of the
+	       * attr array - assume there's a paragraph separators off the end
 	       * of @text.
-               */
+	       */
 	      next_wc = PARAGRAPH_SEPARATOR;
 	      almost_done = TRUE;
-            }
-          else
+	    }
+	  else
 	    next_wc = g_utf8_get_char (next);
 
 	  next_break_type = g_unichar_break_type (next_wc);
-          next_break_type = BREAK_TYPE_SAFE (next_break_type);
-        }
+	  next_break_type = BREAK_TYPE_SAFE (next_break_type);
+	}
 
       type = g_unichar_type (wc);
       jamo = JAMO_TYPE (break_type);
 
       /* Determine wheter this forms a Hangul syllable with prev. */
       if (jamo == NO_JAMO)
-        makes_hangul_syllable = FALSE;
+	makes_hangul_syllable = FALSE;
       else
 	{
 	  JamoType prev_end   = HangulJamoProps[prev_jamo].end  ;
@@ -631,106 +631,106 @@ pango_default_break (const gchar   *text,
       /* ---- Cursor position breaks (Grapheme breaks) ---- */
 
       if (wc == '\n')
-        {
-          /* Break before line feed unless prev char is a CR */
-
-          if (prev_wc != '\r')
-            attrs[i].is_cursor_position = TRUE;
-          else
-            attrs[i].is_cursor_position = FALSE;
-        }
+	{
+	  /* Break before line feed unless prev char is a CR */
+
+	  if (prev_wc != '\r')
+	    attrs[i].is_cursor_position = TRUE;
+	  else
+	    attrs[i].is_cursor_position = FALSE;
+	}
       else if (i == 0 ||
-               prev_type == G_UNICODE_CONTROL ||
-               prev_type == G_UNICODE_FORMAT)
-        {
-          /* Break at first position (must be special cased, or if the
-           * first char is say a combining mark there won't be a
-           * cursor position at the start, which seems wrong to me
-           * ???? - maybe it makes sense though, who knows)
-           */
-          /* break after all format or control characters */
-          attrs[i].is_cursor_position = TRUE;
-        }
+	       prev_type == G_UNICODE_CONTROL ||
+	       prev_type == G_UNICODE_FORMAT)
+	{
+	  /* Break at first position (must be special cased, or if the
+	   * first char is say a combining mark there won't be a
+	   * cursor position at the start, which seems wrong to me
+	   * ???? - maybe it makes sense though, who knows)
+	   */
+	  /* break after all format or control characters */
+	  attrs[i].is_cursor_position = TRUE;
+	}
       else
-        {
-          switch (type)
-            {
-            case G_UNICODE_CONTROL:
-            case G_UNICODE_FORMAT:
-              /* Break before all format or control characters */
-              attrs[i].is_cursor_position = TRUE;
-              break;
-
-            case G_UNICODE_COMBINING_MARK:
-            case G_UNICODE_ENCLOSING_MARK:
-            case G_UNICODE_NON_SPACING_MARK:
-              /* Unicode spec includes "Combining marks plus Tibetan
-               * subjoined characters" as joining chars, but lists the
-               * Tibetan subjoined characters as combining marks, and
-               * g_unichar_type() returns NON_SPACING_MARK for the Tibetan
-               * subjoined characters. So who knows, beats me.
-               */
-
-              /* It's a joining character, break only if preceded by
-               * control or format; we already handled the case where
-               * it was preceded earlier, so here we know it wasn't,
-               * don't break
-               */
-              attrs[i].is_cursor_position = FALSE;
-              break;
-
-            case G_UNICODE_LOWERCASE_LETTER:
-            case G_UNICODE_MODIFIER_LETTER:
-            case G_UNICODE_OTHER_LETTER:
-            case G_UNICODE_TITLECASE_LETTER:
-            case G_UNICODE_UPPERCASE_LETTER:
+	{
+	  switch (type)
+	    {
+	    case G_UNICODE_CONTROL:
+	    case G_UNICODE_FORMAT:
+	      /* Break before all format or control characters */
+	      attrs[i].is_cursor_position = TRUE;
+	      break;
+
+	    case G_UNICODE_COMBINING_MARK:
+	    case G_UNICODE_ENCLOSING_MARK:
+	    case G_UNICODE_NON_SPACING_MARK:
+	      /* Unicode spec includes "Combining marks plus Tibetan
+	       * subjoined characters" as joining chars, but lists the
+	       * Tibetan subjoined characters as combining marks, and
+	       * g_unichar_type() returns NON_SPACING_MARK for the Tibetan
+	       * subjoined characters. So who knows, beats me.
+	       */
+
+	      /* It's a joining character, break only if preceded by
+	       * control or format; we already handled the case where
+	       * it was preceded earlier, so here we know it wasn't,
+	       * don't break
+	       */
+	      attrs[i].is_cursor_position = FALSE;
+	      break;
+
+	    case G_UNICODE_LOWERCASE_LETTER:
+	    case G_UNICODE_MODIFIER_LETTER:
+	    case G_UNICODE_OTHER_LETTER:
+	    case G_UNICODE_TITLECASE_LETTER:
+	    case G_UNICODE_UPPERCASE_LETTER:
 
 	      if (makes_hangul_syllable)
-                attrs[i].is_cursor_position = FALSE;
-              else
-                {
-                  /* Handle non-Hangul-syllable non-combining chars */
+		attrs[i].is_cursor_position = FALSE;
+	      else
+		{
+		  /* Handle non-Hangul-syllable non-combining chars */
 
-                  /* Break before Jamo if they are in a broken sequence or
-                   * next to non-Jamo; break if preceded by Jamo; don't
+		  /* Break before Jamo if they are in a broken sequence or
+		   * next to non-Jamo; break if preceded by Jamo; don't
 		   * break if a letter is preceded by a virama; break in
 		   * all other cases. No need to check whether we are or are
-                   * preceded by Jamo explicitly, since a Jamo is not
-                   * a virama, we just break in all cases where we
-                   * aren't a or preceded by a virama.  Don't fool with
+		   * preceded by Jamo explicitly, since a Jamo is not
+		   * a virama, we just break in all cases where we
+		   * aren't a or preceded by a virama.  Don't fool with
 		   * viramas if we aren't part of a script that uses them.
-                   */
-
-                  if (VIRAMA_SCRIPT (wc))
-                    {
-                      /* Check whether we're preceded by a virama; this
-                       * could use some optimization.
-                       */
-                      if (VIRAMA (prev_wc))
-                        attrs[i].is_cursor_position = FALSE;
-                      else
-                        attrs[i].is_cursor_position = TRUE;
-                    }
-                  else
-                    {
-                      attrs[i].is_cursor_position = TRUE;
-                    }
-                }
-              break;
-
-            default:
-              /* Some weirdo char, just break here, why not */
-              attrs[i].is_cursor_position = TRUE;
-              break;
-            }
-        }
+		   */
+
+		  if (VIRAMA_SCRIPT (wc))
+		    {
+		      /* Check whether we're preceded by a virama; this
+		       * could use some optimization.
+		       */
+		      if (VIRAMA (prev_wc))
+			attrs[i].is_cursor_position = FALSE;
+		      else
+			attrs[i].is_cursor_position = TRUE;
+		    }
+		  else
+		    {
+		      attrs[i].is_cursor_position = TRUE;
+		    }
+		}
+	      break;
+
+	    default:
+	      /* Some weirdo char, just break here, why not */
+	      attrs[i].is_cursor_position = TRUE;
+	      break;
+	    }
+	}
 
       /* If this is a grapheme boundary, we have to decide if backspace
        * deletes a character or the whole grapheme cluster */
       if (attrs[i].is_cursor_position)
-        attrs[i].backspace_deletes_character = BACKSPACE_DELETES_CHARACTER (base_character);
+	attrs[i].backspace_deletes_character = BACKSPACE_DELETES_CHARACTER (base_character);
       else
-        attrs[i].backspace_deletes_character = FALSE;
+	attrs[i].backspace_deletes_character = FALSE;
 
       /* ---- Line breaking ---- */
 
@@ -742,9 +742,9 @@ pango_default_break (const gchar   *text,
       attrs[i].is_mandatory_break = FALSE;
 
       if (attrs[i].is_cursor_position) /* If it's not a grapheme boundary,
-                                        * it's not a line break either
-                                        */
-        {
+					* it's not a line break either
+					*/
+	{
 	  /* space followed by a combining mark is handled
 	   * specially; (rule 7a from TR 14)
 	   */
@@ -752,15 +752,15 @@ pango_default_break (const gchar   *text,
 	      next_break_type == G_UNICODE_BREAK_COMBINING_MARK)
 	    break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
 
-          /* Unicode doesn't specify char wrap; we wrap around all chars
-           * except where a line break is prohibited, which means we
-           * effectively break everywhere except inside runs of spaces.
-           */
-          attrs[i].is_char_break = TRUE;
+	  /* Unicode doesn't specify char wrap; we wrap around all chars
+	   * except where a line break is prohibited, which means we
+	   * effectively break everywhere except inside runs of spaces.
+	   */
+	  attrs[i].is_char_break = TRUE;
 
 	  /* Make any necessary replacements first */
-          switch (prev_break_type)
-            {
+	  switch (prev_break_type)
+	    {
 	    case G_UNICODE_BREAK_HANGUL_L_JAMO:
 	    case G_UNICODE_BREAK_HANGUL_V_JAMO:
 	    case G_UNICODE_BREAK_HANGUL_T_JAMO:
@@ -771,16 +771,16 @@ pango_default_break (const gchar   *text,
 	      prev_break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
 	      break;
 
-            case G_UNICODE_BREAK_AMBIGUOUS:
+	    case G_UNICODE_BREAK_AMBIGUOUS:
 	      /* FIXME
-               * we need to resolve the East Asian width
-               * to decide what to do here
+	       * we need to resolve the East Asian width
+	       * to decide what to do here
 	       */
-            case G_UNICODE_BREAK_COMPLEX_CONTEXT:
+	    case G_UNICODE_BREAK_COMPLEX_CONTEXT:
 	      /* FIXME
-               * language engines should handle this case...
+	       * language engines should handle this case...
 	       */
-            case G_UNICODE_BREAK_UNKNOWN:
+	    case G_UNICODE_BREAK_UNKNOWN:
 	      /* convert unknown, complex, ambiguous to ALPHABETIC
 	       */
 	      prev_break_type = G_UNICODE_BREAK_ALPHABETIC;
@@ -790,71 +790,71 @@ pango_default_break (const gchar   *text,
 	      ;
 	    }
 
-          switch (prev_break_type)
-            {
-            case G_UNICODE_BREAK_MANDATORY:
-            case G_UNICODE_BREAK_LINE_FEED:
-            case G_UNICODE_BREAK_NEXT_LINE:
-              attrs[i].is_line_break = TRUE;
-              attrs[i].is_mandatory_break = TRUE;
-              break;
-
-            case G_UNICODE_BREAK_CARRIAGE_RETURN:
-              if (wc != '\n')
-                {
-                  attrs[i].is_line_break = TRUE;
-                  attrs[i].is_mandatory_break = TRUE;
-                }
-              break;
-
-            case G_UNICODE_BREAK_CONTINGENT:
-              /* can break after 0xFFFC by default, though we might want
-               * to eventually have a PangoLayout setting or
-               * PangoAttribute that disables this, if for some
-               * application breaking after objects is not desired.
-               */
-              break_op = BREAK_ALLOWED;
-              break;
-
-            case G_UNICODE_BREAK_SURROGATE:
+	  switch (prev_break_type)
+	    {
+	    case G_UNICODE_BREAK_MANDATORY:
+	    case G_UNICODE_BREAK_LINE_FEED:
+	    case G_UNICODE_BREAK_NEXT_LINE:
+	      attrs[i].is_line_break = TRUE;
+	      attrs[i].is_mandatory_break = TRUE;
+	      break;
+
+	    case G_UNICODE_BREAK_CARRIAGE_RETURN:
+	      if (wc != '\n')
+		{
+		  attrs[i].is_line_break = TRUE;
+		  attrs[i].is_mandatory_break = TRUE;
+		}
+	      break;
+
+	    case G_UNICODE_BREAK_CONTINGENT:
+	      /* can break after 0xFFFC by default, though we might want
+	       * to eventually have a PangoLayout setting or
+	       * PangoAttribute that disables this, if for some
+	       * application breaking after objects is not desired.
+	       */
+	      break_op = BREAK_ALLOWED;
+	      break;
+
+	    case G_UNICODE_BREAK_SURROGATE:
 	      g_assert_not_reached ();
-              break;
-
-            default:
-              g_assert (IN_BREAK_TABLE (prev_break_type));
-
-              /* Note that our table assumes that combining marks
-               * are only applied to alphabetic characters;
-               * tech report 14 explains how to remove this assumption
-               * from the code, if anyone ever cares, but it shouldn't
-               * be a problem. Also this issue sort of goes
-               * away since we only look for breaks on grapheme
-               * boundaries.
-               */
-
-              switch (break_type)
-                {
-                case G_UNICODE_BREAK_MANDATORY:
-                case G_UNICODE_BREAK_LINE_FEED:
-                case G_UNICODE_BREAK_CARRIAGE_RETURN:
-                case G_UNICODE_BREAK_NEXT_LINE:
-                case G_UNICODE_BREAK_SPACE:
-                  /* These types all "pile up" at the end of lines and
-                   * get elided.
-                   */
-                  break_op = BREAK_PROHIBITED;
-                  break;
-
-                case G_UNICODE_BREAK_CONTINGENT:
-                  /* break before 0xFFFC by default, eventually
-                   * make this configurable?
-                   */
-                  break_op = BREAK_ALLOWED;
-                  break;
-
-                case G_UNICODE_BREAK_SURROGATE:
+	      break;
+
+	    default:
+	      g_assert (IN_BREAK_TABLE (prev_break_type));
+
+	      /* Note that our table assumes that combining marks
+	       * are only applied to alphabetic characters;
+	       * tech report 14 explains how to remove this assumption
+	       * from the code, if anyone ever cares, but it shouldn't
+	       * be a problem. Also this issue sort of goes
+	       * away since we only look for breaks on grapheme
+	       * boundaries.
+	       */
+
+	      switch (break_type)
+		{
+		case G_UNICODE_BREAK_MANDATORY:
+		case G_UNICODE_BREAK_LINE_FEED:
+		case G_UNICODE_BREAK_CARRIAGE_RETURN:
+		case G_UNICODE_BREAK_NEXT_LINE:
+		case G_UNICODE_BREAK_SPACE:
+		  /* These types all "pile up" at the end of lines and
+		   * get elided.
+		   */
+		  break_op = BREAK_PROHIBITED;
+		  break;
+
+		case G_UNICODE_BREAK_CONTINGENT:
+		  /* break before 0xFFFC by default, eventually
+		   * make this configurable?
+		   */
+		  break_op = BREAK_ALLOWED;
+		  break;
+
+		case G_UNICODE_BREAK_SURROGATE:
 		  g_assert_not_reached ();
-                  break;
+		  break;
 
 		/* Hangul additions are from Unicode 4.1 UAX#14 */
 		case G_UNICODE_BREAK_HANGUL_L_JAMO:
@@ -862,9 +862,9 @@ pango_default_break (const gchar   *text,
 		case G_UNICODE_BREAK_HANGUL_T_JAMO:
 		case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE:
 		case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE:
-                  /* treat Jamo as IDEOGRAPHIC from now
+		  /* treat Jamo as IDEOGRAPHIC from now
 		   */
-                  break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
+		  break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
 
 		  if (makes_hangul_syllable)
 		    break_op = BREAK_IF_SPACES;
@@ -882,55 +882,55 @@ pango_default_break (const gchar   *text,
 		   * language engines should handle this case...
 		   */
 		case G_UNICODE_BREAK_UNKNOWN:
-	          /* treat unknown, complex, and ambiguous like ALPHABETIC
+		  /* treat unknown, complex, and ambiguous like ALPHABETIC
 		   * for now
-	           */
-                  break_op = BREAK_OP (prev_break_type, G_UNICODE_BREAK_ALPHABETIC);
-                  break;
+		   */
+		  break_op = BREAK_OP (prev_break_type, G_UNICODE_BREAK_ALPHABETIC);
+		  break;
 
 		default:
 
 		  g_assert (IN_BREAK_TABLE (break_type));
-                  break_op = BREAK_OP (prev_break_type, break_type);
-                  break;
-                }
-              break;
-            }
-
-          if (break_op != BREAK_ALREADY_HANDLED)
-            {
-              switch (break_op)
-                {
-                case BREAK_PROHIBITED:
-                  /* can't break here */
-                  attrs[i].is_char_break = FALSE;
-                  break;
-
-                case BREAK_IF_SPACES:
-                  /* break if prev char was space */
-                  if (prev_was_break_space)
-                    attrs[i].is_line_break = TRUE;
-                  break;
-
-                case BREAK_ALLOWED:
-                  attrs[i].is_line_break = TRUE;
-                  break;
-
-                default:
-                  g_assert_not_reached ();
-                  break;
-                }
-            }
-        }
+		  break_op = BREAK_OP (prev_break_type, break_type);
+		  break;
+		}
+	      break;
+	    }
+
+	  if (break_op != BREAK_ALREADY_HANDLED)
+	    {
+	      switch (break_op)
+		{
+		case BREAK_PROHIBITED:
+		  /* can't break here */
+		  attrs[i].is_char_break = FALSE;
+		  break;
+
+		case BREAK_IF_SPACES:
+		  /* break if prev char was space */
+		  if (prev_was_break_space)
+		    attrs[i].is_line_break = TRUE;
+		  break;
+
+		case BREAK_ALLOWED:
+		  attrs[i].is_line_break = TRUE;
+		  break;
+
+		default:
+		  g_assert_not_reached ();
+		  break;
+		}
+	    }
+	}
 
       if (break_type != G_UNICODE_BREAK_SPACE)
-        {
-          prev_break_type = break_type;
-          prev_was_break_space = FALSE;
+	{
+	  prev_break_type = break_type;
+	  prev_was_break_space = FALSE;
 	  prev_jamo = jamo;
-        }
+	}
       else
-        prev_was_break_space = TRUE;
+	prev_was_break_space = TRUE;
 
       /* ---- Word breaks ---- */
 
@@ -939,100 +939,100 @@ pango_default_break (const gchar   *text,
       attrs[i].is_word_end = FALSE;
 
       if (current_word_type != WordNone)
-        {
-          /* Check for a word end */
-          switch (type)
-            {
-            case G_UNICODE_COMBINING_MARK:
-            case G_UNICODE_ENCLOSING_MARK:
-            case G_UNICODE_NON_SPACING_MARK:
+	{
+	  /* Check for a word end */
+	  switch (type)
+	    {
+	    case G_UNICODE_COMBINING_MARK:
+	    case G_UNICODE_ENCLOSING_MARK:
+	    case G_UNICODE_NON_SPACING_MARK:
 	    case G_UNICODE_FORMAT:
-              /* nothing, we just eat these up as part of the word */
-              break;
-
-            case G_UNICODE_LOWERCASE_LETTER:
-            case G_UNICODE_MODIFIER_LETTER:
-            case G_UNICODE_OTHER_LETTER:
-            case G_UNICODE_TITLECASE_LETTER:
-            case G_UNICODE_UPPERCASE_LETTER:
-              if (current_word_type == WordLetters)
-                {
-                  /* Japanese special cases for ending the word */
-                  if (JAPANESE (last_word_letter) ||
-                      JAPANESE (wc))
-                    {
-                      if ((HIRAGANA (last_word_letter) &&
-                           !HIRAGANA (wc)) ||
-                          (KATAKANA (last_word_letter) &&
-                           !(KATAKANA (wc) || HIRAGANA (wc))) ||
-                          (KANJI (last_word_letter) &&
-                           !(HIRAGANA (wc) || KANJI (wc))) ||
-                          (JAPANESE (last_word_letter) &&
-                           !JAPANESE (wc)) ||
-                          (!JAPANESE (last_word_letter) &&
-                           JAPANESE (wc)))
-                        attrs[i].is_word_end = TRUE;
-                    }
-                }
-              else
-                {
-                  /* end the number word, start the letter word */
-                  attrs[i].is_word_end = TRUE;
-                  attrs[i].is_word_start = TRUE;
-                  current_word_type = WordLetters;
-                }
-
-              last_word_letter = wc;
-              break;
-
-            case G_UNICODE_DECIMAL_NUMBER:
-            case G_UNICODE_LETTER_NUMBER:
-            case G_UNICODE_OTHER_NUMBER:
-              if (current_word_type != WordNumbers)
-                {
-                  attrs[i].is_word_end = TRUE;
-                  attrs[i].is_word_start = TRUE;
-                  current_word_type = WordNumbers;
-                }
-
-              last_word_letter = wc;
-              break;
-
-            default:
-              /* Punctuation, control/format chars, etc. all end a word. */
-              attrs[i].is_word_end = TRUE;
+	      /* nothing, we just eat these up as part of the word */
+	      break;
+
+	    case G_UNICODE_LOWERCASE_LETTER:
+	    case G_UNICODE_MODIFIER_LETTER:
+	    case G_UNICODE_OTHER_LETTER:
+	    case G_UNICODE_TITLECASE_LETTER:
+	    case G_UNICODE_UPPERCASE_LETTER:
+	      if (current_word_type == WordLetters)
+		{
+		  /* Japanese special cases for ending the word */
+		  if (JAPANESE (last_word_letter) ||
+		      JAPANESE (wc))
+		    {
+		      if ((HIRAGANA (last_word_letter) &&
+			   !HIRAGANA (wc)) ||
+			  (KATAKANA (last_word_letter) &&
+			   !(KATAKANA (wc) || HIRAGANA (wc))) ||
+			  (KANJI (last_word_letter) &&
+			   !(HIRAGANA (wc) || KANJI (wc))) ||
+			  (JAPANESE (last_word_letter) &&
+			   !JAPANESE (wc)) ||
+			  (!JAPANESE (last_word_letter) &&
+			   JAPANESE (wc)))
+			attrs[i].is_word_end = TRUE;
+		    }
+		}
+	      else
+		{
+		  /* end the number word, start the letter word */
+		  attrs[i].is_word_end = TRUE;
+		  attrs[i].is_word_start = TRUE;
+		  current_word_type = WordLetters;
+		}
+
+	      last_word_letter = wc;
+	      break;
+
+	    case G_UNICODE_DECIMAL_NUMBER:
+	    case G_UNICODE_LETTER_NUMBER:
+	    case G_UNICODE_OTHER_NUMBER:
+	      if (current_word_type != WordNumbers)
+		{
+		  attrs[i].is_word_end = TRUE;
+		  attrs[i].is_word_start = TRUE;
+		  current_word_type = WordNumbers;
+		}
+
+	      last_word_letter = wc;
+	      break;
+
+	    default:
+	      /* Punctuation, control/format chars, etc. all end a word. */
+	      attrs[i].is_word_end = TRUE;
 	      current_word_type = WordNone;
-              break;
-            }
-        }
+	      break;
+	    }
+	}
       else
-        {
-          /* Check for a word start */
-          switch (type)
-            {
-            case G_UNICODE_LOWERCASE_LETTER:
-            case G_UNICODE_MODIFIER_LETTER:
-            case G_UNICODE_OTHER_LETTER:
-            case G_UNICODE_TITLECASE_LETTER:
-            case G_UNICODE_UPPERCASE_LETTER:
-              current_word_type = WordLetters;
-              last_word_letter = wc;
-              attrs[i].is_word_start = TRUE;
-              break;
-
-            case G_UNICODE_DECIMAL_NUMBER:
-            case G_UNICODE_LETTER_NUMBER:
-            case G_UNICODE_OTHER_NUMBER:
-              current_word_type = WordNumbers;
-              last_word_letter = wc;
-              attrs[i].is_word_start = TRUE;
-              break;
-
-            default:
-              /* No word here */
-              break;
-            }
-        }
+	{
+	  /* Check for a word start */
+	  switch (type)
+	    {
+	    case G_UNICODE_LOWERCASE_LETTER:
+	    case G_UNICODE_MODIFIER_LETTER:
+	    case G_UNICODE_OTHER_LETTER:
+	    case G_UNICODE_TITLECASE_LETTER:
+	    case G_UNICODE_UPPERCASE_LETTER:
+	      current_word_type = WordLetters;
+	      last_word_letter = wc;
+	      attrs[i].is_word_start = TRUE;
+	      break;
+
+	    case G_UNICODE_DECIMAL_NUMBER:
+	    case G_UNICODE_LETTER_NUMBER:
+	    case G_UNICODE_OTHER_NUMBER:
+	      current_word_type = WordNumbers;
+	      last_word_letter = wc;
+	      attrs[i].is_word_start = TRUE;
+	      break;
+
+	    default:
+	      /* No word here */
+	      break;
+	    }
+	}
 
       /* ---- Sentence breaks ---- */
 
@@ -1050,21 +1050,21 @@ pango_default_break (const gchar   *text,
        */
 
 #define MAYBE_START_NEW_SENTENCE                                \
-              switch (type)                                     \
-                {                                               \
-                case G_UNICODE_LINE_SEPARATOR:                  \
-                case G_UNICODE_PARAGRAPH_SEPARATOR:             \
-                case G_UNICODE_CONTROL:                         \
-                case G_UNICODE_FORMAT:                          \
-                case G_UNICODE_SPACE_SEPARATOR:                 \
-                  sentence_state = STATE_SENTENCE_OUTSIDE;      \
-                  break;                                        \
-                                                                \
-                default:                                        \
-                  sentence_state = STATE_SENTENCE_BODY;         \
-                  attrs[i].is_sentence_start = TRUE;            \
-                  break;                                        \
-                }
+	      switch (type)                                     \
+		{                                               \
+		case G_UNICODE_LINE_SEPARATOR:                  \
+		case G_UNICODE_PARAGRAPH_SEPARATOR:             \
+		case G_UNICODE_CONTROL:                         \
+		case G_UNICODE_FORMAT:                          \
+		case G_UNICODE_SPACE_SEPARATOR:                 \
+		  sentence_state = STATE_SENTENCE_OUTSIDE;      \
+		  break;                                        \
+								\
+		default:                                        \
+		  sentence_state = STATE_SENTENCE_BODY;         \
+		  attrs[i].is_sentence_start = TRUE;            \
+		  break;                                        \
+		}
 
       /* No sentence break at the start of the text */
 
@@ -1084,374 +1084,374 @@ pango_default_break (const gchar   *text,
        * followed by newline
        */
       switch (prev_type)
-        {
-        case G_UNICODE_LINE_SEPARATOR:
-        case G_UNICODE_PARAGRAPH_SEPARATOR:
-        case G_UNICODE_CONTROL:
-        case G_UNICODE_FORMAT:
-          if (wc == '\r')
-            {
-              if (next_wc != '\n')
-                attrs[i].is_sentence_boundary = TRUE;
-            }
-          else
-            attrs[i].is_sentence_boundary = TRUE;
-          break;
-
-        default:
-          break;
-        }
+	{
+	case G_UNICODE_LINE_SEPARATOR:
+	case G_UNICODE_PARAGRAPH_SEPARATOR:
+	case G_UNICODE_CONTROL:
+	case G_UNICODE_FORMAT:
+	  if (wc == '\r')
+	    {
+	      if (next_wc != '\n')
+		attrs[i].is_sentence_boundary = TRUE;
+	    }
+	  else
+	    attrs[i].is_sentence_boundary = TRUE;
+	  break;
+
+	default:
+	  break;
+	}
 
       /* break before para/line separators except newline following
        * carriage return
        */
       switch (type)
-        {
-        case G_UNICODE_LINE_SEPARATOR:
-        case G_UNICODE_PARAGRAPH_SEPARATOR:
-        case G_UNICODE_CONTROL:
-        case G_UNICODE_FORMAT:
-          if (wc == '\n')
-            {
-              if (prev_wc != '\r')
-                attrs[i].is_sentence_boundary = TRUE;
-            }
-          else
-            attrs[i].is_sentence_boundary = TRUE;
-          break;
-
-        default:
-          break;
-        }
+	{
+	case G_UNICODE_LINE_SEPARATOR:
+	case G_UNICODE_PARAGRAPH_SEPARATOR:
+	case G_UNICODE_CONTROL:
+	case G_UNICODE_FORMAT:
+	  if (wc == '\n')
+	    {
+	      if (prev_wc != '\r')
+		attrs[i].is_sentence_boundary = TRUE;
+	    }
+	  else
+	    attrs[i].is_sentence_boundary = TRUE;
+	  break;
+
+	default:
+	  break;
+	}
 
       switch (sentence_state)
-        {
-        case STATE_SENTENCE_OUTSIDE:
-          /* Start sentence if we have non-whitespace/format/control */
-          switch (type)
-            {
-            case G_UNICODE_LINE_SEPARATOR:
-            case G_UNICODE_PARAGRAPH_SEPARATOR:
-            case G_UNICODE_CONTROL:
-            case G_UNICODE_FORMAT:
-            case G_UNICODE_SPACE_SEPARATOR:
-              break;
-
-            default:
-              attrs[i].is_sentence_start = TRUE;
-              sentence_state = STATE_SENTENCE_BODY;
-              break;
-            }
-          break;
-
-        case STATE_SENTENCE_BODY:
-          /* If we already broke here due to separators, end the sentence. */
-          if (attrs[i].is_sentence_boundary)
-            {
-              attrs[i].is_sentence_end = TRUE;
-
-              MAYBE_START_NEW_SENTENCE;
-            }
-          else
-            {
-              if (wc == '.')
-                sentence_state = STATE_SENTENCE_DOT;
-              else if (wc == '?' || wc == '!')
-                sentence_state = STATE_SENTENCE_TERM;
-            }
-          break;
-
-        case STATE_SENTENCE_TERM:
-          /* End sentence on anything but close punctuation and some
-           * loosely-specified OTHER_PUNCTUATION such as period,
-           * comma, etc.; follow Unicode rules for breaks
-           */
-          switch (type)
-            {
-            case G_UNICODE_OTHER_PUNCTUATION:
-            case G_UNICODE_CLOSE_PUNCTUATION:
-              if (type == G_UNICODE_CLOSE_PUNCTUATION ||
-                  wc == '.' ||
-                  wc == ',' ||
-                  wc == '?' ||
-                  wc == '!')
-                sentence_state = STATE_SENTENCE_POST_TERM_CLOSE;
-              else
-                {
-                  attrs[i].is_sentence_end = TRUE;
-                  attrs[i].is_sentence_boundary = TRUE;
-
-                  MAYBE_START_NEW_SENTENCE;
-                }
-              break;
-
-            case G_UNICODE_SPACE_SEPARATOR:
-              attrs[i].is_sentence_end = TRUE;
-              sentence_state = STATE_SENTENCE_POST_TERM_SPACE;
-              break;
-
-            case G_UNICODE_LINE_SEPARATOR:
-            case G_UNICODE_PARAGRAPH_SEPARATOR:
-              attrs[i].is_sentence_end = TRUE;
-              sentence_state = STATE_SENTENCE_POST_TERM_SEP;
-              break;
-
-            default:
-              attrs[i].is_sentence_end = TRUE;
-              attrs[i].is_sentence_boundary = TRUE;
-
-              MAYBE_START_NEW_SENTENCE;
-
-              break;
-            }
-          break;
-
-        case STATE_SENTENCE_POST_TERM_CLOSE:
-          /* End sentence on anything besides more punctuation; follow
-           * rules for breaks
-           */
-          switch (type)
-            {
-            case G_UNICODE_OTHER_PUNCTUATION:
-            case G_UNICODE_CLOSE_PUNCTUATION:
-              if (type == G_UNICODE_CLOSE_PUNCTUATION ||
-                  wc == '.' ||
-                  wc == ',' ||
-                  wc == '?' ||
-                  wc == '!')
-                /* continue in this state */
-                ;
-              else
-                {
-                  attrs[i].is_sentence_end = TRUE;
-                  attrs[i].is_sentence_boundary = TRUE;
-
-                  MAYBE_START_NEW_SENTENCE;
-                }
-              break;
-
-            case G_UNICODE_SPACE_SEPARATOR:
-              attrs[i].is_sentence_end = TRUE;
-              sentence_state = STATE_SENTENCE_POST_TERM_SPACE;
-              break;
-
-            case G_UNICODE_LINE_SEPARATOR:
-            case G_UNICODE_PARAGRAPH_SEPARATOR:
-              attrs[i].is_sentence_end = TRUE;
-              /* undo the unconditional break-at-all-line/para-separators
-               * from above; I'm not sure this is what the Unicode spec
-               * intends, but it seems right - we get to include
-               * a single line/para separator in the sentence according
-               * to their rules
-               */
-              attrs[i].is_sentence_boundary = FALSE;
-              sentence_state = STATE_SENTENCE_POST_TERM_SEP;
-              break;
-
-            default:
-              attrs[i].is_sentence_end = TRUE;
-              attrs[i].is_sentence_boundary = TRUE;
-
-              MAYBE_START_NEW_SENTENCE;
-
-              break;
-            }
-          break;
-
-        case STATE_SENTENCE_POST_TERM_SPACE:
-
-          /* Sentence is definitely already ended; to enter this state
-           * we had to see a space, which ends the sentence.
-           */
-
-          switch (type)
-            {
-            case G_UNICODE_SPACE_SEPARATOR:
-              /* continue in this state */
-              break;
-
-            case G_UNICODE_LINE_SEPARATOR:
-            case G_UNICODE_PARAGRAPH_SEPARATOR:
-              /* undo the unconditional break-at-all-line/para-separators
-               * from above; I'm not sure this is what the Unicode spec
-               * intends, but it seems right
-               */
-              attrs[i].is_sentence_boundary = FALSE;
-              sentence_state = STATE_SENTENCE_POST_TERM_SEP;
-              break;
-
-            default:
-              attrs[i].is_sentence_boundary = TRUE;
-
-              MAYBE_START_NEW_SENTENCE;
-
-              break;
-            }
-          break;
-
-        case STATE_SENTENCE_POST_TERM_SEP:
-          /* Break is forced at this point, unless we're a newline
-           * after a CR, then we will break after the newline on the
-           * next iteration. Only a single Sep can be in the
-           * sentence.
-           */
-          if (!(prev_wc == '\r' && wc == '\n'))
-            attrs[i].is_sentence_boundary = TRUE;
-
-          MAYBE_START_NEW_SENTENCE;
-
-          break;
-
-        case STATE_SENTENCE_DOT:
-          switch (type)
-            {
-            case G_UNICODE_CLOSE_PUNCTUATION:
-              sentence_state = STATE_SENTENCE_POST_DOT_CLOSE;
-              break;
-
-            case G_UNICODE_SPACE_SEPARATOR:
-              possible_sentence_end = i;
-              sentence_state = STATE_SENTENCE_POST_DOT_SPACE;
-              break;
-
-            default:
-              /* If we broke on a control/format char, end the
-               * sentence; else this was not a sentence end, since
-               * we didn't enter the POST_DOT_SPACE state.
-               */
-              if (attrs[i].is_sentence_boundary)
-                {
-                  attrs[i].is_sentence_end = TRUE;
-
-                  MAYBE_START_NEW_SENTENCE;
-                }
-              else
-                sentence_state = STATE_SENTENCE_BODY;
-              break;
-            }
-          break;
-
-        case STATE_SENTENCE_POST_DOT_CLOSE:
-          switch (type)
-            {
-            case G_UNICODE_SPACE_SEPARATOR:
-              possible_sentence_end = i;
-              sentence_state = STATE_SENTENCE_POST_DOT_SPACE;
-              break;
-
-            default:
-              /* If we broke on a control/format char, end the
-               * sentence; else this was not a sentence end, since
-               * we didn't enter the POST_DOT_SPACE state.
-               */
-              if (attrs[i].is_sentence_boundary)
-                {
-                  attrs[i].is_sentence_end = TRUE;
-
-                  MAYBE_START_NEW_SENTENCE;
-                }
-              else
-                sentence_state = STATE_SENTENCE_BODY;
-              break;
-            }
-          break;
-
-        case STATE_SENTENCE_POST_DOT_SPACE:
-
-          possible_sentence_boundary = i;
-
-          switch (type)
-            {
-            case G_UNICODE_SPACE_SEPARATOR:
-              /* remain in current state */
-              break;
-
-            case G_UNICODE_OPEN_PUNCTUATION:
-              sentence_state = STATE_SENTENCE_POST_DOT_OPEN;
-              break;
-
-            case G_UNICODE_LOWERCASE_LETTER:
-              /* wasn't a sentence-ending period; so re-enter the sentence
-               * body
-               */
-              sentence_state = STATE_SENTENCE_BODY;
-              break;
-
-            default:
-              /* End the sentence, break, maybe start a new one */
-
-              g_assert (possible_sentence_end >= 0);
-              g_assert (possible_sentence_boundary >= 0);
-
-              attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;
-              attrs[possible_sentence_end].is_sentence_end = TRUE;
-
-              possible_sentence_end = -1;
-              possible_sentence_boundary = -1;
-
-              MAYBE_START_NEW_SENTENCE;
-
-              break;
-            }
-          break;
-
-        case STATE_SENTENCE_POST_DOT_OPEN:
-          switch (type)
-            {
-            case G_UNICODE_OPEN_PUNCTUATION:
-              /* continue in current state */
-              break;
-
-            case G_UNICODE_LOWERCASE_LETTER:
-              /* wasn't a sentence-ending period; so re-enter the sentence
-               * body
-               */
-              sentence_state = STATE_SENTENCE_BODY;
-              break;
-
-            default:
-              /* End the sentence, break, maybe start a new one */
-
-              g_assert (possible_sentence_end >= 0);
-              g_assert (possible_sentence_boundary >= 0);
-
-              attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;
-              attrs[possible_sentence_end].is_sentence_end = TRUE;
-
-              possible_sentence_end = -1;
-              possible_sentence_boundary = -1;
-
-              MAYBE_START_NEW_SENTENCE;
-
-              break;
-            }
-          break;
-
-        case STATE_SENTENCE_POST_DOT_SEP:
-          /* Break is forced at this point, unless we're a newline
-           * after a CR, then we will break after the newline on the
-           * next iteration. Only a single Sep can be in the
-           * sentence.
-           */
-          if (!(prev_wc == '\r' && wc == '\n'))
-            attrs[i].is_sentence_boundary = TRUE;
+	{
+	case STATE_SENTENCE_OUTSIDE:
+	  /* Start sentence if we have non-whitespace/format/control */
+	  switch (type)
+	    {
+	    case G_UNICODE_LINE_SEPARATOR:
+	    case G_UNICODE_PARAGRAPH_SEPARATOR:
+	    case G_UNICODE_CONTROL:
+	    case G_UNICODE_FORMAT:
+	    case G_UNICODE_SPACE_SEPARATOR:
+	      break;
 
-          g_assert (possible_sentence_end >= 0);
-          g_assert (possible_sentence_boundary >= 0);
+	    default:
+	      attrs[i].is_sentence_start = TRUE;
+	      sentence_state = STATE_SENTENCE_BODY;
+	      break;
+	    }
+	  break;
 
-          attrs[possible_sentence_end].is_sentence_end = TRUE;
+	case STATE_SENTENCE_BODY:
+	  /* If we already broke here due to separators, end the sentence. */
+	  if (attrs[i].is_sentence_boundary)
+	    {
+	      attrs[i].is_sentence_end = TRUE;
 
-          possible_sentence_end = -1;
-          possible_sentence_boundary = -1;
+	      MAYBE_START_NEW_SENTENCE;
+	    }
+	  else
+	    {
+	      if (wc == '.')
+		sentence_state = STATE_SENTENCE_DOT;
+	      else if (wc == '?' || wc == '!')
+		sentence_state = STATE_SENTENCE_TERM;
+	    }
+	  break;
 
-          MAYBE_START_NEW_SENTENCE;
+	case STATE_SENTENCE_TERM:
+	  /* End sentence on anything but close punctuation and some
+	   * loosely-specified OTHER_PUNCTUATION such as period,
+	   * comma, etc.; follow Unicode rules for breaks
+	   */
+	  switch (type)
+	    {
+	    case G_UNICODE_OTHER_PUNCTUATION:
+	    case G_UNICODE_CLOSE_PUNCTUATION:
+	      if (type == G_UNICODE_CLOSE_PUNCTUATION ||
+		  wc == '.' ||
+		  wc == ',' ||
+		  wc == '?' ||
+		  wc == '!')
+		sentence_state = STATE_SENTENCE_POST_TERM_CLOSE;
+	      else
+		{
+		  attrs[i].is_sentence_end = TRUE;
+		  attrs[i].is_sentence_boundary = TRUE;
+
+		  MAYBE_START_NEW_SENTENCE;
+		}
+	      break;
+
+	    case G_UNICODE_SPACE_SEPARATOR:
+	      attrs[i].is_sentence_end = TRUE;
+	      sentence_state = STATE_SENTENCE_POST_TERM_SPACE;
+	      break;
+
+	    case G_UNICODE_LINE_SEPARATOR:
+	    case G_UNICODE_PARAGRAPH_SEPARATOR:
+	      attrs[i].is_sentence_end = TRUE;
+	      sentence_state = STATE_SENTENCE_POST_TERM_SEP;
+	      break;
+
+	    default:
+	      attrs[i].is_sentence_end = TRUE;
+	      attrs[i].is_sentence_boundary = TRUE;
+
+	      MAYBE_START_NEW_SENTENCE;
+
+	      break;
+	    }
+	  break;
+
+	case STATE_SENTENCE_POST_TERM_CLOSE:
+	  /* End sentence on anything besides more punctuation; follow
+	   * rules for breaks
+	   */
+	  switch (type)
+	    {
+	    case G_UNICODE_OTHER_PUNCTUATION:
+	    case G_UNICODE_CLOSE_PUNCTUATION:
+	      if (type == G_UNICODE_CLOSE_PUNCTUATION ||
+		  wc == '.' ||
+		  wc == ',' ||
+		  wc == '?' ||
+		  wc == '!')
+		/* continue in this state */
+		;
+	      else
+		{
+		  attrs[i].is_sentence_end = TRUE;
+		  attrs[i].is_sentence_boundary = TRUE;
+
+		  MAYBE_START_NEW_SENTENCE;
+		}
+	      break;
+
+	    case G_UNICODE_SPACE_SEPARATOR:
+	      attrs[i].is_sentence_end = TRUE;
+	      sentence_state = STATE_SENTENCE_POST_TERM_SPACE;
+	      break;
+
+	    case G_UNICODE_LINE_SEPARATOR:
+	    case G_UNICODE_PARAGRAPH_SEPARATOR:
+	      attrs[i].is_sentence_end = TRUE;
+	      /* undo the unconditional break-at-all-line/para-separators
+	       * from above; I'm not sure this is what the Unicode spec
+	       * intends, but it seems right - we get to include
+	       * a single line/para separator in the sentence according
+	       * to their rules
+	       */
+	      attrs[i].is_sentence_boundary = FALSE;
+	      sentence_state = STATE_SENTENCE_POST_TERM_SEP;
+	      break;
+
+	    default:
+	      attrs[i].is_sentence_end = TRUE;
+	      attrs[i].is_sentence_boundary = TRUE;
+
+	      MAYBE_START_NEW_SENTENCE;
+
+	      break;
+	    }
+	  break;
+
+	case STATE_SENTENCE_POST_TERM_SPACE:
+
+	  /* Sentence is definitely already ended; to enter this state
+	   * we had to see a space, which ends the sentence.
+	   */
+
+	  switch (type)
+	    {
+	    case G_UNICODE_SPACE_SEPARATOR:
+	      /* continue in this state */
+	      break;
+
+	    case G_UNICODE_LINE_SEPARATOR:
+	    case G_UNICODE_PARAGRAPH_SEPARATOR:
+	      /* undo the unconditional break-at-all-line/para-separators
+	       * from above; I'm not sure this is what the Unicode spec
+	       * intends, but it seems right
+	       */
+	      attrs[i].is_sentence_boundary = FALSE;
+	      sentence_state = STATE_SENTENCE_POST_TERM_SEP;
+	      break;
+
+	    default:
+	      attrs[i].is_sentence_boundary = TRUE;
+
+	      MAYBE_START_NEW_SENTENCE;
+
+	      break;
+	    }
+	  break;
+
+	case STATE_SENTENCE_POST_TERM_SEP:
+	  /* Break is forced at this point, unless we're a newline
+	   * after a CR, then we will break after the newline on the
+	   * next iteration. Only a single Sep can be in the
+	   * sentence.
+	   */
+	  if (!(prev_wc == '\r' && wc == '\n'))
+	    attrs[i].is_sentence_boundary = TRUE;
 
-          break;
+	  MAYBE_START_NEW_SENTENCE;
 
-        default:
-          g_assert_not_reached ();
-          break;
-        }
+	  break;
+
+	case STATE_SENTENCE_DOT:
+	  switch (type)
+	    {
+	    case G_UNICODE_CLOSE_PUNCTUATION:
+	      sentence_state = STATE_SENTENCE_POST_DOT_CLOSE;
+	      break;
+
+	    case G_UNICODE_SPACE_SEPARATOR:
+	      possible_sentence_end = i;
+	      sentence_state = STATE_SENTENCE_POST_DOT_SPACE;
+	      break;
+
+	    default:
+	      /* If we broke on a control/format char, end the
+	       * sentence; else this was not a sentence end, since
+	       * we didn't enter the POST_DOT_SPACE state.
+	       */
+	      if (attrs[i].is_sentence_boundary)
+		{
+		  attrs[i].is_sentence_end = TRUE;
+
+		  MAYBE_START_NEW_SENTENCE;
+		}
+	      else
+		sentence_state = STATE_SENTENCE_BODY;
+	      break;
+	    }
+	  break;
+
+	case STATE_SENTENCE_POST_DOT_CLOSE:
+	  switch (type)
+	    {
+	    case G_UNICODE_SPACE_SEPARATOR:
+	      possible_sentence_end = i;
+	      sentence_state = STATE_SENTENCE_POST_DOT_SPACE;
+	      break;
+
+	    default:
+	      /* If we broke on a control/format char, end the
+	       * sentence; else this was not a sentence end, since
+	       * we didn't enter the POST_DOT_SPACE state.
+	       */
+	      if (attrs[i].is_sentence_boundary)
+		{
+		  attrs[i].is_sentence_end = TRUE;
+
+		  MAYBE_START_NEW_SENTENCE;
+		}
+	      else
+		sentence_state = STATE_SENTENCE_BODY;
+	      break;
+	    }
+	  break;
+
+	case STATE_SENTENCE_POST_DOT_SPACE:
+
+	  possible_sentence_boundary = i;
+
+	  switch (type)
+	    {
+	    case G_UNICODE_SPACE_SEPARATOR:
+	      /* remain in current state */
+	      break;
+
+	    case G_UNICODE_OPEN_PUNCTUATION:
+	      sentence_state = STATE_SENTENCE_POST_DOT_OPEN;
+	      break;
+
+	    case G_UNICODE_LOWERCASE_LETTER:
+	      /* wasn't a sentence-ending period; so re-enter the sentence
+	       * body
+	       */
+	      sentence_state = STATE_SENTENCE_BODY;
+	      break;
+
+	    default:
+	      /* End the sentence, break, maybe start a new one */
+
+	      g_assert (possible_sentence_end >= 0);
+	      g_assert (possible_sentence_boundary >= 0);
+
+	      attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;
+	      attrs[possible_sentence_end].is_sentence_end = TRUE;
+
+	      possible_sentence_end = -1;
+	      possible_sentence_boundary = -1;
+
+	      MAYBE_START_NEW_SENTENCE;
+
+	      break;
+	    }
+	  break;
+
+	case STATE_SENTENCE_POST_DOT_OPEN:
+	  switch (type)
+	    {
+	    case G_UNICODE_OPEN_PUNCTUATION:
+	      /* continue in current state */
+	      break;
+
+	    case G_UNICODE_LOWERCASE_LETTER:
+	      /* wasn't a sentence-ending period; so re-enter the sentence
+	       * body
+	       */
+	      sentence_state = STATE_SENTENCE_BODY;
+	      break;
+
+	    default:
+	      /* End the sentence, break, maybe start a new one */
+
+	      g_assert (possible_sentence_end >= 0);
+	      g_assert (possible_sentence_boundary >= 0);
+
+	      attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;
+	      attrs[possible_sentence_end].is_sentence_end = TRUE;
+
+	      possible_sentence_end = -1;
+	      possible_sentence_boundary = -1;
+
+	      MAYBE_START_NEW_SENTENCE;
+
+	      break;
+	    }
+	  break;
+
+	case STATE_SENTENCE_POST_DOT_SEP:
+	  /* Break is forced at this point, unless we're a newline
+	   * after a CR, then we will break after the newline on the
+	   * next iteration. Only a single Sep can be in the
+	   * sentence.
+	   */
+	  if (!(prev_wc == '\r' && wc == '\n'))
+	    attrs[i].is_sentence_boundary = TRUE;
+
+	  g_assert (possible_sentence_end >= 0);
+	  g_assert (possible_sentence_boundary >= 0);
+
+	  attrs[possible_sentence_end].is_sentence_end = TRUE;
+
+	  possible_sentence_end = -1;
+	  possible_sentence_boundary = -1;
+
+	  MAYBE_START_NEW_SENTENCE;
+
+	  break;
+
+	default:
+	  g_assert_not_reached ();
+	  break;
+	}
 
       prev_type = type;
       prev_wc = wc;
@@ -1459,18 +1459,18 @@ pango_default_break (const gchar   *text,
       /* wc might not be a valid Unicode base character, but really all we
        * need to know is the last non-combining character */
       if (type != G_UNICODE_COMBINING_MARK &&
-          type != G_UNICODE_ENCLOSING_MARK &&
-          type != G_UNICODE_NON_SPACING_MARK)
-        base_character = wc;
+	  type != G_UNICODE_ENCLOSING_MARK &&
+	  type != G_UNICODE_NON_SPACING_MARK)
+	base_character = wc;
     }
 }
 
 static gboolean
 tailor_break (const gchar   *text,
-             gint           length,
-             PangoAnalysis *analysis,
-             PangoLogAttr  *attrs,
-             int            attrs_len)
+	     gint           length,
+	     PangoAnalysis *analysis,
+	     PangoLogAttr  *attrs,
+	     int            attrs_len)
 {
   if (analysis->lang_engine && PANGO_ENGINE_LANG_GET_CLASS (analysis->lang_engine)->script_break)
     {
@@ -1499,10 +1499,10 @@ tailor_break (const gchar   *text,
  */
 void
 pango_break (const gchar   *text,
-             gint           length,
-             PangoAnalysis *analysis,
-             PangoLogAttr  *attrs,
-             int            attrs_len)
+	     gint           length,
+	     PangoAnalysis *analysis,
+	     PangoLogAttr  *attrs,
+	     int            attrs_len)
 {
   g_return_if_fail (analysis != NULL);
   g_return_if_fail (attrs != NULL);
@@ -1531,9 +1531,9 @@ pango_break (const gchar   *text,
  **/
 void
 pango_find_paragraph_boundary (const gchar *text,
-                               gint         length,
-                               gint        *paragraph_delimiter_index,
-                               gint        *next_paragraph_start)
+			       gint         length,
+			       gint        *paragraph_delimiter_index,
+			       gint        *next_paragraph_start)
 {
   const gchar *p = text;
   const gchar *end;
@@ -1570,29 +1570,29 @@ pango_find_paragraph_boundary (const gchar *text,
   while (p != end)
     {
       if (prev_sep == '\n' ||
-          prev_sep == PARAGRAPH_SEPARATOR_STRING[0])
-        {
-          g_assert (delimiter);
-          start = p;
-          break;
-        }
+	  prev_sep == PARAGRAPH_SEPARATOR_STRING[0])
+	{
+	  g_assert (delimiter);
+	  start = p;
+	  break;
+	}
       else if (prev_sep == '\r')
-        {
-          /* don't break between \r and \n */
-          if (*p != '\n')
-            {
-              g_assert (delimiter);
-              start = p;
-              break;
-            }
-        }
+	{
+	  /* don't break between \r and \n */
+	  if (*p != '\n')
+	    {
+	      g_assert (delimiter);
+	      start = p;
+	      break;
+	    }
+	}
 
       if (*p == '\n' ||
-           *p == '\r' ||
-           !strncmp(p, PARAGRAPH_SEPARATOR_STRING,
+	   *p == '\r' ||
+	   !strncmp(p, PARAGRAPH_SEPARATOR_STRING,
 		    strlen(PARAGRAPH_SEPARATOR_STRING)))
-        {
-          if (delimiter == NULL)
+	{
+	  if (delimiter == NULL)
 	    delimiter = p;
 	  prev_sep = *p;
 	}
@@ -1663,11 +1663,11 @@ tailor_segment (const char      *range_start,
  */
 void
 pango_get_log_attrs (const char    *text,
-                     int            length,
-                     int            level,
-                     PangoLanguage *language,
-                     PangoLogAttr  *log_attrs,
-                     int            attrs_len)
+		     int            length,
+		     int            level,
+		     PangoLanguage *language,
+		     PangoLogAttr  *log_attrs,
+		     int            attrs_len)
 {
   PangoMap *lang_map;
   int chars_broken;
@@ -1711,15 +1711,15 @@ pango_get_log_attrs (const char    *text,
       g_assert (range_end == run_start);
 
       if (range_engine != run_engine)
-        {
-          /* Engine has changed; do the tailoring for the current range,
-           * then start a new range.
-           */
+	{
+	  /* Engine has changed; do the tailoring for the current range,
+	   * then start a new range.
+	   */
 	  chars_broken += tailor_segment (range_start, range_end, range_engine, chars_broken, &analysis, log_attrs);
 
-          range_start = run_start;
+	  range_start = run_start;
 	  range_engine = run_engine;
-        }
+	}
       range_end = run_end;
     }
   pango_script_iter_free (iter);