4 files changed, 108 insertions, 64 deletions
diff --git a/docs/pango-sections.txt b/docs/pango-sections.txt
index e20c917a..496db41a 100644
--- a/docs/pango-sections.txt
+++ b/docs/pango-sections.txt
@@ -23,6 +23,7 @@ pango_break
 pango_get_log_attrs
 pango_find_paragraph_boundary
 pango_default_break
+pango_tailor_break
 PangoLogAttr
 
 <SUBSECTION>
@@ -71,6 +72,7 @@ PANGO_CONTEXT_CLASS
 PANGO_IS_CONTEXT
 PANGO_IS_CONTEXT_CLASS
 PANGO_CONTEXT_GET_CLASS
+PANGO_TYPE_ITEM
 
 <SUBSECTION Private>
 pango_context_get_type
diff --git a/pango/break.c b/pango/break.c
index 11a1e34a..8066fca0 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -146,13 +146,11 @@ typedef enum
  * @attrs: logical attributes to fill in
  * @attrs_len: size of the array passed as @attrs
  *
- * This is the default break algorithm, used if no language
- * engine overrides it. Normally you should use pango_break()
- * instead. Unlike pango_break(),
- * @analysis can be %NULL, but only do that if you know what
- * you're doing. If you need an analysis to pass to pango_break(),
- * you need to pango_itemize().  In most cases however you should
- * simply use pango_get_log_attrs().
+ * This is the default break algorithm. It applies Unicode
+ * rules without language-specific tailoring, therefore
+ * the @analyis argument is unused and can be %NULL.
+ *
+ * See pango_tailor_break() for language-specific breaks.
  **/
 void
 pango_default_break (const gchar   *text,
@@ -1604,8 +1602,10 @@ tailor_break (const gchar   *text,
  * @attrs_len: size of the array passed as @attrs
  *
  * Determines possible line, word, and character breaks
- * for a string of Unicode text with a single analysis.  For most
- * purposes you may want to use pango_get_log_attrs().
+ * for a string of Unicode text with a single analysis.
+ * For most purposes you may want to use pango_get_log_attrs().
+ *
+ * Deprecated: 1.44: Use pango_default_break() and pango_tailor_break()
  */
 void
 pango_break (const gchar   *text,
@@ -1721,6 +1721,43 @@ pango_find_paragraph_boundary (const gchar *text,
     *next_paragraph_start = start - text;
 }
 
+/**
+ * pango_tailor_break:
+ * @text: text to process. Must be valid UTF-8
+ * @length: length in bytes of @text
+ * @analysis:  #PangoAnalysis structure from pango_itemize() for @text
+ * @log_attrs: (array length=attrs_len): array with one #PangoLogAttr
+ *   per character in @text, plus one extra, to be filled in
+ * @attrs_len: length of @log_attrs array
+ *
+ * Apply language-specific tailoring to the breaks in
+ * @log_attrs, which are assumed to have been produced
+ * by pango_default_break().
+ */
+void
+pango_tailor_break (const char    *text,
+                    int            length,
+                    PangoAnalysis *analysis,
+                    PangoLogAttr  *log_attrs,
+                    int            log_attrs_len)
+{
+  PangoLogAttr *start = log_attrs;
+  PangoLogAttr attr_before = *start;
+
+  if (tailor_break (text, length, analysis, log_attrs, log_attrs_len))
+    {
+      /* if tailored, we enforce some of the attrs from before
+       * tailoring at the boundary
+       */
+
+     start->backspace_deletes_character  = attr_before.backspace_deletes_character;
+
+     start->is_line_break      |= attr_before.is_line_break;
+     start->is_mandatory_break |= attr_before.is_mandatory_break;
+     start->is_cursor_position |= attr_before.is_cursor_position;
+    }
+}
+
 static int
 tailor_segment (const char      *range_start,
 		const char      *range_end,
@@ -1734,22 +1771,11 @@ tailor_segment (const char      *range_start,
 
   chars_in_range = pango_utf8_strlen (range_start, range_end - range_start);
 
-  if (tailor_break (range_start,
-		    range_end - range_start,
-		    analysis,
-		    start,
-		    chars_in_range + 1))
-    {
-      /* if tailored, we enforce some of the attrs from before tailoring at
-       * the boundary
-       */
-
-     start->backspace_deletes_character  = attr_before.backspace_deletes_character;
-
-     start->is_line_break      |= attr_before.is_line_break;
-     start->is_mandatory_break |= attr_before.is_mandatory_break;
-     start->is_cursor_position |= attr_before.is_cursor_position;
-    }
+  pango_tailor_break (range_start,
+                      range_end - range_start,
+                      analysis,
+                      start,
+                      chars_in_range + 1);
 
   return chars_in_range;
 }
diff --git a/pango/pango-break.h b/pango/pango-break.h
index b035506e..4e1db0bc 100644
--- a/pango/pango-break.h
+++ b/pango/pango-break.h
@@ -100,10 +100,7 @@ struct _PangoLogAttr
   guint is_word_boundary            : 1;
 };
 
-/* Determine information about cluster/word/line breaks in a string
- * of Unicode text.
- */
-PANGO_AVAILABLE_IN_ALL
+PANGO_DEPRECATED_IN_1_44
 void pango_break (const gchar   *text,
 		  int            length,
 		  PangoAnalysis *analysis,
@@ -136,6 +133,13 @@ void pango_default_break (const gchar   *text,
 			  PangoLogAttr  *attrs,
 			  int            attrs_len);
 
+PANGO_AVAILABLE_IN_1_44
+void pango_tailor_break  (const gchar   *text,
+			  int            length,
+			  PangoAnalysis *analysis,
+			  PangoLogAttr  *attrs,
+			  int            attrs_len);
+
 G_END_DECLS
 
 #endif /* __PANGO_BREAK_H__ */
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index e0725adc..21897e29 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -3958,51 +3958,61 @@ process_line (PangoLayout    *layout,
   state->line_start_offset = state->start_offset;
 }
 
+static gboolean
+can_break_together (PangoAnalysis *analysis1,
+                    PangoAnalysis *analysis2)
+{
+  return analysis1->level == analysis2->level &&
+         analysis1->script == analysis2->script &&
+         analysis1->language == analysis2->language;
+}
+
 static void
 get_items_log_attrs (const char   *text,
+                     int           length,
 		     GList        *items,
 		     PangoLogAttr *log_attrs,
-		     int           para_delimiter_len)
+                     int           log_attrs_len)
 {
   int offset = 0;
   int index = 0;
+  int num_bytes = 0;
+  int num_chars = 0;
+  PangoAnalysis analysis = { NULL };
+  GList *l;
 
-  while (items)
-    {
-      PangoItem tmp_item = *(PangoItem *)items->data;
-
-      /* Accumulate all the consecutive items that match in language
-       * characteristics, ignoring font, style tags, etc.
-       */
-      while (items->next)
-	{
-	  PangoItem *next_item = items->next->data;
-
-	  /* FIXME: Handle language tags */
-	  tmp_item.length += next_item->length;
-	  tmp_item.num_chars += next_item->num_chars;
+  analysis.level = -1;
 
-	  items = items->next;
-	}
+  pango_default_break (text, length, &analysis, log_attrs, log_attrs_len);
 
-      /* Break the paragraph delimiters with the last item */
-      if (items->next == NULL)
-	{
-	  tmp_item.num_chars += pango_utf8_strlen (text + index + tmp_item.length, para_delimiter_len);
-	  tmp_item.length += para_delimiter_len;
-	}
-
-      /* XXX This is wrong.  we should call pango_default_break on the entire
-       * layout text and then tailor_break on each language change, like
-       * pango_get_log_attrs does.
-       */
-      pango_break (text + index, tmp_item.length, &tmp_item.analysis,
-		   log_attrs + offset, tmp_item.num_chars + 1);
+  for (l = items; l; l = l->next)
+    {
+      PangoItem *item = l->data;
 
-      offset += tmp_item.num_chars;
-      index += tmp_item.length;
+      if (l == items)
+        {
+          analysis = item->analysis;
+          index = item->offset;
+          offset = 0;
+        }
 
-      items = items->next;
+      if (can_break_together (&analysis, &item->analysis))
+        {
+          num_bytes += item->length;
+          num_chars += item->num_chars;
+        }
+      else
+        {
+          pango_tailor_break (text + index,
+                              num_bytes,
+                              &analysis,
+                              log_attrs + offset,
+                              num_chars + 1);
+
+          analysis = item->analysis;
+          index += num_bytes;
+          offset += num_chars;
+        }
     }
 }
 
@@ -4243,9 +4253,11 @@ pango_layout_check_lines (PangoLayout *layout)
       if (no_break_attrs)
         apply_no_break_attributes (state.items, no_break_attrs);
 
-      get_items_log_attrs (start, state.items,
+      get_items_log_attrs (start,
+                           delimiter_index + delim_len,
+                           state.items,
 			   layout->log_attrs + start_offset,
-			   delim_len);
+                           layout->n_chars + 1 - start_offset);
 
       state.base_dir = base_dir;
       state.line_of_par = 1;