delete lang engine

2000-11-30 Havoc Pennington <hp@pobox.com> * modules/thai/thai.c: delete lang engine * modules/tamil/tamil.c: delete lang engine (tamil_engine_x_new): fix type tag for shape engine * modules/indic/myanmar.c: delete lang engine (pango_engine_x_new): fix type tag for shape engine * modules/indic/gurmukhi.c: delete lang engine (pango_indic_engine_x_new): fix type tag for shape engine * modules/indic/gujarati.c: delete lang engine (pango_indic_engine_x_new): fix type tag for shape engine * modules/indic/devanagari.c: delete lang engine (pango_indic_engine_x_new): fix type tag for shape engine * modules/indic/pango-indic-script.h (SCRIPT_ENGINE_DEFINITION): delete lang engine * modules/indic/bengali.c: delete the lang engine (pango_indic_engine_x_new): fix type tag for shape engine * modules/hangul/hangul.c: delete the lang engine (hangul_engine_x_new): fix type tag for shape engine * modules/basic/basic.c: delete the lang engine (basic_engine_x_new): fix type tag for shape engine * modules/basic/basic-win32.c: delete the lang engine (basic_engine_win32_new): this was a shape engine, use correct type tag * modules/basic/basic-ft2.c: delete the lang engine * modules/arabic/arabic.c: Delete the lang engine (arabic_engine_x_new): this is a shape engine, not a lang engine, fix type tag * pango/pango-layout.c (pango_layout_index_to_line_x): handle the fact that paragraph delimiters aren't in the layout lines (pango_layout_index_to_pos): update to handle paragraph delimiters * pango/break.c (pango_find_paragraph_boundary): New function to find paragraph boundaries * pango/pango-layout.c (get_items_log_attrs): don't separate calls to pango_break() when directional level changes * pango/pango-layout.h (struct _PangoLayoutLine): put start index of the line into the struct * pango/pango-layout.c (pango_layout_get_cursor_pos): Fixups to reflect the fact that paragraph separators are removed from the input text. * pango/pango-layout.c (can_break_at): don't special-case start of line and whitespace-following-alphabetic here, because pango_break() already handles that properly * tests/testboundaries.c, tests/Makefile.am, tests/runtests.sh: Add directory for test programs, and a script to run them all * configure.in: Create Makefile in tests * pango/break.c (pango_break): Try for a real implementation of the Unicode text boundary algorithms (pango_get_log_attrs): Allow length to be -1 * pango/pango-context.c (pango_itemize): use pango_item_new(), assert that items added to the list are sane. * pango/pango-layout.c (pango_layout_check_lines): Reimplement to honor the paragraph boundaries from pango_break() * pango/pango-layout.c (process_item): use pango_item_split() here * pango/pango-item.c (pango_item_split): New function to split an item into two items
author: Havoc Pennington <hp@pobox.com> 2000-12-02 07:49:56 +0000
committer: Havoc Pennington <hp@src.gnome.org> 2000-12-02 07:49:56 +0000
commit: 31832c0f4bcdf3e7c69cd5b8a7ad570a7b60d525 (patch)
tree: d7ed3aa9ac35017fe03d954dd6baa2ccfaf3ed30
parent: e9e84a3f75fbab073ce5488c0e82b3e7fc39bcda (diff)
download: pango-31832c0f4bcdf3e7c69cd5b8a7ad570a7b60d525.tar.gz
43 files changed, 2808 insertions, 1176 deletions
diff --git a/ChangeLog b/ChangeLog
index 20ee0631..6ac4dddc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,86 @@
+2000-11-30  Havoc Pennington  <hp@pobox.com>
+
+	* modules/thai/thai.c: delete lang engine
+
+	* modules/tamil/tamil.c: delete lang engine
+	(tamil_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/myanmar.c: delete lang engine
+	(pango_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gurmukhi.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gujarati.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/devanagari.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/pango-indic-script.h (SCRIPT_ENGINE_DEFINITION):
+	delete lang engine
+
+	* modules/indic/bengali.c: delete the lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/hangul/hangul.c: delete the lang engine
+	(hangul_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic.c: delete the lang engine
+	(basic_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic-win32.c: delete the lang engine
+	(basic_engine_win32_new): this was a shape engine,
+	use correct type tag
+
+	* modules/basic/basic-ft2.c: delete the lang engine
+
+	* modules/arabic/arabic.c: Delete the lang engine
+	(arabic_engine_x_new): this is a shape
+	engine, not a lang engine, fix type tag
+
+	* pango/pango-layout.c (pango_layout_index_to_line_x): handle 
+	the fact that paragraph delimiters aren't in the layout lines
+	(pango_layout_index_to_pos): update to handle paragraph 
+	delimiters
+
+	* pango/break.c (pango_find_paragraph_boundary): New function
+	to find paragraph boundaries
+
+	* pango/pango-layout.c (get_items_log_attrs): don't separate calls
+	to pango_break() when directional level changes
+
+	* pango/pango-layout.h (struct _PangoLayoutLine): put start index
+ 	of the line into the struct
+ 
+      	* pango/pango-layout.c (pango_layout_get_cursor_pos): Fixups to
+ 	reflect the fact that paragraph separators are removed from the
+ 	input text.
+	
+	* pango/pango-layout.c (can_break_at): don't 
+	special-case start of line and whitespace-following-alphabetic
+	here, because pango_break() already handles that properly
+
+	* tests/testboundaries.c, tests/Makefile.am, tests/runtests.sh:
+	Add directory for test programs, and a script to run them all
+
+	* configure.in: Create Makefile in tests
+
+	* pango/break.c (pango_break): Try for a real implementation of
+	the Unicode text boundary algorithms
+	(pango_get_log_attrs): Allow length to be -1
+	
+	* pango/pango-context.c (pango_itemize): use pango_item_new(),
+	assert that items added to the list are sane.
+	
+	* pango/pango-layout.c (pango_layout_check_lines): Reimplement 
+	to honor the paragraph boundaries from pango_break()
+
+	* pango/pango-layout.c (process_item): use pango_item_split() here
+
+	* pango/pango-item.c (pango_item_split): New function to split an
+	item into two items
+
 Fri Dec  1 11:49:50 2000  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-layout.c (get_tab_pos): Make sure that
diff --git a/ChangeLog.pre-1-0 b/ChangeLog.pre-1-0
index 20ee0631..6ac4dddc 100644
--- a/ChangeLog.pre-1-0
+++ b/ChangeLog.pre-1-0
@@ -1,3 +1,86 @@
+2000-11-30  Havoc Pennington  <hp@pobox.com>
+
+	* modules/thai/thai.c: delete lang engine
+
+	* modules/tamil/tamil.c: delete lang engine
+	(tamil_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/myanmar.c: delete lang engine
+	(pango_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gurmukhi.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gujarati.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/devanagari.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/pango-indic-script.h (SCRIPT_ENGINE_DEFINITION):
+	delete lang engine
+
+	* modules/indic/bengali.c: delete the lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/hangul/hangul.c: delete the lang engine
+	(hangul_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic.c: delete the lang engine
+	(basic_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic-win32.c: delete the lang engine
+	(basic_engine_win32_new): this was a shape engine,
+	use correct type tag
+
+	* modules/basic/basic-ft2.c: delete the lang engine
+
+	* modules/arabic/arabic.c: Delete the lang engine
+	(arabic_engine_x_new): this is a shape
+	engine, not a lang engine, fix type tag
+
+	* pango/pango-layout.c (pango_layout_index_to_line_x): handle 
+	the fact that paragraph delimiters aren't in the layout lines
+	(pango_layout_index_to_pos): update to handle paragraph 
+	delimiters
+
+	* pango/break.c (pango_find_paragraph_boundary): New function
+	to find paragraph boundaries
+
+	* pango/pango-layout.c (get_items_log_attrs): don't separate calls
+	to pango_break() when directional level changes
+
+	* pango/pango-layout.h (struct _PangoLayoutLine): put start index
+ 	of the line into the struct
+ 
+      	* pango/pango-layout.c (pango_layout_get_cursor_pos): Fixups to
+ 	reflect the fact that paragraph separators are removed from the
+ 	input text.
+	
+	* pango/pango-layout.c (can_break_at): don't 
+	special-case start of line and whitespace-following-alphabetic
+	here, because pango_break() already handles that properly
+
+	* tests/testboundaries.c, tests/Makefile.am, tests/runtests.sh:
+	Add directory for test programs, and a script to run them all
+
+	* configure.in: Create Makefile in tests
+
+	* pango/break.c (pango_break): Try for a real implementation of
+	the Unicode text boundary algorithms
+	(pango_get_log_attrs): Allow length to be -1
+	
+	* pango/pango-context.c (pango_itemize): use pango_item_new(),
+	assert that items added to the list are sane.
+	
+	* pango/pango-layout.c (pango_layout_check_lines): Reimplement 
+	to honor the paragraph boundaries from pango_break()
+
+	* pango/pango-layout.c (process_item): use pango_item_split() here
+
+	* pango/pango-item.c (pango_item_split): New function to split an
+	item into two items
+
 Fri Dec  1 11:49:50 2000  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-layout.c (get_tab_pos): Make sure that
diff --git a/ChangeLog.pre-1-10 b/ChangeLog.pre-1-10
index 20ee0631..6ac4dddc 100644
--- a/ChangeLog.pre-1-10
+++ b/ChangeLog.pre-1-10
@@ -1,3 +1,86 @@
+2000-11-30  Havoc Pennington  <hp@pobox.com>
+
+	* modules/thai/thai.c: delete lang engine
+
+	* modules/tamil/tamil.c: delete lang engine
+	(tamil_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/myanmar.c: delete lang engine
+	(pango_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gurmukhi.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gujarati.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/devanagari.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/pango-indic-script.h (SCRIPT_ENGINE_DEFINITION):
+	delete lang engine
+
+	* modules/indic/bengali.c: delete the lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/hangul/hangul.c: delete the lang engine
+	(hangul_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic.c: delete the lang engine
+	(basic_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic-win32.c: delete the lang engine
+	(basic_engine_win32_new): this was a shape engine,
+	use correct type tag
+
+	* modules/basic/basic-ft2.c: delete the lang engine
+
+	* modules/arabic/arabic.c: Delete the lang engine
+	(arabic_engine_x_new): this is a shape
+	engine, not a lang engine, fix type tag
+
+	* pango/pango-layout.c (pango_layout_index_to_line_x): handle 
+	the fact that paragraph delimiters aren't in the layout lines
+	(pango_layout_index_to_pos): update to handle paragraph 
+	delimiters
+
+	* pango/break.c (pango_find_paragraph_boundary): New function
+	to find paragraph boundaries
+
+	* pango/pango-layout.c (get_items_log_attrs): don't separate calls
+	to pango_break() when directional level changes
+
+	* pango/pango-layout.h (struct _PangoLayoutLine): put start index
+ 	of the line into the struct
+ 
+      	* pango/pango-layout.c (pango_layout_get_cursor_pos): Fixups to
+ 	reflect the fact that paragraph separators are removed from the
+ 	input text.
+	
+	* pango/pango-layout.c (can_break_at): don't 
+	special-case start of line and whitespace-following-alphabetic
+	here, because pango_break() already handles that properly
+
+	* tests/testboundaries.c, tests/Makefile.am, tests/runtests.sh:
+	Add directory for test programs, and a script to run them all
+
+	* configure.in: Create Makefile in tests
+
+	* pango/break.c (pango_break): Try for a real implementation of
+	the Unicode text boundary algorithms
+	(pango_get_log_attrs): Allow length to be -1
+	
+	* pango/pango-context.c (pango_itemize): use pango_item_new(),
+	assert that items added to the list are sane.
+	
+	* pango/pango-layout.c (pango_layout_check_lines): Reimplement 
+	to honor the paragraph boundaries from pango_break()
+
+	* pango/pango-layout.c (process_item): use pango_item_split() here
+
+	* pango/pango-item.c (pango_item_split): New function to split an
+	item into two items
+
 Fri Dec  1 11:49:50 2000  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-layout.c (get_tab_pos): Make sure that
diff --git a/ChangeLog.pre-1-2 b/ChangeLog.pre-1-2
index 20ee0631..6ac4dddc 100644
--- a/ChangeLog.pre-1-2
+++ b/ChangeLog.pre-1-2
@@ -1,3 +1,86 @@
+2000-11-30  Havoc Pennington  <hp@pobox.com>
+
+	* modules/thai/thai.c: delete lang engine
+
+	* modules/tamil/tamil.c: delete lang engine
+	(tamil_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/myanmar.c: delete lang engine
+	(pango_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gurmukhi.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gujarati.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/devanagari.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/pango-indic-script.h (SCRIPT_ENGINE_DEFINITION):
+	delete lang engine
+
+	* modules/indic/bengali.c: delete the lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/hangul/hangul.c: delete the lang engine
+	(hangul_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic.c: delete the lang engine
+	(basic_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic-win32.c: delete the lang engine
+	(basic_engine_win32_new): this was a shape engine,
+	use correct type tag
+
+	* modules/basic/basic-ft2.c: delete the lang engine
+
+	* modules/arabic/arabic.c: Delete the lang engine
+	(arabic_engine_x_new): this is a shape
+	engine, not a lang engine, fix type tag
+
+	* pango/pango-layout.c (pango_layout_index_to_line_x): handle 
+	the fact that paragraph delimiters aren't in the layout lines
+	(pango_layout_index_to_pos): update to handle paragraph 
+	delimiters
+
+	* pango/break.c (pango_find_paragraph_boundary): New function
+	to find paragraph boundaries
+
+	* pango/pango-layout.c (get_items_log_attrs): don't separate calls
+	to pango_break() when directional level changes
+
+	* pango/pango-layout.h (struct _PangoLayoutLine): put start index
+ 	of the line into the struct
+ 
+      	* pango/pango-layout.c (pango_layout_get_cursor_pos): Fixups to
+ 	reflect the fact that paragraph separators are removed from the
+ 	input text.
+	
+	* pango/pango-layout.c (can_break_at): don't 
+	special-case start of line and whitespace-following-alphabetic
+	here, because pango_break() already handles that properly
+
+	* tests/testboundaries.c, tests/Makefile.am, tests/runtests.sh:
+	Add directory for test programs, and a script to run them all
+
+	* configure.in: Create Makefile in tests
+
+	* pango/break.c (pango_break): Try for a real implementation of
+	the Unicode text boundary algorithms
+	(pango_get_log_attrs): Allow length to be -1
+	
+	* pango/pango-context.c (pango_itemize): use pango_item_new(),
+	assert that items added to the list are sane.
+	
+	* pango/pango-layout.c (pango_layout_check_lines): Reimplement 
+	to honor the paragraph boundaries from pango_break()
+
+	* pango/pango-layout.c (process_item): use pango_item_split() here
+
+	* pango/pango-item.c (pango_item_split): New function to split an
+	item into two items
+
 Fri Dec  1 11:49:50 2000  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-layout.c (get_tab_pos): Make sure that
diff --git a/ChangeLog.pre-1-4 b/ChangeLog.pre-1-4
index 20ee0631..6ac4dddc 100644
--- a/ChangeLog.pre-1-4
+++ b/ChangeLog.pre-1-4
@@ -1,3 +1,86 @@
+2000-11-30  Havoc Pennington  <hp@pobox.com>
+
+	* modules/thai/thai.c: delete lang engine
+
+	* modules/tamil/tamil.c: delete lang engine
+	(tamil_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/myanmar.c: delete lang engine
+	(pango_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gurmukhi.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gujarati.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/devanagari.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/pango-indic-script.h (SCRIPT_ENGINE_DEFINITION):
+	delete lang engine
+
+	* modules/indic/bengali.c: delete the lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/hangul/hangul.c: delete the lang engine
+	(hangul_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic.c: delete the lang engine
+	(basic_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic-win32.c: delete the lang engine
+	(basic_engine_win32_new): this was a shape engine,
+	use correct type tag
+
+	* modules/basic/basic-ft2.c: delete the lang engine
+
+	* modules/arabic/arabic.c: Delete the lang engine
+	(arabic_engine_x_new): this is a shape
+	engine, not a lang engine, fix type tag
+
+	* pango/pango-layout.c (pango_layout_index_to_line_x): handle 
+	the fact that paragraph delimiters aren't in the layout lines
+	(pango_layout_index_to_pos): update to handle paragraph 
+	delimiters
+
+	* pango/break.c (pango_find_paragraph_boundary): New function
+	to find paragraph boundaries
+
+	* pango/pango-layout.c (get_items_log_attrs): don't separate calls
+	to pango_break() when directional level changes
+
+	* pango/pango-layout.h (struct _PangoLayoutLine): put start index
+ 	of the line into the struct
+ 
+      	* pango/pango-layout.c (pango_layout_get_cursor_pos): Fixups to
+ 	reflect the fact that paragraph separators are removed from the
+ 	input text.
+	
+	* pango/pango-layout.c (can_break_at): don't 
+	special-case start of line and whitespace-following-alphabetic
+	here, because pango_break() already handles that properly
+
+	* tests/testboundaries.c, tests/Makefile.am, tests/runtests.sh:
+	Add directory for test programs, and a script to run them all
+
+	* configure.in: Create Makefile in tests
+
+	* pango/break.c (pango_break): Try for a real implementation of
+	the Unicode text boundary algorithms
+	(pango_get_log_attrs): Allow length to be -1
+	
+	* pango/pango-context.c (pango_itemize): use pango_item_new(),
+	assert that items added to the list are sane.
+	
+	* pango/pango-layout.c (pango_layout_check_lines): Reimplement 
+	to honor the paragraph boundaries from pango_break()
+
+	* pango/pango-layout.c (process_item): use pango_item_split() here
+
+	* pango/pango-item.c (pango_item_split): New function to split an
+	item into two items
+
 Fri Dec  1 11:49:50 2000  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-layout.c (get_tab_pos): Make sure that
diff --git a/ChangeLog.pre-1-6 b/ChangeLog.pre-1-6
index 20ee0631..6ac4dddc 100644
--- a/ChangeLog.pre-1-6
+++ b/ChangeLog.pre-1-6
@@ -1,3 +1,86 @@
+2000-11-30  Havoc Pennington  <hp@pobox.com>
+
+	* modules/thai/thai.c: delete lang engine
+
+	* modules/tamil/tamil.c: delete lang engine
+	(tamil_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/myanmar.c: delete lang engine
+	(pango_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gurmukhi.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gujarati.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/devanagari.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/pango-indic-script.h (SCRIPT_ENGINE_DEFINITION):
+	delete lang engine
+
+	* modules/indic/bengali.c: delete the lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/hangul/hangul.c: delete the lang engine
+	(hangul_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic.c: delete the lang engine
+	(basic_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic-win32.c: delete the lang engine
+	(basic_engine_win32_new): this was a shape engine,
+	use correct type tag
+
+	* modules/basic/basic-ft2.c: delete the lang engine
+
+	* modules/arabic/arabic.c: Delete the lang engine
+	(arabic_engine_x_new): this is a shape
+	engine, not a lang engine, fix type tag
+
+	* pango/pango-layout.c (pango_layout_index_to_line_x): handle 
+	the fact that paragraph delimiters aren't in the layout lines
+	(pango_layout_index_to_pos): update to handle paragraph 
+	delimiters
+
+	* pango/break.c (pango_find_paragraph_boundary): New function
+	to find paragraph boundaries
+
+	* pango/pango-layout.c (get_items_log_attrs): don't separate calls
+	to pango_break() when directional level changes
+
+	* pango/pango-layout.h (struct _PangoLayoutLine): put start index
+ 	of the line into the struct
+ 
+      	* pango/pango-layout.c (pango_layout_get_cursor_pos): Fixups to
+ 	reflect the fact that paragraph separators are removed from the
+ 	input text.
+	
+	* pango/pango-layout.c (can_break_at): don't 
+	special-case start of line and whitespace-following-alphabetic
+	here, because pango_break() already handles that properly
+
+	* tests/testboundaries.c, tests/Makefile.am, tests/runtests.sh:
+	Add directory for test programs, and a script to run them all
+
+	* configure.in: Create Makefile in tests
+
+	* pango/break.c (pango_break): Try for a real implementation of
+	the Unicode text boundary algorithms
+	(pango_get_log_attrs): Allow length to be -1
+	
+	* pango/pango-context.c (pango_itemize): use pango_item_new(),
+	assert that items added to the list are sane.
+	
+	* pango/pango-layout.c (pango_layout_check_lines): Reimplement 
+	to honor the paragraph boundaries from pango_break()
+
+	* pango/pango-layout.c (process_item): use pango_item_split() here
+
+	* pango/pango-item.c (pango_item_split): New function to split an
+	item into two items
+
 Fri Dec  1 11:49:50 2000  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-layout.c (get_tab_pos): Make sure that
diff --git a/ChangeLog.pre-1-8 b/ChangeLog.pre-1-8
index 20ee0631..6ac4dddc 100644
--- a/ChangeLog.pre-1-8
+++ b/ChangeLog.pre-1-8
@@ -1,3 +1,86 @@
+2000-11-30  Havoc Pennington  <hp@pobox.com>
+
+	* modules/thai/thai.c: delete lang engine
+
+	* modules/tamil/tamil.c: delete lang engine
+	(tamil_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/myanmar.c: delete lang engine
+	(pango_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gurmukhi.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/gujarati.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/devanagari.c: delete lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/indic/pango-indic-script.h (SCRIPT_ENGINE_DEFINITION):
+	delete lang engine
+
+	* modules/indic/bengali.c: delete the lang engine
+	(pango_indic_engine_x_new): fix type tag for shape engine
+
+	* modules/hangul/hangul.c: delete the lang engine
+	(hangul_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic.c: delete the lang engine
+	(basic_engine_x_new): fix type tag for shape engine
+
+	* modules/basic/basic-win32.c: delete the lang engine
+	(basic_engine_win32_new): this was a shape engine,
+	use correct type tag
+
+	* modules/basic/basic-ft2.c: delete the lang engine
+
+	* modules/arabic/arabic.c: Delete the lang engine
+	(arabic_engine_x_new): this is a shape
+	engine, not a lang engine, fix type tag
+
+	* pango/pango-layout.c (pango_layout_index_to_line_x): handle 
+	the fact that paragraph delimiters aren't in the layout lines
+	(pango_layout_index_to_pos): update to handle paragraph 
+	delimiters
+
+	* pango/break.c (pango_find_paragraph_boundary): New function
+	to find paragraph boundaries
+
+	* pango/pango-layout.c (get_items_log_attrs): don't separate calls
+	to pango_break() when directional level changes
+
+	* pango/pango-layout.h (struct _PangoLayoutLine): put start index
+ 	of the line into the struct
+ 
+      	* pango/pango-layout.c (pango_layout_get_cursor_pos): Fixups to
+ 	reflect the fact that paragraph separators are removed from the
+ 	input text.
+	
+	* pango/pango-layout.c (can_break_at): don't 
+	special-case start of line and whitespace-following-alphabetic
+	here, because pango_break() already handles that properly
+
+	* tests/testboundaries.c, tests/Makefile.am, tests/runtests.sh:
+	Add directory for test programs, and a script to run them all
+
+	* configure.in: Create Makefile in tests
+
+	* pango/break.c (pango_break): Try for a real implementation of
+	the Unicode text boundary algorithms
+	(pango_get_log_attrs): Allow length to be -1
+	
+	* pango/pango-context.c (pango_itemize): use pango_item_new(),
+	assert that items added to the list are sane.
+	
+	* pango/pango-layout.c (pango_layout_check_lines): Reimplement 
+	to honor the paragraph boundaries from pango_break()
+
+	* pango/pango-layout.c (process_item): use pango_item_split() here
+
+	* pango/pango-item.c (pango_item_split): New function to split an
+	item into two items
+
 Fri Dec  1 11:49:50 2000  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-layout.c (get_tab_pos): Make sure that
diff --git a/configure.in b/configure.in
index 00c599e5..8e75121e 100644
--- a/configure.in
+++ b/configure.in
@@ -259,6 +259,7 @@ examples/makefile.mingw
 docs/Makefile
 tools/Makefile
 fonts/Makefile
+tests/Makefile
 pango.spec
 pango-config
 pango.pc
diff --git a/modules/arabic/arabic-x.c b/modules/arabic/arabic-x.c
index 49435b5c..8115e606 100644
--- a/modules/arabic/arabic-x.c
+++ b/modules/arabic/arabic-x.c
@@ -29,12 +29,6 @@ static PangoEngineRange arabic_range[] = {
 
 static PangoEngineInfo script_engines[] = {
     {
-        "ArabicScriptEngineLang",
-        PANGO_ENGINE_TYPE_LANG,
-        PANGO_RENDER_TYPE_NONE,
-        arabic_range, G_N_ELEMENTS(arabic_range)
-    },
-    {
         "ArabicScriptEngineX",
         PANGO_ENGINE_TYPE_SHAPE,
         PANGO_RENDER_TYPE_X,
@@ -44,58 +38,6 @@ static PangoEngineInfo script_engines[] = {
 
 static gint n_script_engines = G_N_ELEMENTS (script_engines);
 
-
-
-
-/*
- * Language script engine
- */
-
-static void 
-arabic_engine_break (const char    *text,
-                     int            len,
-                     PangoAnalysis *analysis,
-                     PangoLogAttr  *attrs)
-{
-    /* Most of the code comes from tamil_engine_break
-     */
-
-    const char *cur = text;
-    gint        i  = 0;
-    gunichar    wc;
-
-    while (*cur && cur - text < len)
-        {
-            wc = g_utf8_get_char (cur);
-            if (wc == (gunichar)-1)
-                break;           /* FIXME: ERROR */
-
-            attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-            attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
-            attrs[i].is_char_stop = 1;
-            attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
-            /* actually, is_word_stop in not correct, but simple and good enough. */
-            
-            i++;
-            cur = g_utf8_next_char (cur);
-        }
-}
-
-static PangoEngine *
-arabic_engine_lang_new ()
-{
-    PangoEngineLang *result;
-  
-    result = g_new (PangoEngineLang, 1);
-
-    result->engine.id = "ArabicScriptEngine";
-    result->engine.type = PANGO_ENGINE_TYPE_LANG;
-    result->engine.length = sizeof (result);
-    result->script_break = arabic_engine_break;
-
-    return (PangoEngine *)result;
-}
-
 /*
  * X window system script engine portion
  */
@@ -377,7 +319,7 @@ arabic_engine_x_new ()
     result = g_new (PangoEngineShape, 1);
 
     result->engine.id = "ArabicScriptEngine";
-    result->engine.type = PANGO_ENGINE_TYPE_LANG;
+    result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
     result->engine.length = sizeof (result);
     result->script_shape = arabic_engine_shape;
     result->get_coverage = arabic_engine_get_coverage;
@@ -408,12 +350,10 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-    if (!strcmp (id, "ArabicScriptEngineLang"))
-        return arabic_engine_lang_new ();
-    else if (!strcmp (id, "ArabicScriptEngineX"))
-        return arabic_engine_x_new ();
-    else
-        return NULL;
+  if (!strcmp (id, "ArabicScriptEngineX"))
+    return arabic_engine_x_new ();
+  else
+    return NULL;
 }
 
 void 
diff --git a/modules/arabic/arabic.c b/modules/arabic/arabic.c
index 49435b5c..8115e606 100644
--- a/modules/arabic/arabic.c
+++ b/modules/arabic/arabic.c
@@ -29,12 +29,6 @@ static PangoEngineRange arabic_range[] = {
 
 static PangoEngineInfo script_engines[] = {
     {
-        "ArabicScriptEngineLang",
-        PANGO_ENGINE_TYPE_LANG,
-        PANGO_RENDER_TYPE_NONE,
-        arabic_range, G_N_ELEMENTS(arabic_range)
-    },
-    {
         "ArabicScriptEngineX",
         PANGO_ENGINE_TYPE_SHAPE,
         PANGO_RENDER_TYPE_X,
@@ -44,58 +38,6 @@ static PangoEngineInfo script_engines[] = {
 
 static gint n_script_engines = G_N_ELEMENTS (script_engines);
 
-
-
-
-/*
- * Language script engine
- */
-
-static void 
-arabic_engine_break (const char    *text,
-                     int            len,
-                     PangoAnalysis *analysis,
-                     PangoLogAttr  *attrs)
-{
-    /* Most of the code comes from tamil_engine_break
-     */
-
-    const char *cur = text;
-    gint        i  = 0;
-    gunichar    wc;
-
-    while (*cur && cur - text < len)
-        {
-            wc = g_utf8_get_char (cur);
-            if (wc == (gunichar)-1)
-                break;           /* FIXME: ERROR */
-
-            attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-            attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
-            attrs[i].is_char_stop = 1;
-            attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
-            /* actually, is_word_stop in not correct, but simple and good enough. */
-            
-            i++;
-            cur = g_utf8_next_char (cur);
-        }
-}
-
-static PangoEngine *
-arabic_engine_lang_new ()
-{
-    PangoEngineLang *result;
-  
-    result = g_new (PangoEngineLang, 1);
-
-    result->engine.id = "ArabicScriptEngine";
-    result->engine.type = PANGO_ENGINE_TYPE_LANG;
-    result->engine.length = sizeof (result);
-    result->script_break = arabic_engine_break;
-
-    return (PangoEngine *)result;
-}
-
 /*
  * X window system script engine portion
  */
@@ -377,7 +319,7 @@ arabic_engine_x_new ()
     result = g_new (PangoEngineShape, 1);
 
     result->engine.id = "ArabicScriptEngine";
-    result->engine.type = PANGO_ENGINE_TYPE_LANG;
+    result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
     result->engine.length = sizeof (result);
     result->script_shape = arabic_engine_shape;
     result->get_coverage = arabic_engine_get_coverage;
@@ -408,12 +350,10 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-    if (!strcmp (id, "ArabicScriptEngineLang"))
-        return arabic_engine_lang_new ();
-    else if (!strcmp (id, "ArabicScriptEngineX"))
-        return arabic_engine_x_new ();
-    else
-        return NULL;
+  if (!strcmp (id, "ArabicScriptEngineX"))
+    return arabic_engine_x_new ();
+  else
+    return NULL;
 }
 
 void 
diff --git a/modules/basic/basic-ft2.c b/modules/basic/basic-ft2.c
index 973e8a43..b252eed0 100644
--- a/modules/basic/basic-ft2.c
+++ b/modules/basic/basic-ft2.c
@@ -98,12 +98,6 @@ static PangoEngineRange basic_ranges[] = {
 
 static PangoEngineInfo script_engines[] = {
   {
-    "BasicScriptEngineLangFT2",
-    PANGO_ENGINE_TYPE_LANG,
-    PANGO_RENDER_TYPE_NONE,
-    basic_ranges, G_N_ELEMENTS(basic_ranges)
-  },
-  {
     "BasicScriptEngineFT2",
     PANGO_ENGINE_TYPE_SHAPE,
     PANGO_RENDER_TYPE_FT2,
@@ -114,33 +108,6 @@ static PangoEngineInfo script_engines[] = {
 static gint n_script_engines = G_N_ELEMENTS (script_engines);
 
 /*
- * Language script engine
- */
-
-static void 
-basic_engine_break (const char     *text,
-		    gint            len,
-		    PangoAnalysis  *analysis,
-		    PangoLogAttr   *attrs)
-{
-}
-
-static PangoEngine *
-basic_engine_lang_new (void)
-{
-  PangoEngineLang *result;
-  
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = "BasicScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = basic_engine_break;
-
-  return (PangoEngine *)result;
-}
-
-/*
  * FT2 system script engine portion
  */
 
diff --git a/modules/basic/basic-win32.c b/modules/basic/basic-win32.c
index 81b0b746..b4599e61 100644
--- a/modules/basic/basic-win32.c
+++ b/modules/basic/basic-win32.c
@@ -41,12 +41,6 @@ static PangoEngineRange basic_ranges[] = {
 
 static PangoEngineInfo script_engines[] = {
   {
-    "BasicScriptEngineLangWin32",
-    PANGO_ENGINE_TYPE_LANG,
-    PANGO_RENDER_TYPE_NONE,
-    basic_ranges, G_N_ELEMENTS(basic_ranges)
-  },
-  {
     "BasicScriptEngineWin32",
     PANGO_ENGINE_TYPE_SHAPE,
     PANGO_RENDER_TYPE_WIN32,
@@ -57,33 +51,6 @@ static PangoEngineInfo script_engines[] = {
 static gint n_script_engines = G_N_ELEMENTS (script_engines);
 
 /*
- * Language script engine
- */
-
-static void 
-basic_engine_break (const char     *text,
-		    gint            len,
-		    PangoAnalysis  *analysis,
-		    PangoLogAttr   *attrs)
-{
-}
-
-static PangoEngine *
-basic_engine_lang_new (void)
-{
-  PangoEngineLang *result;
-  
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = "BasicScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = basic_engine_break;
-
-  return (PangoEngine *)result;
-}
-
-/*
  * Win32 system script engine portion
  */
 
@@ -307,7 +274,7 @@ basic_engine_win32_new (void)
   result = g_new (PangoEngineShape, 1);
 
   result->engine.id = "BasicScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = basic_engine_shape;
   result->get_coverage = basic_engine_get_coverage;
diff --git a/modules/basic/basic-x.c b/modules/basic/basic-x.c
index b49c8480..ab54aa9b 100644
--- a/modules/basic/basic-x.c
+++ b/modules/basic/basic-x.c
@@ -79,12 +79,6 @@ static PangoGlyph conv_ucs4 (CharCache  *cache,
 
 static PangoEngineInfo script_engines[] = {
   {
-    "BasicScriptEngineLang",
-    PANGO_ENGINE_TYPE_LANG,
-    PANGO_RENDER_TYPE_NONE,
-    basic_ranges, G_N_ELEMENTS(basic_ranges)
-  },
-  {
     "BasicScriptEngineX",
     PANGO_ENGINE_TYPE_SHAPE,
     PANGO_RENDER_TYPE_X,
@@ -95,33 +89,6 @@ static PangoEngineInfo script_engines[] = {
 static gint n_script_engines = G_N_ELEMENTS (script_engines);
 
 /*
- * Language script engine
- */
-
-static void 
-basic_engine_break (const char     *text,
-		    gint            len,
-		    PangoAnalysis  *analysis,
-		    PangoLogAttr   *attrs)
-{
-}
-
-static PangoEngine *
-basic_engine_lang_new ()
-{
-  PangoEngineLang *result;
-  
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = "BasicScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = basic_engine_break;
-
-  return (PangoEngine *)result;
-}
-
-/*
  * X window system script engine portion
  */
 
@@ -481,7 +448,7 @@ basic_engine_x_new ()
   result = g_new (PangoEngineShape, 1);
 
   result->engine.id = "BasicScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = basic_engine_shape;
   result->get_coverage = basic_engine_get_coverage;
@@ -508,9 +475,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, gint *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, "BasicScriptEngineLang"))
-    return basic_engine_lang_new ();
-  else if (!strcmp (id, "BasicScriptEngineX"))
+  if (!strcmp (id, "BasicScriptEngineX"))
     return basic_engine_x_new ();
   else
     return NULL;
diff --git a/modules/basic/basic.c b/modules/basic/basic.c
index b49c8480..ab54aa9b 100644
--- a/modules/basic/basic.c
+++ b/modules/basic/basic.c
@@ -79,12 +79,6 @@ static PangoGlyph conv_ucs4 (CharCache  *cache,
 
 static PangoEngineInfo script_engines[] = {
   {
-    "BasicScriptEngineLang",
-    PANGO_ENGINE_TYPE_LANG,
-    PANGO_RENDER_TYPE_NONE,
-    basic_ranges, G_N_ELEMENTS(basic_ranges)
-  },
-  {
     "BasicScriptEngineX",
     PANGO_ENGINE_TYPE_SHAPE,
     PANGO_RENDER_TYPE_X,
@@ -95,33 +89,6 @@ static PangoEngineInfo script_engines[] = {
 static gint n_script_engines = G_N_ELEMENTS (script_engines);
 
 /*
- * Language script engine
- */
-
-static void 
-basic_engine_break (const char     *text,
-		    gint            len,
-		    PangoAnalysis  *analysis,
-		    PangoLogAttr   *attrs)
-{
-}
-
-static PangoEngine *
-basic_engine_lang_new ()
-{
-  PangoEngineLang *result;
-  
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = "BasicScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = basic_engine_break;
-
-  return (PangoEngine *)result;
-}
-
-/*
  * X window system script engine portion
  */
 
@@ -481,7 +448,7 @@ basic_engine_x_new ()
   result = g_new (PangoEngineShape, 1);
 
   result->engine.id = "BasicScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = basic_engine_shape;
   result->get_coverage = basic_engine_get_coverage;
@@ -508,9 +475,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, gint *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, "BasicScriptEngineLang"))
-    return basic_engine_lang_new ();
-  else if (!strcmp (id, "BasicScriptEngineX"))
+  if (!strcmp (id, "BasicScriptEngineX"))
     return basic_engine_x_new ();
   else
     return NULL;
diff --git a/modules/hangul/hangul-x.c b/modules/hangul/hangul-x.c
index eb655ab2..5309b05d 100644
--- a/modules/hangul/hangul-x.c
+++ b/modules/hangul/hangul-x.c
@@ -38,12 +38,6 @@ static PangoEngineRange hangul_ranges[] = {
 
 static PangoEngineInfo script_engines[] = {
   {
-    "HangulScriptEngineLang",
-    PANGO_ENGINE_TYPE_LANG,
-    PANGO_RENDER_TYPE_NONE,
-    hangul_ranges, G_N_ELEMENTS(hangul_ranges)
-  },
-  {
     "HangulScriptEngineX",
     PANGO_ENGINE_TYPE_SHAPE,
     PANGO_RENDER_TYPE_X,
@@ -54,34 +48,6 @@ static PangoEngineInfo script_engines[] = {
 static int n_script_engines = G_N_ELEMENTS (script_engines);
 
 /*
- * Language script engine
- */
-
-static void 
-hangul_engine_break (const char    *text,
-		     int            len,
-		     PangoAnalysis *analysis,
-		     PangoLogAttr  *attrs)
-{
-  /* (FIXME) */
-}
-
-static PangoEngine *
-hangul_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = "HangulScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = hangul_engine_break;
-
-  return (PangoEngine *) result;
-}
-
-/*
  * X window system script engine portion
  */
 
@@ -751,7 +717,7 @@ hangul_engine_x_new ()
   result = g_new (PangoEngineShape, 1);
 
   result->engine.id = "HangulScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = hangul_engine_shape;
   result->get_coverage = hangul_engine_get_coverage;
@@ -781,9 +747,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, "HangulScriptEngineLang"))
-    return hangul_engine_lang_new ();
-  else if (!strcmp (id, "HangulScriptEngineX"))
+  if (!strcmp (id, "HangulScriptEngineX"))
     return hangul_engine_x_new ();
   else
     return NULL;
diff --git a/modules/hangul/hangul.c b/modules/hangul/hangul.c
index eb655ab2..5309b05d 100644
--- a/modules/hangul/hangul.c
+++ b/modules/hangul/hangul.c
@@ -38,12 +38,6 @@ static PangoEngineRange hangul_ranges[] = {
 
 static PangoEngineInfo script_engines[] = {
   {
-    "HangulScriptEngineLang",
-    PANGO_ENGINE_TYPE_LANG,
-    PANGO_RENDER_TYPE_NONE,
-    hangul_ranges, G_N_ELEMENTS(hangul_ranges)
-  },
-  {
     "HangulScriptEngineX",
     PANGO_ENGINE_TYPE_SHAPE,
     PANGO_RENDER_TYPE_X,
@@ -54,34 +48,6 @@ static PangoEngineInfo script_engines[] = {
 static int n_script_engines = G_N_ELEMENTS (script_engines);
 
 /*
- * Language script engine
- */
-
-static void 
-hangul_engine_break (const char    *text,
-		     int            len,
-		     PangoAnalysis *analysis,
-		     PangoLogAttr  *attrs)
-{
-  /* (FIXME) */
-}
-
-static PangoEngine *
-hangul_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = "HangulScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = hangul_engine_break;
-
-  return (PangoEngine *) result;
-}
-
-/*
  * X window system script engine portion
  */
 
@@ -751,7 +717,7 @@ hangul_engine_x_new ()
   result = g_new (PangoEngineShape, 1);
 
   result->engine.id = "HangulScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = hangul_engine_shape;
   result->get_coverage = hangul_engine_get_coverage;
@@ -781,9 +747,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, "HangulScriptEngineLang"))
-    return hangul_engine_lang_new ();
-  else if (!strcmp (id, "HangulScriptEngineX"))
+  if (!strcmp (id, "HangulScriptEngineX"))
     return hangul_engine_x_new ();
   else
     return NULL;
diff --git a/modules/indic/bengali-x.c b/modules/indic/bengali-x.c
index 05e430f3..1336d4a1 100644
--- a/modules/indic/bengali-x.c
+++ b/modules/indic/bengali-x.c
@@ -278,56 +278,13 @@ pango_indic_engine_x_new ()
   PangoEngineShape *result;
   result = g_new (PangoEngineShape, 1);
   result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = pango_indic_engine_shape;
   result->get_coverage = pango_indic_engine_get_coverage;
   return (PangoEngine *) result;
 }
 
-static void
-pango_indic_engine_break (const char *text,
-			 int len,
-			 PangoAnalysis * analysis, PangoLogAttr * attrs)
-{
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;			/* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i - 1].is_white) ||
-	attrs[i].is_white;
-      attrs[i].is_char_stop = 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i - 1].is_white;
-      /* actually, is_word_stop in not correct, but simple and good enough. */
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-
-static PangoEngine *
-pango_indic_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = pango_indic_engine_break;
-
-  return (PangoEngine *) result;
-}
-
 #ifdef MODULE_PREFIX
 #define MODULE_ENTRY(func) _pango_pango_##func
 #else
@@ -344,9 +301,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo ** engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, SCRIPT_STRING "ScriptEngineLang"))
-    return pango_indic_engine_lang_new ();
-  else if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
+  if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
     return pango_indic_engine_x_new ();
   else
     return NULL;
diff --git a/modules/indic/bengali.c b/modules/indic/bengali.c
index 05e430f3..1336d4a1 100644
--- a/modules/indic/bengali.c
+++ b/modules/indic/bengali.c
@@ -278,56 +278,13 @@ pango_indic_engine_x_new ()
   PangoEngineShape *result;
   result = g_new (PangoEngineShape, 1);
   result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = pango_indic_engine_shape;
   result->get_coverage = pango_indic_engine_get_coverage;
   return (PangoEngine *) result;
 }
 
-static void
-pango_indic_engine_break (const char *text,
-			 int len,
-			 PangoAnalysis * analysis, PangoLogAttr * attrs)
-{
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;			/* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i - 1].is_white) ||
-	attrs[i].is_white;
-      attrs[i].is_char_stop = 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i - 1].is_white;
-      /* actually, is_word_stop in not correct, but simple and good enough. */
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-
-static PangoEngine *
-pango_indic_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = pango_indic_engine_break;
-
-  return (PangoEngine *) result;
-}
-
 #ifdef MODULE_PREFIX
 #define MODULE_ENTRY(func) _pango_pango_##func
 #else
@@ -344,9 +301,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo ** engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, SCRIPT_STRING "ScriptEngineLang"))
-    return pango_indic_engine_lang_new ();
-  else if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
+  if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
     return pango_indic_engine_x_new ();
   else
     return NULL;
diff --git a/modules/indic/devanagari-x.c b/modules/indic/devanagari-x.c
index 028c432e..6ff9bcc9 100644
--- a/modules/indic/devanagari-x.c
+++ b/modules/indic/devanagari-x.c
@@ -331,56 +331,13 @@ pango_indic_engine_x_new ()
   PangoEngineShape *result;
   result = g_new (PangoEngineShape, 1);
   result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = pango_indic_engine_shape;
   result->get_coverage = pango_indic_engine_get_coverage;
   return (PangoEngine *) result;
 }
 
-static void
-pango_indic_engine_break (const char *text,
-		    int len,
-		    PangoAnalysis * analysis, PangoLogAttr * attrs)
-{
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;			/* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i - 1].is_white) ||
-	attrs[i].is_white;
-      attrs[i].is_char_stop = 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i - 1].is_white;
-      /* actually, is_word_stop in not correct, but simple and good enough. */
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-
-static PangoEngine *
-pango_indic_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = pango_indic_engine_break;
-
-  return (PangoEngine *) result;
-}
-
 #ifdef MODULE_PREFIX
 #define MODULE_ENTRY(func) _pango_pango_indic_##func
 #else
@@ -397,9 +354,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo ** engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, SCRIPT_STRING "ScriptEngineLang"))
-    return pango_indic_engine_lang_new ();
-  else if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
+  if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
     return pango_indic_engine_x_new ();
   else
     return NULL;
diff --git a/modules/indic/devanagari.c b/modules/indic/devanagari.c
index 028c432e..6ff9bcc9 100644
--- a/modules/indic/devanagari.c
+++ b/modules/indic/devanagari.c
@@ -331,56 +331,13 @@ pango_indic_engine_x_new ()
   PangoEngineShape *result;
   result = g_new (PangoEngineShape, 1);
   result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = pango_indic_engine_shape;
   result->get_coverage = pango_indic_engine_get_coverage;
   return (PangoEngine *) result;
 }
 
-static void
-pango_indic_engine_break (const char *text,
-		    int len,
-		    PangoAnalysis * analysis, PangoLogAttr * attrs)
-{
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;			/* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i - 1].is_white) ||
-	attrs[i].is_white;
-      attrs[i].is_char_stop = 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i - 1].is_white;
-      /* actually, is_word_stop in not correct, but simple and good enough. */
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-
-static PangoEngine *
-pango_indic_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = pango_indic_engine_break;
-
-  return (PangoEngine *) result;
-}
-
 #ifdef MODULE_PREFIX
 #define MODULE_ENTRY(func) _pango_pango_indic_##func
 #else
@@ -397,9 +354,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo ** engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, SCRIPT_STRING "ScriptEngineLang"))
-    return pango_indic_engine_lang_new ();
-  else if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
+  if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
     return pango_indic_engine_x_new ();
   else
     return NULL;
diff --git a/modules/indic/gujarati-x.c b/modules/indic/gujarati-x.c
index 0b72babd..639f5e61 100644
--- a/modules/indic/gujarati-x.c
+++ b/modules/indic/gujarati-x.c
@@ -307,56 +307,13 @@ pango_indic_engine_x_new ()
   PangoEngineShape *result;
   result = g_new (PangoEngineShape, 1);
   result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = pango_indic_engine_shape;
   result->get_coverage = pango_indic_engine_get_coverage;
   return (PangoEngine *) result;
 }
 
-static void
-pango_indic_engine_break (const char *text,
-			 int len,
-			 PangoAnalysis * analysis, PangoLogAttr * attrs)
-{
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;			/* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i - 1].is_white) ||
-	attrs[i].is_white;
-      attrs[i].is_char_stop = 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i - 1].is_white;
-      /* actually, is_word_stop in not correct, but simple and good enough. */
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-
-static PangoEngine *
-pango_indic_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = pango_indic_engine_break;
-
-  return (PangoEngine *) result;
-}
-
 #ifdef MODULE_PREFIX
 #define MODULE_ENTRY(func) _pango_pango_indic_##func
 #else
@@ -373,9 +330,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo ** engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, SCRIPT_STRING "ScriptEngineLang"))
-    return pango_indic_engine_lang_new ();
-  else if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
+  if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
     return pango_indic_engine_x_new ();
   else
     return NULL;
diff --git a/modules/indic/gujarati.c b/modules/indic/gujarati.c
index 0b72babd..639f5e61 100644
--- a/modules/indic/gujarati.c
+++ b/modules/indic/gujarati.c
@@ -307,56 +307,13 @@ pango_indic_engine_x_new ()
   PangoEngineShape *result;
   result = g_new (PangoEngineShape, 1);
   result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = pango_indic_engine_shape;
   result->get_coverage = pango_indic_engine_get_coverage;
   return (PangoEngine *) result;
 }
 
-static void
-pango_indic_engine_break (const char *text,
-			 int len,
-			 PangoAnalysis * analysis, PangoLogAttr * attrs)
-{
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;			/* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i - 1].is_white) ||
-	attrs[i].is_white;
-      attrs[i].is_char_stop = 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i - 1].is_white;
-      /* actually, is_word_stop in not correct, but simple and good enough. */
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-
-static PangoEngine *
-pango_indic_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = pango_indic_engine_break;
-
-  return (PangoEngine *) result;
-}
-
 #ifdef MODULE_PREFIX
 #define MODULE_ENTRY(func) _pango_pango_indic_##func
 #else
@@ -373,9 +330,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo ** engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, SCRIPT_STRING "ScriptEngineLang"))
-    return pango_indic_engine_lang_new ();
-  else if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
+  if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
     return pango_indic_engine_x_new ();
   else
     return NULL;
diff --git a/modules/indic/gurmukhi-x.c b/modules/indic/gurmukhi-x.c
index 7d75e505..108053b0 100644
--- a/modules/indic/gurmukhi-x.c
+++ b/modules/indic/gurmukhi-x.c
@@ -221,56 +221,13 @@ pango_indic_engine_x_new ()
   PangoEngineShape *result;
   result = g_new (PangoEngineShape, 1);
   result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = pango_indic_engine_shape;
   result->get_coverage = pango_indic_engine_get_coverage;
   return (PangoEngine *) result;
 }
 
-static void
-pango_indic_engine_break (const char *text,
-			 int len,
-			 PangoAnalysis * analysis, PangoLogAttr * attrs)
-{
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;			/* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i - 1].is_white) ||
-	attrs[i].is_white;
-      attrs[i].is_char_stop = 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i - 1].is_white;
-      /* actually, is_word_stop in not correct, but simple and good enough. */
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-
-static PangoEngine *
-pango_indic_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = pango_indic_engine_break;
-
-  return (PangoEngine *) result;
-}
-
 #ifdef MODULE_PREFIX
 #define MODULE_ENTRY(func) _pango_pango_##func
 #else
@@ -287,9 +244,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo ** engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, SCRIPT_STRING "ScriptEngineLang"))
-    return pango_indic_engine_lang_new ();
-  else if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
+  if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
     return pango_indic_engine_x_new ();
   else
     return NULL;
diff --git a/modules/indic/gurmukhi.c b/modules/indic/gurmukhi.c
index 7d75e505..108053b0 100644
--- a/modules/indic/gurmukhi.c
+++ b/modules/indic/gurmukhi.c
@@ -221,56 +221,13 @@ pango_indic_engine_x_new ()
   PangoEngineShape *result;
   result = g_new (PangoEngineShape, 1);
   result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = pango_indic_engine_shape;
   result->get_coverage = pango_indic_engine_get_coverage;
   return (PangoEngine *) result;
 }
 
-static void
-pango_indic_engine_break (const char *text,
-			 int len,
-			 PangoAnalysis * analysis, PangoLogAttr * attrs)
-{
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;			/* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i - 1].is_white) ||
-	attrs[i].is_white;
-      attrs[i].is_char_stop = 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i - 1].is_white;
-      /* actually, is_word_stop in not correct, but simple and good enough. */
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-
-static PangoEngine *
-pango_indic_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = pango_indic_engine_break;
-
-  return (PangoEngine *) result;
-}
-
 #ifdef MODULE_PREFIX
 #define MODULE_ENTRY(func) _pango_pango_##func
 #else
@@ -287,9 +244,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo ** engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, SCRIPT_STRING "ScriptEngineLang"))
-    return pango_indic_engine_lang_new ();
-  else if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
+  if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
     return pango_indic_engine_x_new ();
   else
     return NULL;
diff --git a/modules/indic/myanmar-x.c b/modules/indic/myanmar-x.c
index ea3dc3fc..9bcd8b2a 100644
--- a/modules/indic/myanmar-x.c
+++ b/modules/indic/myanmar-x.c
@@ -201,56 +201,13 @@ pango_engine_x_new ()
   PangoEngineShape *result;
   result = g_new (PangoEngineShape, 1);
   result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = pango_engine_shape;
   result->get_coverage = pango_engine_get_coverage;
   return (PangoEngine *) result;
 }
 
-static void
-pango_engine_break (const char *text,
-			 int len,
-			 PangoAnalysis * analysis, PangoLogAttr * attrs)
-{
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;			/* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i - 1].is_white) ||
-	attrs[i].is_white;
-      attrs[i].is_char_stop = 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i - 1].is_white;
-      /* actually, is_word_stop in not correct, but simple and good enough. */
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-
-static PangoEngine *
-pango_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = pango_engine_break;
-
-  return (PangoEngine *) result;
-}
-
 #ifdef MODULE_PREFIX
 #define MODULE_ENTRY(func) _pango_pango_##func
 #else
@@ -267,9 +224,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo ** engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, SCRIPT_STRING "ScriptEngineLang"))
-    return pango_engine_lang_new ();
-  else if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
+  if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
     return pango_engine_x_new ();
   else
     return NULL;
diff --git a/modules/indic/myanmar.c b/modules/indic/myanmar.c
index ea3dc3fc..9bcd8b2a 100644
--- a/modules/indic/myanmar.c
+++ b/modules/indic/myanmar.c
@@ -201,56 +201,13 @@ pango_engine_x_new ()
   PangoEngineShape *result;
   result = g_new (PangoEngineShape, 1);
   result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = pango_engine_shape;
   result->get_coverage = pango_engine_get_coverage;
   return (PangoEngine *) result;
 }
 
-static void
-pango_engine_break (const char *text,
-			 int len,
-			 PangoAnalysis * analysis, PangoLogAttr * attrs)
-{
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;			/* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i - 1].is_white) ||
-	attrs[i].is_white;
-      attrs[i].is_char_stop = 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i - 1].is_white;
-      /* actually, is_word_stop in not correct, but simple and good enough. */
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-
-static PangoEngine *
-pango_engine_lang_new ()
-{
-  PangoEngineLang *result;
-
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = SCRIPT_STRING "ScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = pango_engine_break;
-
-  return (PangoEngine *) result;
-}
-
 #ifdef MODULE_PREFIX
 #define MODULE_ENTRY(func) _pango_pango_##func
 #else
@@ -267,9 +224,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo ** engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, SCRIPT_STRING "ScriptEngineLang"))
-    return pango_engine_lang_new ();
-  else if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
+  if (!strcmp (id, SCRIPT_STRING "ScriptEngineX"))
     return pango_engine_x_new ();
   else
     return NULL;
diff --git a/modules/indic/pango-indic-script.h b/modules/indic/pango-indic-script.h
index 90c69eca..bade7fd6 100644
--- a/modules/indic/pango-indic-script.h
+++ b/modules/indic/pango-indic-script.h
@@ -15,11 +15,6 @@
   static PangoEngineInfo script_engines[] = \
   { \
     { \
-      SCRIPT_STRING "ScriptEngineLang", \
-      PANGO_ENGINE_TYPE_LANG, \
-      PANGO_RENDER_TYPE_NONE, \
-      pango_indic_range, G_N_ELEMENTS (pango_indic_range)}, \
-    { \
       SCRIPT_STRING "ScriptEngineX", \
       PANGO_ENGINE_TYPE_SHAPE, \
       PANGO_RENDER_TYPE_X, \
diff --git a/modules/tamil/tamil-x.c b/modules/tamil/tamil-x.c
index 98c31548..a8f098d9 100644
--- a/modules/tamil/tamil-x.c
+++ b/modules/tamil/tamil-x.c
@@ -17,12 +17,6 @@ static PangoEngineRange tamil_range[] = {
 
 static PangoEngineInfo script_engines[] = {
   {
-    "TamilScriptEngineLang",
-    PANGO_ENGINE_TYPE_LANG,
-    PANGO_RENDER_TYPE_NONE,
-    tamil_range, G_N_ELEMENTS(tamil_range)
-  },
-  {
     "TamilScriptEngineX",
     PANGO_ENGINE_TYPE_SHAPE,
     PANGO_RENDER_TYPE_X,
@@ -33,55 +27,6 @@ static PangoEngineInfo script_engines[] = {
 static gint n_script_engines = G_N_ELEMENTS (script_engines);
 
 /*
- * Language script engine
- */
-
-static void 
-tamil_engine_break (const char   *text,
-		    int            len,
-		    PangoAnalysis *analysis,
-		    PangoLogAttr  *attrs)
-{
-/* Most of the code comes from pango_break
- * only difference is char stop based on modifiers
- */
-
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;           /* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
-      attrs[i].is_char_stop = (is_uni_modi(wc)) ? 0 : 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-static PangoEngine *
-tamil_engine_lang_new ()
-{
-  PangoEngineLang *result;
-  
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = "TamilScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = tamil_engine_break;
-
-  return (PangoEngine *)result;
-}
-
-/*
  * X window system script engine portion
  */
 
@@ -228,7 +173,7 @@ tamil_engine_x_new ()
   result = g_new (PangoEngineShape, 1);
 
   result->engine.id = "TamilScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = tamil_engine_shape;
   result->get_coverage = tamil_engine_get_coverage;
@@ -256,9 +201,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, "TamilScriptEngineLang"))
-    return tamil_engine_lang_new ();
-  else if (!strcmp (id, "TamilScriptEngineX"))
+  if (!strcmp (id, "TamilScriptEngineX"))
     return tamil_engine_x_new ();
   else
     return NULL;
diff --git a/modules/tamil/tamil.c b/modules/tamil/tamil.c
index 98c31548..a8f098d9 100644
--- a/modules/tamil/tamil.c
+++ b/modules/tamil/tamil.c
@@ -17,12 +17,6 @@ static PangoEngineRange tamil_range[] = {
 
 static PangoEngineInfo script_engines[] = {
   {
-    "TamilScriptEngineLang",
-    PANGO_ENGINE_TYPE_LANG,
-    PANGO_RENDER_TYPE_NONE,
-    tamil_range, G_N_ELEMENTS(tamil_range)
-  },
-  {
     "TamilScriptEngineX",
     PANGO_ENGINE_TYPE_SHAPE,
     PANGO_RENDER_TYPE_X,
@@ -33,55 +27,6 @@ static PangoEngineInfo script_engines[] = {
 static gint n_script_engines = G_N_ELEMENTS (script_engines);
 
 /*
- * Language script engine
- */
-
-static void 
-tamil_engine_break (const char   *text,
-		    int            len,
-		    PangoAnalysis *analysis,
-		    PangoLogAttr  *attrs)
-{
-/* Most of the code comes from pango_break
- * only difference is char stop based on modifiers
- */
-
-  const char *cur = text;
-  gint i = 0;
-  gunichar wc;
-
-  while (*cur && cur - text < len)
-    {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;           /* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
-      attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
-      attrs[i].is_char_stop = (is_uni_modi(wc)) ? 0 : 1;
-      attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
-
-      i++;
-      cur = g_utf8_next_char (cur);
-    }
-}
-
-static PangoEngine *
-tamil_engine_lang_new ()
-{
-  PangoEngineLang *result;
-  
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = "TamilScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = tamil_engine_break;
-
-  return (PangoEngine *)result;
-}
-
-/*
  * X window system script engine portion
  */
 
@@ -228,7 +173,7 @@ tamil_engine_x_new ()
   result = g_new (PangoEngineShape, 1);
 
   result->engine.id = "TamilScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
+  result->engine.type = PANGO_ENGINE_TYPE_SHAPE;
   result->engine.length = sizeof (result);
   result->script_shape = tamil_engine_shape;
   result->get_coverage = tamil_engine_get_coverage;
@@ -256,9 +201,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, int *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, "TamilScriptEngineLang"))
-    return tamil_engine_lang_new ();
-  else if (!strcmp (id, "TamilScriptEngineX"))
+  if (!strcmp (id, "TamilScriptEngineX"))
     return tamil_engine_x_new ();
   else
     return NULL;
diff --git a/modules/thai/thai-x.c b/modules/thai/thai-x.c
index d6b2c44c..2bb0f120 100644
--- a/modules/thai/thai-x.c
+++ b/modules/thai/thai-x.c
@@ -91,12 +91,6 @@ static PangoEngineRange thai_ranges[] = {
 
 static PangoEngineInfo script_engines[] = {
   {
-    "ThaiScriptEngineLang",
-    PANGO_ENGINE_TYPE_LANG,
-    PANGO_RENDER_TYPE_NONE,
-    thai_ranges, G_N_ELEMENTS(thai_ranges)
-  },
-  {
     "ThaiScriptEngineX",
     PANGO_ENGINE_TYPE_SHAPE,
     PANGO_RENDER_TYPE_X,
@@ -105,33 +99,6 @@ static PangoEngineInfo script_engines[] = {
 };
 
 /*
- * Language script engine
- */
-
-static void 
-thai_engine_break (const char     *text,
-		    gint            len,
-		    PangoAnalysis  *analysis,
-		    PangoLogAttr   *attrs)
-{
-}
-
-static PangoEngine *
-thai_engine_lang_new ()
-{
-  PangoEngineLang *result;
-  
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = "ThaiScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = thai_engine_break;
-
-  return (PangoEngine *)result;
-}
-
-/*
  * X window system script engine portion
  */
 
@@ -840,6 +807,11 @@ thai_engine_shape (PangoFont        *font,
   gunichar cluster[MAX_CLUSTER_CHRS];
   gint num_chrs;
 
+  gunichar base = 0;
+  gunichar group1 = 0;
+  gunichar group2 = 0;
+  int cluster_start = 0;
+
   pango_glyph_string_set_size (glyphs, 0);
 
   font_info = get_font_info (font);
@@ -917,9 +889,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, gint *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, "ThaiScriptEngineLang"))
-    return thai_engine_lang_new ();
-  else if (!strcmp (id, "ThaiScriptEngineX"))
+  if (!strcmp (id, "ThaiScriptEngineX"))
     return thai_engine_x_new ();
   else
     return NULL;
diff --git a/modules/thai/thai.c b/modules/thai/thai.c
index d6b2c44c..2bb0f120 100644
--- a/modules/thai/thai.c
+++ b/modules/thai/thai.c
@@ -91,12 +91,6 @@ static PangoEngineRange thai_ranges[] = {
 
 static PangoEngineInfo script_engines[] = {
   {
-    "ThaiScriptEngineLang",
-    PANGO_ENGINE_TYPE_LANG,
-    PANGO_RENDER_TYPE_NONE,
-    thai_ranges, G_N_ELEMENTS(thai_ranges)
-  },
-  {
     "ThaiScriptEngineX",
     PANGO_ENGINE_TYPE_SHAPE,
     PANGO_RENDER_TYPE_X,
@@ -105,33 +99,6 @@ static PangoEngineInfo script_engines[] = {
 };
 
 /*
- * Language script engine
- */
-
-static void 
-thai_engine_break (const char     *text,
-		    gint            len,
-		    PangoAnalysis  *analysis,
-		    PangoLogAttr   *attrs)
-{
-}
-
-static PangoEngine *
-thai_engine_lang_new ()
-{
-  PangoEngineLang *result;
-  
-  result = g_new (PangoEngineLang, 1);
-
-  result->engine.id = "ThaiScriptEngine";
-  result->engine.type = PANGO_ENGINE_TYPE_LANG;
-  result->engine.length = sizeof (result);
-  result->script_break = thai_engine_break;
-
-  return (PangoEngine *)result;
-}
-
-/*
  * X window system script engine portion
  */
 
@@ -840,6 +807,11 @@ thai_engine_shape (PangoFont        *font,
   gunichar cluster[MAX_CLUSTER_CHRS];
   gint num_chrs;
 
+  gunichar base = 0;
+  gunichar group1 = 0;
+  gunichar group2 = 0;
+  int cluster_start = 0;
+
   pango_glyph_string_set_size (glyphs, 0);
 
   font_info = get_font_info (font);
@@ -917,9 +889,7 @@ MODULE_ENTRY(script_engine_list) (PangoEngineInfo **engines, gint *n_engines)
 PangoEngine *
 MODULE_ENTRY(script_engine_load) (const char *id)
 {
-  if (!strcmp (id, "ThaiScriptEngineLang"))
-    return thai_engine_lang_new ();
-  else if (!strcmp (id, "ThaiScriptEngineX"))
+  if (!strcmp (id, "ThaiScriptEngineX"))
     return thai_engine_x_new ();
   else
     return NULL;
diff --git a/pango/break.c b/pango/break.c
index 3dc0465b..8e63415b 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -22,6 +22,1221 @@
 #include "pango.h"
 #include "pango-modules.h"
 
+/* See http://www.unicode.org/unicode/reports/tr14/ if you hope
+ * to understand the line breaking code.
+ */
+
+typedef enum
+{
+  BREAK_ALREADY_HANDLED,   /* didn't use the table */
+  BREAK_PROHIBITED, /* no break, even if spaces intervene */
+  BREAK_IF_SPACES,  /* "indirect break" (only if there are spaces) */
+  BREAK_ALLOWED     /* "direct break" (can always break here) */
+} BreakOpportunity;
+
+enum
+{
+  INDEX_OPEN_PUNCTUATION,
+  INDEX_CLOSE_PUNCTUATION,
+  INDEX_QUOTATION,
+  INDEX_NON_BREAKING_GLUE,
+  INDEX_NON_STARTER,
+  INDEX_EXCLAMATION,
+  INDEX_SYMBOL,
+  INDEX_INFIX_SEPARATOR,
+  INDEX_PREFIX,
+  INDEX_POSTFIX,
+  INDEX_NUMERIC,
+  INDEX_ALPHABETIC,
+  INDEX_IDEOGRAPHIC,
+  INDEX_INSEPARABLE,
+  INDEX_HYPHEN,
+  INDEX_AFTER,
+  INDEX_BEFORE,
+  INDEX_BEFORE_AND_AFTER,
+  INDEX_ZERO_WIDTH_SPACE,
+  INDEX_COMBINING_MARK,
+
+  /* End of the table */
+  INDEX_END_OF_TABLE,
+
+  /* The following are not in the tables */
+  INDEX_MANDATORY,
+  INDEX_CARRIAGE_RETURN,
+  INDEX_LINE_FEED,
+  INDEX_SURROGATE,
+  INDEX_CONTINGENT,
+  INDEX_SPACE,
+  INDEX_COMPLEX_CONTEXT,
+  INDEX_AMBIGUOUS,
+  INDEX_UNKNOWN
+};
+
+static BreakOpportunity row_OPEN_PUNCTUATION[INDEX_END_OF_TABLE] = {
+  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_CLOSE_PUNCTUATION[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_QUOTATION[INDEX_END_OF_TABLE] = {
+  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_NON_BREAKING_GLUE[INDEX_END_OF_TABLE] = {
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_NON_STARTER[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_EXCLAMATION[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_SYMBOL[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_INFIX_SEPARATOR[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_PREFIX[INDEX_END_OF_TABLE] = {
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_POSTFIX[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_NUMERIC[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_ALPHABETIC[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_IDEOGRAPHIC[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_INSEPARABLE[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_HYPHEN[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_AFTER[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_BEFORE[INDEX_END_OF_TABLE] = {
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_BEFORE_AND_AFTER[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_ZERO_WIDTH_SPACE[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity row_COMBINING_MARK[INDEX_END_OF_TABLE] = {
+  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
+  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+};
+
+static BreakOpportunity *line_break_rows[INDEX_END_OF_TABLE] = {
+  row_OPEN_PUNCTUATION, /* INDEX_OPEN_PUNCTUATION */
+  row_CLOSE_PUNCTUATION, /* INDEX_CLOSE_PUNCTUATION */
+  row_QUOTATION, /* INDEX_QUOTATION */
+  row_NON_BREAKING_GLUE, /* INDEX_NON_BREAKING_GLUE */
+  row_NON_STARTER, /* INDEX_NON_STARTER */
+  row_EXCLAMATION, /* INDEX_EXCLAMATION */
+  row_SYMBOL, /* INDEX_SYMBOL */
+  row_INFIX_SEPARATOR, /* INDEX_INFIX_SEPARATOR */
+  row_PREFIX, /* INDEX_PREFIX */
+  row_POSTFIX, /* INDEX_POSTFIX */
+  row_NUMERIC, /* INDEX_NUMERIC */
+  row_ALPHABETIC, /* INDEX_ALPHABETIC */
+  row_IDEOGRAPHIC, /* INDEX_IDEOGRAPHIC */
+  row_INSEPARABLE, /* INDEX_INSEPARABLE */
+  row_HYPHEN, /* INDEX_HYPHEN */
+  row_AFTER, /* INDEX_AFTER */
+  row_BEFORE, /* INDEX_BEFORE */
+  row_BEFORE_AND_AFTER, /* INDEX_BEFORE_AND_AFTER */
+  row_ZERO_WIDTH_SPACE, /* INDEX_ZERO_WIDTH_SPACE */
+  row_COMBINING_MARK /* INDEX_COMBINING_MARK */
+};
+
+/* Map GUnicodeBreakType to table indexes */
+static int line_break_indexes[] = {
+  INDEX_MANDATORY,
+  INDEX_CARRIAGE_RETURN,
+  INDEX_LINE_FEED,
+  INDEX_COMBINING_MARK,
+  INDEX_SURROGATE,
+  INDEX_ZERO_WIDTH_SPACE,
+  INDEX_INSEPARABLE,
+  INDEX_NON_BREAKING_GLUE,
+  INDEX_CONTINGENT,
+  INDEX_SPACE,
+  INDEX_AFTER,
+  INDEX_BEFORE,
+  INDEX_BEFORE_AND_AFTER,
+  INDEX_HYPHEN,
+  INDEX_NON_STARTER,
+  INDEX_OPEN_PUNCTUATION,
+  INDEX_CLOSE_PUNCTUATION,
+  INDEX_QUOTATION,
+  INDEX_EXCLAMATION,
+  INDEX_IDEOGRAPHIC,
+  INDEX_NUMERIC,
+  INDEX_INFIX_SEPARATOR,
+  INDEX_SYMBOL,
+  INDEX_ALPHABETIC,
+  INDEX_PREFIX,
+  INDEX_POSTFIX,
+  INDEX_COMPLEX_CONTEXT,
+  INDEX_AMBIGUOUS,
+  INDEX_UNKNOWN
+};
+
+#define BREAK_INDEX(btype)                \
+         (line_break_indexes[(btype)])
+#define BREAK_ROW(before_type)            \
+         (line_break_rows[BREAK_INDEX (before_type)])
+#define BREAK_OP(before_type, after_type) \
+         (BREAK_ROW (before_type)[BREAK_INDEX (after_type)])
+#define IN_BREAK_TABLE(btype)             \
+         (BREAK_INDEX(btype) < INDEX_END_OF_TABLE)
+
+/* Keep these in sync with the same macros in the test program */
+
+#define LEADING_JAMO(wc)  ((wc) >= 0x1100 && (wc) <= 0x115F)
+#define VOWEL_JAMO(wc)    ((wc) >= 0x1160 && (wc) <= 0x11A2)
+#define TRAILING_JAMO(wc) ((wc) >= 0x11A8 && (wc) <= 0x11F9)
+#define JAMO(wc)          ((wc) >= 0x1100 && (wc) <= 0x11FF)
+/* "virama script" is just an optimization; it includes a bunch of
+ * scripts without viramas in them
+ */
+#define VIRAMA_SCRIPT(wc)        ((wc) >= 0x0901 && (wc) <= 0x17FF)
+#define VIRAMA(wc) ((wc) == 0x094D || \
+                    (wc) == 0x09CD || \
+                    (wc) == 0x0A4D || \
+                    (wc) == 0x0ACD || \
+                    (wc) == 0x0B4D || \
+                    (wc) == 0x0BCD || \
+                    (wc) == 0x0C4D || \
+                    (wc) == 0x0CCD || \
+                    (wc) == 0x0D4D || \
+                    (wc) == 0x0DCA || \
+                    (wc) == 0x0E3A || \
+                    (wc) == 0x0F84 || \
+                    (wc) == 0x1039 || \
+                    (wc) == 0x17D2)
+/* Types of Japanese characters */
+#define JAPANESE(wc) ((wc) >= 0x2F00 && (wc) <= 0x30FF)
+#define KANJI(wc)    ((wc) >= 0x2F00 && (wc) <= 0x2FDF)
+#define HIRAGANA(wc) ((wc) >= 0x3040 && (wc) <= 0x309F)
+#define KATAKANA(wc) ((wc) >= 0x30A0 && (wc) <= 0x30FF)
+
+
+/* p. 132-133 of Unicode spec table 5-6 will help understand this */
+typedef enum
+{
+  STATE_SENTENCE_OUTSIDE,
+  STATE_SENTENCE_BODY,
+  STATE_SENTENCE_TERM,
+  STATE_SENTENCE_POST_TERM_CLOSE,
+  STATE_SENTENCE_POST_TERM_SPACE,
+  STATE_SENTENCE_POST_TERM_SEP,
+  STATE_SENTENCE_DOT,
+  STATE_SENTENCE_POST_DOT_CLOSE,
+  STATE_SENTENCE_POST_DOT_SPACE,
+  STATE_SENTENCE_POST_DOT_OPEN,
+  /* never include line/para separators in a sentence for now */
+  /* This isn't in the spec, but I can't figure out why they'd include
+   * one line/para separator in lines ending with Term but not with
+   * period-terminated lines, so I'm doing it for the dot lines also
+   */
+  STATE_SENTENCE_POST_DOT_SEP
+} SentenceState;
+
+/* We call "123" and "foobar" words, but "123foo" is two words;
+ * the Unicode spec just calls "123" a non-word
+ */
+typedef enum
+{
+  WordNone,
+  WordLetters,
+  WordNumbers
+} WordType;
+
+
+/**
+ * pango_default_break:
+ * @text: text to break
+ * @length: length of text in bytes
+ * @analysis: a #PangoAnalysis for the text
+ * @attrs: logical attributes to fill in
+ *
+ * This is the default break algorithm, used if no language
+ * engine overrides it. Normally you should use pango_break()
+ * instead; this function is mostly useful for chaining up
+ * from a language engine override. Unlike pango_break(),
+ * @analysis can be NULL, but only do that if you know what
+ * you're doing. (If you need an analysis to pass to pango_break(),
+ * you need to pango_itemize() or use pango_get_log_attrs().)
+ *
+ **/
+void
+pango_default_break (const gchar   *text,
+                     gint           length,
+                     PangoAnalysis *analysis,
+                     PangoLogAttr  *attrs)
+{  
+  /* The rationale for all this is in section 5.15 of the Unicode 3.0 book */
+
+  /* This is a default break implementation that should work for nearly all
+   * languages. Language engines can override it optionally.
+   */
+
+  /* FIXME one cheesy optimization here would be to memset attrs to 0
+   * before we start, and then never assign FALSE to anything
+   */
+
+  const gchar *next = text;
+  const gchar *end = text + length;
+  gint i = 0;
+  gunichar prev_wc;
+  gunichar next_wc;
+  GUnicodeType prev_type;
+  GUnicodeBreakType prev_break_type; /* skips spaces */
+  gboolean prev_was_break_space;
+  WordType current_word_type = WordNone;
+  gunichar last_word_letter = 0;
+  SentenceState sentence_state = STATE_SENTENCE_OUTSIDE;
+  /* Tracks what will be the end of the sentence if a period is
+   * determined to actually be a sentence-ending period.
+   */
+  gint possible_sentence_end = -1;
+  /* possible sentence break before Open* after a period-ended sentence */
+  gint possible_sentence_boundary = -1;
+
+  g_return_if_fail (text != NULL);
+  g_return_if_fail (attrs != NULL);
+  
+  if (next == end)
+    return;
+
+  prev_type = (GUnicodeType) -1;
+  prev_break_type = G_UNICODE_BREAK_UNKNOWN;
+  prev_was_break_space = FALSE;
+  prev_wc = 0;
+
+  next_wc = g_utf8_get_char (next);
+
+  g_assert (next_wc != 0);
+
+  while (next_wc != 0)
+    {
+      GUnicodeType type;
+      gunichar wc;
+      GUnicodeBreakType break_type;
+      BreakOpportunity break_op;
+
+      wc = next_wc;
+
+      next = g_utf8_next_char (next);
+
+      if (next >= end)
+        next_wc = 0;
+      else
+        {
+          next_wc = g_utf8_get_char (next);
+          g_assert (next_wc != 0);
+        }
+
+      type = g_unichar_type (wc);
+
+      /* Can't just use the type here since isspace() doesn't
+       * correspond to a Unicode character type
+       */
+      attrs[i].is_white = g_unichar_isspace (wc);
+
+
+      /* ---- Cursor position breaks (Grapheme breaks) ---- */
+
+      if (wc == '\n')
+        {
+          /* Break before line feed unless prev char is a CR */
+
+          if (prev_wc != '\r')
+            attrs[i].is_cursor_position = TRUE;
+          else
+            attrs[i].is_cursor_position = FALSE;
+        }
+      else if (i == 0 ||
+               prev_type == G_UNICODE_CONTROL ||
+               prev_type == G_UNICODE_FORMAT)
+        {
+          /* Break at first position (must be special cased, or if the
+           * first char is say a combining mark there won't be a
+           * cursor position at the start, which seems wrong to me
+           * ???? - maybe it makes sense though, who knows)
+           */
+          /* break after all format or control characters */
+          attrs[i].is_cursor_position = TRUE;
+        }
+      else
+        {
+          switch (type)
+            {
+            case G_UNICODE_CONTROL:
+            case G_UNICODE_FORMAT:
+              /* Break before all format or control characters */
+              attrs[i].is_cursor_position = TRUE;
+              break;
+
+            case G_UNICODE_COMBINING_MARK:
+            case G_UNICODE_ENCLOSING_MARK:
+            case G_UNICODE_NON_SPACING_MARK:
+              /* Unicode spec includes "Combining marks plus Tibetan
+               * subjoined characters" as joining chars, but lists the
+               * Tibetan subjoined characters as combining marks, and
+               * g_unichar_type() returns NON_SPACING_MARK for the Tibetan
+               * subjoined characters. So who knows, beats me.
+               */
+
+              /* It's a joining character, break only if preceded by
+               * control or format; we already handled the case where
+               * it was preceded earlier, so here we know it wasn't,
+               * don't break
+               */
+              attrs[i].is_cursor_position = FALSE;
+              break;
+
+            case G_UNICODE_LOWERCASE_LETTER:
+            case G_UNICODE_MODIFIER_LETTER:
+            case G_UNICODE_OTHER_LETTER:
+            case G_UNICODE_TITLECASE_LETTER:
+            case G_UNICODE_UPPERCASE_LETTER:
+              if (JAMO (wc))
+                {
+                  /* Break before Jamo if they are in a broken sequence or
+                   * next to non-Jamo, otherwise don't
+                   */
+                  if (LEADING_JAMO (wc) &&
+                      !LEADING_JAMO (prev_wc))
+                    attrs[i].is_cursor_position = TRUE;
+                  else if (VOWEL_JAMO (wc) &&
+                           !LEADING_JAMO (prev_wc) &&
+                           !VOWEL_JAMO (prev_wc))
+                    attrs[i].is_cursor_position = TRUE;
+                  else if (TRAILING_JAMO (wc) &&
+                           !LEADING_JAMO (prev_wc) &&
+                           !VOWEL_JAMO (prev_wc) &&
+                           !TRAILING_JAMO (prev_wc))
+                    attrs[i].is_cursor_position = TRUE;
+                  else
+                    attrs[i].is_cursor_position = FALSE;
+                }
+              else
+                {
+                  /* Handle non-Jamo non-combining chars */
+
+                  /* Break if preceded by Jamo; don't break if a
+                   * letter is preceded by a virama; break in all
+                   * other cases. No need to check whether we're
+                   * preceded by Jamo explicitly, since a Jamo is not
+                   * a virama, we just break in all cases where we
+                   * aren't preceded by a virama. Don't fool with viramas
+                   * if we aren't part of a script that uses them.
+                   */
+
+                  if (VIRAMA_SCRIPT (wc))
+                    {
+                      /* Check whether we're preceded by a virama; this
+                       * could use some optimization.
+                       */
+                      if (VIRAMA (prev_wc))
+                        attrs[i].is_cursor_position = FALSE;
+                      else
+                        attrs[i].is_cursor_position = TRUE;
+                    }
+                  else
+                    {
+                      attrs[i].is_cursor_position = TRUE;
+                    }
+                }
+              break;
+
+            default:
+              /* Some weirdo char, just break here, why not */
+              attrs[i].is_cursor_position = TRUE;
+              break;
+            }
+        }
+      
+      /* ---- Line breaking ---- */
+
+      break_type = g_unichar_break_type (wc);
+      break_op = BREAK_ALREADY_HANDLED;
+
+      g_assert (prev_break_type != G_UNICODE_BREAK_SPACE);
+
+      attrs[i].is_break = FALSE;
+      attrs[i].is_mandatory_break = FALSE;
+
+      if (attrs[i].is_cursor_position) /* If it's not a grapheme boundary,
+                                        * it's not a line break either
+                                        */
+        {
+          switch (prev_break_type)
+            {
+            case G_UNICODE_BREAK_MANDATORY:
+            case G_UNICODE_BREAK_LINE_FEED:
+              attrs[i].is_break = TRUE;
+              attrs[i].is_mandatory_break = TRUE;
+              break;
+
+            case G_UNICODE_BREAK_CARRIAGE_RETURN:
+              if (wc != '\n')
+                {
+                  attrs[i].is_break = TRUE;
+                  attrs[i].is_mandatory_break = TRUE;
+                }
+              break;
+
+            case G_UNICODE_BREAK_CONTINGENT:
+              /* can break after 0xFFFC by default, though we might want
+               * to eventually have a PangoLayout setting or
+               * PangoAttribute that disables this, if for some
+               * application breaking after objects is not desired.
+               */
+              break_op = BREAK_ALLOWED;
+              break;
+
+            case G_UNICODE_BREAK_SURROGATE:
+              /* FIXME I have no clue what to do with these,
+               * but we should do something with them
+               */
+              break;
+
+            case G_UNICODE_BREAK_AMBIGUOUS:
+              /* FIXME we need to resolve the East Asian width
+               * to decide what to do here
+               */
+            case G_UNICODE_BREAK_COMPLEX_CONTEXT:
+              /* FIXME language engines should handle this case... */
+            case G_UNICODE_BREAK_UNKNOWN:
+              /* treat unknown, complex, ambiguous as if they were
+               * alphabetic for now.
+               */
+              prev_break_type = G_UNICODE_BREAK_ALPHABETIC;
+              /* FALL THRU to use the pair table if appropriate */
+
+            default:
+
+              /* Note that our table assumes that combining marks
+               * are only applied to alphabetic characters;
+               * tech report 14 explains how to remove this assumption
+               * from the code, if anyone ever cares, but it shouldn't
+               * be a problem. Also this issue sort of goes
+               * away since we only look for breaks on grapheme
+               * boundaries.
+               */
+
+              g_assert (IN_BREAK_TABLE (prev_break_type));
+
+              switch (break_type)
+                {
+                case G_UNICODE_BREAK_MANDATORY:
+                case G_UNICODE_BREAK_LINE_FEED:
+                case G_UNICODE_BREAK_CARRIAGE_RETURN:
+                case G_UNICODE_BREAK_SPACE:
+                  /* These types all "pile up" at the end of lines and
+                   * get elided.
+                   */
+                  break_op = BREAK_PROHIBITED;
+                  break;
+
+                case G_UNICODE_BREAK_CONTINGENT:
+                  /* break before 0xFFFC by default, eventually
+                   * make this configurable?
+                   */
+                  break_op = BREAK_ALLOWED;
+                  break;
+
+                case G_UNICODE_BREAK_AMBIGUOUS:
+                  /* FIXME resolve East Asian width to figure out what to do */
+                case G_UNICODE_BREAK_COMPLEX_CONTEXT:
+                  /* FIXME language engine analysis */
+                case G_UNICODE_BREAK_UNKNOWN:
+                case G_UNICODE_BREAK_ALPHABETIC:
+                  /* treat all of the above as alphabetic for now */
+                  break_op = BREAK_OP (prev_break_type, G_UNICODE_BREAK_ALPHABETIC);
+                  break;
+
+                case G_UNICODE_BREAK_SURROGATE:
+                  /* FIXME this case needs to be handled
+                   */
+                  break_op = BREAK_IF_SPACES; /* not right at all */
+                  break;
+
+                default:
+                  g_assert (IN_BREAK_TABLE (prev_break_type));
+                  g_assert (IN_BREAK_TABLE (break_type));
+                  break_op = BREAK_OP (prev_break_type, break_type);
+                  break;
+                }
+              break;
+            }
+
+          if (break_op != BREAK_ALREADY_HANDLED)
+            {
+              switch (break_op)
+                {
+                case BREAK_PROHIBITED:
+                  /* nothing, can't break here */
+                  break;
+
+                case BREAK_IF_SPACES:
+                  /* break if prev char was space */
+                  if (prev_was_break_space)
+                    attrs[i].is_break = TRUE;
+                  break;
+
+                case BREAK_ALLOWED:
+                  attrs[i].is_break = TRUE;
+                  break;
+
+                default:
+                  g_assert_not_reached ();
+                  break;
+                }
+            }
+        }
+      
+      if (break_type != G_UNICODE_BREAK_SPACE)
+        {
+          prev_break_type = break_type;
+          prev_was_break_space = FALSE;
+        }
+      else
+        prev_was_break_space = TRUE;
+
+      /* ---- Word breaks ---- */
+
+      /* default to not a word start/end */
+      attrs[i].is_word_start = FALSE;
+      attrs[i].is_word_end = FALSE;
+
+      if (current_word_type != WordNone)
+        {
+          /* Check for a word end */
+          switch (type)
+            {
+            case G_UNICODE_COMBINING_MARK:
+            case G_UNICODE_ENCLOSING_MARK:
+            case G_UNICODE_NON_SPACING_MARK:
+              /* nothing, we just eat these up as part of the word */
+              break;
+
+            case G_UNICODE_LOWERCASE_LETTER:
+            case G_UNICODE_MODIFIER_LETTER:
+            case G_UNICODE_OTHER_LETTER:
+            case G_UNICODE_TITLECASE_LETTER:
+            case G_UNICODE_UPPERCASE_LETTER:
+              if (current_word_type == WordLetters)
+                {
+                  /* Japanese special cases for ending the word */
+                  if (JAPANESE (last_word_letter) ||
+                      JAPANESE (wc))
+                    {
+                      if ((HIRAGANA (last_word_letter) &&
+                           !HIRAGANA (wc)) ||
+                          (KATAKANA (last_word_letter) &&
+                           !(KATAKANA (wc) || HIRAGANA (wc))) ||
+                          (KANJI (last_word_letter) &&
+                           !(HIRAGANA (wc) || KANJI (wc))) ||
+                          (JAPANESE (last_word_letter) &&
+                           !JAPANESE (wc)) ||
+                          (!JAPANESE (last_word_letter) &&
+                           JAPANESE (wc)))
+                        attrs[i].is_word_end = TRUE;
+                    }
+                }
+              else
+                {
+                  /* end the number word, start the letter word */
+                  attrs[i].is_word_end = TRUE;
+                  attrs[i].is_word_start = TRUE;
+                  current_word_type = WordLetters;
+                }
+
+              last_word_letter = wc;
+              break;
+
+            case G_UNICODE_DECIMAL_NUMBER:
+            case G_UNICODE_LETTER_NUMBER:
+            case G_UNICODE_OTHER_NUMBER:
+              if (current_word_type != WordNumbers)
+                {
+                  attrs[i].is_word_end = TRUE;
+                  attrs[i].is_word_start = TRUE;
+                  current_word_type = WordNumbers;
+                }
+
+              last_word_letter = wc;
+              break;
+
+            default:
+              /* Punctuation, control/format chars, etc. all end a word. */
+              attrs[i].is_word_end = TRUE;
+              break;
+            }
+
+          if (attrs[i].is_word_end)
+            current_word_type = WordNone;
+        }
+      else
+        {
+          /* Check for a word start */
+          switch (type)
+            {
+            case G_UNICODE_LOWERCASE_LETTER:
+            case G_UNICODE_MODIFIER_LETTER:
+            case G_UNICODE_OTHER_LETTER:
+            case G_UNICODE_TITLECASE_LETTER:
+            case G_UNICODE_UPPERCASE_LETTER:
+              current_word_type = WordLetters;
+              last_word_letter = wc;
+              attrs[i].is_word_start = TRUE;
+              break;
+
+            case G_UNICODE_DECIMAL_NUMBER:
+            case G_UNICODE_LETTER_NUMBER:
+            case G_UNICODE_OTHER_NUMBER:
+              current_word_type = WordNumbers;
+              last_word_letter = wc;
+              attrs[i].is_word_start = TRUE;
+              break;
+
+            default:
+              /* No word here */
+              break;
+            }
+        }
+
+      /* ---- Sentence breaks ---- */
+
+      /* The Unicode spec specifies sentence breakpoints, so that a piece of
+       * text would be partitioned into sentences, and all characters would
+       * be inside some sentence. This code implements that for is_sentence_boundary,
+       * but tries to keep leading/trailing whitespace out of sentences for
+       * the start/end flags
+       */
+
+      /* The Unicode spec seems to say that one trailing line/para
+       * separator can be tacked on to a sentence ending in ! or ?,
+       * but not a sentence ending in period; I think they're on crack
+       * so am allowing one to be tacked onto a sentence ending in period.
+       */
+
+      /* No sentence break at the start of the text */
+
+      /* default to not a sentence breakpoint */
+      attrs[i].is_sentence_boundary = FALSE;
+      attrs[i].is_sentence_start = FALSE;
+      attrs[i].is_sentence_end = FALSE;
+
+      /* FIXME the Unicode spec lumps control/format chars with
+       * line/para separators in descriptive text, but not in the
+       * character class specs, in table 5-6, so who knows whether you
+       * are actually supposed to break on control/format
+       * characters. Seems semi-broken to break on tabs...
+       */
+
+      /* Break after line/para separators except carriage return
+       * followed by newline
+       */
+      switch (prev_type)
+        {
+        case G_UNICODE_LINE_SEPARATOR:
+        case G_UNICODE_PARAGRAPH_SEPARATOR:
+        case G_UNICODE_CONTROL:
+        case G_UNICODE_FORMAT:
+          if (wc == '\r')
+            {
+              if (next_wc != '\n')
+                attrs[i].is_sentence_boundary = TRUE;
+            }
+          else
+            attrs[i].is_sentence_boundary = TRUE;
+          break;
+
+        default:
+          break;
+        }
+
+      /* break before para/line separators except newline following
+       * carriage return
+       */
+      switch (type)
+        {
+        case G_UNICODE_LINE_SEPARATOR:
+        case G_UNICODE_PARAGRAPH_SEPARATOR:
+        case G_UNICODE_CONTROL:
+        case G_UNICODE_FORMAT:
+          if (wc == '\n')
+            {
+              if (prev_wc != '\r')
+                attrs[i].is_sentence_boundary = TRUE;
+            }
+          else
+            attrs[i].is_sentence_boundary = TRUE;
+          break;
+
+        default:
+          break;
+        }
+
+      switch (sentence_state)
+        {
+        case STATE_SENTENCE_OUTSIDE:
+          /* Start sentence if we have non-whitespace/format/control */
+          switch (type)
+            {
+            case G_UNICODE_LINE_SEPARATOR:
+            case G_UNICODE_PARAGRAPH_SEPARATOR:
+            case G_UNICODE_CONTROL:
+            case G_UNICODE_FORMAT:
+            case G_UNICODE_SPACE_SEPARATOR:
+              break;
+
+            default:
+              attrs[i].is_sentence_start = TRUE;
+              sentence_state = STATE_SENTENCE_BODY;
+              break;
+            }
+          break;
+
+        case STATE_SENTENCE_BODY:
+          /* If we already broke here due to separators, end the sentence. */
+          if (attrs[i].is_sentence_boundary)
+            {
+              attrs[i].is_sentence_end = TRUE;
+              sentence_state = STATE_SENTENCE_OUTSIDE;
+            }
+          else
+            {
+              if (wc == '.')
+                sentence_state = STATE_SENTENCE_DOT;
+              else if (wc == '?' || wc == '!')
+                sentence_state = STATE_SENTENCE_TERM;
+            }
+          break;
+
+        case STATE_SENTENCE_TERM:
+          /* End sentence on anything but close punctuation and some
+           * loosely-specified OTHER_PUNCTUATION such as period,
+           * comma, etc.; follow Unicode rules for breaks
+           */
+          switch (type)
+            {
+            case G_UNICODE_OTHER_PUNCTUATION:
+            case G_UNICODE_CLOSE_PUNCTUATION:
+              if (type == G_UNICODE_CLOSE_PUNCTUATION ||
+                  wc == '.' ||
+                  wc == ',' ||
+                  wc == '?' ||
+                  wc == '!')
+                sentence_state = STATE_SENTENCE_POST_TERM_CLOSE;
+              else
+                {
+                  attrs[i].is_sentence_end = TRUE;
+                  attrs[i].is_sentence_boundary = TRUE;
+                  sentence_state = STATE_SENTENCE_OUTSIDE;
+                }
+              break;
+
+            case G_UNICODE_SPACE_SEPARATOR:
+              attrs[i].is_sentence_end = TRUE;
+              sentence_state = STATE_SENTENCE_POST_TERM_SPACE;
+              break;
+
+            case G_UNICODE_LINE_SEPARATOR:
+            case G_UNICODE_PARAGRAPH_SEPARATOR:
+              attrs[i].is_sentence_end = TRUE;
+              sentence_state = STATE_SENTENCE_POST_TERM_SEP;
+              break;
+
+            default:
+              attrs[i].is_sentence_end = TRUE;
+              attrs[i].is_sentence_boundary = TRUE;
+              sentence_state = STATE_SENTENCE_OUTSIDE;
+              break;
+            }
+          break;
+
+        case STATE_SENTENCE_POST_TERM_CLOSE:
+          /* End sentence on anything besides more punctuation; follow
+           * rules for breaks
+           */
+          switch (type)
+            {
+            case G_UNICODE_OTHER_PUNCTUATION:
+            case G_UNICODE_CLOSE_PUNCTUATION:
+              if (type == G_UNICODE_CLOSE_PUNCTUATION ||
+                  wc == '.' ||
+                  wc == ',' ||
+                  wc == '?' ||
+                  wc == '!')
+                /* continue in this state */
+                ;
+              else
+                {
+                  attrs[i].is_sentence_end = TRUE;
+                  attrs[i].is_sentence_boundary = TRUE;
+                  sentence_state = STATE_SENTENCE_OUTSIDE;
+                }
+              break;
+
+            case G_UNICODE_SPACE_SEPARATOR:
+              attrs[i].is_sentence_end = TRUE;
+              sentence_state = STATE_SENTENCE_POST_TERM_SPACE;
+              break;
+
+            case G_UNICODE_LINE_SEPARATOR:
+            case G_UNICODE_PARAGRAPH_SEPARATOR:
+              attrs[i].is_sentence_end = TRUE;
+              /* undo the unconditional break-at-all-line/para-separators
+               * from above; I'm not sure this is what the Unicode spec
+               * intends, but it seems right - we get to include
+               * a single line/para separator in the sentence according
+               * to their rules
+               */
+              attrs[i].is_sentence_boundary = FALSE;
+              sentence_state = STATE_SENTENCE_POST_TERM_SEP;
+              break;
+
+            default:
+              attrs[i].is_sentence_end = TRUE;
+              attrs[i].is_sentence_boundary = TRUE;
+              sentence_state = STATE_SENTENCE_OUTSIDE;
+              break;
+            }
+          break;
+
+        case STATE_SENTENCE_POST_TERM_SPACE:
+
+          /* Sentence is definitely already ended; to enter this state
+           * we had to see a space, which ends the sentence.
+           */
+
+          switch (type)
+            {
+            case G_UNICODE_SPACE_SEPARATOR:
+              /* continue in this state */
+              break;
+
+            case G_UNICODE_LINE_SEPARATOR:
+            case G_UNICODE_PARAGRAPH_SEPARATOR:
+              /* undo the unconditional break-at-all-line/para-separators
+               * from above; I'm not sure this is what the Unicode spec
+               * intends, but it seems right
+               */
+              attrs[i].is_sentence_boundary = FALSE;
+              sentence_state = STATE_SENTENCE_POST_TERM_SEP;
+              break;
+
+            default:
+              attrs[i].is_sentence_boundary = TRUE;
+              sentence_state = STATE_SENTENCE_OUTSIDE;
+              break;
+            }
+          break;
+
+        case STATE_SENTENCE_POST_TERM_SEP:
+          /* Break is forced at this point, unless we're a newline
+           * after a CR, then we will break after the newline on the
+           * next iteration. Only a single Sep can be in the
+           * sentence.
+           */
+          if (!(prev_wc == '\r' && wc == '\n'))
+            attrs[i].is_sentence_boundary = TRUE;
+          sentence_state = STATE_SENTENCE_OUTSIDE;
+          break;
+
+        case STATE_SENTENCE_DOT:
+          switch (type)
+            {
+            case G_UNICODE_CLOSE_PUNCTUATION:
+              sentence_state = STATE_SENTENCE_POST_DOT_CLOSE;
+              break;
+
+            case G_UNICODE_SPACE_SEPARATOR:
+              possible_sentence_end = i;
+              sentence_state = STATE_SENTENCE_POST_DOT_SPACE;
+              break;
+
+            default:
+              /* If we broke on a control/format char, end the
+               * sentence; else this was not a sentence end, since
+               * we didn't enter the POST_DOT_SPACE state.
+               */
+              if (attrs[i].is_sentence_boundary)
+                {
+                  attrs[i].is_sentence_end = TRUE;
+
+                  sentence_state = STATE_SENTENCE_OUTSIDE;
+                }
+              else
+                sentence_state = STATE_SENTENCE_BODY;
+              break;
+            }
+          break;
+
+        case STATE_SENTENCE_POST_DOT_CLOSE:
+          switch (type)
+            {
+            case G_UNICODE_SPACE_SEPARATOR:
+              possible_sentence_end = i;
+              sentence_state = STATE_SENTENCE_POST_DOT_SPACE;
+              break;
+
+            default:
+              /* If we broke on a control/format char, end the
+               * sentence; else this was not a sentence end, since
+               * we didn't enter the POST_DOT_SPACE state.
+               */
+              if (attrs[i].is_sentence_boundary)
+                {
+                  attrs[i].is_sentence_end = TRUE;
+
+                  sentence_state = STATE_SENTENCE_OUTSIDE;
+                }
+              else
+                sentence_state = STATE_SENTENCE_BODY;
+              break;
+            }
+          break;
+
+        case STATE_SENTENCE_POST_DOT_SPACE:
+
+          possible_sentence_boundary = i;
+
+          switch (type)
+            {
+            case G_UNICODE_SPACE_SEPARATOR:
+              /* remain in current state */
+              break;
+
+            case G_UNICODE_OPEN_PUNCTUATION:
+              sentence_state = STATE_SENTENCE_POST_DOT_OPEN;
+              break;
+
+            case G_UNICODE_LOWERCASE_LETTER:
+              /* wasn't a sentence-ending period; so re-enter the sentence
+               * body
+               */
+              sentence_state = STATE_SENTENCE_BODY;
+              break;
+
+            default:
+              /* End the sentence, break, maybe start a new one */
+
+              g_assert (possible_sentence_end >= 0);
+              g_assert (possible_sentence_boundary >= 0);
+
+              attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;
+              attrs[possible_sentence_end].is_sentence_end = TRUE;
+
+              possible_sentence_end = -1;
+              possible_sentence_boundary = -1;
+
+              switch (type)
+                {
+                case G_UNICODE_LINE_SEPARATOR:
+                case G_UNICODE_PARAGRAPH_SEPARATOR:
+                case G_UNICODE_CONTROL:
+                case G_UNICODE_FORMAT:
+                  sentence_state = STATE_SENTENCE_OUTSIDE;
+                  break;
+
+                default:
+                  g_assert (type != G_UNICODE_SPACE_SEPARATOR);
+                  sentence_state = STATE_SENTENCE_BODY;
+                  attrs[i].is_sentence_start = TRUE;
+                  break;
+                }
+              break;
+            }
+          break;
+
+        case STATE_SENTENCE_POST_DOT_OPEN:
+          switch (type)
+            {
+            case G_UNICODE_OPEN_PUNCTUATION:
+              /* continue in current state */
+              break;
+
+            case G_UNICODE_LOWERCASE_LETTER:
+              /* wasn't a sentence-ending period; so re-enter the sentence
+               * body
+               */
+              sentence_state = STATE_SENTENCE_BODY;
+              break;
+
+            default:
+              /* End the sentence, break, maybe start a new one */
+
+              g_assert (possible_sentence_end >= 0);
+              g_assert (possible_sentence_boundary >= 0);
+
+              attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;
+              attrs[possible_sentence_end].is_sentence_end = TRUE;
+
+              possible_sentence_end = -1;
+              possible_sentence_boundary = -1;
+
+              switch (type)
+                {
+                case G_UNICODE_LINE_SEPARATOR:
+                case G_UNICODE_PARAGRAPH_SEPARATOR:
+                case G_UNICODE_CONTROL:
+                case G_UNICODE_FORMAT:
+                  sentence_state = STATE_SENTENCE_OUTSIDE;
+                  break;
+
+                default:
+                  g_assert (type != G_UNICODE_SPACE_SEPARATOR);
+                  sentence_state = STATE_SENTENCE_BODY;
+                  attrs[i].is_sentence_start = TRUE;
+                  break;
+                }
+              break;
+            }
+          break;
+
+        case STATE_SENTENCE_POST_DOT_SEP:
+          /* Break is forced at this point, unless we're a newline
+           * after a CR, then we will break after the newline on the
+           * next iteration. Only a single Sep can be in the
+           * sentence.
+           */
+          if (!(prev_wc == '\r' && wc == '\n'))
+            attrs[i].is_sentence_boundary = TRUE;
+          sentence_state = STATE_SENTENCE_OUTSIDE;
+
+          g_assert (possible_sentence_end >= 0);
+          g_assert (possible_sentence_boundary >= 0);
+
+          attrs[possible_sentence_end].is_sentence_end = TRUE;
+
+          possible_sentence_end = -1;
+          possible_sentence_boundary = -1;
+          break;
+
+        default:
+          g_assert_not_reached ();
+          break;
+        }
+
+      prev_type = type;
+      prev_wc = wc;
+      ++i;
+    }
+}
+
 /**
  * pango_break:
  * @text:      the text to process
@@ -32,31 +1247,120 @@
  * Determines possible line, word, and character breaks
  * for a string of Unicode text.
  */
-void pango_break (const gchar   *text, 
-		  gint           length, 
-		  PangoAnalysis *analysis, 
-		  PangoLogAttr  *attrs)
+void
+pango_break (const gchar   *text,
+             gint           length,
+             PangoAnalysis *analysis,
+             PangoLogAttr  *attrs)
 {
-  /* Pseudo-implementation */
+  g_return_if_fail (text != NULL);
+  g_return_if_fail (analysis != NULL);
+  g_return_if_fail (attrs != NULL);
+  
+  if (length < 0)
+    length = strlen (text);
 
-  const gchar *cur = text;
-  gint i = 0;
-  gunichar wc;
+  if (analysis->lang_engine &&
+      analysis->lang_engine->script_break)
+    (* analysis->lang_engine->script_break) (text, length, analysis, attrs);
+  else
+    pango_default_break (text, length, analysis, attrs);
+}
+
+/**
+ * pango_find_paragraph_boundary:
+ * @text: UTF-8 text
+ * @length: length of @text in bytes, or -1 if nul-terminated
+ * @paragraph_delimiter_index: return location for index of delimiter
+ * @next_paragraph_start: return location for start of next paragraph
+ * 
+ * Locates a paragraph boundary in @text. A boundary is caused by
+ * delimiter characters, such as a newline, carriage return, carriage
+ * return-newline pair, or Unicode paragraph separator character.  The
+ * index of the run of delimiters is returned in
+ * @paragraph_delimiter_index. The index of the start of the paragraph
+ * (index after all delimiters) is stored in @paragraph_start.
+ *
+ * If no delimiters are found, both @paragraph_delimiter_index and
+ * @next_paragraph_start are filled with the length of @text (an index one
+ * off the end).
+ **/
+void
+pango_find_paragraph_boundary (const gchar *text,
+                               gint         length,
+                               gint        *paragraph_delimiter_index,
+                               gint        *next_paragraph_start)
+{
+  const gchar *p = text;
+  const gchar *end;
+  const gchar *start = NULL;
+  const gchar *delimiter = NULL;
+  gunichar prev_wc;
+
+  /* Only one character has type G_UNICODE_PARAGRAPH_SEPARATOR in
+   * Unicode 3.0; update this if that changes.
+   */
+#define PARAGRAPH_SEPARATOR 0x2029
+  
+  if (length < 0)
+    length = strlen (text);
+
+  end = text + length;
+
+  if (paragraph_delimiter_index)
+    *paragraph_delimiter_index = length;
+
+  if (next_paragraph_start)
+    *next_paragraph_start = length;
+
+  if (length == 0)
+    return;
+
+  /* FIXME there's plenty of room to optimize this; e.g. there's
+   * no real need to g_utf8_get_char() on every char
+   */
   
-  while (*cur && cur - text < length)
+  prev_wc = 0;
+
+  while (p != end)
     {
-      wc = g_utf8_get_char (cur);
-      if (wc == (gunichar)-1)
-	break;			/* FIXME: ERROR */
-
-      attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == '\n' || wc == 0x200b) ? 1 : 0;
-      attrs[i].is_break = i == 0 || attrs[i-1].is_white || attrs[i].is_white;
-      attrs[i].is_char_stop = 1;
-      attrs[i].is_word_stop = ((i == 0) || attrs[i-1].is_white) && !attrs[i].is_white;
+      gunichar wc;
+
+      wc = g_utf8_get_char (p);
+      
+      if (prev_wc == '\n' ||
+          prev_wc == PARAGRAPH_SEPARATOR)
+        {
+          g_assert (delimiter);
+          start = p;
+          break;
+        }
+      else if (prev_wc == '\r')
+        {
+          /* don't break between \r and \n */
+          if (wc != '\n')
+            {
+              g_assert (delimiter);
+              start = p;
+              break;
+            }
+        }
       
-      i++;
-      cur = g_utf8_next_char (cur);
+      if ((wc == '\n' ||
+           wc == '\r' ||
+           wc == PARAGRAPH_SEPARATOR) &&
+          delimiter == NULL)
+        delimiter = p;
+      
+      prev_wc = wc;
+      p = g_utf8_next_char (p);
     }
+
+  if (delimiter && paragraph_delimiter_index)
+    *paragraph_delimiter_index = delimiter - text;
+
+  if (start && next_paragraph_start)
+    *next_paragraph_start = start - text;
 }
 
 /**
@@ -85,17 +1389,20 @@ pango_get_log_attrs (const char    *text,
   const char *range_start;
   int chars_in_range;
   static guint engine_type_id = 0;
-  static guint render_type_id = 0;  
+  static guint render_type_id = 0;
   PangoAnalysis analysis = { NULL, NULL, NULL, 0 };
 
   analysis.level = level;
-  
+
   g_return_if_fail (length == 0 || text != NULL);
   g_return_if_fail (log_attrs != NULL);
-  
+
+  if (length < 0)
+    length = strlen (text);
+
   if (length == 0)
     return;
-  
+
   if (engine_type_id == 0)
     {
       engine_type_id = g_quark_from_static_string (PANGO_ENGINE_TYPE_LANG);
@@ -105,23 +1412,27 @@ pango_get_log_attrs (const char    *text,
   n_chars = g_utf8_strlen (text, length);
 
   lang_map = pango_find_map (language, engine_type_id, render_type_id);
-    
+
   range_start = text;
   range_engine = (PangoEngineLang*) pango_map_get_engine (lang_map,
                                                           g_utf8_get_char (text));
   analysis.lang_engine = range_engine;
   chars_broken = 0;
   chars_in_range = 1;
-  
+
   end = text + length;
   pos = g_utf8_next_char (text);
-  
+
   while (pos != end)
     {
+      g_assert (chars_in_range > 0);
+      g_assert (range_start <= end);
+      g_assert (end - pos < length);
+
       analysis.lang_engine =
         (PangoEngineLang*) pango_map_get_engine (lang_map,
                                                  g_utf8_get_char (pos));
-      
+
       if (range_engine != analysis.lang_engine)
         {
           /* Engine has changed; do the breaking for the current range,
@@ -133,7 +1444,7 @@ pango_get_log_attrs (const char    *text,
                        log_attrs + chars_broken);
 
           chars_broken += chars_in_range;
-          
+
           range_start = pos;
           range_engine = analysis.lang_engine;
           chars_in_range = 1;
@@ -142,15 +1453,15 @@ pango_get_log_attrs (const char    *text,
         {
           chars_in_range += 1;
         }
-      
+
       pos = g_utf8_next_char (pos);
     }
-    
+
     g_assert (chars_in_range > 0);
     g_assert (range_start != end);
     g_assert (pos == end);
     g_assert (range_engine == analysis.lang_engine);
-    
+
     pango_break (range_start,
                  end - range_start,
                  &analysis,
diff --git a/pango/pango-context.c b/pango/pango-context.c
index 0e9f7146..c8a7d5c1 100644
--- a/pango/pango-context.c
+++ b/pango/pango-context.c
@@ -510,7 +510,10 @@ pango_context_get_base_dir (PangoContext *context)
  * @cached_iter:      Cached attribute iterator, or NULL
  *
  * Breaks a piece of text into segments with consistent
- * directional level and shaping engine.
+ * directional level and shaping engine. Each byte of @text will
+ * be contained in exactly one of the items in the returned list;
+ * the generated list of items will be in logical order (the start
+ * offsets of the items are ascending).
  *
  * @cached_iter should be an iterator over @attrs currently positioned at a
  * range before or containing @start_index; @cached_iter will be advanced to
@@ -565,7 +568,7 @@ pango_itemize (PangoContext      *context,
   embedding_levels = g_new (guint8, n_chars);
 
   pango_log2vis_get_embedding_levels (text_ucs4, n_chars, &base_dir,
-					embedding_levels);
+                                      embedding_levels);
 
   /* Storing these as ranges would be a lot more efficient,
    * but also more complicated... we take the simple
@@ -603,7 +606,11 @@ pango_itemize (PangoContext      *context,
 	  fonts[i] != fonts[i-1] ||
 	  extra_attr_lists[i] != extra_attr_lists[i-1])
 	{
-	  item = g_new (PangoItem, 1);
+          /* assert that previous item got at least one char */
+          g_assert (item == NULL || item->length > 0);
+          g_assert (item == NULL || item->num_chars > 0);
+          
+	  item = pango_item_new ();
 	  item->offset = p - text;
 	  item->num_chars = 0;
 	  item->analysis.level = embedding_levels[i];
diff --git a/pango/pango-item.c b/pango/pango-item.c
index d3e0dbd9..b13b60e9 100644
--- a/pango/pango-item.c
+++ b/pango/pango-item.c
@@ -88,3 +88,45 @@ pango_item_free (PangoItem *item)
   g_free (item);
 }
 
+/**
+ * pango_item_split:
+ * @orig: a #PangoItem
+ * @split_index: byte index of position to split item, relative to the start of the item
+ * @split_offset: number of chars between start of @orig and @split_index
+ * 
+ * Modifies @orig to cover only the text after @split_index, and
+ * returns a new item that covers the text before @split_index that
+ * used to be in @orig. You can think of @split_index as the length of
+ * the returned item. @split_index may not be 0, and it may not be
+ * greater than or equal to the length of @orig (that is, there must
+ * be at least one byte assigned to each item, you can't create a
+ * zero-length item). @split_offset is the length of the first item in
+ * chars, and must be provided because the text used to generate the
+ * item isn't available, so pango_item_split() can't count the char
+ * length of the split items itself.
+ * 
+ * Return value: new item representing text before @split_index
+ **/
+PangoItem*
+pango_item_split (PangoItem  *orig,
+                  int         split_index,
+                  int         split_offset)
+{
+  PangoItem *new_item = pango_item_copy (orig);
+
+  g_return_val_if_fail (orig != NULL, NULL);
+  g_return_val_if_fail (orig->length > 0, NULL);
+  g_return_val_if_fail (split_index > 0, NULL);
+  g_return_val_if_fail (split_index < orig->length, NULL);
+  g_return_val_if_fail (split_offset > 0, NULL);
+  g_return_val_if_fail (split_offset < orig->num_chars, NULL);
+  
+  new_item->length = split_index;
+  new_item->num_chars = split_offset;
+  
+  orig->offset += split_index;
+  orig->length -= split_index;
+  orig->num_chars -= split_offset;
+  
+  return new_item;
+}
diff --git a/pango/pango-item.h b/pango/pango-item.h
index 407e5004..d4b067eb 100644
--- a/pango/pango-item.h
+++ b/pango/pango-item.h
@@ -49,9 +49,12 @@ struct _PangoItem
   PangoAnalysis analysis;
 };
 
-PangoItem *pango_item_new  (void);
-PangoItem *pango_item_copy (PangoItem *item);
-void       pango_item_free (PangoItem *item);
+PangoItem *pango_item_new   (void);
+PangoItem *pango_item_copy  (PangoItem  *item);
+void       pango_item_free  (PangoItem  *item);
+PangoItem *pango_item_split (PangoItem  *orig,
+                             int         split_index,
+                             int         split_offset);
 
 #ifdef __cplusplus
 }
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index e0d921ac..3d4f6f8c 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -914,29 +914,49 @@ pango_layout_index_to_line_x (PangoLayout *layout,
 			      int         *x_pos)
 {
   GSList *tmp_list;
-  int tmp_line = 0;
-  int bytes_seen = 0;
-
+  int line_num = 0;
+  PangoLayoutLine *layout_line = NULL;
+  
   g_return_if_fail (layout != NULL);
+  g_return_if_fail (index >= 0);
+  g_return_if_fail (index <= layout->length);
 
   pango_layout_check_lines (layout);
 
   tmp_list = layout->lines;
   while (tmp_list)
     {
-      PangoLayoutLine *layout_line = tmp_list->data;
+      PangoLayoutLine *tmp_line = tmp_list->data;
+
+      /* use end of previous layout_line if index was in the paragraph
+       * delimiters
+       */
+      if (layout_line && layout_line->start_index > index)
+        {
+          if (line)
+	    *line = line_num;
+          
+	  pango_layout_line_index_to_x (layout_line,
+                                        layout_line->start_index + layout_line->length,
+                                        trailing, x_pos);
+	  return;
+
+        }
 
-      if (bytes_seen + layout_line->length > index)
+      layout_line = tmp_line;
+      ++line_num;
+      
+      if ((layout_line->start_index + layout_line->length) > index)
 	{
 	  if (line)
-	    *line = tmp_line;
-
-	  pango_layout_line_index_to_x (layout_line, index, trailing, x_pos);
+	    *line = line_num;
+          
+	  pango_layout_line_index_to_x (layout_line, index,
+                                        trailing, x_pos);
 	  return;
 	}
 
       tmp_list = tmp_list->next;
-      bytes_seen += layout_line->length;
     }
 
   if (line)
@@ -978,7 +998,6 @@ pango_layout_move_cursor_visually (PangoLayout *layout,
 				   int         *new_index,
 				   int         *new_trailing)
 {
-  int bytes_seen = 0;
   PangoDirection base_dir;
   PangoLayoutLine *line = NULL;
   PangoLayoutLine *prev_line = NULL;
@@ -1005,14 +1024,18 @@ pango_layout_move_cursor_visually (PangoLayout *layout,
   tmp_list = layout->lines;
   while (tmp_list)
     {
-      line = tmp_list->data;
+      PangoLayoutLine *tmp_line = tmp_list->data;
 
-      if (bytes_seen + line->length > old_index || !tmp_list->next)
-	break;
+      if (line && line->start_index > old_index)
+        break; /* stick with the previous line */
 
-      tmp_list = tmp_list->next;
       prev_line = line;
-      bytes_seen += line->length;
+      line = tmp_line;
+      
+      if (line->start_index + line->length > old_index || !tmp_list->next)
+	break;
+      
+      tmp_list = tmp_list->next;
     }
 
   if (tmp_list->next)
@@ -1024,9 +1047,13 @@ pango_layout_move_cursor_visually (PangoLayout *layout,
     old_index = g_utf8_next_char (layout->text + old_index) - layout->text;
 
   log2vis_map = pango_layout_line_get_log2vis_map (line, TRUE);
-  n_vis = g_utf8_strlen (layout->text + bytes_seen, line->length);
+  n_vis = g_utf8_strlen (layout->text + line->start_index, line->length);
 
-  vis_pos = log2vis_map[old_index - bytes_seen];
+  /* Clamp old_index to fit on the line */
+  if (old_index > (line->start_index + line->length))
+    old_index = line->start_index + line->length;
+  
+  vis_pos = log2vis_map[old_index - line->start_index];
   g_free (log2vis_map);
   
   if (vis_pos == 0 && direction < 0)
@@ -1040,7 +1067,6 @@ pango_layout_move_cursor_visually (PangoLayout *layout,
 	      return;
 	    }
 	  line = prev_line;
-	  bytes_seen -= line->length;
 	}
       else
 	{
@@ -1050,11 +1076,10 @@ pango_layout_move_cursor_visually (PangoLayout *layout,
 	      *new_trailing = 0;
 	      return;
 	    }
-	  bytes_seen += line->length;
 	  line = next_line;
 	}
       
-      vis_pos = g_utf8_strlen (layout->text + bytes_seen, line->length);
+      vis_pos = g_utf8_strlen (layout->text + line->start_index, line->length);
     }
   else if (vis_pos == n_vis && direction > 0)
     {
@@ -1066,7 +1091,6 @@ pango_layout_move_cursor_visually (PangoLayout *layout,
 	      *new_trailing = 0;
 	      return;
 	    }
-	  bytes_seen += line->length;
 	  line = next_line;
 	}
       else
@@ -1078,7 +1102,6 @@ pango_layout_move_cursor_visually (PangoLayout *layout,
 	      return;
 	    }
 	  line = prev_line;
-	  bytes_seen -= line->length;
 	}
       
       vis_pos = 0;
@@ -1087,10 +1110,10 @@ pango_layout_move_cursor_visually (PangoLayout *layout,
   vis_pos += (direction > 0) ? 1 : -1;
   
   vis2log_map = pango_layout_line_get_vis2log_map (line, TRUE);
-  *new_index = bytes_seen + vis2log_map[vis_pos];
+  *new_index = line->start_index + vis2log_map[vis_pos];
   g_free (vis2log_map);
 
-  if (*new_index == bytes_seen + line->length && line->length > 0)
+  if (*new_index == line->start_index + line->length && line->length > 0)
     {
       *new_index = g_utf8_prev_char (layout->text + *new_index) - layout->text;
       *new_trailing = 1;
@@ -1175,8 +1198,9 @@ pango_layout_index_to_pos (PangoLayout    *layout,
 			   PangoRectangle *pos)
 {
   PangoRectangle logical_rect;
-  int bytes_seen = 0;
   PangoLayoutIter *iter;
+  PangoLayoutLine *layout_line = NULL;
+  gboolean notfound = FALSE;
   
   g_return_if_fail (layout != NULL);
   g_return_if_fail (index >= 0);
@@ -1184,43 +1208,56 @@ pango_layout_index_to_pos (PangoLayout    *layout,
   
   iter = pango_layout_get_iter (layout);
   
-  do
+  while (TRUE)
     {
-      PangoLayoutLine *layout_line = pango_layout_iter_get_line (iter);
-
-      pango_layout_iter_get_line_extents (iter, NULL, &logical_rect);
+      PangoLayoutLine *tmp_line = pango_layout_iter_get_line (iter);
 
-      if (bytes_seen + layout_line->length > index)
-	{
-	  int x_pos;
-
-	  pos->y = logical_rect.y;
-	  pos->height = logical_rect.height;
-
-	  pango_layout_line_index_to_x (layout_line, index, FALSE, &x_pos);
-	  pos->x = logical_rect.x + x_pos;
-	  
-	  pango_layout_line_index_to_x (layout_line, index, TRUE, &x_pos);
-	  pos->width = (logical_rect.x + x_pos) - pos->x;
+      if (layout_line && tmp_line->start_index > index)
+        {
+          /* index is in the paragraph delimiters, move to
+           * end of previous line
+           */
+          index = layout_line->start_index + layout_line->length;
+          break;
+        }
 
-          pango_layout_iter_free (iter);
-          
-	  return;
-	}
+      layout_line = tmp_line;
+      
+      pango_layout_iter_get_line_extents (iter, NULL, &logical_rect);
+      
+      if (layout_line->start_index + layout_line->length > index)
+        break;
 
-      bytes_seen += layout_line->length;
-      if (bytes_seen < layout->length && layout->text[bytes_seen] == '\n')
-	bytes_seen++;
+      if (!pango_layout_iter_next_line (iter))
+        {
+          notfound = TRUE;
+          break;
+        }
     }
-  while (pango_layout_iter_next_line (iter));
 
-  /* Iterator should now be on the "NULL" run at the end of the last
-   * line, which is a zero-width rectangle. Return the extents of
-   * that run.
-   */
+  if (notfound)
+    {
+      /* Iterator should now be on the "NULL" run at the end of the last
+       * line, which is a zero-width rectangle. Return the extents of
+       * that run.
+       */
+      
+      pango_layout_iter_get_run_extents (iter, NULL, pos);
+    }
+  else
+    {
+      int x_pos;
 
-  pango_layout_iter_get_run_extents (iter, NULL, pos);
+      pos->y = logical_rect.y;
+      pos->height = logical_rect.height;
 
+      pango_layout_line_index_to_x (layout_line, index, FALSE, &x_pos);
+      pos->x = logical_rect.x + x_pos;
+	  
+      pango_layout_line_index_to_x (layout_line, index, TRUE, &x_pos);
+      pos->width = (logical_rect.x + x_pos) - pos->x;
+    }
+  
   pango_layout_iter_free (iter);
 }
 
@@ -1409,7 +1446,6 @@ pango_layout_get_cursor_pos (PangoLayout    *layout,
   PangoLayoutLine *layout_line = NULL; /* Quiet GCC */
   int x1_trailing;
   int x2;
-  int bytes_seen = 0;
   PangoLayoutIter *iter;
   
   g_return_if_fail (layout != NULL);
@@ -1420,26 +1456,32 @@ pango_layout_get_cursor_pos (PangoLayout    *layout,
   iter = pango_layout_get_iter (layout);
   
   /* Find the line */
-  do 
+  while (TRUE)
     {
-      layout_line = pango_layout_iter_get_line (iter);
-      
-      pango_layout_iter_get_line_extents (iter, NULL, &line_rect);
+      PangoLayoutLine *tmp_line;
 
-      if (bytes_seen + layout_line->length > index)
-	break;
+      tmp_line = pango_layout_iter_get_line (iter);
 
-      /* Want last line of layout for trailing position */
-      if (!pango_layout_iter_at_last_line (iter))
-        bytes_seen += layout_line->length;
+      if (layout_line && layout_line->start_index > index)
+        break; /* keep previous layout_line and line_rect */
+
+      layout_line = tmp_line;
+      pango_layout_iter_get_line_extents (iter, NULL, &line_rect);
+      
+      if ((layout_line->start_index + layout_line->length) > index)
+        break;
+
+      if (!pango_layout_iter_next_line (iter))
+        break; /* use end of the last line */
     }
-  while (pango_layout_iter_next_line (iter));
 
   pango_layout_iter_free (iter);
   iter = NULL;
+
+  g_assert (index >= layout_line->start_index);
   
   /* Examine the trailing edge of the character before the cursor */
-  if (index == bytes_seen)
+  if (index == layout_line->start_index)
     {
       dir1 = base_dir;
       if (base_dir == PANGO_DIRECTION_LTR)
@@ -1453,9 +1495,9 @@ pango_layout_get_cursor_pos (PangoLayout    *layout,
       dir1 = pango_layout_line_get_char_direction (layout_line, prev_index);
       pango_layout_line_index_to_x (layout_line, prev_index, TRUE, &x1_trailing);
     }
-
+  
   /* Examine the leading edge of the character after the cursor */
-  if (index == bytes_seen + layout_line->length)
+  if (index >= layout_line->start_index + layout_line->length)
     {
       dir2 = base_dir;
       if (base_dir == PANGO_DIRECTION_LTR)
@@ -2137,20 +2179,14 @@ static inline gboolean
 can_break_at (PangoLayout *layout,
 	      gint         offset)
 {
-  /* While a break between a letter and following whitespace is *
-   * legimate, we disallow it here to avoid lines starting with *
-   * whitespace. We probably should have a mode where we treat all
-   * white-space as of fungeable width - appropriate for typography
-   * but not for editing.
+  /* We probably should have a mode where we treat all white-space as
+   * of fungeable width - appropriate for typography but not for
+   * editing.
    */
-  if (offset == 0)
-    return FALSE;
-  else if (offset == layout->n_chars)
+  if (offset == layout->n_chars)
     return TRUE;
-  else  
-    return (layout->log_attrs[offset].is_break &&
-	    (layout->log_attrs[offset - 1].is_white ||
-	     !layout->log_attrs[offset].is_white));
+  else
+    return layout->log_attrs[offset].is_break;
 }
 
 static inline gboolean
@@ -2258,15 +2294,10 @@ process_item (PangoLayout     *layout,
 	  else
 	    {
 	      PangoItem *new_item = pango_item_copy (item);
-	      
+
 	      length = g_utf8_offset_to_pointer (text + item->offset, break_num_chars) - (text + item->offset);
-	      
-	      new_item->length = length;
-	      new_item->num_chars = break_num_chars;
-	      
-	      item->offset += length;
-	      item->length -= length;
-	      item->num_chars -= break_num_chars;
+
+              new_item = pango_item_split (item, length, break_num_chars);
 	      
 	      if (shape_set)
 		imposed_shape (item->num_chars, &shape_ink, &shape_logical, glyphs);
@@ -2294,6 +2325,7 @@ struct _ParaBreakState
   gboolean first_line;
   const char *text;
   gint start_offset;
+  gint line_start_index;
 };
 
 static void
@@ -2310,7 +2342,8 @@ process_line (PangoLayout    *layout,
   GSList *break_link = NULL;        /* Link holding run before break */
   
   line = pango_layout_line_new (layout);
-
+  line->start_index = state->line_start_index;
+  
   if (state->first_line)
     remaining_width = (layout->indent >= 0) ? layout->width - layout->indent : layout->width;
   else
@@ -2393,12 +2426,14 @@ process_line (PangoLayout    *layout,
   pango_layout_line_postprocess (line);
   layout->lines = g_slist_prepend (layout->lines, line);
   state->first_line = FALSE;
+  state->line_start_index += line->length;
 }
 
 static void
-get_para_log_attrs (const char   *text,
-		    GList        *items,
-		    PangoLogAttr *log_attrs)
+get_items_log_attrs (const char   *text,
+                     GList        *items,
+                     PangoLogAttr *log_attrs,
+                     int           para_delimiter_len)
 {
   int offset = 0;
   int index = 0;
@@ -2415,11 +2450,10 @@ get_para_log_attrs (const char   *text,
 	  PangoItem *next_item = items->next->data;
 
 	  /* FIXME: Handle language tags */
-	  if (next_item->analysis.level != tmp_item.analysis.level ||
-	      (next_item->analysis.lang_engine != tmp_item.analysis.lang_engine &&
-	       (!next_item->analysis.lang_engine || !tmp_item.analysis.lang_engine ||
+	  if (next_item->analysis.lang_engine != tmp_item.analysis.lang_engine &&
+              (!next_item->analysis.lang_engine || !tmp_item.analysis.lang_engine ||
 		strcmp (next_item->analysis.lang_engine->engine.id,
-			tmp_item.analysis.lang_engine->engine.id) != 0)))
+			tmp_item.analysis.lang_engine->engine.id) != 0))
 	    break;
 	  else
 	    {
@@ -2430,6 +2464,10 @@ get_para_log_attrs (const char   *text,
 	  items = items->next;
 	}
 
+      /* Break the paragraph delimiters with the last item */
+      if (items->next == NULL)
+        tmp_item.length += para_delimiter_len;
+
       pango_break (text + index, tmp_item.length, &tmp_item.analysis, log_attrs + offset);
 
       offset += tmp_item.num_chars;
@@ -2488,21 +2526,33 @@ pango_layout_check_lines (PangoLayout *layout)
   
   start_offset = 0;
   start = layout->text;
+
   do
     {
-      int para_chars = 0;
-      const char *end = start;
+      int delim_len;
+      const char *end;
+      int delimiter_index, next_para_index;
       ParaBreakState state;
-  
-      while (end != layout->text + layout->length && *end != '\n')
-	{
-	  end = g_utf8_next_char (end);
-	  para_chars++;
-	}
       
-      if (end == layout->text + layout->length)
+      pango_find_paragraph_boundary (start,
+                                     (layout->text + layout->length) - start,
+                                     &delimiter_index,
+                                     &next_para_index);
+
+      g_assert (next_para_index >= delimiter_index);
+      
+      end = start + delimiter_index;
+      
+      delim_len = next_para_index - delimiter_index;
+      
+      if ((end + delim_len) == (layout->text + layout->length))
 	done = TRUE;
 
+      g_assert (end <= (layout->text + layout->length));
+      g_assert (start <= (layout->text + layout->length));
+      g_assert (delim_len < 3);
+      g_assert (delim_len >= 0);
+      
       state.items = pango_itemize (layout->context,
 				   layout->text,
 				   start - layout->text,
@@ -2510,34 +2560,35 @@ pango_layout_check_lines (PangoLayout *layout)
 				   attrs,
 				   iter);
 
-      get_para_log_attrs (start, state.items, layout->log_attrs + start_offset);
+      get_items_log_attrs (start, state.items,
+                           layout->log_attrs + start_offset,
+                           delim_len);
 
       if (state.items)
 	{
 	  state.first_line = TRUE;
 	  state.start_offset = start_offset;
 	  state.text = start;
+          state.line_start_index = state.text - layout->text;
 	  
 	  while (state.items)
-	    process_line (layout, &state);
+            process_line (layout, &state);
 	}
       else
-	layout->lines = g_slist_prepend (layout->lines,
-					 pango_layout_line_new (layout));
-      
-      start_offset += para_chars;
+        {
+          PangoLayoutLine *empty_line;
+
+          empty_line = pango_layout_line_new (layout);
+          empty_line->start_index = start - layout->text; 
+
+          layout->lines = g_slist_prepend (layout->lines,
+                                           empty_line);
+        }
 
       if (!done)
-	{
-	  /* Handle newline */
-	  layout->log_attrs[start_offset].is_break = TRUE;
-	  layout->log_attrs[start_offset].is_white = TRUE;
-	  layout->log_attrs[start_offset].is_char_stop = TRUE;
-	  layout->log_attrs[start_offset].is_word_stop = TRUE;
-	  start_offset += 1;
-
-	  start = end + 1;
-	}
+        start_offset += g_utf8_strlen (start, (end - start) + delim_len);
+
+      start = end + delim_len;
     }
   while (!done);
 
@@ -3140,6 +3191,8 @@ pango_layout_line_new (PangoLayout *layout)
   private->line.runs = 0;
   private->line.length = 0;
 
+  /* Note that we leave start_index uninitialized */
+  
   return (PangoLayoutLine *) private;
 }
 
diff --git a/pango/pango-layout.h b/pango/pango-layout.h
index b19ea973..ef87869d 100644
--- a/pango/pango-layout.h
+++ b/pango/pango-layout.h
@@ -45,7 +45,8 @@ typedef enum {
 struct _PangoLayoutLine
 {
   PangoLayout *layout;
-  gint         length;		/* length of line in bytes*/
+  gint         start_index;     /* start of line as byte index into layout->text */
+  gint         length;		/* length of line in bytes */
   GSList      *runs;
 };
 
diff --git a/pango/pango.h b/pango/pango.h
index e0652fd0..3cbc0f59 100644
--- a/pango/pango.h
+++ b/pango/pango.h
@@ -39,14 +39,40 @@ extern "C" {
 #include <pango/pango-layout.h>
 #include <pango/pango-types.h>
 
-/* Logical attributes of a character
+/* Logical attributes of a character.
  */
 struct _PangoLogAttr
 {
-  guint is_break : 1;  /* Break in front of character */
-  guint is_white : 1;
-  guint is_char_stop : 1;
-  guint is_word_stop : 1;
+  guint is_break : 1;           /* Can break line in front of character */
+
+  guint is_mandatory_break : 1; /* Must break line in front of character */
+  
+  guint is_white : 1;           /* Whitespace character */
+
+  /* cursor can appear in front of character (i.e. this is a grapheme
+   * boundary, or the first character in the text)
+   */
+  guint is_cursor_position : 1;
+  
+  /* Note that in degenerate cases, you could have both start/end set on
+   * some text, most likely for sentences (e.g. no space after a period, so
+   * the next sentence starts right away)
+   */
+  
+  guint is_word_start : 1;      /* first character in a word */
+  guint is_word_end   : 1;      /* is first non-word char after a word */
+
+  /* There are two ways to divide sentences. The first assigns all
+   * intersentence whitespace/control/format chars to some sentence,
+   * so all chars are in some sentence; is_sentence_boundary denotes
+   * the boundaries there. The second way doesn't assign
+   * between-sentence spaces, etc. to any sentence, so
+   * is_sentence_start/is_sentence_end mark the boundaries of those
+   * sentences.
+   */
+  guint is_sentence_boundary : 1;
+  guint is_sentence_start : 1;  /* first character in a sentence */
+  guint is_sentence_end : 1;    /* first non-sentence char after a sentence */
 };
 
 /* Determine information about cluster/word/line breaks in a string
@@ -57,6 +83,11 @@ void pango_break (const gchar   *text,
 		  PangoAnalysis *analysis, 
 		  PangoLogAttr  *attrs);
 
+void pango_find_paragraph_boundary (const gchar *text,
+                                    gint         length,
+                                    gint        *paragraph_delimiter_index,
+                                    gint        *next_paragraph_start);
+
 void pango_get_log_attrs (const char    *text,
                           int            length,
                           int            level,
@@ -72,6 +103,16 @@ void pango_shape (const gchar      *text,
 
 GList *pango_reorder_items (GList *logical_items);
 
+/* This is the default break algorithm, used if no language
+ * engine overrides it. Normally you should use pango_break()
+ * instead; this function is mostly useful for chaining up
+ * from a language engine override.
+ */
+void pango_default_break (const gchar   *text,
+                          gint           length,
+                          PangoAnalysis *analysis,
+                          PangoLogAttr  *attrs);
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
diff --git a/tests/Makefile.am b/tests/Makefile.am
new file mode 100644
index 00000000..e15157ab
--- /dev/null
+++ b/tests/Makefile.am
@@ -0,0 +1,14 @@
+## Process this file with automake to create Makefile.in.
+
+INCLUDES = -I$(top_srcdir) $(GLIB_CFLAGS)
+
+TESTS=runtests.sh
+
+check_PROGRAMS =	testboundaries
+
+testboundaries_SOURCES = testboundaries.c
+
+## pangox should not actually be required, this is broken
+testboundaries_LDADD = ../pango/libpango.la ../pango/libpangox.la $(X_LIBS)
+
+
diff --git a/tests/boundaries.utf8 b/tests/boundaries.utf8
new file mode 100644
index 00000000..f70bd0fc
--- /dev/null
+++ b/tests/boundaries.utf8
@@ -0,0 +1,69 @@
+Testing sentence boundaries - this is a sentence ending in several exclamation points!!!    Several spaces there. Abbreviations such as Mr. or Mrs. should not result in sentence breaks, should they?! (Parentheses should be included in a sentence.) (((Even nested parentheses, with funny punctuation inside!!?!!...))) Anyhow, this should be enough testing.
+
+This text has carriage returns
+all over the 
+ freaking place 
+
+ such as here
+here
+and 
+here
+
+     
+but not at the end of this line.
+
+This is some "quoted" text e.g. "this is some stuff in quotes" and 
+'this is some other stuff in single quotes' and ""this is some stuff with 
+two levels of double quotes"" and so on.
+
+Big string of Arabic:
+وقد بدأ ثلاث من أكثر المؤسسات تقدما في شبكة اكسيون برامجها كمنظمات لا تسعى للربح، ثم تحولت في السنوات الخمس الماضية إلى مؤسسات مالية منظمة، وباتت جزءا من النظام المالي في بلدانها، ولكنها تتخصص في خدمة قطاع المشروعات الصغيرة. وأحد أكثر هذه المؤسسات نجاحا هو »بانكوسول« في بوليفيا.
+
+
+Παν語
+
+This is a list of ways to say hello in various languages. Its purpose is to illustrate a number of scripts.
+
+(Converted into UTF-8)
+
+---------------------------------------------------------
+Arabic	السلام عليكم 
+Bengali (বাঙ্লা)	ষাগতোম
+Burmese (မ္ရန္မာ)
+Cherokee	(ᏣᎳᎩ)	ᎣᏏᏲ
+Czech	(česky)	Dobrý den
+Danish	(Dansk)	Hej, Goddag
+English	Hello
+Esperanto	Saluton
+Estonian	Tere, Tervist
+FORTRAN	PROGRAM
+Finnish	(Suomi)	Hei
+French	(Français)	Bonjour, Salut
+German	(Deutsch Nord)	Guten Tag
+German	(Deutsch Süd)	Grüß Gott
+Georgian	(ქართველი)	გამარჯობა
+Gujarati     (ગુજરાતિ)
+Greek	(Ελληνικά)	Γειά σας
+Hebrew	שלום
+Hindi	नमस्ते, नमस्कार।
+Italiano	Ciao, Buon giorno
+ɪŋglɪʃ       hɛləʊ
+Maltese	Ċaw, Saħħa
+Nederlands, Vlaams	Hallo, Dag
+Norwegian	(Norsk)	Hei, God dag
+Punjabi     (ੁਪੁਂਜਾਬਿ)
+Polish	Dzień dobry, Hej
+Russian	(Русский)	Здравствуйте!
+Slovak	Dobrý deň
+Spanish	(Español)	‎¡Hola!‎
+Swedish	(Svenska)	Hej, Goddag
+Thai	(ภาษาไทย)	สวัสดีครับ, สวัสดีค่ะ
+Turkish	(Türkçe)	Merhaba
+Vietnamese	(Tiếng Việt)	Xin Chào
+Yiddish	(ײַדישע) דאָס הײַזעלע 
+
+Japanese	(日本語)	こんにちは, ｺﾝﾆﾁﾊ
+Chinese	(中文,普通话,汉语)	你好
+Cantonese	(粵語,廣東話)	早晨, 你好
+Korean	(한글)	안녕하세요, 안녕하십니까
+
+Difference among chinese characters in GB, JIS, KSC, BIG5:‎
+ GB	--	元气	开发
+ JIS	--	元気	開発
+ KSC	--	元氣	開發
+ BIG5	--	元氣	開發
+
diff --git a/tests/runtests.sh b/tests/runtests.sh
new file mode 100755
index 00000000..aa67ccde
--- /dev/null
+++ b/tests/runtests.sh
@@ -0,0 +1,55 @@
+#! /bin/sh
+
+LOGFILE=runtests.log
+POTENTIAL_TESTS='testboundaries'
+
+for I in $POTENTIAL_TESTS
+do
+    GOOD=yes
+    test -f $I || {
+        echo "WARNING: test program $I not found, not running"
+        GOOD=no
+    }
+
+    if test x$GOOD = xyes; then
+        test -x $I || {
+            echo "WARNING: test program $I is not executable, not running"
+            GOOD=no
+        }
+    fi
+    
+    if test x$GOOD = xyes; then
+        TESTS="$TESTS$I "
+    fi
+done
+
+echo "Logging to $LOGFILE"
+
+echo "Log file for Pango test programs." > $LOGFILE
+echo "" >> $LOGFILE
+echo "Tests are: "$TESTS >> $LOGFILE
+echo "" >> $LOGFILE
+
+for I in $TESTS
+do
+    echo -n "Running test program \"$I\", please wait:"
+    echo "" >> $LOGFILE
+    echo "Output of $I:" >> $LOGFILE
+    if ./$I >>$LOGFILE 2>&1; then
+        echo " passed"
+    else
+        echo
+        echo
+        echo '***'
+        echo " Test failed: $I"
+        echo " See $LOGFILE for errors"
+        echo 
+        exit 1
+    fi
+done
+
+echo 
+echo "All tests passed."
+
+
+
diff --git a/tests/runtests.sh.in b/tests/runtests.sh.in
new file mode 100755
index 00000000..aa67ccde
--- /dev/null
+++ b/tests/runtests.sh.in
@@ -0,0 +1,55 @@
+#! /bin/sh
+
+LOGFILE=runtests.log
+POTENTIAL_TESTS='testboundaries'
+
+for I in $POTENTIAL_TESTS
+do
+    GOOD=yes
+    test -f $I || {
+        echo "WARNING: test program $I not found, not running"
+        GOOD=no
+    }
+
+    if test x$GOOD = xyes; then
+        test -x $I || {
+            echo "WARNING: test program $I is not executable, not running"
+            GOOD=no
+        }
+    fi
+    
+    if test x$GOOD = xyes; then
+        TESTS="$TESTS$I "
+    fi
+done
+
+echo "Logging to $LOGFILE"
+
+echo "Log file for Pango test programs." > $LOGFILE
+echo "" >> $LOGFILE
+echo "Tests are: "$TESTS >> $LOGFILE
+echo "" >> $LOGFILE
+
+for I in $TESTS
+do
+    echo -n "Running test program \"$I\", please wait:"
+    echo "" >> $LOGFILE
+    echo "Output of $I:" >> $LOGFILE
+    if ./$I >>$LOGFILE 2>&1; then
+        echo " passed"
+    else
+        echo
+        echo
+        echo '***'
+        echo " Test failed: $I"
+        echo " See $LOGFILE for errors"
+        echo 
+        exit 1
+    fi
+done
+
+echo 
+echo "All tests passed."
+
+
+
diff --git a/tests/testboundaries.c b/tests/testboundaries.c
new file mode 100644
index 00000000..c05bc837
--- /dev/null
+++ b/tests/testboundaries.c
@@ -0,0 +1,356 @@
+/* Pango
+ * testboundaries.c: Test text boundary algorithms
+ *
+ * Copyright (C) 1999-2000 Red Hat Software
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <pango/pango.h>
+
+#define CHFORMAT "%0#6x"
+
+/* FIXME for now this just tests that the breaking of some sample
+ * text conforms to certain rules and invariants. But eventually
+ * we should also have test-result pairs, i.e. a string and some
+ * encoding of the correct way to break the string, to check
+ * more precisely that things worked
+ */
+
+
+/* Keep these in sync with the same macros in break.c */
+
+#define LEADING_JAMO(wc)  ((wc) >= 0x1100 && (wc) <= 0x115F)
+#define VOWEL_JAMO(wc)    ((wc) >= 0x1160 && (wc) <= 0x11A2)
+#define TRAILING_JAMO(wc) ((wc) >= 0x11A8 && (wc) <= 0x11F9)
+#define JAMO(wc)          ((wc) >= 0x1100 && (wc) <= 0x11FF)
+/* "virama script" is just an optimization; it includes a bunch of
+ * scripts without viramas in them
+ */
+#define VIRAMA_SCRIPT(wc)        ((wc) >= 0x0901 && (wc) <= 0x17FF)
+#define VIRAMA(wc) ((wc) == 0x094D || \
+                    (wc) == 0x09CD || \
+                    (wc) == 0x0A4D || \
+                    (wc) == 0x0ACD || \
+                    (wc) == 0x0B4D || \
+                    (wc) == 0x0BCD || \
+                    (wc) == 0x0C4D || \
+                    (wc) == 0x0CCD || \
+                    (wc) == 0x0D4D || \
+                    (wc) == 0x0DCA || \
+                    (wc) == 0x0E3A || \
+                    (wc) == 0x0F84 || \
+                    (wc) == 0x1039 || \
+                    (wc) == 0x17D2)
+/* Types of Japanese characters */
+#define JAPANESE(wc) ((wc) >= 0x2F00 && (wc) <= 0x30FF)
+#define KANJI(wc)    ((wc) >= 0x2F00 && (wc) <= 0x2FDF)
+#define HIRAGANA(wc) ((wc) >= 0x3040 && (wc) <= 0x309F)
+#define KATAKANA(wc) ((wc) >= 0x30A0 && (wc) <= 0x30FF)
+
+static int offset = 0;
+static int line = 0;
+static gunichar current_wc = 0;
+static const char *line_start = NULL;
+static const char *line_end = NULL;
+
+static void
+fail (const char *format,
+      ...)
+{
+  char *str;
+  char *line_text;
+  
+  va_list args;
+
+  va_start (args, format);
+  str = g_strdup_vprintf (format, args);
+  va_end (args);
+
+  line_text = g_strndup (line_start, line_end - line_start);
+  
+  fprintf (stderr, "line %d offset %d char is " CHFORMAT ": %s\n (line is '%s')\n", line, offset, current_wc, str, line_text);
+  g_free (str);
+  g_free (line_text);
+
+  exit (1);
+}
+
+typedef void (* CharForeachFunc) (gunichar      wc,
+                                  gunichar      prev_wc,
+                                  gunichar      next_wc,
+                                  GUnicodeType  type,
+                                  GUnicodeType  prev_type,
+                                  GUnicodeType  next_type,
+                                  PangoLogAttr *attr,
+                                  PangoLogAttr *prev_attr,
+                                  PangoLogAttr *next_attr,
+                                  gpointer      data);
+
+static void
+log_attr_foreach (const char     *text,
+                  PangoLogAttr   *attrs,
+                  CharForeachFunc func,
+                  gpointer        data)
+{
+  const gchar *next = text;
+  gint length = strlen (text);
+  const gchar *end = text + length;
+  gint i = 0;
+  gunichar prev_wc;
+  gunichar next_wc;
+  GUnicodeType prev_type;
+  GUnicodeType next_type;
+  
+  if (next == end)
+    return;
+
+  offset = 0;
+  line = 0;
+  
+  prev_type = (GUnicodeType) -1;
+  prev_wc = 0;
+
+  next_wc = g_utf8_get_char (next);
+  next_type = g_unichar_type (next_wc);
+
+  line_start = text;
+  line_end = text;
+  
+  while (next_wc != 0)
+    {
+      GUnicodeType type;
+      gunichar wc;
+
+      wc = next_wc;
+      type = next_type;
+
+      current_wc = wc;
+      
+      next = g_utf8_next_char (next);
+      line_end = next;
+      
+      if (next >= end)
+        next_wc = 0;
+      else
+        next_wc = g_utf8_get_char (next);
+
+      if (next_wc)
+        next_type = g_unichar_type (next_wc);
+
+      (* func) (wc, prev_wc, next_wc,
+                type, prev_type, next_type,
+                &attrs[i],
+                i != 0 ? &attrs[i-1] : NULL,
+                next_wc != 0 ? &attrs[i+1] : NULL,
+                data);
+            
+      prev_type = type;
+      prev_wc = wc;
+      ++i;
+      ++offset;
+      if (wc == '\n')
+        {
+          ++line;
+          offset = 0;
+          line_start = next;
+          line_end = next;
+        }
+    }
+}
+
+static void
+check_line_char (gunichar      wc,
+                 gunichar      prev_wc,
+                 gunichar      next_wc,
+                 GUnicodeType  type,
+                 GUnicodeType  prev_type,
+                 GUnicodeType  next_type,
+                 PangoLogAttr *attr,
+                 PangoLogAttr *prev_attr,
+                 PangoLogAttr *next_attr,
+                 gpointer      data)
+{
+  GUnicodeBreakType break_type;
+  GUnicodeBreakType prev_break_type;
+
+  break_type = g_unichar_break_type (wc);
+  if (prev_wc)
+    prev_break_type = g_unichar_break_type (prev_wc);
+  else
+    prev_break_type = G_UNICODE_BREAK_UNKNOWN;
+  
+  if (wc == '\n')
+    {
+      if (prev_wc == '\r')
+        {
+          if (attr->is_break)
+            fail ("line break between \\r and \\n");
+        }
+      
+      if (next_attr && !next_attr->is_break)
+        fail ("no line break after \\n");
+    }
+  
+  if (attr->is_break && prev_wc == 0)
+    fail ("first char in string should not be marked as a line break");
+
+  if (break_type == G_UNICODE_BREAK_SPACE)
+    {
+      if (attr->is_break && prev_attr != NULL &&
+          !attr->is_mandatory_break)
+        fail ("can't break lines before a space unless a mandatory break char precedes it; prev char was " CHFORMAT, prev_wc);
+    }
+
+  if (attr->is_mandatory_break && !attr->is_break)
+    fail ("mandatory breaks must also be marked as regular breaks");
+
+  
+  
+  /* FIXME use the break tables from break.c to automatically
+   * check invariants for each cell in the table. Shouldn't
+   * be that hard to do.
+   */
+  
+  if (break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION &&
+      prev_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION &&
+      attr->is_break &&
+      !attr->is_mandatory_break)
+    fail ("can't break between two open punctuation chars");
+
+  if (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION &&
+      prev_break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION &&
+      attr->is_break &&
+      !attr->is_mandatory_break)
+    fail ("can't break between two close punctuation chars");
+
+  if (break_type == G_UNICODE_BREAK_QUOTATION &&
+      prev_break_type == G_UNICODE_BREAK_ALPHABETIC &&
+      attr->is_break &&
+      !attr->is_mandatory_break)
+    fail ("can't break letter-quotemark sequence");  
+}
+
+static void
+check_line_invariants (const char   *text,
+                       PangoLogAttr *attrs)
+{
+  log_attr_foreach (text, attrs, check_line_char, NULL);
+}
+
+static void
+check_word_invariants (const char   *text,
+                       PangoLogAttr *attrs)
+{
+
+
+}
+
+static void
+check_sentence_invariants (const char   *text,
+                           PangoLogAttr *attrs)
+{
+
+
+}
+
+static void
+check_grapheme_invariants (const char   *text,
+                           PangoLogAttr *attrs)
+{
+
+
+}
+
+static void
+print_sentences (const char   *text,
+                 PangoLogAttr *attrs)
+{
+  const char *p;
+  const char *last;
+  int i = 0;
+
+  last = text;
+  p = text;
+
+  while (*p)
+    {
+      if (attrs[i].is_sentence_boundary)
+        {
+          char *s = g_strndup (last, p - last);
+          printf ("%s\n", s);
+          g_free (s);
+          last = p;
+        }
+      
+      p = g_utf8_next_char (p);
+      ++i;
+    }
+}
+
+static void
+check_invariants (const char *text)
+{
+  int len;
+  PangoLogAttr *attrs;
+
+  if (!g_utf8_validate (text, -1, NULL))
+    fail ("Invalid UTF-8 in test text");
+  
+  len = g_utf8_strlen (text, -1);
+  attrs = g_new0 (PangoLogAttr, len);
+
+  pango_get_log_attrs (text,
+                       -1,
+                       0,
+                       "C",
+                       attrs);
+  
+  check_line_invariants (text, attrs);
+  check_sentence_invariants (text, attrs);
+  check_grapheme_invariants (text, attrs);
+  check_word_invariants (text, attrs);
+
+#if 0
+  print_sentences (text, attrs);
+#endif
+  
+  g_free (attrs);
+}
+
+int
+main (int    argc,
+      char **argv)
+{
+  gchar *text;
+
+  if (!g_file_get_contents ("boundaries.utf8", &text, NULL, NULL))
+    fail ("Couldn't open sample text file");
+  
+  check_invariants (text);
+
+  g_free (text);
+
+  printf ("testboundaries passed\n");
+  
+  return 0;
+}
+
author	Havoc Pennington <hp@pobox.com>	2000-12-02 07:49:56 +0000
committer	Havoc Pennington <hp@src.gnome.org>	2000-12-02 07:49:56 +0000
commit	31832c0f4bcdf3e7c69cd5b8a7ad570a7b60d525 (patch)
tree	d7ed3aa9ac35017fe03d954dd6baa2ccfaf3ed30
parent	e9e84a3f75fbab073ce5488c0e82b3e7fc39bcda (diff)
download	pango-31832c0f4bcdf3e7c69cd5b8a7ad570a7b60d525.tar.gz