summaryrefslogtreecommitdiff
path: root/pango/pango-break.h
blob: 92af390bf6634272579b8f4fc4543bad0290eb59 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/* Pango
 * pango-break.h:
 *
 * Copyright (C) 1999 Red Hat Software
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#ifndef __PANGO_BREAK_H__
#define __PANGO_BREAK_H__

#include <glib.h>

G_BEGIN_DECLS

#include <pango/pango-item.h>

/* Logical attributes of a character.
 */
/**
 * PangoLogAttr:
 * @is_line_break: if set, can break line in front of character
 * @is_mandatory_break: if set, must break line in front of character
 * @is_char_break: if set, can break here when doing character wrapping
 * @is_white: is whitespace character
 * @is_cursor_position: if set, cursor can appear in front of character.
 * i.e. this is a grapheme boundary, or the first character
 * in the text.
 * This flag implements Unicode's
 * <ulink url="http://www.unicode.org/reports/tr29/">Grapheme
 * Cluster Boundaries</ulink> semantics.
 * @is_word_start: is first character in a word
 * @is_word_end: is first non-word char after a word
 * Note that in degenerate cases, you could have both @is_word_start
 * and @is_word_end set for some character.
 * @is_sentence_boundary: is a sentence boundary.
 * There are two ways to divide sentences. The first assigns all
 * inter-sentence whitespace/control/format chars to some sentence,
 * so all chars are in some sentence; @is_sentence_boundary denotes
 * the boundaries there. The second way doesn't assign
 * between-sentence spaces, etc. to any sentence, so
 * @is_sentence_start/@is_sentence_end mark the boundaries of those sentences.
 * @is_sentence_start: is first character in a sentence
 * @is_sentence_end: is first char after a sentence.
 * Note that in degenerate cases, you could have both @is_sentence_start
 * and @is_sentence_end set for some character. (e.g. no space after a
 * period, so the next sentence starts right away)
 * @backspace_deletes_character: if set, backspace deletes one character
 * rather than the entire grapheme cluster. This
 * field is only meaningful on grapheme
 * boundaries (where @is_cursor_position is
 * set).  In some languages, the full grapheme
 * (e.g.  letter + diacritics) is considered a
 * unit, while in others, each decomposed
 * character in the grapheme is a unit. In the
 * default implementation of pango_break(), this
 * bit is set on all grapheme boundaries except
 * those following Latin, Cyrillic or Greek base characters.
 * @is_expandable_space: is a whitespace character that can possibly be
 * expanded for justification purposes. (Since: 1.18)
 * @is_word_boundary: is a word boundary.
 * More specifically, means that this is not a position in the middle
 * of a word.  For example, both sides of a punctuation mark are
 * considered word boundaries.  This flag is particularly useful when
 * selecting text word-by-word.
 * This flag implements Unicode's
 * <ulink url="http://www.unicode.org/reports/tr29/">Word
 * Boundaries</ulink> semantics. (Since: 1.22)
 *
 * The #PangoLogAttr structure stores information
 * about the attributes of a single character.
 */
struct _PangoLogAttr
{
  guint is_line_break : 1;      /* Can break line in front of character */

  guint is_mandatory_break : 1; /* Must break line in front of character */

  guint is_char_break : 1;      /* Can break here when doing char wrap */

  guint is_white : 1;           /* Whitespace character */

  /* Cursor can appear in front of character (i.e. this is a grapheme
   * boundary, or the first character in the text).
   */
  guint is_cursor_position : 1;

  /* Note that in degenerate cases, you could have both start/end set on
   * some text, most likely for sentences (e.g. no space after a period, so
   * the next sentence starts right away).
   */

  guint is_word_start : 1;      /* first character in a word */
  guint is_word_end   : 1;      /* is first non-word char after a word */

  /* There are two ways to divide sentences. The first assigns all
   * intersentence whitespace/control/format chars to some sentence,
   * so all chars are in some sentence; is_sentence_boundary denotes
   * the boundaries there. The second way doesn't assign
   * between-sentence spaces, etc. to any sentence, so
   * is_sentence_start/is_sentence_end mark the boundaries of those
   * sentences.
   */
  guint is_sentence_boundary : 1;
  guint is_sentence_start : 1;  /* first character in a sentence */
  guint is_sentence_end : 1;    /* first non-sentence char after a sentence */

  /* If set, backspace deletes one character rather than
   * the entire grapheme cluster.
   */
  guint backspace_deletes_character : 1;

  /* Only few space variants (U+0020 and U+00A0) have variable
   * width during justification.
   */
  guint is_expandable_space : 1;

  /* Word boundary as defined by UAX#29 */
  guint is_word_boundary : 1;	/* is NOT in the middle of a word */
};

/* Determine information about cluster/word/line breaks in a string
 * of Unicode text.
 */
PANGO_AVAILABLE_IN_ALL
void pango_break (const gchar   *text,
		  int            length,
		  PangoAnalysis *analysis,
		  PangoLogAttr  *attrs,
		  int            attrs_len);

PANGO_AVAILABLE_IN_ALL
void pango_find_paragraph_boundary (const gchar *text,
				    gint         length,
				    gint        *paragraph_delimiter_index,
				    gint        *next_paragraph_start);

PANGO_AVAILABLE_IN_ALL
void pango_get_log_attrs (const char    *text,
			  int            length,
			  int            level,
			  PangoLanguage *language,
			  PangoLogAttr  *log_attrs,
			  int            attrs_len);

/* This is the default break algorithm, used if no language
 * engine overrides it. Normally you should use pango_break()
 * instead; this function is mostly useful for chaining up
 * from a language engine override.
 */
PANGO_AVAILABLE_IN_ALL
void pango_default_break (const gchar   *text,
			  int            length,
			  PangoAnalysis *analysis,
			  PangoLogAttr  *attrs,
			  int            attrs_len);

G_END_DECLS

#endif /* __PANGO_BREAK_H__ */