1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
/* Pango
* pango-break.h:
*
* Copyright (C) 1999 Red Hat Software
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#ifndef __PANGO_BREAK_H__
#define __PANGO_BREAK_H__
#include <glib.h>
G_BEGIN_DECLS
#include <pango/pango-item.h>
/* Logical attributes of a character.
*/
/**
* PangoLogAttr:
* @is_line_break: if set, can break line in front of character
* @is_mandatory_break: if set, must break line in front of character
* @is_char_break: if set, can break here when doing character wrapping
* @is_white: is whitespace character
* @is_cursor_position: if set, cursor can appear in front of character.
* i.e. this is a grapheme boundary, or the first character in the text.
* This flag implements Unicode's
* [Grapheme Cluster Boundaries](http://www.unicode.org/reports/tr29/)
* semantics.
* @is_word_start: is first character in a word
* @is_word_end: is first non-word char after a word
* Note that in degenerate cases, you could have both @is_word_start
* and @is_word_end set for some character.
* @is_sentence_boundary: is a sentence boundary.
* There are two ways to divide sentences. The first assigns all
* inter-sentence whitespace/control/format chars to some sentence,
* so all chars are in some sentence; @is_sentence_boundary denotes
* the boundaries there. The second way doesn't assign
* between-sentence spaces, etc. to any sentence, so
* @is_sentence_start/@is_sentence_end mark the boundaries of those sentences.
* @is_sentence_start: is first character in a sentence
* @is_sentence_end: is first char after a sentence.
* Note that in degenerate cases, you could have both @is_sentence_start
* and @is_sentence_end set for some character. (e.g. no space after a
* period, so the next sentence starts right away)
* @backspace_deletes_character: if set, backspace deletes one character
* rather than the entire grapheme cluster. This field is only meaningful
* on grapheme boundaries (where @is_cursor_position is set). In some languages,
* the full grapheme (e.g. letter + diacritics) is considered a unit, while in
* others, each decomposed character in the grapheme is a unit. In the default
* implementation of [func@break], this bit is set on all grapheme boundaries
* except those following Latin, Cyrillic or Greek base characters.
* @is_expandable_space: is a whitespace character that can possibly be
* expanded for justification purposes. (Since: 1.18)
* @is_word_boundary: is a word boundary, as defined by UAX#29.
* More specifically, means that this is not a position in the middle of a word.
* For example, both sides of a punctuation mark are considered word boundaries.
* This flag is particularly useful when selecting text word-by-word. This flag
* implements Unicode's [Word Boundaries](http://www.unicode.org/reports/tr29/)
* semantics. (Since: 1.22)
*
* The `PangoLogAttr` structure stores information about the attributes of a
* single character.
*/
struct _PangoLogAttr
{
guint is_line_break : 1;
guint is_mandatory_break : 1;
guint is_char_break : 1;
guint is_white : 1;
guint is_cursor_position : 1;
guint is_word_start : 1;
guint is_word_end : 1;
guint is_sentence_boundary : 1;
guint is_sentence_start : 1;
guint is_sentence_end : 1;
guint backspace_deletes_character : 1;
guint is_expandable_space : 1;
guint is_word_boundary : 1;
};
PANGO_DEPRECATED_IN_1_44
void pango_break (const gchar *text,
int length,
PangoAnalysis *analysis,
PangoLogAttr *attrs,
int attrs_len);
PANGO_AVAILABLE_IN_ALL
void pango_find_paragraph_boundary (const gchar *text,
gint length,
gint *paragraph_delimiter_index,
gint *next_paragraph_start);
PANGO_AVAILABLE_IN_ALL
void pango_get_log_attrs (const char *text,
int length,
int level,
PangoLanguage *language,
PangoLogAttr *log_attrs,
int attrs_len);
/* This is the default break algorithm, used if no language
* engine overrides it. Normally you should use pango_break()
* instead; this function is mostly useful for chaining up
* from a language engine override.
*/
PANGO_AVAILABLE_IN_ALL
void pango_default_break (const gchar *text,
int length,
PangoAnalysis *analysis,
PangoLogAttr *attrs,
int attrs_len);
PANGO_AVAILABLE_IN_1_44
void pango_tailor_break (const char *text,
int length,
PangoAnalysis *analysis,
int offset,
PangoLogAttr *log_attrs,
int log_attrs_len);
G_END_DECLS
#endif /* __PANGO_BREAK_H__ */
|