1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
/* Character set conversion with error handling.
Copyright (C) 2001-2007 Free Software Foundation, Inc.
Written by Bruno Haible and Simon Josefsson.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef _STRICONVEH_H
#define _STRICONVEH_H
#include <stddef.h>
#if HAVE_ICONV
#include <iconv.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* Handling of unconvertible characters. */
enum iconv_ilseq_handler
{
iconveh_error, /* return and set errno = EILSEQ */
iconveh_question_mark, /* use one '?' per unconvertible character */
iconveh_escape_sequence /* use escape sequence \uxxxx or \Uxxxxxxxx */
};
#if HAVE_ICONV
/* Convert an entire string from one encoding to another, using iconv.
The original string is at [SRC,...,SRC+SRCLEN-1].
CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if
the system does not support a direct conversion from FROMCODE to TOCODE.
CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or
(iconv_t)(-1) if FROM_CODESET is UTF-8).
CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)
if TO_CODESET is UTF-8).
If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this
array is filled with offsets into the result, i.e. the character starting
at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
and other offsets are set to (size_t)(-1).
*RESULTP and *LENGTH should initially be a scratch buffer and its size,
or *RESULTP can initially be NULL.
May erase the contents of the memory at *RESULTP.
Return value: 0 if successful, otherwise -1 and errno set.
If successful: The resulting string is stored in *RESULTP and its length
in *LENGTHP. *RESULTP is set to a freshly allocated memory block, or is
unchanged if no dynamic memory allocation was necessary. */
extern int
mem_cd_iconveh (const char *src, size_t srclen,
iconv_t cd, iconv_t cd1, iconv_t cd2,
enum iconv_ilseq_handler handler,
size_t *offsets,
char **resultp, size_t *lengthp);
/* Convert an entire string from one encoding to another, using iconv.
The original string is the NUL-terminated string starting at SRC.
CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if
the system does not support a direct conversion from FROMCODE to TOCODE.
Both the "from" and the "to" encoding must use a single NUL byte at the end
of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or
(iconv_t)(-1) if FROM_CODESET is UTF-8).
CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)
if TO_CODESET is UTF-8).
Allocate a malloced memory block for the result.
Return value: the freshly allocated resulting NUL-terminated string if
successful, otherwise NULL and errno set. */
extern char *
str_cd_iconveh (const char *src,
iconv_t cd, iconv_t cd1, iconv_t cd2,
enum iconv_ilseq_handler handler);
#endif
/* Convert an entire string from one encoding to another, using iconv.
The original string is at [SRC,...,SRC+SRCLEN-1].
If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this
array is filled with offsets into the result, i.e. the character starting
at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
and other offsets are set to (size_t)(-1).
*RESULTP and *LENGTH should initially be a scratch buffer and its size,
or *RESULTP can initially be NULL.
May erase the contents of the memory at *RESULTP.
Return value: 0 if successful, otherwise -1 and errno set.
If successful: The resulting string is stored in *RESULTP and its length
in *LENGTHP. *RESULTP is set to a freshly allocated memory block, or is
unchanged if no dynamic memory allocation was necessary. */
extern int
mem_iconveh (const char *src, size_t srclen,
const char *from_codeset, const char *to_codeset,
enum iconv_ilseq_handler handler,
size_t *offsets,
char **resultp, size_t *lengthp);
/* Convert an entire string from one encoding to another, using iconv.
The original string is the NUL-terminated string starting at SRC.
Both the "from" and the "to" encoding must use a single NUL byte at the
end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
Allocate a malloced memory block for the result.
Return value: the freshly allocated resulting NUL-terminated string if
successful, otherwise NULL and errno set. */
extern char *
str_iconveh (const char *src,
const char *from_codeset, const char *to_codeset,
enum iconv_ilseq_handler handler);
#ifdef __cplusplus
}
#endif
#endif /* _STRICONVEH_H */
|