summaryrefslogtreecommitdiff
path: root/ACEXML/common/Transcode.h
blob: 427784891dbccc2e7c393d3a79080533a0a90ee2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
// -*- C++ -*-

//=============================================================================
/**
 *  @file    Transcode.h
 *
 * This file declares functions to convert char string among different
 * unicode encoding (utf8, utf16, utf32)
 *
 *  $Id$
 *
 *  @author Nanbor Wang <nanbor@cs.wustl.edu>
 */
//=============================================================================

#ifndef _ACEXML_TRANSCODE_H_
#define _ACEXML_TRANSCODE_H_

#include "ace/pre.h"
#include "ACEXML/common/ACEXML_Export.h"

#if !defined (ACE_LACKS_PRAGMA_ONCE)
#pragma once
#endif /* ACE_LACKS_PRAGMA_ONCE */

#include "ACEXML/common/XML_Types.h"

/**
 * @class ACEXML_Transcoder Transcode.h "ACEXML/common/Transcode.h"
 *
 * @brief ACEXML_Transcoder
 *
 * Wrapper class for performing transcoding among different UNICODE
 * encoding.
 */
class ACEXML_Export ACEXML_Transcoder
{
public:
  /*
   * Status of the conversion function.
   */
  enum
  {
    SUCCESS = 0,
    DESTINATION_TOO_SHORT = -1,
    END_OF_SOURCE = -2,
    INVALID_ARGS = -3,
    IS_SURROGATE = -4,
    NON_UNICODE = -5
  } STATUS;


  // The following functions translate a unicode characters
  // into different encoding.  Return number of characters put into
  // destination or consumed from src if success without
  // error, otherwise, return corresponding error code.
  /*
   * Convert a UTF-16 character into a string in UTF-8 encoding.
   *
   * @return number of characters the function uses to store the
   *         converted string if succeeds or one of the error STATUS
   *         otherwise.
   */
  static int utf162utf8 (ACEXML_UTF16 src,
                         ACEXML_UTF8 *dst,
                         size_t len);

  /*
   * Convert a UCS-4 character into a string in UTF-8 encoding.
   *
   * @return number of characters the function uses to store the
   *         converted string if succeeds or one of the error STATUS
   *         otherwise.
   */
  static int ucs42utf8 (ACEXML_UCS4 src,
                        ACEXML_UTF8 *dst,
                        size_t len);

  /*
   * Convert a UCS-4 character into a string in UTF-16 encoding.
   *
   * @return number of characters the function uses to store the
   *         converted string if succeeds or one of the error STATUS
   *         otherwise.
   */
  static int ucs42utf16 (ACEXML_UCS4 src,
                         ACEXML_UTF16 *dst,
                         size_t len);

  /*
   * Convert a UTF-16 surrogate character pair into a string in UTF-8 encoding.
   *
   * @return number of characters the function uses to store the
   *         converted string if succeeds or one of the error STATUS
   *         otherwise.
   */
  static int surrogate2utf8 (ACEXML_UTF16 high,
                             ACEXML_UTF16 low,
                             ACEXML_UTF8 *dst,
                             size_t len);

  /*
   * Convert a UTF-16 surrogate character pair into a UCS-4 character.
   *
   * @return SUCCESS if succeeds or one of the error STATUS
   *         otherwise.
   */
  static int surrogate2ucs4 (ACEXML_UTF16 high,
                             ACEXML_UTF16 low,
                             ACEXML_UCS4 &dst);

  /*
   * Convert the first UNICODE character in a UTF-8 character string
   * into a UCS-4 character.
   *
   * @return number of characters the function consumed from the
   *         UTF-8 string if succeeds or one of the error STATUS
   *         otherwise.
   */
  static int utf82ucs4 (const ACEXML_UTF8 *src,
                        size_t len,
                        ACEXML_UCS4 &dst);

  /*
   * Convert the first UNICODE character in a UTF-16 character string
   * into a UCS-4 character.
   *
   * @return number of characters the function consumed from the
   *         UTF-16 string if succeeds or one of the error STATUS
   *         otherwise.
   */
  static int utf162ucs4 (const ACEXML_UTF16 *src,
                        size_t len,
                        ACEXML_UCS4 &dst);

//    static int utf82utf16 (const ACEXML_UTF8 *src,
//                           size_t len,
//                           ACEXML_UTF16 &dst);
  // This function does not handle surrogates.

  // = The following functions are non-inlined:

  /*
   * Convert a UTF-8 string into a UTF-16 string.
   *
   * @param len The length of @a dst string.
   *
   * @return number of characters the function consumed from the
   *         UTF-8 string if succeeds or one of the error STATUS
   *         otherwise.
   */
  static int utf8s2utf16s (const ACEXML_UTF8 *src,
                           ACEXML_UTF16 *dst,
                           size_t len);

  /*
   * Convert a UTF-16 string into a UTF-8 string.
   *
   * @param len The length of @a dst string.
   *
   * @return number of characters the function uses in
   *         UTF-8 string if succeeds or one of the error STATUS
   *         otherwise.
   */
  static int utf16s2utf8s (const ACEXML_UTF16 *src,
                           ACEXML_UTF8 *dst,
                           size_t len);
};

#if defined (__ACEXML_INLINE__)
# include "ACEXML/common/Transcode.i"
#endif /* __ACEXML_INLINE__ */

#include "ace/post.h"

#endif /* _ACEXML_TRANSCODE_H_ */