summaryrefslogtreecommitdiff
path: root/ACEXML/common/Transcode.h
blob: 4cfcbb80b273cd89a4d656663dfa0b097eeffe56 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
// -*- C++ -*-

//=============================================================================
/**
 *  @file    Transcode.h
 *
 * This file declares functions to convert char string among different
 * unicode encoding (utf8, utf16, utf32)
 *
 *  $Id$
 *
 *  @author Nanbor Wang <nanbor@cs.wustl.edu>
 */
//=============================================================================

#ifndef _ACEXML_TRANSCODE_H_
#define _ACEXML_TRANSCODE_H_
#include "common/XML_Types.h"

/**
 * @class ACEXML_Transcoder Transcode.h "common/Transcode.h"
 *
 * @brief ACEXML_Transcoder
 *
 * Wrapper class for performing transcoding among different UNICODE
 * encoding.
 */
class ACEXML_Export ACEXML_Transcoder
{
public:
  enum
  {
    SUCCESS = 0,
    DESTINATION_TOO_SHORT = -1,
    END_OF_SOURCE = -2,
    INVALID_ARGS = -3,
    IS_SURROGATE = -4,
    NON_UNICODE = -5
  };


  // The following functions translate a unicode characters
  // into different encoding.  Return number of characters put into
  // destination or consumed from src if success without
  // error, otherwise, return corresponding error code.
  static int utf162utf8 (ACEXML_UTF16 src,
                         ACEXML_UTF8 *dst,
                         size_t len);

  static int ucs42utf8 (ACEXML_UCS4 src,
                        ACEXML_UTF8 *dst,
                        size_t len);

  static int ucs42utf16 (ACEXML_UCS4 src,
                         ACEXML_UTF16 *dst,
                         size_t len);

  static int surrogate2utf8 (ACEXML_UTF16 high,
                             ACEXML_UTF16 low,
                             ACEXML_UTF8 *dst,
                             size_t len);

  static int surrogate2ucs4 (ACEXML_UTF16 high,
                             ACEXML_UTF16 low,
                             ACEXML_UCS4 &dst);

  static int utf82ucs4 (const ACEXML_UTF8 *src,
                        size_t len,
                        ACEXML_UCS4 &dst);

  static int utf162ucs4 (const ACEXML_UTF16 *src,
                        size_t len,
                        ACEXML_UCS4 &dst);

//    static int utf82utf16 (const ACEXML_UTF8 *src,
//                           size_t len,
//                           ACEXML_UTF16 &dst);
  // This function does not handle surrogates.

  // The following functions are non-inlined:
  static int utf8s2utf16s (const ACEXML_UTF8 *src,
                           ACEXML_UTF16 *dst,
                           size_t len);

  static int utf16s2utf8s (const ACEXML_UTF16 *src,
                           ACEXML_UTF8 *dst,
                           size_t len);
};

#if defined (__ACEXML_INLINE__)
# include "common/Transcode.i"
#endif /* __ACEXML_INLINE__ */
#endif /* _ACEXML_TRANSCODE_H_ */