charset/fribidi-char-sets-utf8.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

/* FriBidi
 * fribidi-char-sets-utf8.c - UTF-8 character set conversion routines
 *
 * Authors:
 *   Behdad Esfahbod, 2001, 2002, 2004
 *   Dov Grobgeld, 1999, 2000
 *
 * Copyright (C) 2004 Sharif FarsiWeb, Inc
 * Copyright (C) 2001,2002 Behdad Esfahbod
 * Copyright (C) 1999,2000 Dov Grobgeld
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library, in a file named COPYING; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA
 * 
 * For licensing issues, contact <fribidi.license@gmail.com>.
 */

#include <common.h>

#include <fribidi-char-sets-utf8.h>

#include <fribidi-unicode.h>

FriBidiStrIndex
fribidi_utf8_to_unicode (
  /* input */
  const char *ss,
  FriBidiStrIndex len,
  /* output */
  FriBidiChar *us
)
{
  FriBidiStrIndex length;
  const unsigned char *s = (unsigned const char *) ss;
  const unsigned char *t = s;

  length = 0;
  while ((FriBidiStrIndex) (s - t) < len)
    {
      register unsigned char ch = *s;
      if (ch <= 0x7f)		/* one byte */
	{
	  *us++ = *s++;
	}
      else if (ch <= 0xdf)	/* 2 byte */
	{
	  *us++ = ((*s & 0x1f) << 6) + (*(s + 1) & 0x3f);
	  s += 2;
	}
      else if (ch <= 0xef)	/* 3 byte */
	{
	  *us++ =
	    ((int) (*s & 0x0f) << 12) +
	    ((*(s + 1) & 0x3f) << 6) + (*(s + 2) & 0x3f);
	  s += 3;
	}
      else                     /* 4 byte */
      {
	  *us++ =
	    ((int) (*s & 0x07) << 18) +
	    ((*(s + 1) & 0x3f) << 12) +
	    ((*(s + 2) & 0x3f) << 6) +
	    ((*(s + 3) & 0x3f) << 0);
	  s += 4;
      }
      length++;
    }
  return (length);
}

FriBidiStrIndex
fribidi_unicode_to_utf8 (
  /* input */
  const FriBidiChar *us,
  FriBidiStrIndex len,
  /* output */
  char *ss
)
{
  FriBidiStrIndex i;
  unsigned char *s = (unsigned char *) ss;
  unsigned char *t = s;

  for (i = 0; i < len; i++)
    {
      FriBidiChar mychar = us[i];
      if (mychar <= 0x7F)
	{			/* 7 sig bits */
	  *t++ = mychar;
	}
      else if (mychar <= 0x7FF)
	{			/* 11 sig bits */
	  *t++ = 0xC0 | (unsigned char) (mychar >> 6);	/* upper 5 bits */
	  *t++ = 0x80 | (unsigned char) (mychar & 0x3F);	/* lower 6 bits */
	}
      else if (mychar <= 0xFFFF)
	{			/* 16 sig bits */
	  *t++ = 0xE0 | (unsigned char) (mychar >> 12);	/* upper 4 bits */
	  *t++ = 0x80 | (unsigned char) ((mychar >> 6) & 0x3F);	/* next 6 bits */
	  *t++ = 0x80 | (unsigned char) (mychar & 0x3F);	/* lowest 6 bits */
	}
      else if (mychar < FRIBIDI_UNICODE_CHARS)
	{			/* 21 sig bits */
	  *t++ = 0xF0 | (unsigned char) ((mychar >> 18) & 0x07);	/* upper 3 bits */
	  *t++ = 0x80 | (unsigned char) ((mychar >> 12) & 0x3F);	/* next 6 bits */
	  *t++ = 0x80 | (unsigned char) ((mychar >> 6) & 0x3F);	/* next 6 bits */
	  *t++ = 0x80 | (unsigned char) (mychar & 0x3F);	/* lowest 6 bits */
	}
    }
  *t = 0;

  return (t - s);
}

/* Editor directions:
 * vim:textwidth=78:tabstop=8:shiftwidth=2:autoindent:cindent
 */