summaryrefslogtreecommitdiff
path: root/gnulib-local/lib/localcharset.c.diff
blob: 04865a34553aac27b554170ed1574dd61ad2ca24 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
Add a variant of `locale_charset' that returns its result based solely on
information from the environment.  See
http://lists.gnu.org/archive/html/guile-devel/2011-11/msg00040.html for the
rationale.

--- a/lib/localcharset.c
+++ b/lib/localcharset.c
@@ -544,3 +544,120 @@ locale_charset (void)
 
   return codeset;
 }
+
+/* A variant of the above, without calls to `setlocale', `nl_langinfo',
+   etc.  */
+const char *
+environ_locale_charset (void)
+{
+  static char buf[2 + 10 + 1];
+  const char *codeset, *aliases;
+  const char *locale = NULL;
+
+  locale = getenv ("LC_ALL");
+  if (locale == NULL || locale[0] == '\0')
+    {
+      locale = getenv ("LC_CTYPE");
+      if (locale == NULL || locale[0] == '\0')
+	locale = getenv ("LANG");
+    }
+
+  if (locale != NULL && locale[0] != '\0')
+    {
+      /* If the locale name contains an encoding after the dot, return it.  */
+      const char *dot = strchr (locale, '.');
+
+      if (dot != NULL)
+        {
+          const char *modifier;
+
+          dot++;
+          /* Look for the possible @... trailer and remove it, if any.  */
+          modifier = strchr (dot, '@');
+          if (modifier == NULL)
+            return dot;
+          if (modifier - dot < sizeof (buf))
+            {
+              memcpy (buf, dot, modifier - dot);
+              buf [modifier - dot] = '\0';
+              return buf;
+            }
+        }
+      else if (strcmp (locale, "C") == 0)
+	{
+	  strcpy (buf, "ASCII");
+	  return buf;
+	}
+      else
+	codeset = "";
+    }
+  else
+    codeset = "";
+
+  /* Resolve alias. */
+  {
+# ifdef alias_table_defined
+    /* On some platforms, UTF-8 locales are the most frequently used ones.
+       Speed up the common case and slow down the less common cases by
+       testing for this case first.  */
+#  if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
+    if (strcmp (codeset, "UTF-8") == 0)
+      goto done_table_lookup;
+    else
+#  endif
+      {
+        const struct table_entry * const table = alias_table;
+        size_t const table_size =
+          sizeof (alias_table) / sizeof (struct table_entry);
+        /* The table is sorted.  Perform a binary search.  */
+        size_t hi = table_size;
+        size_t lo = 0;
+        while (lo < hi)
+          {
+            /* Invariant:
+               for i < lo, strcmp (table[i].alias, codeset) < 0,
+               for i >= hi, strcmp (table[i].alias, codeset) > 0.  */
+            size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
+            int cmp = strcmp (table[mid].alias, codeset);
+            if (cmp < 0)
+              lo = mid + 1;
+            else if (cmp > 0)
+              hi = mid;
+            else
+              {
+                /* Found an i with
+                     strcmp (table[i].alias, codeset) == 0.  */
+                codeset = table[mid].canonical;
+                goto done_table_lookup;
+              }
+          }
+      }
+    if (0)
+      done_table_lookup: ;
+    else
+# endif
+      {
+        /* Did not find it in the table.  */
+        /* On Mac OS X, all modern locales use the UTF-8 encoding.
+           BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
+# if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
+        codeset = "UTF-8";
+# else
+        /* Don't return an empty string.  GNU libc and GNU libiconv interpret
+           the empty string as denoting "the locale's character encoding",
+           thus GNU libiconv would call this function a second time.  */
+        if (codeset[0] == '\0')
+          codeset = "ASCII";
+# endif
+      }
+  }
+
+  /* Don't return an empty string.  GNU libc and GNU libiconv interpret
+     the empty string as denoting "the locale's character encoding",
+     thus GNU libiconv would call this function a second time.  */
+  if (codeset[0] == '\0')
+    /* Default to Latin-1, for backward compatibility with Guile 1.8.  */
+    codeset = "ISO-8859-1";
+
+  return codeset;
+}