summaryrefslogtreecommitdiff
path: root/pango/pango-language.c
diff options
context:
space:
mode:
Diffstat (limited to 'pango/pango-language.c')
-rw-r--r--pango/pango-language.c382
1 files changed, 382 insertions, 0 deletions
diff --git a/pango/pango-language.c b/pango/pango-language.c
new file mode 100644
index 00000000..a5e32369
--- /dev/null
+++ b/pango/pango-language.c
@@ -0,0 +1,382 @@
+/* Pango
+ * pango-language.c: Language handling routines
+ *
+ * Copyright (C) 2000 Red Hat Software
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include <locale.h>
+
+#include "pango-language.h"
+#include "pango-impl-utils.h"
+
+static const char canon_map[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0,
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0,
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-',
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0
+};
+
+static gboolean
+lang_equal (gconstpointer v1,
+ gconstpointer v2)
+{
+ const guchar *p1 = v1;
+ const guchar *p2 = v2;
+
+ while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2])
+ {
+ p1++, p2++;
+ }
+
+ return (canon_map[*p1] == canon_map[*p2]);
+}
+
+static guint
+lang_hash (gconstpointer key)
+{
+ const guchar *p = key;
+ guint h = 0;
+ while (canon_map[*p])
+ {
+ h = (h << 5) - h + canon_map[*p];
+ p++;
+ }
+
+ return h;
+}
+
+static PangoLanguage *
+pango_language_copy (PangoLanguage *language)
+{
+ return language; /* language tags are const */
+}
+
+static void
+pango_language_free (PangoLanguage *language)
+{
+ return; /* nothing */
+}
+
+GType
+pango_language_get_type (void)
+{
+ static GType our_type = 0;
+
+ if (our_type == 0)
+ our_type = g_boxed_type_register_static (I_("PangoLanguage"),
+ (GBoxedCopyFunc)pango_language_copy,
+ (GBoxedFreeFunc)pango_language_free);
+ return our_type;
+}
+
+/**
+ * _pango_get_lc_ctype:
+ *
+ * Return the Unix-style locale string for the language currently in
+ * effect. On Unix systems, this is the return value from
+ * <literal>setlocale(LC_CTYPE, NULL)</literal>, and the user can
+ * affect this through the environment variables LC_ALL, LC_CTYPE or
+ * LANG (checked in that order). The locale strings typically is in
+ * the form lang_COUNTRY, where lang is an ISO-639 language code, and
+ * COUNTRY is an ISO-3166 country code. For instance, sv_FI for
+ * Swedish as written in Finland or pt_BR for Portuguese as written in
+ * Brazil.
+ *
+ * On Windows, the C library doesn't use any such environment
+ * variables, and setting them won't affect the behavior of functions
+ * like ctime(). The user sets the locale through the Regional Options
+ * in the Control Panel. The C library (in the setlocale() function)
+ * does not use country and language codes, but country and language
+ * names spelled out in English.
+ * However, this function does check the above environment
+ * variables, and does return a Unix-style locale string based on
+ * either said environment variables or the thread's current locale.
+ *
+ * Return value: a dynamically allocated string, free with g_free().
+ */
+static gchar *
+_pango_get_lc_ctype (void)
+{
+#ifdef G_OS_WIN32
+ /* Somebody might try to set the locale for this process using the
+ * LANG or LC_ environment variables. The Microsoft C library
+ * doesn't know anything about them. You set the locale in the
+ * Control Panel. Setting these env vars won't have any affect on
+ * locale-dependent C library functions like ctime(). But just for
+ * kicks, do obey LC_ALL, LC_CTYPE and LANG in Pango. (This also makes
+ * it easier to test GTK and Pango in various default languages, you
+ * don't have to clickety-click in the Control Panel, you can simply
+ * start the program with LC_ALL=something on the command line.)
+ */
+
+ gchar *p;
+
+ p = getenv ("LC_ALL");
+ if (p != NULL)
+ return g_strdup (p);
+
+ p = getenv ("LC_CTYPE");
+ if (p != NULL)
+ return g_strdup (p);
+
+ p = getenv ("LANG");
+ if (p != NULL)
+ return g_strdup (p);
+
+ return g_win32_getlocale ();
+#else
+ return g_strdup (setlocale (LC_CTYPE, NULL));
+#endif
+}
+
+/**
+ * pango_language_get_default:
+ *
+ * Returns the #PangoLanguage for the current locale of the process.
+ * Note that this can change over the life of an application.
+ *
+ * On Unix systems, this is the return value is derived from
+ * <literal>setlocale(LC_CTYPE, NULL)</literal>, and the user can
+ * affect this through the environment variables LC_ALL, LC_CTYPE or
+ * LANG (checked in that order). The locale string typically is in
+ * the form lang_COUNTRY, where lang is an ISO-639 language code, and
+ * COUNTRY is an ISO-3166 country code. For instance, sv_FI for
+ * Swedish as written in Finland or pt_BR for Portuguese as written in
+ * Brazil.
+ *
+ * On Windows, the C library does not use any such environment
+ * variables, and setting them won't affect the behavior of functions
+ * like ctime(). The user sets the locale through the Regional Options
+ * in the Control Panel. The C library (in the setlocale() function)
+ * does not use country and language codes, but country and language
+ * names spelled out in English.
+ * However, this function does check the above environment
+ * variables, and does return a Unix-style locale string based on
+ * either said environment variables or the thread's current locale.
+ *
+ * Your application should call <literal>setlocale(LC_ALL, "");</literal>
+ * for the user settings to take effect. Gtk+ does this in its initialization
+ * functions automatically (by calling gtk_set_locale()).
+ * See <literal>man setlocale</literal> for more details.
+ *
+ * Return value: the default language as a #PangoLanguage, must not be
+ * freed.
+ *
+ * Since: 1.16
+ **/
+PangoLanguage *
+pango_language_get_default (void)
+{
+ gchar *lang;
+ PangoLanguage *result;
+
+ lang = _pango_get_lc_ctype ();
+
+ result = pango_language_from_string (lang);
+ g_free (lang);
+
+ return result;
+}
+
+/**
+ * pango_language_from_string:
+ * @language: a string representing a language tag
+ *
+ * Take a RFC-3066 format language tag as a string and convert it to a
+ * #PangoLanguage pointer that can be efficiently copied (copy the
+ * pointer) and compared with other language tags (compare the
+ * pointer.)
+ *
+ * This function first canonicalizes the string by converting it to
+ * lowercase, mapping '_' to '-', and stripping all characters other
+ * than letters and '-'.
+ *
+ * Use pango_language_get_default() if you want to get the #PangoLanguage for
+ * the current locale of the process.
+ *
+ * Return value: an opaque pointer to a #PangoLanguage structure.
+ * this will be valid forever after.
+ **/
+PangoLanguage *
+pango_language_from_string (const char *language)
+{
+ static GHashTable *hash = NULL;
+ char *result;
+ int len;
+ char *p;
+
+ if (G_UNLIKELY (!hash))
+ hash = g_hash_table_new (lang_hash, lang_equal);
+ else
+ {
+ result = g_hash_table_lookup (hash, language);
+ if (result)
+ return (PangoLanguage *)result;
+ }
+
+ len = strlen (language);
+ result = g_malloc (len + 1);
+
+ p = result;
+ while ((*(p++) = canon_map[*(guchar *)language++]))
+ ;
+
+ g_hash_table_insert (hash, result, result);
+
+ return (PangoLanguage *)result;
+}
+
+/**
+ * pango_language_matches:
+ * @language: a language tag (see pango_language_from_string()),
+ * %NULL is allowed and matches nothing but '*'
+ * @range_list: a list of language ranges, separated by ';', ':',
+ * ',', or space characters.
+ * Each element must either be '*', or a RFC 3066 language range
+ * canonicalized as by pango_language_from_string()
+ *
+ * Checks if a language tag matches one of the elements in a list of
+ * language ranges. A language tag is considered to match a range
+ * in the list if the range is '*', the range is exactly the tag,
+ * or the range is a prefix of the tag, and the character after it
+ * in the tag is '-'.
+ *
+ * Return value: %TRUE if a match was found.
+ **/
+gboolean
+pango_language_matches (PangoLanguage *language,
+ const char *range_list)
+{
+ const char *lang_str = pango_language_to_string (language);
+ const char *p = range_list;
+ gboolean done = FALSE;
+
+ while (!done)
+ {
+ const char *end = strpbrk (p, ";:, \t");
+ if (!end)
+ {
+ end = p + strlen (p);
+ done = TRUE;
+ }
+
+ if (strncmp (p, "*", 1) == 0 ||
+ (lang_str && strncmp (lang_str, p, end - p) == 0 &&
+ (lang_str[end - p] == '\0' || lang_str[end - p] == '-')))
+ return TRUE;
+
+ if (!done)
+ p = end + 1;
+ }
+
+ return FALSE;
+}
+
+typedef struct {
+ const char lang[4];
+ const char *str;
+} LangInfo;
+
+static int
+lang_info_compare (const void *key, const void *val)
+{
+ const LangInfo *lang_info = val;
+
+ return strncmp (key, lang_info->lang, 2);
+}
+
+/* The following array is supposed to contain enough text to tickle all necessary fonts for each
+ * of the languages in the following. Yes, it's pretty lame. Not all of the languages
+ * in the following have sufficient text to exercise all the accents for the language, and
+ * there are obviously many more languages to include as well.
+ */
+static const LangInfo lang_texts[] = {
+ { "ar", "Arabic \330\247\331\204\330\263\331\204\330\247\331\205 \330\271\331\204\331\212\331\203\331\205" },
+ { "cs", "Czech (\304\215esky) Dobr\303\275 den" },
+ { "da", "Danish (Dansk) Hej, Goddag" },
+ { "el", "Greek (\316\225\316\273\316\273\316\267\316\275\316\271\316\272\316\254) \316\223\316\265\316\271\316\254 \317\203\316\261\317\202" },
+ { "en", "English Hello" },
+ { "eo", "Esperanto Saluton" },
+ { "es", "Spanish (Espa\303\261ol) \302\241Hola!" },
+ { "et", "Estonian Tere, Tervist" },
+ { "fi", "Finnish (Suomi) Hei, Hyv\303\244\303\244 p\303\244iv\303\244\303\244" },
+ { "fr", "French (Fran\303\247ais)" },
+ { "de", "German Gr\303\274\303\237 Gott" },
+ { "he", "Hebrew \327\251\327\234\327\225\327\235" },
+ { "it", "Italiano Ciao, Buon giorno" },
+ { "ja", "Japanese (\346\227\245\346\234\254\350\252\236) \343\201\223\343\202\223\343\201\253\343\201\241\343\201\257, \357\275\272\357\276\235\357\276\206\357\276\201\357\276\212" },
+ { "ko", "Korean (\355\225\234\352\270\200) \354\225\210\353\205\225\355\225\230\354\204\270\354\232\224, \354\225\210\353\205\225\355\225\230\354\213\255\353\213\210\352\271\214" },
+ { "mt", "Maltese \304\212aw, Sa\304\247\304\247a" },
+ { "nl", "Nederlands, Vlaams Hallo, Dag" },
+ { "no", "Norwegian (Norsk) Hei, God dag" },
+ { "pl", "Polish Dzie\305\204 dobry, Hej" },
+ { "ru", "Russian (\320\240\321\203\321\201\321\201\320\272\320\270\320\271)" },
+ { "sk", "Slovak Dobr\303\275 de\305\210" },
+ { "sv", "Swedish (Svenska) Hej p\303\245 dej, Goddag" },
+ { "tr", "Turkish (T\303\274rk\303\247e) Merhaba" },
+ { "zh", "Chinese (\344\270\255\346\226\207,\346\231\256\351\200\232\350\257\235,\346\261\211\350\257\255)" }
+};
+
+/**
+ * pango_language_get_sample_string:
+ * @language: a #PangoLanguage
+ *
+ * Get a string that is representative of the characters needed to
+ * render a particular language. This function is a bad hack for
+ * internal use by renderers and Pango.
+ *
+ * Return value: the sample string. This value is owned by Pango
+ * and must not be freed.
+ **/
+G_CONST_RETURN char *
+pango_language_get_sample_string (PangoLanguage *language)
+{
+ const char *result;
+
+ if (language)
+ {
+ const char *lang_str = pango_language_to_string (language);
+
+ LangInfo *lang_info = bsearch (lang_str, lang_texts,
+ G_N_ELEMENTS (lang_texts), sizeof (LangInfo),
+ lang_info_compare);
+
+ if (lang_info)
+ result = lang_info->str;
+ else
+ result = "French (Fran\303\247ais)"; /* Assume iso-8859-1 */
+ }
+ else
+ {
+ /* Complete junk
+ */
+
+ result = "\330\247\331\204\330\263\331\204\330\247\331\205 \330\271\331\204\331\212\331\203\331\205 \304\215esky \316\225\316\273\316\273\316\267\316\275\316\271\316\272\316\254 Fran\303\247ais \346\227\245\346\234\254\350\252\236 \355\225\234\352\270\200 \320\240\321\203\321\201\321\201\320\272\320\270\320\271 \344\270\255\346\226\207,\346\231\256\351\200\232\350\257\235,\346\261\211\350\257\255 T\303\274rk\303\247e";
+ }
+
+ return result;
+}