diff options
Diffstat (limited to 'pango/pango-language.c')
-rw-r--r-- | pango/pango-language.c | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/pango/pango-language.c b/pango/pango-language.c new file mode 100644 index 00000000..a5e32369 --- /dev/null +++ b/pango/pango-language.c @@ -0,0 +1,382 @@ +/* Pango + * pango-language.c: Language handling routines + * + * Copyright (C) 2000 Red Hat Software + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include <config.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <math.h> +#include <locale.h> + +#include "pango-language.h" +#include "pango-impl-utils.h" + +static const char canon_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0, + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0, + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-', + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0 +}; + +static gboolean +lang_equal (gconstpointer v1, + gconstpointer v2) +{ + const guchar *p1 = v1; + const guchar *p2 = v2; + + while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2]) + { + p1++, p2++; + } + + return (canon_map[*p1] == canon_map[*p2]); +} + +static guint +lang_hash (gconstpointer key) +{ + const guchar *p = key; + guint h = 0; + while (canon_map[*p]) + { + h = (h << 5) - h + canon_map[*p]; + p++; + } + + return h; +} + +static PangoLanguage * +pango_language_copy (PangoLanguage *language) +{ + return language; /* language tags are const */ +} + +static void +pango_language_free (PangoLanguage *language) +{ + return; /* nothing */ +} + +GType +pango_language_get_type (void) +{ + static GType our_type = 0; + + if (our_type == 0) + our_type = g_boxed_type_register_static (I_("PangoLanguage"), + (GBoxedCopyFunc)pango_language_copy, + (GBoxedFreeFunc)pango_language_free); + return our_type; +} + +/** + * _pango_get_lc_ctype: + * + * Return the Unix-style locale string for the language currently in + * effect. On Unix systems, this is the return value from + * <literal>setlocale(LC_CTYPE, NULL)</literal>, and the user can + * affect this through the environment variables LC_ALL, LC_CTYPE or + * LANG (checked in that order). The locale strings typically is in + * the form lang_COUNTRY, where lang is an ISO-639 language code, and + * COUNTRY is an ISO-3166 country code. For instance, sv_FI for + * Swedish as written in Finland or pt_BR for Portuguese as written in + * Brazil. + * + * On Windows, the C library doesn't use any such environment + * variables, and setting them won't affect the behavior of functions + * like ctime(). The user sets the locale through the Regional Options + * in the Control Panel. The C library (in the setlocale() function) + * does not use country and language codes, but country and language + * names spelled out in English. + * However, this function does check the above environment + * variables, and does return a Unix-style locale string based on + * either said environment variables or the thread's current locale. + * + * Return value: a dynamically allocated string, free with g_free(). + */ +static gchar * +_pango_get_lc_ctype (void) +{ +#ifdef G_OS_WIN32 + /* Somebody might try to set the locale for this process using the + * LANG or LC_ environment variables. The Microsoft C library + * doesn't know anything about them. You set the locale in the + * Control Panel. Setting these env vars won't have any affect on + * locale-dependent C library functions like ctime(). But just for + * kicks, do obey LC_ALL, LC_CTYPE and LANG in Pango. (This also makes + * it easier to test GTK and Pango in various default languages, you + * don't have to clickety-click in the Control Panel, you can simply + * start the program with LC_ALL=something on the command line.) + */ + + gchar *p; + + p = getenv ("LC_ALL"); + if (p != NULL) + return g_strdup (p); + + p = getenv ("LC_CTYPE"); + if (p != NULL) + return g_strdup (p); + + p = getenv ("LANG"); + if (p != NULL) + return g_strdup (p); + + return g_win32_getlocale (); +#else + return g_strdup (setlocale (LC_CTYPE, NULL)); +#endif +} + +/** + * pango_language_get_default: + * + * Returns the #PangoLanguage for the current locale of the process. + * Note that this can change over the life of an application. + * + * On Unix systems, this is the return value is derived from + * <literal>setlocale(LC_CTYPE, NULL)</literal>, and the user can + * affect this through the environment variables LC_ALL, LC_CTYPE or + * LANG (checked in that order). The locale string typically is in + * the form lang_COUNTRY, where lang is an ISO-639 language code, and + * COUNTRY is an ISO-3166 country code. For instance, sv_FI for + * Swedish as written in Finland or pt_BR for Portuguese as written in + * Brazil. + * + * On Windows, the C library does not use any such environment + * variables, and setting them won't affect the behavior of functions + * like ctime(). The user sets the locale through the Regional Options + * in the Control Panel. The C library (in the setlocale() function) + * does not use country and language codes, but country and language + * names spelled out in English. + * However, this function does check the above environment + * variables, and does return a Unix-style locale string based on + * either said environment variables or the thread's current locale. + * + * Your application should call <literal>setlocale(LC_ALL, "");</literal> + * for the user settings to take effect. Gtk+ does this in its initialization + * functions automatically (by calling gtk_set_locale()). + * See <literal>man setlocale</literal> for more details. + * + * Return value: the default language as a #PangoLanguage, must not be + * freed. + * + * Since: 1.16 + **/ +PangoLanguage * +pango_language_get_default (void) +{ + gchar *lang; + PangoLanguage *result; + + lang = _pango_get_lc_ctype (); + + result = pango_language_from_string (lang); + g_free (lang); + + return result; +} + +/** + * pango_language_from_string: + * @language: a string representing a language tag + * + * Take a RFC-3066 format language tag as a string and convert it to a + * #PangoLanguage pointer that can be efficiently copied (copy the + * pointer) and compared with other language tags (compare the + * pointer.) + * + * This function first canonicalizes the string by converting it to + * lowercase, mapping '_' to '-', and stripping all characters other + * than letters and '-'. + * + * Use pango_language_get_default() if you want to get the #PangoLanguage for + * the current locale of the process. + * + * Return value: an opaque pointer to a #PangoLanguage structure. + * this will be valid forever after. + **/ +PangoLanguage * +pango_language_from_string (const char *language) +{ + static GHashTable *hash = NULL; + char *result; + int len; + char *p; + + if (G_UNLIKELY (!hash)) + hash = g_hash_table_new (lang_hash, lang_equal); + else + { + result = g_hash_table_lookup (hash, language); + if (result) + return (PangoLanguage *)result; + } + + len = strlen (language); + result = g_malloc (len + 1); + + p = result; + while ((*(p++) = canon_map[*(guchar *)language++])) + ; + + g_hash_table_insert (hash, result, result); + + return (PangoLanguage *)result; +} + +/** + * pango_language_matches: + * @language: a language tag (see pango_language_from_string()), + * %NULL is allowed and matches nothing but '*' + * @range_list: a list of language ranges, separated by ';', ':', + * ',', or space characters. + * Each element must either be '*', or a RFC 3066 language range + * canonicalized as by pango_language_from_string() + * + * Checks if a language tag matches one of the elements in a list of + * language ranges. A language tag is considered to match a range + * in the list if the range is '*', the range is exactly the tag, + * or the range is a prefix of the tag, and the character after it + * in the tag is '-'. + * + * Return value: %TRUE if a match was found. + **/ +gboolean +pango_language_matches (PangoLanguage *language, + const char *range_list) +{ + const char *lang_str = pango_language_to_string (language); + const char *p = range_list; + gboolean done = FALSE; + + while (!done) + { + const char *end = strpbrk (p, ";:, \t"); + if (!end) + { + end = p + strlen (p); + done = TRUE; + } + + if (strncmp (p, "*", 1) == 0 || + (lang_str && strncmp (lang_str, p, end - p) == 0 && + (lang_str[end - p] == '\0' || lang_str[end - p] == '-'))) + return TRUE; + + if (!done) + p = end + 1; + } + + return FALSE; +} + +typedef struct { + const char lang[4]; + const char *str; +} LangInfo; + +static int +lang_info_compare (const void *key, const void *val) +{ + const LangInfo *lang_info = val; + + return strncmp (key, lang_info->lang, 2); +} + +/* The following array is supposed to contain enough text to tickle all necessary fonts for each + * of the languages in the following. Yes, it's pretty lame. Not all of the languages + * in the following have sufficient text to exercise all the accents for the language, and + * there are obviously many more languages to include as well. + */ +static const LangInfo lang_texts[] = { + { "ar", "Arabic \330\247\331\204\330\263\331\204\330\247\331\205 \330\271\331\204\331\212\331\203\331\205" }, + { "cs", "Czech (\304\215esky) Dobr\303\275 den" }, + { "da", "Danish (Dansk) Hej, Goddag" }, + { "el", "Greek (\316\225\316\273\316\273\316\267\316\275\316\271\316\272\316\254) \316\223\316\265\316\271\316\254 \317\203\316\261\317\202" }, + { "en", "English Hello" }, + { "eo", "Esperanto Saluton" }, + { "es", "Spanish (Espa\303\261ol) \302\241Hola!" }, + { "et", "Estonian Tere, Tervist" }, + { "fi", "Finnish (Suomi) Hei, Hyv\303\244\303\244 p\303\244iv\303\244\303\244" }, + { "fr", "French (Fran\303\247ais)" }, + { "de", "German Gr\303\274\303\237 Gott" }, + { "he", "Hebrew \327\251\327\234\327\225\327\235" }, + { "it", "Italiano Ciao, Buon giorno" }, + { "ja", "Japanese (\346\227\245\346\234\254\350\252\236) \343\201\223\343\202\223\343\201\253\343\201\241\343\201\257, \357\275\272\357\276\235\357\276\206\357\276\201\357\276\212" }, + { "ko", "Korean (\355\225\234\352\270\200) \354\225\210\353\205\225\355\225\230\354\204\270\354\232\224, \354\225\210\353\205\225\355\225\230\354\213\255\353\213\210\352\271\214" }, + { "mt", "Maltese \304\212aw, Sa\304\247\304\247a" }, + { "nl", "Nederlands, Vlaams Hallo, Dag" }, + { "no", "Norwegian (Norsk) Hei, God dag" }, + { "pl", "Polish Dzie\305\204 dobry, Hej" }, + { "ru", "Russian (\320\240\321\203\321\201\321\201\320\272\320\270\320\271)" }, + { "sk", "Slovak Dobr\303\275 de\305\210" }, + { "sv", "Swedish (Svenska) Hej p\303\245 dej, Goddag" }, + { "tr", "Turkish (T\303\274rk\303\247e) Merhaba" }, + { "zh", "Chinese (\344\270\255\346\226\207,\346\231\256\351\200\232\350\257\235,\346\261\211\350\257\255)" } +}; + +/** + * pango_language_get_sample_string: + * @language: a #PangoLanguage + * + * Get a string that is representative of the characters needed to + * render a particular language. This function is a bad hack for + * internal use by renderers and Pango. + * + * Return value: the sample string. This value is owned by Pango + * and must not be freed. + **/ +G_CONST_RETURN char * +pango_language_get_sample_string (PangoLanguage *language) +{ + const char *result; + + if (language) + { + const char *lang_str = pango_language_to_string (language); + + LangInfo *lang_info = bsearch (lang_str, lang_texts, + G_N_ELEMENTS (lang_texts), sizeof (LangInfo), + lang_info_compare); + + if (lang_info) + result = lang_info->str; + else + result = "French (Fran\303\247ais)"; /* Assume iso-8859-1 */ + } + else + { + /* Complete junk + */ + + result = "\330\247\331\204\330\263\331\204\330\247\331\205 \330\271\331\204\331\212\331\203\331\205 \304\215esky \316\225\316\273\316\273\316\267\316\275\316\271\316\272\316\254 Fran\303\247ais \346\227\245\346\234\254\350\252\236 \355\225\234\352\270\200 \320\240\321\203\321\201\321\201\320\272\320\270\320\271 \344\270\255\346\226\207,\346\231\256\351\200\232\350\257\235,\346\261\211\350\257\255 T\303\274rk\303\247e"; + } + + return result; +} |