diff options
author | Behdad Esfahbod <behdad@gnome.org> | 2009-01-06 11:52:45 +0000 |
---|---|---|
committer | Behdad Esfahbod <behdad@src.gnome.org> | 2009-01-06 11:52:45 +0000 |
commit | 8657d36b16ebbd1d9890751bcab654cb9ac67d13 (patch) | |
tree | 46eb249372829e4db6375c23ba44b34c1d5ae369 /tools/gen-script-for-lang.c | |
parent | 07992d0e2c7cca25032aeac650c847a905079b23 (diff) | |
download | pango-8657d36b16ebbd1d9890751bcab654cb9ac67d13.tar.gz |
Remove the old gen-script-for-lang tool, rename the new one to it.
2009-01-06 Behdad Esfahbod <behdad@gnome.org>
* tools/Makefile.am:
* tools/gen-script-for-lang.c:
* tools/gen-script-for-lang-new.c:
Remove the old gen-script-for-lang tool, rename the new one to it.
svn path=/trunk/; revision=2789
Diffstat (limited to 'tools/gen-script-for-lang.c')
-rw-r--r-- | tools/gen-script-for-lang.c | 342 |
1 files changed, 165 insertions, 177 deletions
diff --git a/tools/gen-script-for-lang.c b/tools/gen-script-for-lang.c index f7ccdc1a..b3cfa7ba 100644 --- a/tools/gen-script-for-lang.c +++ b/tools/gen-script-for-lang.c @@ -24,23 +24,26 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <time.h> #include <pango/pango-enum-types.h> #include <pango/pango-script.h> #include <pango/pango-types.h> -#include <pango/pango-utils.h> + +#include <fontconfig/fontconfig.h> #define MAX_SCRIPTS 3 typedef struct { + PangoScript script; + int freq; +} ScriptInfo; + +typedef struct { PangoLanguage *lang; - PangoScript scripts[MAX_SCRIPTS]; + ScriptInfo scripts[MAX_SCRIPTS]; } LangInfo; -static void scripts_for_file (const char *base_dir, - const char *file_part, - LangInfo *info); - static const char *get_script_name (PangoScript script) { static GEnumClass *class = NULL; @@ -66,181 +69,126 @@ static void fail (const char *format, ...) exit (1); } -static gboolean scan_hex (const char **str, gunichar *result) -{ - const char *end; - - *result = strtol (*str, (char **)&end, 16); - if (end == *str) - return FALSE; - - *str = end; - return TRUE; -} - static void -scripts_for_line (const char *base_dir, - const char *file_part, - const char *str, - LangInfo *info) +script_for_char (gunichar ch, + LangInfo *info) { - gunichar start_char; - gunichar end_char; - gunichar ch; - const char *p = str; - - if (g_str_has_prefix (str, "include")) + PangoScript script = pango_script_for_unichar (ch); + if (script != PANGO_SCRIPT_COMMON && + script != PANGO_SCRIPT_INHERITED) { - GString *file_part = g_string_new (NULL); - - str += strlen ("include"); - if (!pango_skip_space (&str)) - goto err; - - if (!pango_scan_string (&str, file_part) || - pango_skip_space (&str)) - goto err; - - scripts_for_file (base_dir, file_part->str, info); - g_string_free (file_part, TRUE); - - return; - } - - /* Format is HEX_DIGITS or HEX_DIGITS-HEX_DIGITS */ - if (!scan_hex (&p, &start_char)) - goto err; - - end_char = start_char; - - pango_skip_space (&p); - if (*p == '-') - { - p++; - if (!scan_hex (&p, &end_char)) - goto err; - - pango_skip_space (&p); - } - - /* The rest of the line is ignored */ - /* - if (*p != '\0') - goto err; - */ + int j; - for (ch = start_char; ch <= end_char; ch++) - { - PangoScript script = pango_script_for_unichar (ch); - if (script != PANGO_SCRIPT_COMMON && - script != PANGO_SCRIPT_INHERITED) + if (script == PANGO_SCRIPT_UNKNOWN) { - int j; + g_message ("Script unknown for U+%04X", ch); + return; + } - if (script == PANGO_SCRIPT_UNKNOWN) + for (j = 0; j < MAX_SCRIPTS; j++) + { + if (info->scripts[j].script == script) + break; + if (info->scripts[j].script == PANGO_SCRIPT_COMMON) { - g_message ("Script unknown for U+%04X", ch); - continue; + info->scripts[j].script = script; + break; } + } - for (j = 0; j < MAX_SCRIPTS; j++) - { - if (info->scripts[j] == script) - break; - if (info->scripts[j] == PANGO_SCRIPT_INVALID_CODE) - { - info->scripts[j] = script; - break; - } - } + if (j == MAX_SCRIPTS) + fail ("More than %d scripts found for %s. Increase MAX_SCRIPTS.\n", MAX_SCRIPTS, pango_language_to_string (info->lang)); - if (j == MAX_SCRIPTS) - fail ("More than %d scripts found for %s\n", MAX_SCRIPTS, file_part); - } + info->scripts[j].freq++; } - - return; - - err: - fail ("While processing '%s', cannot parse line: '%s'\n", file_part, str); - return; /* Not reached */ } static void -scripts_for_file (const char *base_dir, - const char *file_part, - LangInfo *info) +scripts_for_lang (LangInfo *info) { - GError *error = NULL; - char *filename = g_build_filename (base_dir, file_part, NULL); - GIOChannel *channel = g_io_channel_new_file (filename, "r", &error); - GIOStatus status = G_IO_STATUS_NORMAL; - - if (!channel) - fail ("Error opening '%s': %s\n", filename, error->message); - - /* The files have ISO-8859-1 copyright signs in them */ - if (!g_io_channel_set_encoding (channel, "ISO-8859-1", &error)) - fail ("Cannot set encoding when reading '%s': %s\n", filename, error->message); - - while (status == G_IO_STATUS_NORMAL) + const FcCharSet *charset; + FcChar32 ucs4, pos; + FcChar32 map[FC_CHARSET_MAP_SIZE]; + int i; + + charset = FcLangGetCharSet ((const FcChar8 *) info->lang); + if (!charset) + return; + + for (ucs4 = FcCharSetFirstPage (charset, map, &pos); + ucs4 != FC_CHARSET_DONE; + ucs4 = FcCharSetNextPage (charset, map, &pos)) { - char *str; - size_t term; - char *comment; - - status = g_io_channel_read_line (channel, &str, NULL, &term, &error); - switch (status) + + for (i = 0; i < FC_CHARSET_MAP_SIZE; i++) { - case G_IO_STATUS_NORMAL: - str[term] = '\0'; - comment = strchr (str, '#'); - if (comment) - *comment = '\0'; - g_strstrip (str); - if (str[0] != '\0') /* Empty */ - scripts_for_line (base_dir, file_part, str, info); - g_free (str); - break; - case G_IO_STATUS_EOF: - break; - case G_IO_STATUS_ERROR: - fail ("Error reading '%s': %s\n", filename, error->message); - break; - case G_IO_STATUS_AGAIN: - g_assert_not_reached (); - break; + FcChar32 bits = map[i]; + FcChar32 base = ucs4 + i * 32; + int b = 0; + bits = map[i]; + while (bits) + { + if (bits & 1) + script_for_char (base + b, info); + + bits >>= 1; + b++; + } } } - - if (!g_io_channel_shutdown (channel, FALSE, &error)) - fail ("Error closing '%s': %s\n", filename, error->message); - - g_free (filename); } static void -do_file (GArray *script_array, - const char *base_dir, - const char *file_part) +do_lang (GArray *script_array, + const FcChar8 *lang) { - char *langpart; LangInfo info; int j; - langpart = g_strndup (file_part, strlen (file_part) - strlen (".orth")); - info.lang = pango_language_from_string (langpart); - g_free (langpart); + info.lang = pango_language_from_string ((const char *)lang); for (j = 0; j < MAX_SCRIPTS; j++) - info.scripts[j] = PANGO_SCRIPT_INVALID_CODE; + { + info.scripts[j].script = PANGO_SCRIPT_COMMON; + info.scripts[j].freq = 0; + } - scripts_for_file (base_dir, file_part, &info); + scripts_for_lang (&info); g_array_append_val (script_array, info); } static int +compare_script (gconstpointer a, + gconstpointer b, + gpointer data) +{ + const ScriptInfo *info_a = a; + const ScriptInfo *info_b = b; + G_GNUC_UNUSED LangInfo *lang_info = data; + + /* first compare frequencies, higher first */ + if (info_a->freq > info_b->freq) + return -1; + if (info_a->freq < info_b->freq) + return +1; + + /* next compare script indices, higher first (it's more specific) */ + if (info_a->script > info_b->script) + return -1; + if (info_a->script < info_b->script) + return +1; + + /* for stability, next compare pointers themselves, smaller first */ + if (info_a < info_b) + return -1; + if (info_a > info_b) + return +1; + + return 0; +} + +static int compare_lang (gconstpointer a, gconstpointer b) { @@ -251,73 +199,113 @@ compare_lang (gconstpointer a, pango_language_to_string (info_b->lang)); } -int main (int argc, char **argv) +int main (void) { - GDir *dir; - GError *error = NULL; GArray *script_array; + unsigned int i; int j; int max_lang_len = 0; + int max_script_len = 0; - g_type_init (); + FcStrSet *langs_set; + FcStrList *langs; + FcChar8* lang; - if (argc != 2) - fail ("Usage: gen-script-for-lang DIR > script-for-lang.h\n"); + char date_buf[200]; + const char *date_str = "unknown"; + time_t t; + struct tm *tmp; + int fc_version; - dir = g_dir_open (argv[1], 0, &error); - if (!dir) - fail ("%s\n", error->message); + g_type_init (); script_array = g_array_new (FALSE, FALSE, sizeof (LangInfo)); - while (TRUE) - { - const char *name = g_dir_read_name (dir); - if (!name) - break; - if (g_str_has_suffix (name, ".orth")) - do_file (script_array, argv[1], name); - } + langs_set = FcGetLangs (); + langs = FcStrListCreate (langs_set); + FcStrSetDestroy (langs_set); + + while ((lang = FcStrListNext (langs))) + do_lang (script_array, lang); + + FcStrListDone (langs); + g_array_sort (script_array, compare_lang); for (i = 0; i < script_array->len; i++) { LangInfo *info = &g_array_index (script_array, LangInfo, i); - + max_lang_len = MAX (max_lang_len, - 1 + (int)strlen (pango_language_to_string (info->lang))); - } + (int)strlen (pango_language_to_string (info->lang))); + + g_qsort_with_data (info->scripts, + G_N_ELEMENTS (info->scripts), + sizeof (info->scripts[0]), + compare_script, + info); + + for (j = 0; j < MAX_SCRIPTS; j++) + if (!info->scripts[j].freq) + break; - g_print ("typedef struct {\n" + max_script_len = MAX (max_script_len, j); + } + + if ((t = time(NULL), tmp = localtime (&t)) && strftime(date_buf, sizeof(date_buf), "%F", tmp)) + date_str = date_buf; + + fc_version = FcGetVersion (); + + g_print ("/* pango-script-lang-table.h:\n" + " * \n" + " * Generated by %s\n" + " * Date: %s\n" + " * Source: fontconfig-%d.%d.%d\n" + " * \n" + " * Do not edit.\n" + " */\n", + __FILE__, + date_str, + fc_version / 10000, (fc_version / 100) % 100, fc_version % 100); + + g_print ("typedef struct _PangoScriptForLang {\n" " const char lang[%d];\n" " PangoScript scripts[%d];\n" "} PangoScriptForLang;\n" "\n" "static const PangoScriptForLang pango_script_for_lang[] = {\n", - max_lang_len, - MAX_SCRIPTS); + max_lang_len + 1, + max_script_len); for (i = 0; i < script_array->len; i++) { LangInfo *info = &g_array_index (script_array, LangInfo, i); - g_print (" { \"%s\", { ", pango_language_to_string (info->lang)); + g_print (" { \"%s\", %*s{ ", + pango_language_to_string (info->lang), + max_lang_len - strlen (pango_language_to_string (info->lang)), ""); for (j = 0; j < MAX_SCRIPTS; j++) { + if (!info->scripts[j].freq) + break; + if (j != 0) g_print (", "); - g_print ("%s", get_script_name (info->scripts[j])); + g_print ("%s/*%d*/", + get_script_name (info->scripts[j].script), + info->scripts[j].freq); } - g_print (" } },\n"); + g_print (" } }"); + if (i + 1 != script_array->len) + g_print (","); + g_print ("\n"); } g_print ("};\n"); - - - g_dir_close (dir); - + return 0; } |