diff options
author | Don Scorgie <dscorgie@src.gnome.org> | 2007-07-24 19:31:14 +0000 |
---|---|---|
committer | Don Scorgie <dscorgie@src.gnome.org> | 2007-07-24 19:31:14 +0000 |
commit | cf5f948f65af93fcc0d6b197cf2b756a28a30823 (patch) | |
tree | 264dd123fc9cd226f295d1dedfd880822071fbc7 | |
parent | d6d767ddebb4d8d17c95d1d425407639acf26637 (diff) | |
download | yelp-cf5f948f65af93fcc0d6b197cf2b756a28a30823.tar.gz |
Convert search across to use Rarian and clean up lots of crud from the
* src/yelp-search-parser.c:
Convert search across to use Rarian
and clean up lots of crud from the file
svn path=/branches/yelp-spoon/; revision=2841
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | src/yelp-search-parser.c | 401 |
2 files changed, 138 insertions, 269 deletions
@@ -1,3 +1,9 @@ +2007-07-24 Don Scorgie <dscorgie@svn.gnome.org> + + * src/yelp-search-parser.c: + Convert search across to use Rarian + and clean up lots of crud from the file + 2007-07-23 Don Scorgie <dscorgie@svn.gnome.org> * configure.ac: diff --git a/src/yelp-search-parser.c b/src/yelp-search-parser.c index 24230fea..646f5d41 100644 --- a/src/yelp-search-parser.c +++ b/src/yelp-search-parser.c @@ -41,6 +41,7 @@ #include <libxslt/extensions.h> #include <libxslt/xsltInternals.h> #include <libxslt/xsltutils.h> +#include <rarian.h> #ifdef ENABLE_BEAGLE #include <beagle/beagle.h> @@ -99,6 +100,17 @@ struct _SearchContainer { gchar * default_snippet; }; +typedef struct { + YelpSearchParser *parser; + gint required_no; + gint terms_number; + gboolean *stop_list; + gint *dup_list; + gchar **terms_list; + SearchContainer *container; +} SearchDocData; + + struct _YelpSearchParser { gchar *search_terms; xmlDocPtr search_doc; @@ -136,7 +148,8 @@ static void s_declEntity (void *data, static xmlEntityPtr s_getEntity (void *data, const xmlChar *name); static gboolean slow_search_setup (YelpSearchParser *parser); -static gboolean slow_search_process (YelpSearchParser *parser); +static gboolean slow_search_process (RrnReg *reg, + SearchDocData *data); static void search_parse_result (YelpSearchParser *parser, SearchContainer *c); static gchar * search_clean_snippet (gchar *snippet, @@ -154,13 +167,12 @@ void process_info_result (YelpSearchParser *parser, gchar * string_append (gchar *current, gchar *new, gchar *suffix); +static void search_free_container (SearchContainer *c); #ifdef ENABLE_BEAGLE static BeagleClient *beagle_client; #endif /* ENABLE_BEAGLE */ -static char const * const * langs; - YelpSearchParser * yelp_search_parser_new (void) @@ -562,36 +574,6 @@ search_parser_process_idle (YelpSearchParser *parser) return FALSE; } -static gboolean sk_docomf = FALSE; -static GSList *omf_pending = NULL; - -static void -sk_startElement (void *empty, const xmlChar *name, - const xmlChar **attrs) -{ - if (xmlStrEqual((const xmlChar*) name, BAD_CAST "docomf")) - sk_docomf = TRUE; -} - -static void -sk_endElement (void *empty, const xmlChar *name) -{ - if (xmlStrEqual((const xmlChar*) name, BAD_CAST "docomf")) - sk_docomf = FALSE; -} - -static void -sk_characters (void *empty, const xmlChar *ch, - int len) -{ - gchar *omf; - - if (sk_docomf) { - omf = g_strndup ((gchar *) ch, len); - omf_pending = g_slist_prepend (omf_pending, omf); - } -} - void s_startElement(void *data, const xmlChar * name, const xmlChar ** attrs) @@ -998,207 +980,150 @@ build_lists (gchar *search_terms, gchar ***terms, gint **dups, return n_terms; } - static gboolean slow_search_setup (YelpSearchParser *parser) { - gchar *content_list; - gchar *stderr_str; - gchar *lang; - gchar *command; - gchar **terms_list = NULL; gint *dup_list = NULL; gboolean *stop_list = NULL; gint terms_number = 0; gint required_no = 0; - - static xmlSAXHandler sk_sax_handler = { 0, }; - xmlParserCtxtPtr xmlparser; - if (langs && langs[0]) - lang = (gchar *) langs[0]; - else - lang = "C"; + SearchDocData *data; - if (!strcmp (parser->search_terms, "")) { - parser->slow_search_setup_process_id = 0; - check_finished (parser); - return FALSE; - } - command = g_strconcat("scrollkeeper-get-content-list ", lang, NULL); - - if (g_spawn_command_line_sync (command, &content_list, &stderr_str, NULL, NULL)) { - if (!sk_sax_handler.startElement) { - sk_sax_handler.startElement = sk_startElement; - sk_sax_handler.endElement = sk_endElement; - sk_sax_handler.characters = sk_characters; - sk_sax_handler.initialized = TRUE; - } - content_list = g_strstrip (content_list); - xmlSAXUserParseFile (&sk_sax_handler, NULL, content_list); - } - - xmlparser = xmlNewParserCtxt (); + terms_number = build_lists (parser->search_terms,&terms_list, + &dup_list, &stop_list, + &required_no); + data = g_new0 (SearchDocData, 1); + data->container = g_new0 (SearchContainer, 1); + data->parser = parser; + data->required_no = required_no; + data->terms_number = terms_number; + data->stop_list = stop_list; + data->dup_list = dup_list; + data->terms_list = terms_list; + data->terms_number = terms_number; + + rrn_for_each ((RrnForeachFunc) slow_search_process, data); + + search_process_man (parser, terms_list); + search_process_info (parser, terms_list); + + check_finished (parser); + + return FALSE; +} - g_free (content_list); - g_free (stderr_str); - g_free (command); +static gboolean +slow_search_process (RrnReg *reg, SearchDocData *data) +{ + gint i, j=0; + SearchContainer *container = data->container; + gchar *ptr, *path; + gchar *fname; - terms_number = build_lists (parser->search_terms,&terms_list, - &dup_list, &stop_list, - &required_no); + /* Set up the container with the new data */ + if (g_str_has_prefix (reg->uri, "file:")) { + fname = &(reg->uri[5]); + } else { + fname = reg->uri; + } - while (omf_pending) { - GSList *first = NULL; - gchar *file = NULL; - xmlDocPtr omf_doc = NULL; - xmlXPathContextPtr omf_xpath = NULL; - xmlXPathObjectPtr omf_url = NULL; - xmlXPathObjectPtr omf_title = NULL; - xmlXPathObjectPtr omf_mime = NULL; - xmlXPathObjectPtr omf_desc = NULL; - - SearchContainer *container; - gchar *ptr; - gchar *path; - gchar *fname; - gchar *realfname; - gchar *mime_type; - int i = 0; - - first = omf_pending; - omf_pending = g_slist_remove_link (omf_pending, first); - file = (gchar *) first->data; - - - omf_doc = xmlCtxtReadFile (xmlparser, (const char *) file, NULL, - XML_PARSE_NOBLANKS | XML_PARSE_NOCDATA | - XML_PARSE_NOENT | XML_PARSE_NOERROR | - XML_PARSE_NONET ); - - if (!omf_doc) { - g_warning (_("Could not load the OMF file '%s'."), file); - continue; - } + while (fname[0] == '/' && fname[1] == '/') { + fname++; + } - omf_xpath = xmlXPathNewContext (omf_doc); - omf_url = - xmlXPathEvalExpression (BAD_CAST - "string(/omf/resource/identifier/@url)", - omf_xpath); - omf_title = - xmlXPathEvalExpression (BAD_CAST - "string(/omf/resource/title)", - omf_xpath); - omf_mime = - xmlXPathEvalExpression (BAD_CAST - "string(/omf/resource/format/@mime)", - omf_xpath); - omf_desc = - xmlXPathEvalExpression (BAD_CAST - "string(/omf/resource/description)", - omf_xpath); - - mime_type = g_strdup ((gchar *) omf_mime->stringval); - - fname = g_strdup ((gchar *) omf_url->stringval); - if (g_str_has_prefix (fname, "file:")) { - realfname = &fname[5]; - } else { - realfname = fname; - } + container->base_filename = g_strdup (fname); + fname = g_strdup (container->base_filename); - if (!g_file_test (realfname, G_FILE_TEST_EXISTS)) { - continue; - } + container->entities = g_hash_table_new (g_str_hash, g_str_equal); + container->doc_title = g_strdup ((gchar *) reg->name); + container->score=0; + container->html = FALSE; + container->default_snippet = g_strdup ((gchar *) reg->comment); + container->current_subsection = NULL; + container->elem_type = NULL; - container = g_new0 (SearchContainer, 1); + ptr = g_strrstr (container->base_filename, "/"); + + path = g_strndup (container->base_filename, + ptr - container->base_filename); + + /* BEGIN HTML special block */ + if (g_str_equal (reg->type, "text/html") || + g_str_has_suffix (fname, "html")) { + GDir *dir; + gchar *filename; + container->html = TRUE; + ptr++; - container->base_filename = g_strdup (realfname); - container->entities = g_hash_table_new (g_str_hash, g_str_equal); - container->doc_title = g_strdup ((gchar *) omf_title->stringval); - container->score=0; - container->html = FALSE; - container->default_snippet = g_strdup ((gchar *) omf_desc->stringval); - - ptr = g_strrstr (container->base_filename, "/"); - - path = g_strndup (container->base_filename, - ptr - container->base_filename); - - /* BEGIN HTML special block */ - if (g_str_equal (mime_type, "text/html")) { - GDir *dir; - gchar *filename; - container->html = TRUE; - ptr++; - - dir = g_dir_open (path, 0, NULL); - - while ((filename = (gchar *) g_dir_read_name (dir))) { - if ((g_str_has_suffix (filename, ".html") || - g_str_has_suffix (filename, ".htm")) && - !g_str_equal (filename, ptr)) { - container->components = - g_slist_append (container->components, - g_strconcat (path, "/", filename, - NULL)); - - } + dir = g_dir_open (path, 0, NULL); + + while ((filename = (gchar *) g_dir_read_name (dir))) { + if ((g_str_has_suffix (filename, ".html") || + g_str_has_suffix (filename, ".htm")) && + !g_str_equal (filename, ptr)) { + container->components = + g_slist_append (container->components, + g_strconcat (path, "/", filename, + NULL)); + } - /* END HTML special blcok */ } + /* END HTML special blcok */ + } - container->base_path = g_strdup (path); - - container->required_words = required_no; - container->grab_text = FALSE; - container->sect_name = NULL; + container->base_path = g_strdup (path); + + container->required_words = data->required_no; + container->grab_text = FALSE; + container->sect_name = NULL; + + container->search_term = g_strdupv (data->terms_list); + container->stop_word = g_new0 (gboolean, data->terms_number); + container->dup_of = g_new0 (gint, data->terms_number); + container->found_terms = g_new0 (gboolean, data->terms_number); + container->score_per_word = g_new0 (gfloat, data->terms_number); + container->found_terms = g_new0 (gboolean, data->terms_number); + container->result_subsection = NULL; + container->search_status = NOT_SEARCHING; + container->snippet_score = 0; + container->snippet = NULL; + + for (i=0; i< data->terms_number; i++) { + container->stop_word[i] = data->stop_list[i]; + container->dup_of[i] = data->dup_list[i]; + } - container->search_term = g_strdupv (terms_list); - container->stop_word = g_new0 (gboolean, terms_number); - container->dup_of = g_new0 (gint, terms_number); - container->found_terms = g_new0 (gboolean, terms_number); - container->score_per_word = g_new0 (gfloat, terms_number); - container->found_terms = g_new0 (gboolean, terms_number); + xmlSAXUserParseFile (&handlers, container, fname); + for (i=0; i< g_strv_length (container->search_term); ++i) { + if (container->found_terms[i]) { + j++; + } + } + if (j >= container->required_words) { + search_parse_result (data->parser, container); + } else while (container->components) { + GSList *next = container->components; + container->components = g_slist_remove_link (container->components, next); container->search_status = NOT_SEARCHING; - container->snippet_score = 0; - - for (i=0; i< terms_number; i++) { - container->stop_word[i] = stop_list[i]; - container->dup_of[i] = dup_list[i]; + xmlSAXUserParseFile (&handlers, container, (gchar *) next->data); + j = 0; + for (i=0; i< g_strv_length (container->search_term); ++i) { + if (container->found_terms[i]) + j++; + } + if (j >= container->required_words) { + search_parse_result (data->parser, container); + break; } - - parser->pending_searches = - g_slist_prepend (parser->pending_searches, container); - - g_free (fname); - g_free (path); - if (omf_url) - xmlXPathFreeObject (omf_url); - if (omf_title) - xmlXPathFreeObject (omf_title); - if (omf_xpath) - xmlXPathFreeContext (omf_xpath); - if (omf_doc) - xmlFreeDoc (omf_doc); - } - g_return_val_if_fail (parser->slow_search_process_id == 0, FALSE); - parser->slow_search_process_id = - g_idle_add ((GSourceFunc) slow_search_process, parser); - - if (xmlparser) - xmlFreeParserCtxt (xmlparser); - /* returning false removes this idle function from the main loop; - * we also set our slow search _setup_ process id to zero to - * indicate it has been removed */ - parser->slow_search_setup_process_id = 0; - return FALSE; + search_free_container (container); + g_free (path); + return TRUE; } @@ -1221,68 +1146,6 @@ search_free_container (SearchContainer *c) g_free (c->base_filename); g_free (c->snippet); g_hash_table_destroy (c->entities); - g_free (c); -} - - -static gboolean -slow_search_process (YelpSearchParser *parser) -{ - SearchContainer *c; - GSList *first = parser->pending_searches; - gint i, j=0; - - parser->pending_searches = - g_slist_remove_link (parser->pending_searches, first); - - if (first == NULL) { - parser->slow_search_process_id = 0; - check_finished (parser); - return FALSE; - } - - c = (SearchContainer *) first->data; - - xmlSAXUserParseFile (&handlers, c, c->base_filename); - for (i=0; i< g_strv_length (c->search_term); ++i) { - if (c->found_terms[i]) { - j++; - } - } - if (j >= c->required_words) { - search_parse_result (parser, c); - } else while (c->components) { - GSList *next = c->components; - c->components = g_slist_remove_link (c->components, next); - c->search_status = NOT_SEARCHING; - xmlSAXUserParseFile (&handlers, c, (gchar *) next->data); - j = 0; - for (i=0; i< g_strv_length (c->search_term); ++i) { - if (c->found_terms[i]) - j++; - } - if (j >= c->required_words) { - search_parse_result (parser, c); - break; - } - } - - if (parser->pending_searches) { - search_free_container (c); - return TRUE; - } - else { - search_process_man (parser, c->search_term); - search_process_info (parser, c->search_term); - search_free_container (c); - - check_finished (parser); - /* returning false removes this idle function from the main loop; - * we also set our slow search process id to zero to - * indicate it has been removed */ - parser->slow_search_process_id = 0; - return FALSE; - } } gchar * |