diff options
author | Don Scorgie <dscorgie@src.gnome.org> | 2007-07-08 21:22:41 +0000 |
---|---|---|
committer | Don Scorgie <dscorgie@src.gnome.org> | 2007-07-08 21:22:41 +0000 |
commit | 509f907abf08880f4ba9d8f900ab6001b114db4e (patch) | |
tree | f3035392ab393283a96ace2e084eff11fb48c221 | |
parent | 0a0cb63b284761d0e80f79dfb224dc4e4b8c220b (diff) | |
download | yelp-509f907abf08880f4ba9d8f900ab6001b114db4e.tar.gz |
First pass at searching. Search still relies on scrollkeeper. Other than
* src/yelp-window.c:
* src/yelp-search.c:
* src/yelp-search.h:
* src/yelp-search-parser.c:
* src/yelp-search-parser.h:
* src/yelp-utils.c:
* src/Makefile.am:
First pass at searching.
Search still relies on scrollkeeper. Other
than that, it seems to all work (with basic search at least)
svn path=/branches/yelp-spoon/; revision=2835
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | src/Makefile.am | 11 | ||||
-rw-r--r-- | src/yelp-search-parser.c | 1600 | ||||
-rw-r--r-- | src/yelp-search-parser.h | 36 | ||||
-rw-r--r-- | src/yelp-search.c | 397 | ||||
-rw-r--r-- | src/yelp-search.h | 53 | ||||
-rw-r--r-- | src/yelp-utils.c | 11 | ||||
-rw-r--r-- | src/yelp-window.c | 22 |
8 files changed, 2119 insertions, 21 deletions
@@ -1,6 +1,16 @@ 2007-07-08 Don Scorgie <dscorgie@svn.gnome.org> * src/yelp-window.c: + * src/yelp-search.c: + * src/yelp-search.h: + * src/yelp-search-parser.c: + * src/yelp-search-parser.h: + * src/yelp-utils.c: + * src/Makefile.am: + First pass at searching. + Search still relies on scrollkeeper. Other + than that, it seems to all work (with basic search at least) + * src/yelp-db-print.c: * src/yelp-db-print.h: * src/Makefile.am: diff --git a/src/Makefile.am b/src/Makefile.am index 2de64163..fc48974d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -35,11 +35,12 @@ yelp_SOURCES += \ yelp-info-parser.c yelp-info-parser.h endif -#if ENABLE_SEARCH -#yelp_SOURCES += \ -# gtkentryaction.c gtkentryaction.h \ -# yelp-search-pager.c yelp-search-pager.h -#endif +if ENABLE_SEARCH +yelp_SOURCES += \ + gtkentryaction.c gtkentryaction.h \ + yelp-search.c yelp-search.h \ + yelp-search-parser.c yelp-search-parser.h +endif YELP_DEFINES = \ -DG_LOG_DOMAIN=\"Yelp\" \ diff --git a/src/yelp-search-parser.c b/src/yelp-search-parser.c new file mode 100644 index 00000000..8fae4968 --- /dev/null +++ b/src/yelp-search-parser.c @@ -0,0 +1,1600 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 4 -*- */ +/* + * Copyright (C) 2007 Don Scorgie <Don@Scorgie.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Author: Don Scorgie <Don@Scorgie.org> + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <string.h> +#include <glib.h> +#include <glib/gi18n.h> +#include <libgnomevfs/gnome-vfs.h> +#include <libxml/parser.h> +#include <libxml/parserInternals.h> +#include <libxml/xmlreader.h> +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> +#include <libxml/HTMLtree.h> +#include <libxml/tree.h> +#include <libxslt/xslt.h> +#include <libxslt/templates.h> +#include <libxslt/transform.h> +#include <libxslt/extensions.h> +#include <libxslt/xsltInternals.h> +#include <libxslt/xsltutils.h> + +#ifdef ENABLE_BEAGLE +#include <beagle/beagle.h> +#endif /* ENABLE_BEAGLE */ + +#include "yelp-error.h" +#include "yelp-settings.h" +#include "yelp-search-parser.h" +#include "yelp-utils.h" +#include "yelp-debug.h" + +#define DESKTOP_ENTRY_GROUP "Desktop Entry" +#define KDE_DESKTOP_ENTRY_GROUP "KDE Desktop Entry" + +#define YELP_NAMESPACE "http://www.gnome.org/yelp/ns" + +typedef gboolean (*ProcessFunction) (YelpSearchParser *parser); + +typedef struct _SearchContainer SearchContainer; + +#define ONLINE_URL "http://api.gnome.org/yelp/forums?query=%s" + +#define ONLINE_NAME N_("the GNOME Support Forums") + +enum { + NOT_SEARCHING = 0, + SEARCH_1, + SEARCH_CHILD, + SEARCH_DOC = 99 +}; + +struct _SearchContainer { + gchar * current_subsection; + gchar * result_subsection; + gchar * doc_title; + gchar * base_path; + gchar * base_filename; + gchar * snippet; + GSList * components; + GHashTable *entities; + gchar ** search_term; + gint required_words; + gint * dup_of; + gboolean * found_terms; + gboolean * stop_word; + gfloat * score_per_word; + gchar * top_element; + gint search_status; + gchar * elem_type; + GSList * elem_stack; + gfloat score; + gfloat snippet_score; + gboolean html; + gchar * sect_name; + gboolean grab_text; + gchar * default_snippet; +}; + +struct _YelpSearchParser { + gchar *search_terms; + xmlDocPtr search_doc; + xmlNodePtr root; + + GPtrArray *hits; + int snippet_request_count; + GSList * pending_searches; + + guint search_process_id; + guint slow_search_setup_process_id; + guint slow_search_process_id; + + gboolean finished; + +}; + + +static gboolean search_parser_process_idle (YelpSearchParser *parser); + +static void s_startElement (void *data, + const xmlChar *name, + const xmlChar **attrs); +static void s_endElement (void *data, + const xmlChar *name); +static void s_characters (void *data, + const xmlChar *ch, + int len); +static void s_declEntity (void *data, + const xmlChar *name, + int type, + const xmlChar *pID, + const xmlChar *sID, + xmlChar *content); +static xmlEntityPtr s_getEntity (void *data, + const xmlChar *name); +static gboolean slow_search_setup (YelpSearchParser *parser); +static gboolean slow_search_process (YelpSearchParser *parser); +static void search_parse_result (YelpSearchParser *parser, + SearchContainer *c); +static gchar * search_clean_snippet (gchar *snippet, + gchar **terms); +static void search_process_man (YelpSearchParser *parser, + gchar **terms); +static void search_process_info (YelpSearchParser *parser, + gchar **terms); +static void process_man_result (YelpSearchParser *parser, + gchar *result, + gchar **terms); +void process_info_result (YelpSearchParser *parser, + gchar *result, + gchar **terms); +gchar * string_append (gchar *current, + gchar *new, + gchar *suffix); + + +#ifdef ENABLE_BEAGLE +static BeagleClient *beagle_client; +#endif /* ENABLE_BEAGLE */ +static char const * const * langs; + + +YelpSearchParser * +yelp_search_parser_new (void) +{ + YelpSearchParser *parser = g_new0 (YelpSearchParser, 1); + + return parser; +} + +void +yelp_search_parser_free (YelpSearchParser *parser) +{ + g_free (parser); +} + +static gboolean +check_hex (char check) +{ + if (check >= '0' && check <= '9') + return TRUE; + if (check >= 'a' && check <= 'f') + return TRUE; + if (check >= 'A' && check <= 'F') + return TRUE; + return FALSE; +} + +static int +conv_hex (char conv) +{ + if (conv >= '0' && conv <= '9') + return conv - '0'; + if (conv >= 'a' && conv <= 'f') + return conv - 'a' + 10; + if (conv >= 'A' && conv <= 'F') + return conv - 'A' + 10; + return 0; +} + +static char * +decode_uri (const char *uri) +{ + char *decoded = g_strdup (uri); + char *iterator; + + for (iterator = decoded; *iterator; iterator ++) { + if (*iterator == '%' && check_hex (iterator[1]) && check_hex(iterator[2])) { + *iterator = conv_hex (iterator[1]) * 16 + conv_hex (iterator[2]); + memmove (iterator + 1, iterator + 3, strlen (iterator + 3)); + } + } + + return decoded; +} + +xmlDocPtr +yelp_search_parser_process (YelpSearchParser *parser, gchar *search_terms) +{ + debug_print (DB_FUNCTION, "entering\n"); + + parser->search_terms = search_terms; + + parser->search_process_id = + g_idle_add_full (G_PRIORITY_LOW, + (GSourceFunc) search_parser_process_idle, + parser, NULL); + + while (!parser->finished) + g_thread_yield (); + return parser->search_doc; +} + +/******************************************************************************/ +static void +check_finished (YelpSearchParser *parser) +{ + gchar *tmp; + gchar **split; + xmlNodePtr online = NULL; + gchar *check; + gchar *title; + gchar *text = NULL; + xmlXPathContextPtr results_xpath_ctx = NULL; + xmlXPathObjectPtr results_xpath = NULL; + gint number_of_results = 0; + + results_xpath_ctx = xmlXPathNewContext(parser->search_doc); + results_xpath = xmlXPathEvalExpression(BAD_CAST "/search/result", results_xpath_ctx); + if (results_xpath && results_xpath->nodesetval && results_xpath->nodesetval->nodeNr) { + number_of_results = results_xpath->nodesetval->nodeNr; + } else { + number_of_results = 0; + } + xmlXPathFreeObject(results_xpath); + xmlXPathFreeContext(results_xpath_ctx); + + if (number_of_results == 0) { + title = g_strdup_printf( _("No results for \"%s\""), parser->search_terms); + text = g_strdup(_("Try using different words to describe the problem " + "you're having or the topic you want help with.")); + } else { + title = g_strdup_printf( _("Search results for \"%s\""), parser->search_terms); + } + xmlNewTextChild (parser->root, NULL, BAD_CAST "title", BAD_CAST title); + + if (text) { + xmlNewTextChild (parser->root, NULL, BAD_CAST "text", BAD_CAST text); + g_free(text); + } + + /* TRANSLATORS: Please don't do anything funny with the + * format arguement. It isn't really going through a printf + * The %s is used to indicate where the name of the site (linked) + * should be. This is done in the XSLT + */ + tmp = g_strdup (_("Repeat the search online at %s")); + split = g_strsplit (tmp, "%s", 2); + check = g_strdup_printf (ONLINE_URL, parser->search_terms); + + online = xmlNewTextChild (parser->root, NULL, BAD_CAST "online", BAD_CAST split[0]); + g_free (tmp); + xmlNewProp (online, BAD_CAST "name", + BAD_CAST ONLINE_NAME); + xmlNewProp (online, BAD_CAST "href", + BAD_CAST check); + g_free (check); + xmlNewTextChild (parser->root, NULL, BAD_CAST "online1", BAD_CAST split[1]); + + parser->finished = TRUE; + +} + +#ifdef ENABLE_BEAGLE +typedef struct +{ + YelpSearchParser *parser; + xmlNode *node; +} SnippetLocation; + +static void snippet_closed (BeagleSnippetRequest *request, + SnippetLocation *snippet_location); +static void snippet_response (BeagleSnippetRequest *request, + BeagleSnippetResponse *response, + SnippetLocation *snippet_location); +static void snippet_error (BeagleSnippetRequest *request, + GError *error, + SnippetLocation *snippet_location); + +static void +snippet_closed (BeagleSnippetRequest *request, SnippetLocation *snippet_location) +{ + YelpSearchParser *parser = snippet_location->parser; + + debug_print (DB_FUNCTION, "entering\n"); + + parser->snippet_request_count --; + check_finished (parser); + + g_signal_handlers_disconnect_by_func (request, + G_CALLBACK (snippet_response), + snippet_location); + g_signal_handlers_disconnect_by_func (request, + G_CALLBACK (snippet_error), + snippet_location); + g_signal_handlers_disconnect_by_func (request, + G_CALLBACK (snippet_closed), + snippet_location); + + g_free (snippet_location); + g_object_unref (request); +} + +static void +snippet_response (BeagleSnippetRequest *request, BeagleSnippetResponse *response, SnippetLocation *snippet_location) +{ + xmlDoc *snippet_doc; + xmlNode *node; + char *xmldoc; + + const char *xml = beagle_snippet_response_get_snippet (response); + + if (xml == NULL) { + debug_print (DB_DEBUG, "snippet_response empty\n"); + return; + } + debug_print (DB_DEBUG, "snippet_response: %s\n", xml); + + xmldoc = g_strdup_printf ("<snippet>%s</snippet>", xml); + snippet_doc = xmlParseDoc (BAD_CAST xmldoc); + g_free (xmldoc); + if (!snippet_doc) + return; + node = xmlDocGetRootElement (snippet_doc); + xmlUnlinkNode (node); + xmlAddChild (snippet_location->node, node); + xmlFreeDoc (snippet_doc); +} + +static void +snippet_error (BeagleSnippetRequest *request, GError *error, SnippetLocation *snippet_location) +{ + debug_print (DB_FUNCTION, "entering\n"); +} + + +static void +hits_added_cb (BeagleQuery *query, BeagleHitsAddedResponse *response, YelpSearchParser *parser) +{ + GSList *hits, *l; + + debug_print (DB_FUNCTION, "hits_added\n"); + + hits = beagle_hits_added_response_get_hits (response); + + for (l = hits; l; l = l->next) { + BeagleHit *hit = l->data; + beagle_hit_ref (hit); + debug_print (DB_DEBUG, "%f\n", beagle_hit_get_score (hit)); + g_ptr_array_add (parser->hits, hit); + } +} + +static gboolean +check_lang (const char *lang) { + int i; + for (i = 0; langs[i]; i++) { + if (!strncmp (lang, langs[i], 2)) { + debug_print (DB_DEBUG, "%s preferred\n", lang); + return TRUE; + } + } + debug_print (DB_DEBUG, "%s not preferred\n", lang); + return FALSE; +} + +static gint +compare_hits (gconstpointer a, + gconstpointer b) +{ + BeagleHit **hita = (BeagleHit **) a; + BeagleHit **hitb = (BeagleHit **) b; + const char *langa, *langb; + gboolean a_preferred = TRUE, b_preferred = TRUE; + + if (beagle_hit_get_one_property (*hita, "fixme:language", &langa)) + a_preferred = check_lang(langa); + if (beagle_hit_get_one_property (*hitb, "fixme:language", &langb)) + b_preferred = check_lang(langb); + + if (a_preferred != b_preferred) { + if (a_preferred) + return -1; + if (b_preferred) + return 1; + } + + double scorea = beagle_hit_get_score (*hita); + double scoreb = beagle_hit_get_score (*hitb); + + /* The values here are inverted so that it's a descending sort. */ + if (scorea < scoreb) + return 1; + if (scoreb < scorea) + return -1; + return 0; +} + +static void +finished_cb (BeagleQuery *query, + BeagleFinishedResponse *response, + YelpSearchParser *parser) +{ + int i; + + debug_print (DB_FUNCTION, "entering\n"); + + g_ptr_array_sort (parser->hits, compare_hits); + + for (i = 0; i < 10 && i < parser->hits->len; i++) { + BeagleHit *hit = g_ptr_array_index (parser->hits, i); + xmlNode *child; + /* static float score_fake = 0; */ + char *score; + const char *property; + BeagleSnippetRequest *request; + SnippetLocation *snippet_location; + + child = xmlNewTextChild (parser->root, NULL, BAD_CAST "result", NULL); + xmlSetProp (child, BAD_CAST "uri", BAD_CAST beagle_hit_get_uri (hit)); + xmlSetProp (child, BAD_CAST "parent_uri", + BAD_CAST beagle_hit_get_parent_uri (hit)); + if (beagle_hit_get_one_property (hit, "dc:title", &property)) + xmlSetProp (child, BAD_CAST "title", BAD_CAST property); + if (beagle_hit_get_one_property (hit, "fixme:base_title", &property)) + xmlSetProp (child, BAD_CAST "base_title", BAD_CAST property); + + score = g_strdup_printf ("%f", beagle_hit_get_score (hit)); + debug_print (DB_DEBUG, "%f\n", beagle_hit_get_score (hit)); + /*xmlSetProp (child, BAD_CAST "score", BAD_CAST score);*/ + g_free (score); + + parser->snippet_request_count ++; + + snippet_location = g_new (SnippetLocation, 1); + + snippet_location->parser = parser; + snippet_location->node = child; + + request = beagle_snippet_request_new (); + beagle_snippet_request_set_hit (request, hit); + beagle_snippet_request_set_query (request, query); + + g_signal_connect (request, "response", + G_CALLBACK (snippet_response), snippet_location); + g_signal_connect (request, "error", + G_CALLBACK (snippet_error), snippet_location); + g_signal_connect (request, "closed", + G_CALLBACK (snippet_closed), snippet_location); + + debug_print (DB_DEBUG, "Requesting snippet\n"); + beagle_client_send_request_async (beagle_client, BEAGLE_REQUEST (request), + NULL); + } + + g_signal_handlers_disconnect_by_func (query, + G_CALLBACK (hits_added_cb), + parser); + g_signal_handlers_disconnect_by_func (query, + G_CALLBACK (finished_cb), + parser); + g_object_unref (query); + + g_ptr_array_foreach (parser->hits, (GFunc) beagle_hit_unref, NULL); + g_ptr_array_free (parser->hits, TRUE); + parser->hits = NULL; + + check_finished (parser); +} +#endif /* ENABLE_BEAGLE */ + +static gboolean +search_parser_process_idle (YelpSearchParser *parser) +{ +#ifdef ENABLE_BEAGLE + BeagleQuery *query; + GError *error = NULL; +#endif /* ENABLE_BEAGLE */ + + parser->search_doc = xmlNewDoc (BAD_CAST "1.0"); + parser->root = xmlNewNode (NULL, BAD_CAST "search"); + xmlSetProp (parser->root, BAD_CAST "title", BAD_CAST parser->search_terms); + xmlDocSetRootElement (parser->search_doc, parser->root); + +#ifdef ENABLE_BEAGLE + if (beagle_client != NULL) { + query = beagle_query_new (); + + beagle_query_set_max_hits (query, 10000); + beagle_query_add_text (query, parser->search_terms); + beagle_query_add_source (query, "documentation"); + + parser->hits = g_ptr_array_new (); + + g_signal_connect (query, "hits-added", + G_CALLBACK (hits_added_cb), + parser); + + g_signal_connect (query, "finished", + G_CALLBACK (finished_cb), + parser); + + beagle_client_send_request_async (beagle_client, BEAGLE_REQUEST (query), &error); + + if (error) { + debug_print (DB_DEBUG, "error: %s\n", error->message); + } + + g_clear_error (&error); + } else { + g_warning ("beagled not running, using basic search support."); + } +#endif /* ENABLE_BEAGLE */ + +#ifdef ENABLE_BEAGLE + if (beagle_client == NULL) { +#endif + g_return_val_if_fail (parser->slow_search_setup_process_id == 0, FALSE); + + parser->slow_search_setup_process_id = + g_idle_add ((GSourceFunc) slow_search_setup, + parser); +#ifdef ENABLE_BEAGLE + } +#endif + + /* returning false removes this idle function from the main loop; + * we also set our search process id to zero */ + parser->search_process_id = 0; + return FALSE; +} + +static gboolean sk_docomf = FALSE; +static GSList *omf_pending = NULL; + +static void +sk_startElement (void *empty, const xmlChar *name, + const xmlChar **attrs) +{ + if (xmlStrEqual((const xmlChar*) name, BAD_CAST "docomf")) + sk_docomf = TRUE; +} + +static void +sk_endElement (void *empty, const xmlChar *name) +{ + if (xmlStrEqual((const xmlChar*) name, BAD_CAST "docomf")) + sk_docomf = FALSE; +} + +static void +sk_characters (void *empty, const xmlChar *ch, + int len) +{ + gchar *omf; + + if (sk_docomf) { + omf = g_strndup ((gchar *) ch, len); + omf_pending = g_slist_prepend (omf_pending, omf); + } +} + +void s_startElement(void *data, + const xmlChar * name, + const xmlChar ** attrs) +{ + SearchContainer *c = (SearchContainer *) data; + + if (g_str_equal (name, "xi:include") || g_str_equal (name, "include")) { + gint i=0; + while (attrs[i]) { + if (g_str_equal (attrs[i], "href")) { + + c->components = g_slist_append (c->components, + g_strconcat (c->base_path, + "/", + attrs[i+1], + NULL)); + break; + } + i+=2; + } + } + + if (attrs) { + gint i=0; + while (attrs[i]) { + if (g_str_equal (attrs[i], "id")) { + g_free (c->current_subsection); + c->current_subsection = g_strdup ((gchar *) attrs[i+1]); + } + i+=2; + } + } + /* Do we need to grab the title of the document? + * used in snippets when displaying results from an indexterm etc. + */ + if (c->search_status != NOT_SEARCHING && g_str_equal (name, "title")) { + c->grab_text = TRUE; + } + + /* Are we allowed to search this element? */ + if (c->search_status == NOT_SEARCHING) { + if (c->html && g_str_equal (name, "html")) { + c->search_status = SEARCH_DOC; + return; + } + + if (g_str_equal (name, "title")) { + c->search_status = SEARCH_1; + } + else if (g_str_equal (name, "indexterm")) + c->search_status = SEARCH_1; + else if (g_str_equal (name, "sect1") || + g_str_equal (name, "section") || + g_str_equal (name, "chapter") || + g_str_equal (name, "body")) + c->search_status = SEARCH_DOC; + } else if (c->search_status == SEARCH_1) { + c->search_status = SEARCH_CHILD; + } + + if (c->elem_type) { + c->elem_stack = g_slist_prepend (c->elem_stack, + g_strdup (c->elem_type)); + g_free (c->elem_type); + } + + c->elem_type = g_strdup ((gchar *) name); + + return; +} + +void s_endElement(void * data, + const xmlChar * name) +{ + SearchContainer *c = (SearchContainer *) data; + + if (c->search_status == SEARCH_CHILD) { + c->search_status = SEARCH_1; + } else if (c->search_status == SEARCH_1) { + c->search_status = NOT_SEARCHING; + } + + g_free (c->elem_type); + c->elem_type = NULL; + + if (c->elem_stack) { + GSList *top = c->elem_stack; + c->elem_type = g_strdup ((gchar *) top->data); + c->elem_stack = g_slist_delete_link (c->elem_stack, top); + } + c->grab_text = FALSE; + return; +} + +void s_characters(void * data, + const xmlChar * ch, + int len) +{ + SearchContainer *c = (SearchContainer *) data; + if (c->grab_text) { + g_free (c->sect_name); + c->sect_name = g_strndup ((gchar *) ch, len); + } + + /* Sometimes html docs don't trigger the "startElement" method + * I don't know why. Instead, we just search the entire + * html file, hoping to find something. + */ + if (c->html && c->search_status != SEARCH_DOC) + c->search_status = SEARCH_DOC; + if (c->search_status != NOT_SEARCHING) { + gchar *tmp = g_utf8_casefold ((gchar *) ch, len); + gint i = 0; + gchar *s_term = c->search_term[i]; + while (s_term && c->score_per_word[i] < 1.0) { + if (c->stop_word[i] || c->score_per_word[c->dup_of[i]] == 1.0) { + i++; + s_term = c->search_term[i]; + continue; + } + + gchar *location = strstr (tmp, s_term); + if (location) { + gchar before = *(location-1); + gchar after = *(location+strlen(s_term)); + gfloat local_score = 0.0; + gboolean use_text = TRUE; + if (location == tmp) + before = ' '; + if (strlen(location) == strlen(s_term)) + after = ' '; + + if ((g_ascii_ispunct (before) || g_ascii_isspace (before)) + && (g_ascii_ispunct (after) || g_ascii_isspace (after))) { + if (!c->elem_type) { + /* Stupid HTML. Treat like its a normal tag */ + local_score = 0.1; + } else if (g_str_equal(c->elem_type, "primary")) { + local_score = 1.0; + use_text = FALSE; + } else if (g_str_equal (c->elem_type, "secondary")) { + local_score = 0.9; + use_text = FALSE; + } else if (g_str_equal (c->elem_type, "title") || + g_str_equal (c->elem_type, "titleabbrev")) { + local_score = 0.8; + } else { + local_score = 0.1; + } + c->score += local_score; + c->found_terms[c->dup_of[i]] = TRUE; + if (local_score > c->snippet_score) { + g_free (c->snippet); + if (use_text) { + c->snippet = g_strndup (g_utf8_casefold ((gchar *) ch, + len), + len); + } else { + c->snippet = g_strdup (c->sect_name); + } + c->result_subsection = g_strdup (c->current_subsection); + c->snippet_score = local_score; + c->score_per_word[c->dup_of[i]] = local_score; + } + } + } + i++; + s_term = c->search_term[i]; + } + g_free (tmp); + } + return; +} + +void s_declEntity (void *data, const xmlChar *name, int type, + const xmlChar *pID, const xmlChar *sID, + xmlChar *content) +{ + SearchContainer *c = (SearchContainer *) data; + if (type == 2) { + g_hash_table_insert (c->entities, + g_strdup ((gchar *) name), + g_strdup ((gchar *) sID)); + + } + return; +} + +xmlEntityPtr s_getEntity (void *data, const xmlChar *name) +{ + SearchContainer *c = (SearchContainer *) data; + xmlEntityPtr t = xmlGetPredefinedEntity(name); + + if (!t) { + gchar * lookup = g_hash_table_lookup (c->entities, name); + if (lookup) { + c->components = g_slist_append (c->components, + g_strconcat (c->base_path, + "/", + lookup, NULL)); + } + } + + return t; + +} + + + + + +static xmlSAXHandler handlers = { + NULL, NULL, NULL, NULL, NULL, + s_getEntity, + s_declEntity, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, + s_startElement, s_endElement, NULL, s_characters, + NULL, NULL, NULL, NULL, NULL, NULL +}; + + +/* Parse the omfs and build the list of files to be searched */ + +/* A common bit of code used below. Chucked in a function for easy */ +gchar * +string_append (gchar *current, gchar *new, gchar *suffix) +{ + gchar *ret; + + if (suffix) { + ret = g_strconcat (current, ":", new, suffix, NULL); + } else { + ret = g_strconcat (current, ":", new, NULL); + } + g_free (current); + return ret; +} + +static gint +build_lists (gchar *search_terms, gchar ***terms, gint **dups, + gboolean ** stops, gint *req) +{ + gchar *ignore_words, *common_prefixes, *common_suffixes; + gchar **prefixes, **suffixes, **ignore; + gchar **list_copy; + gchar **iter, **iter1 = NULL; + gchar *term_str = NULL; + gchar *dup_str = NULL; + gint n_terms = 0, i=-1; + gint orig_term = 0; + gint non_stop = 0; + + + /* Translators: Do not translate this list exactly. These are + * colon-separated words that aren't useful for choosing search + * results; they will be different for each language. Include + * pronouns, articles, very common verbs and prepositions, + * words from question structures like "tell me about" and + * "how do I", and words for functional states like "not", + * "work", and "broken". + */ + ignore_words = g_strdup (_("a:about:an:are:as:at:be:broke:broken:by" + ":can:can't:dialog:dialogue:do:doesn't" + ":doesnt:don't:dont:explain:for:from:get" + ":gets:got:make:makes:not:when:has" + ":have:help:how:i:in:is:it:item:me:my:of" + ":on:or:tell:that:the:thing:this:to:what" + ":where:who:will:with:won't:wont:why:work" + ":working:works")); + /* Translators: This is a list of common prefixes for words. + * Do not translate this directly. Instead, use a colon + * seperated list of word-starts. In English, an example + * is re-. If there is none, please use the term NULL + * If there is only one, please put a colon after. + * E.g. if the common prefix is re then the string would be + * "re:" + */ + common_prefixes = g_strdup (_("re")); + + /* Translators: This is a list of (guess what?) common suffixes + * to words. Things that may be put at ends of words to slightly + * alter their meaning (like -ing and -s in English). This is a + * colon seperated list (I like colons). If there are none, + * please use the strig NULL. If there is only 1, please + * add a colon at the end of the list + */ + common_suffixes = g_strdup (_("ers:er:ing:es:s:'s")); + + ignore = g_strsplit (ignore_words, ":", -1); + if (strchr (common_prefixes, ':')) { + prefixes = g_strsplit (common_prefixes, ":", -1); + } else { + prefixes = NULL; + } + if (strchr (common_suffixes, ':')) { + suffixes = g_strsplit (common_suffixes, ":", -1); + } else { + suffixes = NULL; + } + search_terms = g_strdelimit (search_terms, ":", ' '); + list_copy = g_strsplit (g_utf8_casefold (g_strstrip ( + search_terms), -1), + " ", -1); + + for (iter = list_copy; *iter != NULL; iter++) { + gboolean ignoring = FALSE; + if (strlen (*iter) == 0) { + continue; + } + if (g_str_has_suffix (*iter, "?")) { + gchar *tmp; + tmp = g_strndup (*iter, strlen (*iter) - 1); + g_free (*iter); + *iter = g_strdup (tmp); + g_free (tmp); + } + if (!term_str) { + term_str = g_strdup (*iter); + } else { + term_str = string_append (term_str, *iter, NULL); + } + + for (iter1 = ignore; *iter1; iter1++) { + if (g_str_equal (*iter, *iter1)) { + ignoring = TRUE; + break; + } + } + if (ignoring) { + if (!dup_str) { + dup_str = g_strdup ("I"); + } else { + dup_str = string_append (dup_str, "I", NULL); + } + continue; + } + non_stop++; + + if (!dup_str) { + dup_str = g_strdup ("O"); + } else { + dup_str = string_append (dup_str, "O", NULL); + } + (*req)++; + if (prefixes) { + for (iter1 = prefixes; *iter1; iter1++) { + if (g_str_has_prefix (*iter, *iter1)) { + term_str = string_append (term_str, + (*iter+strlen(*iter1)), NULL); + } else { + term_str = string_append (term_str, *iter, *iter1); + } + dup_str = string_append (dup_str, "D", NULL); + } + } + if (suffixes) { + for (iter1 = suffixes; *iter1; iter1++) { + if (g_str_has_suffix (*iter, *iter1)) { + gchar *tmp; + tmp = g_strndup (*iter, (strlen(*iter)-strlen(*iter1))); + term_str = string_append (term_str, tmp, NULL); + g_free (tmp); + } else { + term_str = string_append (term_str, *iter, *iter1); + } + dup_str = string_append (dup_str, "D", NULL); + } + } + } + g_strfreev (list_copy); + *terms = g_strsplit (term_str, ":", -1); + n_terms = g_strv_length (*terms); + (*dups) = g_new0 (gint, n_terms); + (*stops) = g_new0 (gboolean, n_terms); + list_copy = g_strsplit (dup_str, ":", -1); + + for (iter = *terms; *iter; iter++) { + i++; + if (g_str_equal (list_copy[i], "O")) { + orig_term = i; + } + (*dups)[i] = orig_term; + + for (iter1 = ignore; *iter1; iter1++) { + if (non_stop > 0 && g_str_equal (*iter, *iter1)) { + (*stops)[i] = TRUE; + (*dups)[i] = -2; + break; + } + } + } + + /* Clean up all those pesky strings */ + g_free (ignore_words); + g_free (common_prefixes); + g_free (common_suffixes); + g_free (term_str); + g_free (dup_str); + g_strfreev (prefixes); + g_strfreev (suffixes); + g_strfreev (ignore); + g_strfreev (list_copy); + + return n_terms; +} + + +static gboolean +slow_search_setup (YelpSearchParser *parser) +{ + gchar *content_list; + gchar *stderr_str; + gchar *lang; + gchar *command; + + gchar **terms_list = NULL; + gint *dup_list = NULL; + gboolean *stop_list = NULL; + gint terms_number = 0; + gint required_no = 0; + + static xmlSAXHandler sk_sax_handler = { 0, }; + xmlParserCtxtPtr xmlparser; + if (langs && langs[0]) + lang = (gchar *) langs[0]; + else + lang = "C"; + + if (!strcmp (parser->search_terms, "")) { + parser->slow_search_setup_process_id = 0; + check_finished (parser); + return FALSE; + } + + command = g_strconcat("scrollkeeper-get-content-list ", lang, NULL); + + if (g_spawn_command_line_sync (command, &content_list, &stderr_str, NULL, NULL)) { + if (!sk_sax_handler.startElement) { + sk_sax_handler.startElement = sk_startElement; + sk_sax_handler.endElement = sk_endElement; + sk_sax_handler.characters = sk_characters; + sk_sax_handler.initialized = TRUE; + } + content_list = g_strstrip (content_list); + xmlSAXUserParseFile (&sk_sax_handler, NULL, content_list); + } + + xmlparser = xmlNewParserCtxt (); + + g_free (content_list); + g_free (stderr_str); + g_free (command); + + + terms_number = build_lists (parser->search_terms,&terms_list, + &dup_list, &stop_list, + &required_no); + + while (omf_pending) { + GSList *first = NULL; + gchar *file = NULL; + xmlDocPtr omf_doc = NULL; + xmlXPathContextPtr omf_xpath = NULL; + xmlXPathObjectPtr omf_url = NULL; + xmlXPathObjectPtr omf_title = NULL; + xmlXPathObjectPtr omf_mime = NULL; + xmlXPathObjectPtr omf_desc = NULL; + + SearchContainer *container; + gchar *ptr; + gchar *path; + gchar *fname; + gchar *realfname; + gchar *mime_type; + int i = 0; + + first = omf_pending; + omf_pending = g_slist_remove_link (omf_pending, first); + file = (gchar *) first->data; + + + omf_doc = xmlCtxtReadFile (xmlparser, (const char *) file, NULL, + XML_PARSE_NOBLANKS | XML_PARSE_NOCDATA | + XML_PARSE_NOENT | XML_PARSE_NOERROR | + XML_PARSE_NONET ); + + if (!omf_doc) { + g_warning (_("Could not load the OMF file '%s'."), file); + continue; + } + + omf_xpath = xmlXPathNewContext (omf_doc); + omf_url = + xmlXPathEvalExpression (BAD_CAST + "string(/omf/resource/identifier/@url)", + omf_xpath); + omf_title = + xmlXPathEvalExpression (BAD_CAST + "string(/omf/resource/title)", + omf_xpath); + omf_mime = + xmlXPathEvalExpression (BAD_CAST + "string(/omf/resource/format/@mime)", + omf_xpath); + omf_desc = + xmlXPathEvalExpression (BAD_CAST + "string(/omf/resource/description)", + omf_xpath); + + mime_type = g_strdup ((gchar *) omf_mime->stringval); + + fname = g_strdup ((gchar *) omf_url->stringval); + if (g_str_has_prefix (fname, "file:")) { + realfname = &fname[5]; + } else { + realfname = fname; + } + + if (!g_file_test (realfname, G_FILE_TEST_EXISTS)) { + continue; + } + + container = g_new0 (SearchContainer, 1); + + container->base_filename = g_strdup (realfname); + container->entities = g_hash_table_new (g_str_hash, g_str_equal); + container->doc_title = g_strdup ((gchar *) omf_title->stringval); + container->score=0; + container->html = FALSE; + container->default_snippet = g_strdup ((gchar *) omf_desc->stringval); + + ptr = g_strrstr (container->base_filename, "/"); + + path = g_strndup (container->base_filename, + ptr - container->base_filename); + + /* BEGIN HTML special block */ + if (g_str_equal (mime_type, "text/html")) { + GDir *dir; + gchar *filename; + container->html = TRUE; + ptr++; + + dir = g_dir_open (path, 0, NULL); + + while ((filename = (gchar *) g_dir_read_name (dir))) { + if ((g_str_has_suffix (filename, ".html") || + g_str_has_suffix (filename, ".htm")) && + !g_str_equal (filename, ptr)) { + container->components = + g_slist_append (container->components, + g_strconcat (path, "/", filename, + NULL)); + + } + } + /* END HTML special blcok */ + } + + container->base_path = g_strdup (path); + + container->required_words = required_no; + container->grab_text = FALSE; + container->sect_name = NULL; + + container->search_term = g_strdupv (terms_list); + container->stop_word = g_new0 (gboolean, terms_number); + container->dup_of = g_new0 (gint, terms_number); + container->found_terms = g_new0 (gboolean, terms_number); + container->score_per_word = g_new0 (gfloat, terms_number); + container->found_terms = g_new0 (gboolean, terms_number); + + container->search_status = NOT_SEARCHING; + container->snippet_score = 0; + + for (i=0; i< terms_number; i++) { + container->stop_word[i] = stop_list[i]; + container->dup_of[i] = dup_list[i]; + } + + parser->pending_searches = + g_slist_prepend (parser->pending_searches, container); + + g_free (fname); + g_free (path); + if (omf_url) + xmlXPathFreeObject (omf_url); + if (omf_title) + xmlXPathFreeObject (omf_title); + if (omf_xpath) + xmlXPathFreeContext (omf_xpath); + if (omf_doc) + xmlFreeDoc (omf_doc); + + } + g_return_val_if_fail (parser->slow_search_process_id == 0, FALSE); + parser->slow_search_process_id = + g_idle_add ((GSourceFunc) slow_search_process, parser); + + if (xmlparser) + xmlFreeParserCtxt (xmlparser); + + /* returning false removes this idle function from the main loop; + * we also set our slow search _setup_ process id to zero to + * indicate it has been removed */ + parser->slow_search_setup_process_id = 0; + return FALSE; + +} + +static void +search_free_container (SearchContainer *c) +{ + g_strfreev (c->search_term); + g_free (c->dup_of); + g_free (c->found_terms); + g_free (c->stop_word); + g_free (c->score_per_word); + g_free (c->top_element); + g_free (c->elem_type); + g_free (c->sect_name); + g_free (c->default_snippet); + g_free (c->current_subsection); + g_free (c->result_subsection); + g_free (c->doc_title); + g_free (c->base_path); + g_free (c->base_filename); + g_free (c->snippet); + g_hash_table_destroy (c->entities); + g_free (c); +} + + +static gboolean +slow_search_process (YelpSearchParser *parser) +{ + SearchContainer *c; + GSList *first = parser->pending_searches; + gint i, j=0; + + parser->pending_searches = + g_slist_remove_link (parser->pending_searches, first); + + if (first == NULL) { + parser->slow_search_process_id = 0; + check_finished (parser); + return FALSE; + } + + c = (SearchContainer *) first->data; + + xmlSAXUserParseFile (&handlers, c, c->base_filename); + for (i=0; i< g_strv_length (c->search_term); ++i) { + if (c->found_terms[i]) { + j++; + } + } + if (j >= c->required_words) { + search_parse_result (parser, c); + } else while (c->components) { + GSList *next = c->components; + c->components = g_slist_remove_link (c->components, next); + c->search_status = NOT_SEARCHING; + xmlSAXUserParseFile (&handlers, c, (gchar *) next->data); + j = 0; + for (i=0; i< g_strv_length (c->search_term); ++i) { + if (c->found_terms[i]) + j++; + } + if (j >= c->required_words) { + search_parse_result (parser, c); + break; + } + } + + if (parser->pending_searches) { + search_free_container (c); + return TRUE; + } + else { +#ifdef ENABLE_MAN + search_process_man (parser, c->search_term); +#endif +#ifdef ENABLE_INFO + search_process_info (parser, c->search_term); +#endif + search_free_container (c); + + check_finished (parser); + /* returning false removes this idle function from the main loop; + * we also set our slow search process id to zero to + * indicate it has been removed */ + parser->slow_search_process_id = 0; + return FALSE; + } +} + +gchar * +search_clean_snippet (gchar *snippet, gchar **terms) +{ + /* This is probably what you want to change */ + gint len_before_term = 47; + gint len_after_term = 47; + gchar **iteration; + gboolean am_cutting = FALSE; + gchar *result = NULL; + gboolean found_terms = FALSE; + + + if (!snippet) + return NULL; + + if (strlen(snippet) > (len_before_term+len_after_term)) { + am_cutting = TRUE; + } + result = g_strdup (snippet); + + for (iteration = terms; *iteration; iteration++) { + gchar *before, *after, *tmp; + gchar *str; + gchar before_c, after_c; + gint count = 0; + + while ((str = strstr (result, (*iteration)))) { + gboolean breaking = FALSE; + gint i; + for (i=0; i< count; i++) { + str++; + str = strstr (str, (*iteration)); + if (!str) { + breaking = TRUE; + break; + } + } + count++; + if (breaking) + break; + + before_c = *(str-1); + after_c = *(str+strlen(*iteration)); + + if (g_ascii_isalpha (before_c) || g_ascii_isalpha (after_c)) { + continue; + } + + tmp = g_strndup (result, (str-result)); + /* If we have to chop the snippet down to size, here is the + * place to do it. Only the first time through though + */ + if (am_cutting && !found_terms && strlen (tmp) > len_before_term) { + gchar *tmp1; + gchar *tmp2; + gint cut_by; + + tmp1 = tmp; + cut_by = strlen(tmp) - len_before_term; + + tmp1 += cut_by; + tmp2 = g_strdup (tmp1); + g_free (tmp); + tmp = g_strconcat ("...",tmp2, NULL); + g_free (tmp2); + } + + before = g_strconcat (tmp, "<em>", NULL); + g_free (tmp); + + str += strlen (*iteration); + + if (am_cutting && !found_terms && strlen (str) > len_after_term) { + gchar *tmp1; + + tmp1 = g_strndup (str, len_after_term); + tmp = g_strconcat (tmp1, "...", NULL); + g_free (tmp1); + } else { + tmp = g_strdup (str); + } + + after = g_strconcat ((*iteration), "</em>", tmp, NULL); + + + + g_free (result); + result = g_strconcat (before, after, NULL); + found_terms = TRUE; + } + } + return result; +} + +void +search_parse_result (YelpSearchParser *parser, SearchContainer *c) +{ + xmlNode *child; + gchar *new_uri; + xmlDoc *snippet_doc; + xmlNode *node; + char *xmldoc; + + new_uri = g_strconcat (c->base_filename, "#", c->result_subsection, + NULL); + child = xmlNewTextChild (parser->root, NULL, + BAD_CAST "result", NULL); + xmlSetProp (child, BAD_CAST "uri", BAD_CAST new_uri); + xmlSetProp (child, BAD_CAST "title", BAD_CAST g_strstrip (c->doc_title)); + xmlSetProp (child, BAD_CAST "score", + BAD_CAST g_strdup_printf ("%f", c->score)); + /* Fix up the snippet to show the break_term in bold */ + if (!c->snippet) + c->snippet = g_strdup (c->default_snippet); + xmldoc = g_strdup_printf ("<snippet>%s</snippet>", + search_clean_snippet (c->snippet, c->search_term)); + snippet_doc = xmlParseDoc (BAD_CAST xmldoc); + g_free (xmldoc); + + if (!snippet_doc) + return; + + node = xmlDocGetRootElement (snippet_doc); + xmlUnlinkNode (node); + xmlAddChild (child, node); + xmlFreeDoc (snippet_doc); +} + +void +process_man_result (YelpSearchParser *parser, gchar *result, gchar **terms) +{ + gchar ** split = g_strsplit (result, "\n", -1); + gint i; + + for (i=0;split[i];i++) { + gchar ** line = g_strsplit (split[i], "(", 2); + gchar *filename = NULL; + gchar *desc = NULL; + xmlNode *child; + gchar *tmp = NULL; + gchar *after = NULL; + /*gchar *before = NULL;*/ + gchar *title = NULL; + /*gint i;*/ + + if (line == NULL || line[0] == NULL || line[1] == NULL) + continue; + + title = g_strdup (g_strstrip (line[0])); + after = strstr (line[1], ")"); + + tmp = g_strndup (line[1], after-line[1]); + + filename = g_strconcat ("man:", title, "(", tmp,")", NULL); + + after++; + g_free (tmp); + + tmp = g_strdup (g_strchug (after)); + after = tmp; after++; + desc = g_strdup (g_strchug (after)); + + child = xmlNewTextChild (parser->root, NULL, + BAD_CAST "result", NULL); + xmlSetProp (child, BAD_CAST "uri", BAD_CAST filename); + xmlSetProp (child, BAD_CAST "title", + BAD_CAST g_strconcat (title, + " manual page", NULL)); + + xmlNewChild (child, NULL, BAD_CAST "snippet", + BAD_CAST desc); + xmlNewChild (child, NULL, BAD_CAST "score", + BAD_CAST "0.1"); + g_free (tmp); + g_strfreev (line); + } + +} + +void +process_info_result (YelpSearchParser *parser, gchar *result, gchar **terms) +{ + gchar ** split = NULL; + gint i; + + split = g_strsplit (result, "\n", -1); + if (split == NULL) + return; + + for (i=0;split[i];i++) { + gchar ** line = NULL; + gchar *filename = NULL; + gchar *desc = NULL; + gchar *title = NULL; + xmlNode *child; + gchar *tmp; + gchar *tmp1; + gchar *file_name; + + line = g_strsplit (split[i], "--", 3); + if (g_strv_length (line) != 2) { + g_strfreev (line); + continue; + } + + /* First is the filename + * We gotta do some fiddling to get the actual filename + * we can use + */ + tmp = g_strdup (g_strchomp (line[0])); + tmp++; + tmp1 = strstr (tmp, "\""); + if (!tmp1) { + g_strfreev (line); + g_free (tmp); + continue; + } + file_name = g_strndup (tmp, tmp1-tmp); + tmp++; + tmp1 = strstr (tmp, ")"); + if (tmp1) + title = g_strndup (tmp, tmp1-tmp); + else { + title = g_strdup (++file_name); + --file_name; + } + tmp--; + tmp--; + filename = g_strconcat ("info:", file_name, NULL); + g_free (tmp); + g_free (file_name); + + /* Then the description */ + desc = g_strdup (g_strchug (line[1])); + + /* Now we add the result to the page */ + child = xmlNewTextChild (parser->root, NULL, + BAD_CAST "result", NULL); + xmlSetProp (child, BAD_CAST "uri", BAD_CAST filename); + xmlSetProp (child, BAD_CAST "title", + BAD_CAST g_strconcat (title, + " info page", NULL)); + + xmlNewChild (child, NULL, BAD_CAST "snippet", + BAD_CAST desc); + xmlNewChild (child, NULL, BAD_CAST "score", + BAD_CAST "0.05"); + g_strfreev (line); + g_free (title); + } + +} + +void +search_process_man (YelpSearchParser *parser, gchar **terms) +{ + gchar *command; + gchar *stdout_str = NULL; + gint exit_code; + gchar *tmp = NULL; + gchar *search = NULL; + + tmp = g_strescape (parser->search_terms, NULL); + tmp = g_strdelimit (tmp, "\'", '\''); + search = g_strconcat ("\"",tmp,"\"", NULL); + + command = g_strconcat("apropos ", search, NULL); + + if (g_spawn_command_line_sync (command, &stdout_str, NULL, + &exit_code, NULL) && exit_code == 0) { + process_man_result (parser, stdout_str, terms); + + } + g_free (tmp); + g_free (search); + g_free (stdout_str); + g_free (command); + + return; +} + +void +search_process_info (YelpSearchParser *parser, gchar **terms) +{ + gchar *command; + gchar *stdout_str = NULL; + gchar *stderr_str = NULL; + gchar *tmp; + gint exit_code; + + gchar *search = NULL; + + tmp = g_strescape (parser->search_terms, NULL); + tmp = g_strdelimit (tmp, "\'", '\''); + search = g_strconcat ("\"",tmp,"\"", NULL); + command = g_strconcat("info --apropos ", search, NULL); + + if (g_spawn_command_line_sync (command, &stdout_str, &stderr_str, + &exit_code, NULL) && + stdout_str != NULL) { + process_info_result (parser, stdout_str, terms); + } + g_free (tmp); + g_free (stdout_str); + g_free (stderr_str); + g_free (command); + + return; +} diff --git a/src/yelp-search-parser.h b/src/yelp-search-parser.h new file mode 100644 index 00000000..d0d25ac7 --- /dev/null +++ b/src/yelp-search-parser.h @@ -0,0 +1,36 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 4 -*- */ +/* + * Copyright (C) 2007 Don Scorgie <Don@Scorgie.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Author: Don Scorgie <Don@Scorgie.org> + */ + +#ifndef __YELP_SEARCH_PARSER_H__ +#define __YELP_SEARCH_PARSER_H__ + +#include <glib.h> +#include <libxml/tree.h> + +typedef struct _YelpSearchParser YelpSearchParser; + +YelpSearchParser * yelp_search_parser_new (void); +xmlDocPtr yelp_search_parser_process (YelpSearchParser *parser, + gchar *terms); +void yelp_search_parser_free (YelpSearchParser *parser); + +#endif /* __YELP_SEARCH_PARSER_H__ */ diff --git a/src/yelp-search.c b/src/yelp-search.c new file mode 100644 index 00000000..ebbc2174 --- /dev/null +++ b/src/yelp-search.c @@ -0,0 +1,397 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 4 -*- */ +/* + * Copyright (C) 2007 Don Scorgie <Don@Scorgie.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Author: Don Scorgie <Don@Scorgie.org> + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <glib.h> +#include <glib/gi18n.h> +#include <gtk/gtk.h> +#include <libxml/tree.h> + +#include "yelp-error.h" +#include "yelp-search.h" +#include "yelp-search-parser.h" +#include "yelp-transform.h" +#include "yelp-debug.h" + +#define STYLESHEET DATADIR"/yelp/xslt/search2html.xsl" + +#define YELP_SEARCH_GET_PRIVATE(object) (G_TYPE_INSTANCE_GET_PRIVATE ((object), YELP_TYPE_SEARCH, YelpSearchPriv)) + +typedef enum { + SEARCH_STATE_BLANK, /* Brand new, run transform as needed */ + SEARCH_STATE_PARSING, /* Parsing/transforming document, please wait */ + SEARCH_STATE_PARSED, /* All done, if we ain't got it, it ain't here */ + SEARCH_STATE_STOP /* Stop everything now, object to be disposed */ +} SearchState; + +struct _YelpSearchPriv { + gchar *search_terms; + SearchState state; + + GMutex *mutex; + GThread *thread; + + xmlDocPtr xmldoc; + + gboolean process_running; + gboolean transform_running; + + YelpTransform *transform; +}; + + +static void search_class_init (YelpSearchClass *klass); +static void search_init (YelpSearch *search); +static void search_try_dispose (GObject *object); +static void search_dispose (GObject *object); + +/* YelpDocument */ +static void search_request (YelpDocument *document, + gint req_id, + gboolean handled, + gchar *page_id, + YelpDocumentFunc func, + gpointer user_data); + +/* YelpTransform */ +static void transform_func (YelpTransform *transform, + YelpTransformSignal signal, + gpointer func_data, + YelpSearch *search); +static void transform_page_func (YelpTransform *transform, + gchar *page_id, + YelpSearch *search); +static void transform_final_func (YelpTransform *transform, + YelpSearch *search); + +/* Threaded */ +static void search_process (YelpSearch *search); + +static YelpDocumentClass *parent_class; + +GType +yelp_search_get_type (void) +{ + static GType type = 0; + if (!type) { + static const GTypeInfo info = { + sizeof (YelpSearchClass), + NULL, NULL, + (GClassInitFunc) search_class_init, + NULL, NULL, + sizeof (YelpSearch), + 0, + (GInstanceInitFunc) search_init, + }; + type = g_type_register_static (YELP_TYPE_DOCUMENT, + "YelpSearch", + &info, 0); + } + return type; +} + +static void +search_class_init (YelpSearchClass *klass) +{ + GObjectClass *object_class = G_OBJECT_CLASS (klass); + YelpDocumentClass *document_class = YELP_DOCUMENT_CLASS (klass); + + parent_class = g_type_class_peek_parent (klass); + + object_class->dispose = search_try_dispose; + + document_class->request = search_request; + document_class->cancel = NULL; + + g_type_class_add_private (klass, sizeof (YelpSearchPriv)); +} + +static void +search_init (YelpSearch *search) +{ + YelpSearchPriv *priv; + + priv = search->priv = YELP_SEARCH_GET_PRIVATE (search); + + priv->state = SEARCH_STATE_BLANK; + + priv->mutex = g_mutex_new (); +} + +static void +search_try_dispose (GObject *object) +{ + YelpSearchPriv *priv; + + g_assert (object != NULL && YELP_IS_SEARCH (object)); + priv = YELP_SEARCH (object)->priv; + + g_mutex_lock (priv->mutex); + if (priv->process_running || priv->transform_running) { + priv->state = SEARCH_STATE_STOP; + g_idle_add ((GSourceFunc) search_try_dispose, object); + g_mutex_unlock (priv->mutex); + } else { + g_mutex_unlock (priv->mutex); + search_dispose (object); + } +} + +static void +search_dispose (GObject *object) +{ + YelpSearch *search = YELP_SEARCH (object); + + g_free (search->priv->search_terms); + + if (search->priv->xmldoc) + xmlFreeDoc (search->priv->xmldoc); + + g_mutex_free (search->priv->mutex); + + G_OBJECT_CLASS (parent_class)->dispose (object); +} + +/******************************************************************************/ + +YelpDocument * +yelp_search_new (gchar *filename) +{ + YelpSearch *search; + + g_return_val_if_fail (filename != NULL, NULL); + + search = (YelpSearch *) g_object_new (YELP_TYPE_SEARCH, NULL); + search->priv->search_terms = g_strdup (filename); + + debug_print (DB_FUNCTION, "entering\n"); + debug_print (DB_ARG, " filename = \"%s\"\n", filename); + + yelp_document_add_page_id (YELP_DOCUMENT (search), "x-yelp-index", "index"); + + return (YelpDocument *) search; +} + + +/******************************************************************************/ +/** YelpDocument **************************************************************/ + +static void +search_request (YelpDocument *document, + gint req_id, + gboolean handled, + gchar *page_id, + YelpDocumentFunc func, + gpointer user_data) +{ + YelpSearch *search; + YelpSearchPriv *priv; + YelpError *error; + + debug_print (DB_FUNCTION, "entering\n"); + debug_print (DB_ARG, " req_id = %i\n", req_id); + debug_print (DB_ARG, " page_id = \"%s\"\n", page_id); + + g_assert (document != NULL && YELP_IS_SEARCH (document)); + + if (handled) + return; + + search = YELP_SEARCH (document); + priv = search->priv; + + g_mutex_lock (priv->mutex); + + switch (priv->state) { + case SEARCH_STATE_BLANK: + priv->state = SEARCH_STATE_PARSING; + priv->process_running = TRUE; + priv->thread = g_thread_create ((GThreadFunc) search_process, search, FALSE, NULL); + break; + case SEARCH_STATE_PARSING: + break; + case SEARCH_STATE_PARSED: + case SEARCH_STATE_STOP: + /* Much bigger problems */ + error = yelp_error_new (_("Page not found"), + _("Could not process search")); + yelp_document_error_request (document, req_id, error); + break; + } + + g_mutex_unlock (priv->mutex); +} + + +/******************************************************************************/ +/** YelpTransform *************************************************************/ + +static void +transform_func (YelpTransform *transform, + YelpTransformSignal signal, + gpointer func_data, + YelpSearch *search) +{ + YelpSearchPriv *priv; + + debug_print (DB_FUNCTION, "entering\n"); + + g_assert (search != NULL && YELP_IS_SEARCH (search)); + + priv = search->priv; + + g_assert (transform == priv->transform); + + if (priv->state == SEARCH_STATE_STOP) { + switch (signal) { + case YELP_TRANSFORM_CHUNK: + g_free (func_data); + break; + case YELP_TRANSFORM_ERROR: + yelp_error_free ((YelpError *) func_data); + break; + case YELP_TRANSFORM_FINAL: + break; + } + yelp_transform_release (transform); + priv->transform = NULL; + priv->transform_running = FALSE; + return; + } + + switch (signal) { + case YELP_TRANSFORM_CHUNK: + transform_page_func (transform, (gchar *) func_data, search); + break; + case YELP_TRANSFORM_ERROR: + yelp_document_error_pending (YELP_DOCUMENT (search), (YelpError *) func_data); + yelp_transform_release (transform); + priv->transform = NULL; + priv->transform_running = FALSE; + break; + case YELP_TRANSFORM_FINAL: + transform_final_func (transform, search); + break; + } +} + +static void +transform_page_func (YelpTransform *transform, + gchar *page_id, + YelpSearch *search) +{ + YelpSearchPriv *priv; + gchar *content; + + debug_print (DB_FUNCTION, "entering\n"); + + priv = search->priv; + g_mutex_lock (priv->mutex); + + content = yelp_transform_eat_chunk (transform, page_id); + + yelp_document_add_page (YELP_DOCUMENT (search), page_id, content); + + g_free (page_id); + + g_mutex_unlock (priv->mutex); +} + +static void +transform_final_func (YelpTransform *transform, YelpSearch *search) +{ + YelpError *error; + YelpSearchPriv *priv = search->priv; + + debug_print (DB_FUNCTION, "entering\n"); + + g_mutex_lock (priv->mutex); + + /*error = yelp_error_new (_("Page not found"), + _("There are big problems afoot")); + yelp_document_error_pending (YELP_DOCUMENT (search), error);*/ + + yelp_transform_release (transform); + priv->transform = NULL; + priv->transform_running = FALSE; + + if (priv->xmldoc) + xmlFreeDoc (priv->xmldoc); + priv->xmldoc = NULL; + + g_mutex_unlock (priv->mutex); +} + + +/******************************************************************************/ +/** Threaded ******************************************************************/ + +static void +search_process (YelpSearch *search) +{ + YelpSearchPriv *priv; + YelpSearchParser *parser; + YelpError *error = NULL; + YelpDocument *document; + gint i; + + debug_print (DB_FUNCTION, "entering\n"); + + g_assert (search != NULL && YELP_IS_SEARCH (search)); + g_object_ref (search); + priv = search->priv; + document = YELP_DOCUMENT (search); + + parser = yelp_search_parser_new (); + priv->xmldoc = yelp_search_parser_process (parser, priv->search_terms); + yelp_search_parser_free (parser); + + if (priv->xmldoc == NULL) { + error = yelp_error_new (_("Could not parse file"), + _("Bigger problems still")); + yelp_document_error_pending (document, error); + } + + g_mutex_lock (priv->mutex); + if (priv->state == SEARCH_STATE_STOP) { + g_mutex_unlock (priv->mutex); + goto done; + } + + priv->transform = yelp_transform_new (STYLESHEET, + (YelpTransformFunc) transform_func, + search); + priv->transform_running = TRUE; + + /* FIXME: we probably need to set our own params */ + yelp_transform_start (priv->transform, + priv->xmldoc, + NULL); + g_mutex_unlock (priv->mutex); + + done: + priv->process_running = FALSE; + g_object_unref (search); +} diff --git a/src/yelp-search.h b/src/yelp-search.h new file mode 100644 index 00000000..a6cec17e --- /dev/null +++ b/src/yelp-search.h @@ -0,0 +1,53 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 4 -*- */ +/* + * Copyright (C) 2007 Don Scorgie <Don@Scorgie.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Author: Don Scorgie <Don@Scorgie.org> + */ + +#ifndef __YELP_SEARCH_H__ +#define __YELP_SEARCH_H__ + +#include <glib-object.h> + +#include "yelp-document.h" + +#define YELP_TYPE_SEARCH (yelp_search_get_type ()) +#define YELP_SEARCH(o) (G_TYPE_CHECK_INSTANCE_CAST ((o), YELP_TYPE_SEARCH, YelpSearch)) +#define YELP_SEARCH_CLASS(k) (G_TYPE_CHECK_CLASS_CAST ((k), YELP_TYPE_SEARCH, YelpSearchClass)) +#define YELP_IS_SEARCH(o) (G_TYPE_CHECK_INSTANCE_TYPE ((o), YELP_TYPE_SEARCH)) +#define YELP_IS_SEARCH_CLASS(k) (G_TYPE_CHECK_CLASS_TYPE ((k), YELP_TYPE_SEARCH)) +#define YELP_SEARCH_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o), YELP_TYPE_SEARCH, YelpSearchClass)) + +typedef struct _YelpSearch YelpSearch; +typedef struct _YelpSearchClass YelpSearchClass; +typedef struct _YelpSearchPriv YelpSearchPriv; + +struct _YelpSearch { + YelpDocument parent; + YelpSearchPriv *priv; +}; + +struct _YelpSearchClass { + YelpDocumentClass parent_class; +}; + +GType yelp_search_get_type (void); +YelpDocument * yelp_search_new (gchar *uri); + +#endif /* __YELP_SEARCH_H__ */ diff --git a/src/yelp-utils.c b/src/yelp-utils.c index 94b53b2c..78e6f999 100644 --- a/src/yelp-utils.c +++ b/src/yelp-utils.c @@ -1226,15 +1226,12 @@ resolve_man_page (const gchar *name, gchar **result, gchar **section) } } } - printf ("Checking %s\n", real_name); if (g_file_test (real_name, G_FILE_TEST_EXISTS)) { /* Full filename */ - printf ("Exists\n"); *result = g_strdup (real_name); return YELP_RRN_TYPE_MAN; } else if (g_file_test (name, G_FILE_TEST_EXISTS)) { /* Full filename */ - printf ("Exists\n"); *result = g_strdup (name); return YELP_RRN_TYPE_MAN; } @@ -1260,7 +1257,7 @@ resolve_man_page (const gchar *name, gchar **result, gchar **section) gchar * resolve_remove_section (const gchar *uri, const gchar *sect) { - if (sect && *sect) + if (sect) return (g_strndup (uri, (strlen(uri) - strlen(sect) - 1 /*for the delimiter char */))); else return (g_strdup (uri)); @@ -1289,6 +1286,10 @@ yelp_uri_resolve (gchar *uri, gchar **result, gchar **section) intern_section = resolve_get_section(uri); intern_uri = resolve_remove_section (uri, intern_section); + if (intern_section && g_str_equal (intern_section, "")) { + intern_section = NULL; + } + if (!strncmp (uri, "ghelp:", 6) || !strncmp (uri, "gnome-help:", 11)) { ret = resolve_process_ghelp (intern_uri, result); if (*result) { @@ -1381,7 +1382,7 @@ yelp_uri_resolve (gchar *uri, gchar **result, gchar **section) } else if (!strncmp (uri, "x-yelp-search:", 14)) { /* Search pager request. *result contains the search terms */ *result = g_strdup (uri); - *section = g_strdup (uri+14); + *section = g_strdup ("results"); ret = YELP_RRN_TYPE_SEARCH; } else if (g_file_test (intern_uri, G_FILE_TEST_EXISTS)) { /* Full path */ diff --git a/src/yelp-window.c b/src/yelp-window.c index bd9e49e2..e02b5758 100644 --- a/src/yelp-window.c +++ b/src/yelp-window.c @@ -57,10 +57,10 @@ #ifdef ENABLE_INFO #include "yelp-info.h" #endif -/*#ifdef ENABLE_SEARCH -#include "yelp-search-pager.h" +#ifdef ENABLE_SEARCH +#include "yelp-search.h" #include "gtkentryaction.h" -#endif*/ +#endif #define YELP_CONFIG_WIDTH "/yelp/Geometry/width" #define YELP_CONFIG_HEIGHT "/yelp/Geometry/height" @@ -975,17 +975,15 @@ window_setup_window (YelpWindow *window, YelpRrnType type, priv = window->priv; - window_set_loading (window); - if (priv->current_request != -1) { yelp_document_cancel_page (priv->current_document, priv->current_request); priv->current_request = -1; - } - - if (add_history) { + } else if (add_history) { history_push_back(window); } + window_set_loading (window); + priv->current_type = type; priv->uri = loading_uri; priv->current_frag = g_strdup (frag); @@ -1075,6 +1073,9 @@ yelp_window_load (YelpWindow *window, const gchar *uri) priv->base_uri = g_strdup (uri); doc = yelp_docbook_new (real_uri); break; + case YELP_RRN_TYPE_SEARCH: + doc = yelp_search_new (&real_uri[14]); //remove x-yelp-search: + break; case YELP_RRN_TYPE_HTML: case YELP_RRN_TYPE_XHTML: window_do_load_html (window, real_uri, frag_id, type, TRUE); @@ -1423,7 +1424,6 @@ search_activated (GtkAction *action, } else { uri = encode_search_uri (search_terms); } - yelp_window_load (window, uri); g_free (uri); @@ -1481,7 +1481,7 @@ window_populate (YelpWindow *window) gtk_menu_tool_button_set_menu (GTK_MENU_TOOL_BUTTON (f_proxy), priv->forward_menu); -#if 0//def ENABLE_SEARCH +#ifdef ENABLE_SEARCH action = gtk_entry_action_new ("Search", _("_Search:"), _("Search for other documentation"), @@ -1507,7 +1507,7 @@ window_populate (YelpWindow *window) window_error (window, error, FALSE); } -#if 0//def ENABLE_SEARCH +#ifdef ENABLE_SEARCH if (!gtk_ui_manager_add_ui_from_file (priv->ui_manager, DATADIR "/yelp/ui/yelp-search-ui.xml", &error)) { |