diff options
author | Shaun McCance <shaunm@gnome.org> | 2011-02-10 13:20:31 -0500 |
---|---|---|
committer | Shaun McCance <shaunm@gnome.org> | 2011-02-10 13:20:31 -0500 |
commit | 4d880e18527367ef0adcb5f9b0cfa25cda34ae6b (patch) | |
tree | 7cd0057207f71b3fd827c9d04b0c223c5a8c80c4 | |
parent | 70bc039b40482243830363e54f87b6a49647ee30 (diff) | |
download | yelp-4d880e18527367ef0adcb5f9b0cfa25cda34ae6b.tar.gz |
yelp-mallard-document: Added indexing code for Mallard
-rw-r--r-- | libyelp/yelp-mallard-document.c | 293 | ||||
-rw-r--r-- | libyelp/yelp-uri.c | 27 | ||||
-rw-r--r-- | libyelp/yelp-uri.h | 1 |
3 files changed, 282 insertions, 39 deletions
diff --git a/libyelp/yelp-mallard-document.c b/libyelp/yelp-mallard-document.c index 9c286584..b3845b48 100644 --- a/libyelp/yelp-mallard-document.c +++ b/libyelp/yelp-mallard-document.c @@ -77,6 +77,7 @@ static void yelp_mallard_document_init (YelpMallardDocument static void yelp_mallard_document_dispose (GObject *object); static void yelp_mallard_document_finalize (GObject *object); +static void mallard_index (YelpDocument *document); static gboolean mallard_request_page (YelpDocument *document, const gchar *page_id, GCancellable *cancellable, @@ -103,6 +104,7 @@ static void mallard_page_data_info (MallardPageData *page_data static void mallard_page_data_run (MallardPageData *page_data); static void mallard_page_data_free (MallardPageData *page_data); +static const char * xml_node_get_icon (xmlNodePtr node); static gboolean xml_node_is_ns_name (xmlNodePtr node, const xmlChar *ns, const xmlChar *name); @@ -119,6 +121,8 @@ struct _YelpMallardDocumentPrivate { GMutex *mutex; GThread *thread; gboolean thread_running; + GThread *index; + gboolean index_running; GSList *pending; xmlDocPtr cache; @@ -140,6 +144,7 @@ yelp_mallard_document_class_init (YelpMallardDocumentClass *klass) object_class->finalize = yelp_mallard_document_finalize; document_class->request_page = mallard_request_page; + document_class->index = mallard_index; g_type_class_add_private (klass, sizeof (YelpMallardDocumentPrivate)); } @@ -153,6 +158,7 @@ yelp_mallard_document_init (YelpMallardDocument *mallard) priv->mutex = g_mutex_new (); priv->thread_running = FALSE; + priv->index_running = FALSE; priv->cache = xmlNewDoc (BAD_CAST "1.0"); priv->cache_ns = xmlNewNs (NULL, BAD_CAST MALLARD_NS, BAD_CAST "mal"); @@ -212,7 +218,6 @@ yelp_mallard_document_new (YelpUri *uri) return (YelpDocument *) mallard; } - static gboolean mallard_request_page (YelpDocument *document, const gchar *page_id, @@ -285,7 +290,6 @@ mallard_think (YelpMallardDocument *mallard) YelpMallardDocumentPrivate *priv = GET_PRIV (mallard); GError *error = NULL; YelpDocument *document; - gchar **search_path; gboolean editor_mode; gchar **path; @@ -296,22 +300,20 @@ mallard_think (YelpMallardDocument *mallard) editor_mode = yelp_settings_get_editor_mode (yelp_settings_get_default ()); - search_path = yelp_uri_get_search_path (priv->uri); - - if (!search_path || search_path[0] == NULL || - !g_file_test (search_path[0], G_FILE_TEST_IS_DIR)) { + path = yelp_uri_get_search_path (priv->uri); + if (!path || path[0] == NULL || + !g_file_test (path[0], G_FILE_TEST_IS_DIR)) { /* This basically only happens when someone passes an actual directory manually, which will have a singleton search path. */ error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND, _("The directory ā%sā does not exist."), - search_path[0]); + path[0]); yelp_document_error_pending ((YelpDocument *) document, error); g_error_free (error); goto done; } - path = yelp_uri_get_search_path (priv->uri); for (path_i = 0; path[path_i] != NULL; path_i++) { gfile = g_file_new_for_path (path[path_i]); children = g_file_enumerate_children (gfile, @@ -497,40 +499,14 @@ mallard_page_data_walk (MallardPageData *page_data) NULL); if (ispage) { - xmlChar *style; - gchar **styles; - gchar *icon = "help-contents"; page_data->page_id = g_strdup ((gchar *) id); xmlSetProp (page_data->cache, BAD_CAST "id", id); yelp_document_set_page_id ((YelpDocument *) page_data->mallard, g_strrstr (page_data->filename, G_DIR_SEPARATOR_S), page_data->page_id); - style = xmlGetProp (page_data->cur, BAD_CAST "style"); - if (style) { - gint i; - styles = g_strsplit (style, " ", -1); - for (i = 0; styles[i] != NULL; i++) { - if (g_str_equal (styles[i], "task")) { - icon = "yelp-page-task"; - break; - } - else if (g_str_equal (styles[i], "tip")) { - icon = "yelp-page-tip"; - break; - } - else if (g_str_equal (styles[i], "ui")) { - icon = "yelp-page-ui"; - break; - } - else if (g_str_equal (styles[i], "video")) { - icon = "yelp-page-video"; - break; - } - } - xmlFree (style); - } yelp_document_set_page_icon ((YelpDocument *) page_data->mallard, - page_data->page_id, icon); + page_data->page_id, + xml_node_get_icon (page_data->cur)); } else { gchar *newid = g_strdup_printf ("%s#%s", page_data->page_id, id); xmlSetProp (page_data->cache, BAD_CAST "id", BAD_CAST newid); @@ -823,6 +799,40 @@ transform_error (YelpTransform *transform, mallard_page_data_cancel (page_data); } +static const char * +xml_node_get_icon (xmlNodePtr node) +{ + xmlChar *style; + gchar **styles; + gchar *icon = "help-contents"; + style = xmlGetProp (node, BAD_CAST "style"); + if (style) { + gint i; + styles = g_strsplit (style, " ", -1); + for (i = 0; styles[i] != NULL; i++) { + if (g_str_equal (styles[i], "task")) { + icon = "yelp-page-task"; + break; + } + else if (g_str_equal (styles[i], "tip")) { + icon = "yelp-page-tip"; + break; + } + else if (g_str_equal (styles[i], "ui")) { + icon = "yelp-page-ui"; + break; + } + else if (g_str_equal (styles[i], "video")) { + icon = "yelp-page-video"; + break; + } + } + g_strfreev (styles); + xmlFree (style); + } + return icon; +} + static gboolean xml_node_is_ns_name (xmlNodePtr node, const xmlChar *ns, @@ -834,3 +844,214 @@ xml_node_is_ns_name (xmlNodePtr node, return (xmlStrEqual (ns, node->ns->href) && xmlStrEqual (name, node->name)); return FALSE; } + +/******************************************************************************/ + +typedef struct { + YelpMallardDocument *mallard; + xmlDocPtr doc; + xmlNodePtr cur; + GString *str; + gboolean is_inline; +} MallardIndexData; + +static void +mallard_index_node (MallardIndexData *index) +{ + xmlNodePtr orig, child; + gboolean was_inline; + + orig = index->cur; + was_inline = index->is_inline; + + for (child = index->cur->children; child; child = child->next) { + if (index->is_inline) { + if ((xml_node_is_ns_name (child->parent, MALLARD_NS, BAD_CAST "guiseq") || + xml_node_is_ns_name (child->parent, MALLARD_NS, BAD_CAST "keyseq")) && + child->prev != NULL) { + g_string_append_c (index->str, ' '); + } + if (child->type == XML_TEXT_NODE) { + g_string_append (index->str, child->content); + continue; + } + } + + if (child->type != XML_ELEMENT_NODE || + xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "info")) + continue; + + if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "p") || + xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "code") || + xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "screen") || + xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "title") || + xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "desc") || + xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "cite")) { + index->is_inline = TRUE; + } + + index->cur = child; + mallard_index_node (index); + + if (index->is_inline && !was_inline) { + g_string_append_c (index->str, '\n'); + } + + index->cur = orig; + index->is_inline = was_inline; + } +} + +static gboolean +mallard_index_done (YelpMallardDocument *mallard) +{ + g_object_set (mallard, "indexed", TRUE, NULL); + return FALSE; +} + +static void +mallard_index_threaded (YelpMallardDocument *mallard) +{ + gchar **path; + gint path_i; + GHashTable *ids; + gchar *doc_uri; + YelpMallardDocumentPrivate *priv = GET_PRIV (mallard); + + doc_uri = yelp_uri_get_document_uri (priv->uri); + ids = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL); + path = yelp_uri_get_search_path (priv->uri); + for (path_i = 0; path[path_i] != NULL; path_i++) { + GFile *gfile; + GFileEnumerator *children; + GFileInfo *pageinfo; + gfile = g_file_new_for_path (path[path_i]); + children = g_file_enumerate_children (gfile, + G_FILE_ATTRIBUTE_STANDARD_NAME, + G_FILE_QUERY_INFO_NONE, + NULL, NULL); + while ((pageinfo = g_file_enumerator_next_file (children, NULL, NULL))) { + gchar *filename; + GFile *pagefile; + xmlParserCtxtPtr parserCtxt = NULL; + xmlXPathContextPtr xpath = NULL; + xmlXPathObjectPtr obj; + MallardIndexData *index = NULL; + xmlChar *id; + YelpUri *uri; + gchar *title, *desc, *fulltext, *tmp, *full_uri; + + filename = g_file_info_get_attribute_as_string (pageinfo, + G_FILE_ATTRIBUTE_STANDARD_NAME); + if (!g_str_has_suffix (filename, ".page")) { + g_free (filename); + g_object_unref (pageinfo); + continue; + } + pagefile = g_file_resolve_relative_path (gfile, filename); + g_free (filename); + filename = g_file_get_path (pagefile); + + index = g_new0 (MallardIndexData, 1); + index->mallard = mallard; + + parserCtxt = xmlNewParserCtxt (); + index->doc = xmlCtxtReadFile (parserCtxt, filename, NULL, + XML_PARSE_DTDLOAD | XML_PARSE_NOCDATA | + XML_PARSE_NOENT | XML_PARSE_NONET ); + if (index->doc == NULL) + goto done; + if (xmlXIncludeProcessFlags (index->doc, + XML_PARSE_DTDLOAD | XML_PARSE_NOCDATA | + XML_PARSE_NOENT | XML_PARSE_NONET ) + < 0) + goto done; + + index->cur = xmlDocGetRootElement (index->doc); + + id = xmlGetProp (index->cur, BAD_CAST "id"); + if (id == NULL) + goto done; + if (g_hash_table_lookup (ids, id)) + goto done; + /* We never use the value. Just need something non-NULL. */ + g_hash_table_insert (ids, g_strdup (id), id); + + index->str = g_string_new (""); + + xpath = xmlXPathNewContext (index->doc); + xmlXPathRegisterNs (xpath, BAD_CAST "mal", BAD_CAST MALLARD_NS); + obj = xmlXPathEvalExpression (BAD_CAST + "normalize-space((/mal:page/mal:info/mal:title[@type='text'] |" + "/mal:page/mal:title)[1])", + xpath); + title = g_strdup (obj->stringval); + xmlXPathFreeObject (obj); + obj = xmlXPathEvalExpression (BAD_CAST + "normalize-space(/mal:page/mal:info/mal:desc[1])", + xpath); + desc = g_strdup (obj->stringval); + xmlXPathFreeObject (obj); + + mallard_index_node (index); + + tmp = g_strconcat ("xref:", id, NULL); + uri = yelp_uri_new_relative (priv->uri, tmp); + yelp_uri_resolve_sync (uri); + full_uri = yelp_uri_get_canonical_uri (uri); + g_free (tmp); + g_object_unref (uri); + + fulltext = g_string_free (index->str, FALSE); + + yelp_storage_update (yelp_storage_get_default (), + doc_uri, full_uri, + title, desc, + xml_node_get_icon (xmlDocGetRootElement (index->doc)), + fulltext); + g_free (full_uri); + g_free (title); + g_free (desc); + g_free (fulltext); + + done: + if (id) + xmlFree (id); + if (xpath != NULL) + xmlXPathFreeContext (xpath); + if (index->doc != NULL) + xmlFreeDoc (index->doc); + if (index != NULL) + g_free (index); + if (parserCtxt != NULL) + xmlFreeParserCtxt (parserCtxt); + g_object_unref (pagefile); + g_free (filename); + g_object_unref (pageinfo); + } + } + g_strfreev (path); + g_hash_table_destroy (ids); + priv->index_running = FALSE; + g_free (doc_uri); + g_object_unref (mallard); + g_idle_add ((GSourceFunc) mallard_index_done, mallard); +} + +static void +mallard_index (YelpDocument *document) +{ + YelpMallardDocumentPrivate *priv; + gboolean done; + + g_object_get (document, "indexed", &done, NULL); + if (done) + return; + + priv = GET_PRIV (document); + g_object_ref (document); + priv->index = g_thread_create ((GThreadFunc) mallard_index_threaded, + document, FALSE, NULL); + priv->index_running = TRUE; +} + diff --git a/libyelp/yelp-uri.c b/libyelp/yelp-uri.c index 511de8f1..0e16982a 100644 --- a/libyelp/yelp-uri.c +++ b/libyelp/yelp-uri.c @@ -40,6 +40,7 @@ static void yelp_uri_dispose (GObject *object); static void yelp_uri_finalize (GObject *object); static void resolve_start (YelpUri *uri); +static void resolve_sync (YelpUri *uri); static void resolve_async (YelpUri *uri); static gboolean resolve_final (YelpUri *uri); @@ -243,6 +244,22 @@ yelp_uri_resolve (YelpUri *uri) } } +void +yelp_uri_resolve_sync (YelpUri *uri) +{ + YelpUriPrivate *priv = GET_PRIV (uri); + + if (priv->doctype != YELP_URI_DOCUMENT_TYPE_UNRESOLVED) + return; + + if (priv->res_base) + yelp_uri_resolve_sync (priv->res_base); + + g_object_ref (uri); + resolve_sync (uri); + resolve_final (uri); +} + /* We want code to be able to do something like this: * * if (yelp_uri_get_document_type (uri) != YELP_URI_DOCUMENT_TYPE_UNRESOLVED) { @@ -279,7 +296,7 @@ resolve_start (YelpUri *uri) } static void -resolve_async (YelpUri *uri) +resolve_sync (YelpUri *uri) { gchar *tmp; YelpUriPrivate *priv = GET_PRIV (uri); @@ -307,7 +324,7 @@ resolve_async (YelpUri *uri) YelpUriPrivate *base_priv; if (priv->res_base == NULL) { priv->tmptype = YELP_URI_DOCUMENT_TYPE_ERROR; - goto done; + return; } base_priv = GET_PRIV (priv->res_base); switch (base_priv->doctype) { @@ -352,8 +369,12 @@ resolve_async (YelpUri *uri) if (!priv->fulluri) { priv->fulluri = g_strdup (priv->res_arg); } +} - done: +static void +resolve_async (YelpUri *uri) +{ + resolve_sync (uri); g_idle_add ((GSourceFunc) resolve_final, uri); } diff --git a/libyelp/yelp-uri.h b/libyelp/yelp-uri.h index ab80bab2..5ebbd4a9 100644 --- a/libyelp/yelp-uri.h +++ b/libyelp/yelp-uri.h @@ -70,6 +70,7 @@ YelpUri * yelp_uri_new_search (YelpUri *base, const gchar *text); void yelp_uri_resolve (YelpUri *uri); +void yelp_uri_resolve_sync (YelpUri *uri); gboolean yelp_uri_is_resolved (YelpUri *uri); YelpUriDocumentType yelp_uri_get_document_type (YelpUri *uri); |