diff options
author | Shaun McCance <shaunm@gnome.org> | 2018-12-03 09:28:56 -0500 |
---|---|---|
committer | Shaun McCance <shaunm@gnome.org> | 2018-12-03 09:28:56 -0500 |
commit | c9b1ca3ae200b424f0bc67ceb1a06fb598a839c3 (patch) | |
tree | 18b9c3e71954c9190a01267a8ba4ce3e849cbce4 | |
parent | 7dbcea94263c024845c0679fbbe695b9eea61c7a (diff) | |
download | yelp-c9b1ca3ae200b424f0bc67ceb1a06fb598a839c3.tar.gz |
Support for keywords in search results for Mallard and DocBook
-rw-r--r-- | libyelp/yelp-docbook-document.c | 82 | ||||
-rw-r--r-- | libyelp/yelp-document.c | 38 | ||||
-rw-r--r-- | libyelp/yelp-document.h | 6 | ||||
-rw-r--r-- | libyelp/yelp-mallard-document.c | 35 | ||||
-rw-r--r-- | libyelp/yelp-search-entry.c | 25 |
5 files changed, 172 insertions, 14 deletions
diff --git a/libyelp/yelp-docbook-document.c b/libyelp/yelp-docbook-document.c index 6ecaf316..893732c0 100644 --- a/libyelp/yelp-docbook-document.c +++ b/libyelp/yelp-docbook-document.c @@ -81,6 +81,8 @@ static gboolean docbook_walk_divisionQ (YelpDocbookDocument *docbook, xmlNodePtr cur); static gchar * docbook_walk_get_title (YelpDocbookDocument *docbook, xmlNodePtr cur); +static gchar * docbook_walk_get_keywords (YelpDocbookDocument *docbook, + xmlNodePtr cur); static void transform_chunk_ready (YelpTransform *transform, gchar *chunk_id, @@ -123,6 +125,8 @@ struct _YelpDocbookDocumentPrivate { GFileMonitor **monitors; gint64 reload_time; + + GHashTable *autoids; }; /******************************************************************************/ @@ -161,6 +165,7 @@ yelp_docbook_document_init (YelpDocbookDocument *docbook) YelpDocbookDocumentPrivate *priv = GET_PRIV (docbook); priv->state = DOCBOOK_STATE_BLANK; + priv->autoids = NULL; g_mutex_init (&priv->mutex); } @@ -194,6 +199,8 @@ yelp_docbook_document_finalize (GObject *object) g_free (priv->cur_prev_id); g_free (priv->root_id); + g_hash_table_destroy (priv->autoids); + g_mutex_clear (&priv->mutex); G_OBJECT_CLASS (yelp_docbook_document_parent_class)->finalize (object); @@ -532,6 +539,7 @@ docbook_walk (YelpDocbookDocument *docbook) gchar autoidstr[20]; xmlChar *id = NULL; xmlChar *title = NULL; + xmlChar *keywords = NULL; xmlNodePtr cur, old_cur; gboolean chunkQ; YelpDocbookDocumentPrivate *priv = GET_PRIV (docbook); @@ -558,7 +566,7 @@ docbook_walk (YelpDocbookDocument *docbook) if (docbook_walk_divisionQ (docbook, priv->xmlcur) && !id) { /* If id attribute is not present, autogenerate a * unique value, and insert it into the in-memory tree */ - g_snprintf (autoidstr, 20, "//autoid-%d", ++autoid); + g_snprintf (autoidstr, 20, "//yelp-autoid-%d", ++autoid); if (priv->xmlcur->ns) { xmlNewNsProp (priv->xmlcur, xmlNewNs (priv->xmlcur, XML_XML_NAMESPACE, BAD_CAST "xml"), @@ -569,15 +577,20 @@ docbook_walk (YelpDocbookDocument *docbook) xmlNewProp (priv->xmlcur, BAD_CAST "id", BAD_CAST autoidstr); id = xmlGetProp (priv->xmlcur, BAD_CAST "id"); } + if (!priv->autoids) + priv->autoids = g_hash_table_new_full (g_str_hash, g_str_equal, xmlFree, xmlFree); + g_hash_table_insert (priv->autoids, xmlGetNodePath(priv->xmlcur), xmlStrdup (id)); } if (docbook_walk_chunkQ (docbook, priv->xmlcur, priv->cur_depth, priv->max_depth)) { title = BAD_CAST docbook_walk_get_title (docbook, priv->xmlcur); + keywords = BAD_CAST docbook_walk_get_keywords (docbook, priv->xmlcur); debug_print (DB_DEBUG, " id: \"%s\"\n", id); debug_print (DB_DEBUG, " title: \"%s\"\n", title); yelp_document_set_page_title (document, (gchar *) id, (gchar *) title); + yelp_document_set_page_keywords (document, (gchar *) id, (gchar *) keywords); if (priv->cur_prev_id) { yelp_document_set_prev_id (document, (gchar *) id, priv->cur_prev_id); @@ -626,6 +639,8 @@ docbook_walk (YelpDocbookDocument *docbook) xmlFree (id); if (title != NULL) xmlFree (title); + if (keywords != NULL) + xmlFree (keywords); } static gboolean @@ -791,6 +806,42 @@ docbook_walk_get_title (YelpDocbookDocument *docbook, return g_strdup (_("Unknown")); } +static gchar * +docbook_walk_get_keywords (YelpDocbookDocument *docbook, + xmlNodePtr cur) +{ + xmlNodePtr info, keywordset, keyword; + GString *ret = NULL; + + for (info = cur->children; info; info = info->next) { + if (g_str_has_suffix ((const gchar *) info->name, "info")) { + for (keywordset = info->children; keywordset; keywordset = keywordset->next) { + if (!xmlStrcmp (keywordset->name, BAD_CAST "keywordset")) { + for (keyword = keywordset->children; keyword; keyword = keyword->next) { + if (!xmlStrcmp (keyword->name, BAD_CAST "keyword")) { + xmlChar *content; + if (ret) + g_string_append(ret, ", "); + else + ret = g_string_new (""); + /* FIXME: try this with just ->children->text */ + content = xmlNodeGetContent (keyword); + g_string_append (ret, (gchar *) content); + xmlFree (content); + } + } + } + } + break; + } + } + + if (ret) + return g_string_free (ret, FALSE); + else + return NULL; +} + /******************************************************************************/ static void @@ -910,6 +961,7 @@ typedef struct { GString *str; gint depth; gint max_depth; + gboolean in_info; } DocbookIndexData; static void @@ -926,10 +978,15 @@ docbook_index_node (DocbookIndexData *index) g_string_append (index->str, (const gchar *) index->cur->content); return; } - if (index->cur->type != XML_ELEMENT_NODE || - g_str_has_suffix ((const gchar *) index->cur->name, "info") || - g_str_equal (index->cur->name, "remark")) + if (index->cur->type != XML_ELEMENT_NODE) { + return; + } + if (g_str_equal (index->cur->name, "remark")) { + return; + } + if (g_str_has_suffix ((const gchar *) index->cur->name, "info")) { return; + } oldcur = index->cur; for (child = index->cur->children; child; child = child->next) { index->cur = child; @@ -944,15 +1001,32 @@ docbook_index_chunk (DocbookIndexData *index) xmlChar *id; xmlNodePtr child; gchar *title = NULL; + gchar *keywords; GSList *chunks = NULL; + YelpDocbookDocumentPrivate *priv = GET_PRIV (index->docbook); id = xmlGetProp (index->cur, BAD_CAST "id"); + if (!id) + id = xmlGetNsProp (index->cur, XML_XML_NAMESPACE, BAD_CAST "id"); + if (!id) { + xmlChar *path = xmlGetNodePath (index->cur); + id = g_hash_table_lookup (priv->autoids, path); + if (id) + id = xmlStrdup (id); + xmlFree (path); + } + if (id != NULL) { title = docbook_walk_get_title (index->docbook, index->cur); if (index->cur->parent->parent == NULL) yelp_storage_set_root_title (yelp_storage_get_default (), index->doc_uri, title); index->str = g_string_new (""); + keywords = docbook_walk_get_keywords (index->docbook, index->cur); + if (keywords) { + g_string_append (index->str, keywords); + g_free (keywords); + } } for (child = index->cur->children; child; child = child->next) { diff --git a/libyelp/yelp-document.c b/libyelp/yelp-document.c index c1eec9cc..83582eaf 100644 --- a/libyelp/yelp-document.c +++ b/libyelp/yelp-document.c @@ -84,6 +84,7 @@ struct _YelpDocumentPriv { Hash *page_ids; /* Mapping of fragment IDs to real page IDs */ Hash *titles; /* Mapping of page IDs to titles */ Hash *descs; /* Mapping of page IDs to descs */ + Hash *keywords; /* Mapping of page IDs to keywords */ Hash *icons; /* Mapping of page IDs to icons */ Hash *mime_types; /* Mapping of page IDs to mime types */ Hash *contents; /* Mapping of page IDs to string content */ @@ -313,6 +314,7 @@ yelp_document_init (YelpDocument *document) priv->page_ids = hash_new (g_free ); priv->titles = hash_new (g_free); priv->descs = hash_new (g_free); + priv->keywords = hash_new (g_free); priv->icons = hash_new (g_free); priv->mime_types = hash_new (g_free); priv->contents = hash_new ((GDestroyNotify) str_unref); @@ -361,6 +363,7 @@ yelp_document_finalize (GObject *object) hash_free (document->priv->page_ids); hash_free (document->priv->titles); hash_free (document->priv->descs); + hash_free (document->priv->keywords); hash_free (document->priv->icons); hash_free (document->priv->mime_types); @@ -751,6 +754,41 @@ yelp_document_set_page_desc (YelpDocument *document, } gchar * +yelp_document_get_page_keywords (YelpDocument *document, + const gchar *page_id) +{ + gchar *real, *ret = NULL; + + g_assert (document != NULL && YELP_IS_DOCUMENT (document)); + + if (page_id != NULL && g_str_has_prefix (page_id, "search=")) + return NULL; + + g_mutex_lock (&document->priv->mutex); + real = hash_lookup (document->priv->page_ids, page_id); + if (real) { + ret = hash_lookup (document->priv->keywords, real); + if (ret) + ret = g_strdup (ret); + } + g_mutex_unlock (&document->priv->mutex); + + return ret; +} + +void +yelp_document_set_page_keywords (YelpDocument *document, + const gchar *page_id, + const gchar *keywords) +{ + g_assert (document != NULL && YELP_IS_DOCUMENT (document)); + + g_mutex_lock (&document->priv->mutex); + hash_replace (document->priv->keywords, page_id, g_strdup (keywords)); + g_mutex_unlock (&document->priv->mutex); +} + +gchar * yelp_document_get_page_icon (YelpDocument *document, const gchar *page_id) { diff --git a/libyelp/yelp-document.h b/libyelp/yelp-document.h index 175b281a..8ee9203e 100644 --- a/libyelp/yelp-document.h +++ b/libyelp/yelp-document.h @@ -151,6 +151,12 @@ void yelp_document_set_page_desc (YelpDocument *docum const gchar *page_id, const gchar *desc); +gchar * yelp_document_get_page_keywords (YelpDocument *document, + const gchar *page_id); +void yelp_document_set_page_keywords (YelpDocument *document, + const gchar *page_id, + const gchar *keywords); + gchar * yelp_document_get_page_icon (YelpDocument *document, const gchar *page_id); void yelp_document_set_page_icon (YelpDocument *document, diff --git a/libyelp/yelp-mallard-document.c b/libyelp/yelp-mallard-document.c index 74fc80ee..b72dc9a5 100644 --- a/libyelp/yelp-mallard-document.c +++ b/libyelp/yelp-mallard-document.c @@ -69,6 +69,7 @@ typedef struct { gchar *page_title; gchar *page_desc; + gchar *page_keywords; gchar *next_page; } MallardPageData; @@ -393,6 +394,10 @@ mallard_think (YelpMallardDocument *mallard) yelp_document_set_page_desc ((YelpDocument *) mallard, page_data->page_id, page_data->page_desc); + yelp_document_set_page_keywords ((YelpDocument *) mallard, + page_data->page_id, + page_data->page_keywords); + if (page_data->next_page != NULL) { yelp_document_set_next_id ((YelpDocument *) mallard, page_data->page_id, @@ -662,11 +667,25 @@ mallard_page_data_info (MallardPageData *page_data, xmlXPathObjectPtr obj; page_data->xpath->node = child; obj = xmlXPathCompiledEval (priv->normalize, page_data->xpath); + g_free(page_data->page_desc); page_data->page_desc = g_strdup ((const gchar *) obj->stringval); xmlXPathFreeObject (obj); xmlAddChild (cache_node, xmlCopyNode (child, 1)); } + else if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "keywords")) { + /* FIXME: multiple keywords? same for desc/title */ + + YelpMallardDocumentPrivate *priv = GET_PRIV (page_data->mallard); + xmlXPathObjectPtr obj; + page_data->xpath->node = child; + obj = xmlXPathCompiledEval (priv->normalize, page_data->xpath); + g_free(page_data->page_keywords); + page_data->page_keywords = g_strdup ((const gchar *) obj->stringval); + xmlXPathFreeObject (obj); + + xmlAddChild (cache_node, xmlCopyNode (child, 1)); + } else if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "link")) { xmlChar *type, *next; @@ -749,6 +768,7 @@ mallard_page_data_free (MallardPageData *page_data) xmlXPathFreeContext (page_data->xpath); g_free (page_data->page_title); g_free (page_data->page_desc); + g_free (page_data->page_keywords); g_free (page_data->next_page); g_free (page_data); } @@ -896,16 +916,18 @@ typedef struct { xmlNodePtr cur; GString *str; gboolean is_inline; + gboolean in_info; } MallardIndexData; static void mallard_index_node (MallardIndexData *index) { xmlNodePtr orig, child; - gboolean was_inline; + gboolean was_inline, was_info; orig = index->cur; was_inline = index->is_inline; + was_info = index->in_info; for (child = index->cur->children; child; child = child->next) { if (index->is_inline) { @@ -921,18 +943,24 @@ mallard_index_node (MallardIndexData *index) } if (child->type != XML_ELEMENT_NODE || - xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "info") || xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "comment")) continue; - if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "p") || + if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "info")) { + index->in_info = TRUE; + } + else if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "p") || xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "code") || xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "screen") || xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "title") || xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "desc") || + xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "keywords") || xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "cite")) { index->is_inline = TRUE; } + else if (index->in_info && !index->is_inline) { + continue; + } index->cur = child; mallard_index_node (index); @@ -943,6 +971,7 @@ mallard_index_node (MallardIndexData *index) index->cur = orig; index->is_inline = was_inline; + index->in_info = was_info; } } diff --git a/libyelp/yelp-search-entry.c b/libyelp/yelp-search-entry.c index 55be49cc..2b5a8196 100644 --- a/libyelp/yelp-search-entry.c +++ b/libyelp/yelp-search-entry.c @@ -100,7 +100,8 @@ enum { COMPLETION_COL_DESC, COMPLETION_COL_ICON, COMPLETION_COL_PAGE, - COMPLETION_COL_FLAGS + COMPLETION_COL_FLAGS, + COMPLETION_COL_KEYWORDS }; enum { @@ -451,7 +452,7 @@ entry_match_func (GtkEntryCompletion *completion, YelpSearchEntry *entry) { gint stri; - gchar *title, *desc, *titlecase = NULL, *desccase = NULL; + gchar *title, *desc, *keywords, *titlecase = NULL, *desccase = NULL, *keywordscase = NULL; gboolean ret = FALSE; gchar **strs; gint flags; @@ -470,6 +471,7 @@ entry_match_func (GtkEntryCompletion *completion, gtk_tree_model_get (model, iter, COMPLETION_COL_TITLE, &title, COMPLETION_COL_DESC, &desc, + COMPLETION_COL_KEYWORDS, &keywords, -1); if (title) { titlecase = g_utf8_casefold (title, -1); @@ -479,14 +481,20 @@ entry_match_func (GtkEntryCompletion *completion, desccase = g_utf8_casefold (desc, -1); g_free (desc); } + if (keywords) { + keywordscase = g_utf8_casefold (keywords, -1); + g_free (keywords); + } strs = g_regex_split (nonword, key, 0); ret = TRUE; for (stri = 0; strs[stri]; stri++) { if (!titlecase || !strstr (titlecase, strs[stri])) { if (!desccase || !strstr (desccase, strs[stri])) { - ret = FALSE; - break; + if (!keywordscase || !strstr (keywordscase, strs[stri])) { + ret = FALSE; + break; + } } } } @@ -592,12 +600,13 @@ view_loaded (YelpView *view, !g_str_equal (doc_uri, priv->completion_uri)) { completion = (GtkTreeModel *) g_hash_table_lookup (completions, doc_uri); if (completion == NULL) { - GtkListStore *base = gtk_list_store_new (5, + GtkListStore *base = gtk_list_store_new (6, G_TYPE_STRING, /* title */ G_TYPE_STRING, /* desc */ G_TYPE_STRING, /* icon */ G_TYPE_STRING, /* uri */ - G_TYPE_INT /* flags */ + G_TYPE_INT, /* flags */ + G_TYPE_STRING /* keywords */ ); completion = gtk_tree_model_sort_new_with_model (GTK_TREE_MODEL (base)); gtk_tree_sortable_set_default_sort_func (GTK_TREE_SORTABLE (completion), @@ -607,15 +616,17 @@ view_loaded (YelpView *view, if (document != NULL) { ids = yelp_document_list_page_ids (document); for (i = 0; ids[i]; i++) { - gchar *title, *desc, *icon; + gchar *title, *desc, *icon, *keywords; gtk_list_store_insert (GTK_LIST_STORE (base), &iter, 0); title = yelp_document_get_page_title (document, ids[i]); desc = yelp_document_get_page_desc (document, ids[i]); icon = yelp_document_get_page_icon (document, ids[i]); + keywords = yelp_document_get_page_keywords (document, ids[i]); gtk_list_store_set (base, &iter, COMPLETION_COL_TITLE, title, COMPLETION_COL_DESC, desc, COMPLETION_COL_ICON, icon, + COMPLETION_COL_KEYWORDS, keywords, COMPLETION_COL_PAGE, ids[i], -1); g_free (icon); |