summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShaun McCance <shaunm@gnome.org>2018-12-03 09:28:56 -0500
committerShaun McCance <shaunm@gnome.org>2018-12-03 09:28:56 -0500
commitc9b1ca3ae200b424f0bc67ceb1a06fb598a839c3 (patch)
tree18b9c3e71954c9190a01267a8ba4ce3e849cbce4
parent7dbcea94263c024845c0679fbbe695b9eea61c7a (diff)
downloadyelp-c9b1ca3ae200b424f0bc67ceb1a06fb598a839c3.tar.gz
Support for keywords in search results for Mallard and DocBook
-rw-r--r--libyelp/yelp-docbook-document.c82
-rw-r--r--libyelp/yelp-document.c38
-rw-r--r--libyelp/yelp-document.h6
-rw-r--r--libyelp/yelp-mallard-document.c35
-rw-r--r--libyelp/yelp-search-entry.c25
5 files changed, 172 insertions, 14 deletions
diff --git a/libyelp/yelp-docbook-document.c b/libyelp/yelp-docbook-document.c
index 6ecaf316..893732c0 100644
--- a/libyelp/yelp-docbook-document.c
+++ b/libyelp/yelp-docbook-document.c
@@ -81,6 +81,8 @@ static gboolean docbook_walk_divisionQ (YelpDocbookDocument *docbook,
xmlNodePtr cur);
static gchar * docbook_walk_get_title (YelpDocbookDocument *docbook,
xmlNodePtr cur);
+static gchar * docbook_walk_get_keywords (YelpDocbookDocument *docbook,
+ xmlNodePtr cur);
static void transform_chunk_ready (YelpTransform *transform,
gchar *chunk_id,
@@ -123,6 +125,8 @@ struct _YelpDocbookDocumentPrivate {
GFileMonitor **monitors;
gint64 reload_time;
+
+ GHashTable *autoids;
};
/******************************************************************************/
@@ -161,6 +165,7 @@ yelp_docbook_document_init (YelpDocbookDocument *docbook)
YelpDocbookDocumentPrivate *priv = GET_PRIV (docbook);
priv->state = DOCBOOK_STATE_BLANK;
+ priv->autoids = NULL;
g_mutex_init (&priv->mutex);
}
@@ -194,6 +199,8 @@ yelp_docbook_document_finalize (GObject *object)
g_free (priv->cur_prev_id);
g_free (priv->root_id);
+ g_hash_table_destroy (priv->autoids);
+
g_mutex_clear (&priv->mutex);
G_OBJECT_CLASS (yelp_docbook_document_parent_class)->finalize (object);
@@ -532,6 +539,7 @@ docbook_walk (YelpDocbookDocument *docbook)
gchar autoidstr[20];
xmlChar *id = NULL;
xmlChar *title = NULL;
+ xmlChar *keywords = NULL;
xmlNodePtr cur, old_cur;
gboolean chunkQ;
YelpDocbookDocumentPrivate *priv = GET_PRIV (docbook);
@@ -558,7 +566,7 @@ docbook_walk (YelpDocbookDocument *docbook)
if (docbook_walk_divisionQ (docbook, priv->xmlcur) && !id) {
/* If id attribute is not present, autogenerate a
* unique value, and insert it into the in-memory tree */
- g_snprintf (autoidstr, 20, "//autoid-%d", ++autoid);
+ g_snprintf (autoidstr, 20, "//yelp-autoid-%d", ++autoid);
if (priv->xmlcur->ns) {
xmlNewNsProp (priv->xmlcur,
xmlNewNs (priv->xmlcur, XML_XML_NAMESPACE, BAD_CAST "xml"),
@@ -569,15 +577,20 @@ docbook_walk (YelpDocbookDocument *docbook)
xmlNewProp (priv->xmlcur, BAD_CAST "id", BAD_CAST autoidstr);
id = xmlGetProp (priv->xmlcur, BAD_CAST "id");
}
+ if (!priv->autoids)
+ priv->autoids = g_hash_table_new_full (g_str_hash, g_str_equal, xmlFree, xmlFree);
+ g_hash_table_insert (priv->autoids, xmlGetNodePath(priv->xmlcur), xmlStrdup (id));
}
if (docbook_walk_chunkQ (docbook, priv->xmlcur, priv->cur_depth, priv->max_depth)) {
title = BAD_CAST docbook_walk_get_title (docbook, priv->xmlcur);
+ keywords = BAD_CAST docbook_walk_get_keywords (docbook, priv->xmlcur);
debug_print (DB_DEBUG, " id: \"%s\"\n", id);
debug_print (DB_DEBUG, " title: \"%s\"\n", title);
yelp_document_set_page_title (document, (gchar *) id, (gchar *) title);
+ yelp_document_set_page_keywords (document, (gchar *) id, (gchar *) keywords);
if (priv->cur_prev_id) {
yelp_document_set_prev_id (document, (gchar *) id, priv->cur_prev_id);
@@ -626,6 +639,8 @@ docbook_walk (YelpDocbookDocument *docbook)
xmlFree (id);
if (title != NULL)
xmlFree (title);
+ if (keywords != NULL)
+ xmlFree (keywords);
}
static gboolean
@@ -791,6 +806,42 @@ docbook_walk_get_title (YelpDocbookDocument *docbook,
return g_strdup (_("Unknown"));
}
+static gchar *
+docbook_walk_get_keywords (YelpDocbookDocument *docbook,
+ xmlNodePtr cur)
+{
+ xmlNodePtr info, keywordset, keyword;
+ GString *ret = NULL;
+
+ for (info = cur->children; info; info = info->next) {
+ if (g_str_has_suffix ((const gchar *) info->name, "info")) {
+ for (keywordset = info->children; keywordset; keywordset = keywordset->next) {
+ if (!xmlStrcmp (keywordset->name, BAD_CAST "keywordset")) {
+ for (keyword = keywordset->children; keyword; keyword = keyword->next) {
+ if (!xmlStrcmp (keyword->name, BAD_CAST "keyword")) {
+ xmlChar *content;
+ if (ret)
+ g_string_append(ret, ", ");
+ else
+ ret = g_string_new ("");
+ /* FIXME: try this with just ->children->text */
+ content = xmlNodeGetContent (keyword);
+ g_string_append (ret, (gchar *) content);
+ xmlFree (content);
+ }
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ if (ret)
+ return g_string_free (ret, FALSE);
+ else
+ return NULL;
+}
+
/******************************************************************************/
static void
@@ -910,6 +961,7 @@ typedef struct {
GString *str;
gint depth;
gint max_depth;
+ gboolean in_info;
} DocbookIndexData;
static void
@@ -926,10 +978,15 @@ docbook_index_node (DocbookIndexData *index)
g_string_append (index->str, (const gchar *) index->cur->content);
return;
}
- if (index->cur->type != XML_ELEMENT_NODE ||
- g_str_has_suffix ((const gchar *) index->cur->name, "info") ||
- g_str_equal (index->cur->name, "remark"))
+ if (index->cur->type != XML_ELEMENT_NODE) {
+ return;
+ }
+ if (g_str_equal (index->cur->name, "remark")) {
+ return;
+ }
+ if (g_str_has_suffix ((const gchar *) index->cur->name, "info")) {
return;
+ }
oldcur = index->cur;
for (child = index->cur->children; child; child = child->next) {
index->cur = child;
@@ -944,15 +1001,32 @@ docbook_index_chunk (DocbookIndexData *index)
xmlChar *id;
xmlNodePtr child;
gchar *title = NULL;
+ gchar *keywords;
GSList *chunks = NULL;
+ YelpDocbookDocumentPrivate *priv = GET_PRIV (index->docbook);
id = xmlGetProp (index->cur, BAD_CAST "id");
+ if (!id)
+ id = xmlGetNsProp (index->cur, XML_XML_NAMESPACE, BAD_CAST "id");
+ if (!id) {
+ xmlChar *path = xmlGetNodePath (index->cur);
+ id = g_hash_table_lookup (priv->autoids, path);
+ if (id)
+ id = xmlStrdup (id);
+ xmlFree (path);
+ }
+
if (id != NULL) {
title = docbook_walk_get_title (index->docbook, index->cur);
if (index->cur->parent->parent == NULL)
yelp_storage_set_root_title (yelp_storage_get_default (),
index->doc_uri, title);
index->str = g_string_new ("");
+ keywords = docbook_walk_get_keywords (index->docbook, index->cur);
+ if (keywords) {
+ g_string_append (index->str, keywords);
+ g_free (keywords);
+ }
}
for (child = index->cur->children; child; child = child->next) {
diff --git a/libyelp/yelp-document.c b/libyelp/yelp-document.c
index c1eec9cc..83582eaf 100644
--- a/libyelp/yelp-document.c
+++ b/libyelp/yelp-document.c
@@ -84,6 +84,7 @@ struct _YelpDocumentPriv {
Hash *page_ids; /* Mapping of fragment IDs to real page IDs */
Hash *titles; /* Mapping of page IDs to titles */
Hash *descs; /* Mapping of page IDs to descs */
+ Hash *keywords; /* Mapping of page IDs to keywords */
Hash *icons; /* Mapping of page IDs to icons */
Hash *mime_types; /* Mapping of page IDs to mime types */
Hash *contents; /* Mapping of page IDs to string content */
@@ -313,6 +314,7 @@ yelp_document_init (YelpDocument *document)
priv->page_ids = hash_new (g_free );
priv->titles = hash_new (g_free);
priv->descs = hash_new (g_free);
+ priv->keywords = hash_new (g_free);
priv->icons = hash_new (g_free);
priv->mime_types = hash_new (g_free);
priv->contents = hash_new ((GDestroyNotify) str_unref);
@@ -361,6 +363,7 @@ yelp_document_finalize (GObject *object)
hash_free (document->priv->page_ids);
hash_free (document->priv->titles);
hash_free (document->priv->descs);
+ hash_free (document->priv->keywords);
hash_free (document->priv->icons);
hash_free (document->priv->mime_types);
@@ -751,6 +754,41 @@ yelp_document_set_page_desc (YelpDocument *document,
}
gchar *
+yelp_document_get_page_keywords (YelpDocument *document,
+ const gchar *page_id)
+{
+ gchar *real, *ret = NULL;
+
+ g_assert (document != NULL && YELP_IS_DOCUMENT (document));
+
+ if (page_id != NULL && g_str_has_prefix (page_id, "search="))
+ return NULL;
+
+ g_mutex_lock (&document->priv->mutex);
+ real = hash_lookup (document->priv->page_ids, page_id);
+ if (real) {
+ ret = hash_lookup (document->priv->keywords, real);
+ if (ret)
+ ret = g_strdup (ret);
+ }
+ g_mutex_unlock (&document->priv->mutex);
+
+ return ret;
+}
+
+void
+yelp_document_set_page_keywords (YelpDocument *document,
+ const gchar *page_id,
+ const gchar *keywords)
+{
+ g_assert (document != NULL && YELP_IS_DOCUMENT (document));
+
+ g_mutex_lock (&document->priv->mutex);
+ hash_replace (document->priv->keywords, page_id, g_strdup (keywords));
+ g_mutex_unlock (&document->priv->mutex);
+}
+
+gchar *
yelp_document_get_page_icon (YelpDocument *document,
const gchar *page_id)
{
diff --git a/libyelp/yelp-document.h b/libyelp/yelp-document.h
index 175b281a..8ee9203e 100644
--- a/libyelp/yelp-document.h
+++ b/libyelp/yelp-document.h
@@ -151,6 +151,12 @@ void yelp_document_set_page_desc (YelpDocument *docum
const gchar *page_id,
const gchar *desc);
+gchar * yelp_document_get_page_keywords (YelpDocument *document,
+ const gchar *page_id);
+void yelp_document_set_page_keywords (YelpDocument *document,
+ const gchar *page_id,
+ const gchar *keywords);
+
gchar * yelp_document_get_page_icon (YelpDocument *document,
const gchar *page_id);
void yelp_document_set_page_icon (YelpDocument *document,
diff --git a/libyelp/yelp-mallard-document.c b/libyelp/yelp-mallard-document.c
index 74fc80ee..b72dc9a5 100644
--- a/libyelp/yelp-mallard-document.c
+++ b/libyelp/yelp-mallard-document.c
@@ -69,6 +69,7 @@ typedef struct {
gchar *page_title;
gchar *page_desc;
+ gchar *page_keywords;
gchar *next_page;
} MallardPageData;
@@ -393,6 +394,10 @@ mallard_think (YelpMallardDocument *mallard)
yelp_document_set_page_desc ((YelpDocument *) mallard,
page_data->page_id,
page_data->page_desc);
+ yelp_document_set_page_keywords ((YelpDocument *) mallard,
+ page_data->page_id,
+ page_data->page_keywords);
+
if (page_data->next_page != NULL) {
yelp_document_set_next_id ((YelpDocument *) mallard,
page_data->page_id,
@@ -662,11 +667,25 @@ mallard_page_data_info (MallardPageData *page_data,
xmlXPathObjectPtr obj;
page_data->xpath->node = child;
obj = xmlXPathCompiledEval (priv->normalize, page_data->xpath);
+ g_free(page_data->page_desc);
page_data->page_desc = g_strdup ((const gchar *) obj->stringval);
xmlXPathFreeObject (obj);
xmlAddChild (cache_node, xmlCopyNode (child, 1));
}
+ else if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "keywords")) {
+ /* FIXME: multiple keywords? same for desc/title */
+
+ YelpMallardDocumentPrivate *priv = GET_PRIV (page_data->mallard);
+ xmlXPathObjectPtr obj;
+ page_data->xpath->node = child;
+ obj = xmlXPathCompiledEval (priv->normalize, page_data->xpath);
+ g_free(page_data->page_keywords);
+ page_data->page_keywords = g_strdup ((const gchar *) obj->stringval);
+ xmlXPathFreeObject (obj);
+
+ xmlAddChild (cache_node, xmlCopyNode (child, 1));
+ }
else if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "link")) {
xmlChar *type, *next;
@@ -749,6 +768,7 @@ mallard_page_data_free (MallardPageData *page_data)
xmlXPathFreeContext (page_data->xpath);
g_free (page_data->page_title);
g_free (page_data->page_desc);
+ g_free (page_data->page_keywords);
g_free (page_data->next_page);
g_free (page_data);
}
@@ -896,16 +916,18 @@ typedef struct {
xmlNodePtr cur;
GString *str;
gboolean is_inline;
+ gboolean in_info;
} MallardIndexData;
static void
mallard_index_node (MallardIndexData *index)
{
xmlNodePtr orig, child;
- gboolean was_inline;
+ gboolean was_inline, was_info;
orig = index->cur;
was_inline = index->is_inline;
+ was_info = index->in_info;
for (child = index->cur->children; child; child = child->next) {
if (index->is_inline) {
@@ -921,18 +943,24 @@ mallard_index_node (MallardIndexData *index)
}
if (child->type != XML_ELEMENT_NODE ||
- xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "info") ||
xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "comment"))
continue;
- if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "p") ||
+ if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "info")) {
+ index->in_info = TRUE;
+ }
+ else if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "p") ||
xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "code") ||
xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "screen") ||
xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "title") ||
xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "desc") ||
+ xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "keywords") ||
xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "cite")) {
index->is_inline = TRUE;
}
+ else if (index->in_info && !index->is_inline) {
+ continue;
+ }
index->cur = child;
mallard_index_node (index);
@@ -943,6 +971,7 @@ mallard_index_node (MallardIndexData *index)
index->cur = orig;
index->is_inline = was_inline;
+ index->in_info = was_info;
}
}
diff --git a/libyelp/yelp-search-entry.c b/libyelp/yelp-search-entry.c
index 55be49cc..2b5a8196 100644
--- a/libyelp/yelp-search-entry.c
+++ b/libyelp/yelp-search-entry.c
@@ -100,7 +100,8 @@ enum {
COMPLETION_COL_DESC,
COMPLETION_COL_ICON,
COMPLETION_COL_PAGE,
- COMPLETION_COL_FLAGS
+ COMPLETION_COL_FLAGS,
+ COMPLETION_COL_KEYWORDS
};
enum {
@@ -451,7 +452,7 @@ entry_match_func (GtkEntryCompletion *completion,
YelpSearchEntry *entry)
{
gint stri;
- gchar *title, *desc, *titlecase = NULL, *desccase = NULL;
+ gchar *title, *desc, *keywords, *titlecase = NULL, *desccase = NULL, *keywordscase = NULL;
gboolean ret = FALSE;
gchar **strs;
gint flags;
@@ -470,6 +471,7 @@ entry_match_func (GtkEntryCompletion *completion,
gtk_tree_model_get (model, iter,
COMPLETION_COL_TITLE, &title,
COMPLETION_COL_DESC, &desc,
+ COMPLETION_COL_KEYWORDS, &keywords,
-1);
if (title) {
titlecase = g_utf8_casefold (title, -1);
@@ -479,14 +481,20 @@ entry_match_func (GtkEntryCompletion *completion,
desccase = g_utf8_casefold (desc, -1);
g_free (desc);
}
+ if (keywords) {
+ keywordscase = g_utf8_casefold (keywords, -1);
+ g_free (keywords);
+ }
strs = g_regex_split (nonword, key, 0);
ret = TRUE;
for (stri = 0; strs[stri]; stri++) {
if (!titlecase || !strstr (titlecase, strs[stri])) {
if (!desccase || !strstr (desccase, strs[stri])) {
- ret = FALSE;
- break;
+ if (!keywordscase || !strstr (keywordscase, strs[stri])) {
+ ret = FALSE;
+ break;
+ }
}
}
}
@@ -592,12 +600,13 @@ view_loaded (YelpView *view,
!g_str_equal (doc_uri, priv->completion_uri)) {
completion = (GtkTreeModel *) g_hash_table_lookup (completions, doc_uri);
if (completion == NULL) {
- GtkListStore *base = gtk_list_store_new (5,
+ GtkListStore *base = gtk_list_store_new (6,
G_TYPE_STRING, /* title */
G_TYPE_STRING, /* desc */
G_TYPE_STRING, /* icon */
G_TYPE_STRING, /* uri */
- G_TYPE_INT /* flags */
+ G_TYPE_INT, /* flags */
+ G_TYPE_STRING /* keywords */
);
completion = gtk_tree_model_sort_new_with_model (GTK_TREE_MODEL (base));
gtk_tree_sortable_set_default_sort_func (GTK_TREE_SORTABLE (completion),
@@ -607,15 +616,17 @@ view_loaded (YelpView *view,
if (document != NULL) {
ids = yelp_document_list_page_ids (document);
for (i = 0; ids[i]; i++) {
- gchar *title, *desc, *icon;
+ gchar *title, *desc, *icon, *keywords;
gtk_list_store_insert (GTK_LIST_STORE (base), &iter, 0);
title = yelp_document_get_page_title (document, ids[i]);
desc = yelp_document_get_page_desc (document, ids[i]);
icon = yelp_document_get_page_icon (document, ids[i]);
+ keywords = yelp_document_get_page_keywords (document, ids[i]);
gtk_list_store_set (base, &iter,
COMPLETION_COL_TITLE, title,
COMPLETION_COL_DESC, desc,
COMPLETION_COL_ICON, icon,
+ COMPLETION_COL_KEYWORDS, keywords,
COMPLETION_COL_PAGE, ids[i],
-1);
g_free (icon);