summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Alexander Steffens (heftig) <jan.steffens@gmail.com>2022-07-15 21:18:47 +0200
committerRichard Hughes <richard@hughsie.com>2022-07-18 19:43:07 +0100
commit674490bd54ff206f213ca4547db7fdb591a0fb3d (patch)
tree754f13870582908910ebb30fff6ea2daa360ab72
parent8d39640032752bf81d648d018ff115aa8d495957 (diff)
downloadappstream-glib-674490bd54ff206f213ca4547db7fdb591a0fb3d.tar.gz
Improve handling of <em> and <code> tags
This is still not great code but at least somewhat an improvement. Tests were expanded to showcase the new behavior. I think, ideally, we would append opening/closing tags to the ancestor `p` or `li` node's cdata as soon as we encounter the start/end of an `em` or `code` element. This would then also handle empty elements correctly.
-rw-r--r--libappstream-glib/as-node.c108
-rw-r--r--libappstream-glib/as-self-test.c39
2 files changed, 101 insertions, 46 deletions
diff --git a/libappstream-glib/as-node.c b/libappstream-glib/as-node.c
index 9c6d5d4..5cf9dcc 100644
--- a/libappstream-glib/as-node.c
+++ b/libappstream-glib/as-node.c
@@ -674,6 +674,7 @@ as_node_end_element_cb (GMarkupParseContext *context,
GError **error)
{
AsNodeToXmlHelper *helper = (AsNodeToXmlHelper *) user_data;
+ AsNodeData *data = helper->current->data;
/* do not create a child node for em and code tags */
if (g_strcmp0 (element_name, "em") == 0) {
@@ -684,6 +685,42 @@ as_node_end_element_cb (GMarkupParseContext *context,
helper->is_code_text = 0;
return;
}
+
+ if (data->cdata != NULL) {
+ /* split up into lines and add each with spaces stripped */
+ if ((helper->flags & AS_NODE_FROM_XML_FLAG_LITERAL_TEXT) == 0) {
+ AsRefString *cdata = data->cdata;
+ data->cdata = as_node_reflow_text (cdata, strlen (cdata));
+ as_ref_string_unref (cdata);
+ }
+
+ /* intern commonly duplicated tag values and save a bit of memory */
+ if (data->is_tag_valid) {
+ AsNode *root = g_node_get_root (helper->current);
+ switch (data->tag) {
+ case AS_TAG_CATEGORY:
+ case AS_TAG_COMPULSORY_FOR_DESKTOP:
+ case AS_TAG_CONTENT_ATTRIBUTE:
+ case AS_TAG_DEVELOPER_NAME:
+ case AS_TAG_EXTENDS:
+ case AS_TAG_ICON:
+ case AS_TAG_ID:
+ case AS_TAG_KUDO:
+ case AS_TAG_LANG:
+ case AS_TAG_METADATA_LICENSE:
+ case AS_TAG_MIMETYPE:
+ case AS_TAG_PROJECT_GROUP:
+ case AS_TAG_PROJECT_LICENSE:
+ case AS_TAG_SOURCE_PKGNAME:
+ case AS_TAG_URL:
+ as_node_cdata_to_intern (root, data);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
helper->current = helper->current->parent;
}
@@ -715,22 +752,9 @@ as_node_text_cb (GMarkupParseContext *context,
if (i >= text_len)
return;
- /* split up into lines and add each with spaces stripped */
- if (data->cdata != NULL) {
- /* support em and code tags */
- if (g_strcmp0 (as_tag_data_get_name (data), "p") == 0 ||
- g_strcmp0 (as_tag_data_get_name (data), "li") == 0) {
- g_autoptr(GString) str = g_string_new (data->cdata);
- as_ref_string_unref (data->cdata);
- if (helper->is_em_text)
- g_string_append_printf (str, "<em>%s</em>", text);
- else if (helper->is_code_text)
- g_string_append_printf (str, "<code>%s</code>", text);
- else
- g_string_append (str, text);
- data->cdata = as_ref_string_new_with_length (str->str, str->len);
- return;
- }
+ if (data->cdata != NULL &&
+ g_strcmp0 (as_tag_data_get_name (data), "p") != 0 &&
+ g_strcmp0 (as_tag_data_get_name (data), "li") != 0) {
g_set_error (error,
AS_NODE_ERROR,
AS_NODE_ERROR_INVALID_MARKUP,
@@ -739,37 +763,33 @@ as_node_text_cb (GMarkupParseContext *context,
data->cdata, text);
return;
}
- if ((helper->flags & AS_NODE_FROM_XML_FLAG_LITERAL_TEXT) > 0) {
- data->cdata = as_ref_string_new_with_length (text, text_len + 1);
- } else {
- data->cdata = as_node_reflow_text (text, (gssize) text_len);
- }
- /* intern commonly duplicated tag values and save a bit of memory */
- if (data->is_tag_valid && data->cdata != NULL) {
- AsNode *root = g_node_get_root (helper->current);
- switch (data->tag) {
- case AS_TAG_CATEGORY:
- case AS_TAG_COMPULSORY_FOR_DESKTOP:
- case AS_TAG_CONTENT_ATTRIBUTE:
- case AS_TAG_DEVELOPER_NAME:
- case AS_TAG_EXTENDS:
- case AS_TAG_ICON:
- case AS_TAG_ID:
- case AS_TAG_KUDO:
- case AS_TAG_LANG:
- case AS_TAG_METADATA_LICENSE:
- case AS_TAG_MIMETYPE:
- case AS_TAG_PROJECT_GROUP:
- case AS_TAG_PROJECT_LICENSE:
- case AS_TAG_SOURCE_PKGNAME:
- case AS_TAG_URL:
- as_node_cdata_to_intern (root, data);
- break;
- default:
- break;
+ /* support em and code tags */
+ if (helper->is_em_text || helper->is_code_text || data->cdata != NULL) {
+ g_autoptr(GString) str = g_string_new (NULL);
+
+ if (data->cdata != NULL) {
+ g_string_append (str, data->cdata);
+ as_ref_string_unref (data->cdata);
}
+
+ if (helper->is_em_text)
+ g_string_append (str, "<em>");
+ if (helper->is_code_text)
+ g_string_append (str, "<code>");
+
+ g_string_append_len (str, text, text_len);
+
+ if (helper->is_code_text)
+ g_string_append (str, "</code>");
+ if (helper->is_em_text)
+ g_string_append (str, "</em>");
+
+ data->cdata = as_ref_string_new_with_length (str->str, str->len);
+ return;
}
+
+ data->cdata = as_ref_string_new_with_length (text, text_len);
}
static void
diff --git a/libappstream-glib/as-self-test.c b/libappstream-glib/as-self-test.c
index 11b51b6..aade35c 100644
--- a/libappstream-glib/as-self-test.c
+++ b/libappstream-glib/as-self-test.c
@@ -2870,6 +2870,15 @@ as_test_node_xml_func (void)
"It now also supports <em>em</em> and <code>code</code> tags."
"</p>"
"</description>";
+ const gchar *valid_em_code_2 = "<description>"
+ "<p><em>Emphasis</em> at the start of the paragraph</p>"
+ "</description>";
+ const gchar *valid_em_code_empty = "<description>"
+ "<p><em></em></p>"
+ "</description>";
+ const gchar *valid_em_code_empty_2 = "<description>"
+ "<p>empty <em></em> emphasis</p>"
+ "</description>";
GError *error = NULL;
AsNode *n2;
AsNode *root;
@@ -2940,8 +2949,34 @@ as_test_node_xml_func (void)
n2 = as_node_find (root, "description/p");
g_assert (n2 != NULL);
- printf ("<%s>\n", as_node_get_data (n2));
- g_assert_cmpstr (as_node_get_data (n2), ==, "It now also supports<em>em</em> and <code>code</code> tags.");
+ g_assert_cmpstr (as_node_get_data (n2), ==, "It now also supports <em>em</em> and <code>code</code> tags.");
+ as_node_unref (root);
+
+ root = as_node_from_xml (valid_em_code_2, 0, &error);
+ g_assert_no_error (error);
+ g_assert (root != NULL);
+
+ n2 = as_node_find (root, "description/p");
+ g_assert (n2 != NULL);
+ g_assert_cmpstr (as_node_get_data (n2), ==, "<em>Emphasis</em> at the start of the paragraph");
+ as_node_unref (root);
+
+ root = as_node_from_xml (valid_em_code_empty, 0, &error);
+ g_assert_no_error (error);
+ g_assert (root != NULL);
+
+ n2 = as_node_find (root, "description/p");
+ g_assert (n2 != NULL);
+ g_assert_cmpstr (as_node_get_data (n2), ==, NULL);
+ as_node_unref (root);
+
+ root = as_node_from_xml (valid_em_code_empty_2, 0, &error);
+ g_assert_no_error (error);
+ g_assert (root != NULL);
+
+ n2 = as_node_find (root, "description/p");
+ g_assert (n2 != NULL);
+ g_assert_cmpstr (as_node_get_data (n2), ==, "empty emphasis");
as_node_unref (root);
/* keep comments */