// Copyright 2017 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "services/data_decoder/xml_parser.h" #include #include #include "base/strings/string_util.h" #include "base/values.h" #include "third_party/libxml/chromium/libxml_utils.h" namespace data_decoder { using AttributeMap = std::map; using NamespaceMap = std::map; namespace { void ReportError(XmlParser::ParseCallback callback, const std::string& error) { std::move(callback).Run(/*result=*/nullptr, base::make_optional(error)); } enum class TextNodeType { kText, kCData }; // Returns false if the current node in |xml_reader| is not text or CData. // Otherwise returns true and sets |text| to the text/CData of the current node // and |node_type| to kText or kCData. bool GetTextFromNode(XmlReader* xml_reader, std::string* text, TextNodeType* node_type) { if (xml_reader->GetTextIfTextElement(text)) { *node_type = TextNodeType::kText; return true; } if (xml_reader->GetTextIfCDataElement(text)) { *node_type = TextNodeType::kCData; return true; } return false; } base::Value CreateTextNode(const std::string& text, TextNodeType node_type) { base::Value element(base::Value::Type::DICTIONARY); element.SetKey(mojom::XmlParser::kTypeKey, base::Value(node_type == TextNodeType::kText ? mojom::XmlParser::kTextNodeType : mojom::XmlParser::kCDataNodeType)); element.SetKey(mojom::XmlParser::kTextKey, base::Value(text)); return element; } // Creates and returns new element node with the tag name |name|. base::Value CreateNewElement(const std::string& name) { base::Value element(base::Value::Type::DICTIONARY); element.SetKey(mojom::XmlParser::kTypeKey, base::Value(mojom::XmlParser::kElementType)); element.SetKey(mojom::XmlParser::kTagKey, base::Value(name)); return element; } // Adds |child| as a child of |element|, creating the children list if // necessary. Returns a ponter to |child|. base::Value* AddChildToElement(base::Value* element, base::Value child) { DCHECK(element->is_dict()); base::Value* children = element->FindKey(mojom::XmlParser::kChildrenKey); DCHECK(!children || children->is_list()); if (!children) children = element->SetKey(mojom::XmlParser::kChildrenKey, base::Value(base::Value::Type::LIST)); children->GetList().push_back(std::move(child)); return &children->GetList().back(); } void PopulateNamespaces(base::Value* node_value, XmlReader* xml_reader) { DCHECK(node_value->is_dict()); NamespaceMap namespaces; if (!xml_reader->GetAllDeclaredNamespaces(&namespaces) || namespaces.empty()) return; base::Value namespace_dict(base::Value::Type::DICTIONARY); for (auto ns : namespaces) namespace_dict.SetKey(ns.first, base::Value(ns.second)); node_value->SetKey(mojom::XmlParser::kNamespacesKey, std::move(namespace_dict)); } void PopulateAttributes(base::Value* node_value, XmlReader* xml_reader) { DCHECK(node_value->is_dict()); AttributeMap attributes; if (!xml_reader->GetAllNodeAttributes(&attributes) || attributes.empty()) return; base::Value attribute_dict(base::Value::Type::DICTIONARY); for (auto attribute : attributes) attribute_dict.SetKey(attribute.first, base::Value(attribute.second)); node_value->SetKey(mojom::XmlParser::kAttributesKey, std::move(attribute_dict)); } } // namespace XmlParser::XmlParser( std::unique_ptr service_ref) : service_ref_(std::move(service_ref)) {} XmlParser::~XmlParser() = default; void XmlParser::Parse(const std::string& xml, ParseCallback callback) { XmlReader xml_reader; if (!xml_reader.Load(xml)) { ReportError(std::move(callback), "Invalid XML: failed to load"); return; } base::Value root_element; std::vector element_stack; while (xml_reader.Read()) { if (xml_reader.IsClosingElement()) { if (element_stack.empty()) { ReportError(std::move(callback), "Invalid XML: unbalanced elements"); return; } element_stack.pop_back(); continue; } std::string text; TextNodeType text_node_type = TextNodeType::kText; base::Value* current_element = element_stack.empty() ? nullptr : element_stack.back(); bool push_new_node_to_stack = false; base::Value new_element; if (GetTextFromNode(&xml_reader, &text, &text_node_type)) { if (!base::IsStringUTF8(text)) { ReportError(std::move(callback), "Invalid XML: invalid UTF8 text."); return; } new_element = CreateTextNode(text, text_node_type); } else if (xml_reader.IsElement()) { new_element = CreateNewElement(xml_reader.NodeFullName()); PopulateNamespaces(&new_element, &xml_reader); PopulateAttributes(&new_element, &xml_reader); // Self-closing (empty) element have no close tag (or children); don't // push them on the element stack. push_new_node_to_stack = !xml_reader.IsEmptyElement(); } else { // Ignore all other node types (spaces, comments, processing instructions, // DTDs...). continue; } base::Value* new_element_ptr; if (current_element) { new_element_ptr = AddChildToElement(current_element, std::move(new_element)); } else { // First element we are parsing, it becomes the root element. DCHECK(xml_reader.IsElement()); DCHECK(root_element.is_none()); root_element = std::move(new_element); new_element_ptr = &root_element; } if (push_new_node_to_stack) element_stack.push_back(new_element_ptr); } if (!element_stack.empty()) { ReportError(std::move(callback), "Invalid XML: unbalanced elements"); return; } base::DictionaryValue* dictionary = nullptr; root_element.GetAsDictionary(&dictionary); if (!dictionary || dictionary->empty()) { ReportError(std::move(callback), "Invalid XML: bad content"); return; } std::move(callback).Run( base::Value::ToUniquePtrValue(std::move(root_element)), base::Optional()); } } // namespace data_decoder