summaryrefslogtreecommitdiff
path: root/chromium/third_party/libxml/chromium/xml_reader.h
blob: eb1ae63807f60a79b390a7dca502c362b00c38ea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
#define THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_

#include <map>
#include <string>

extern "C" {
struct _xmlTextReader;
}

// XmlReader is a wrapper class around libxml's xmlReader,
// providing a simplified C++ API.
class XmlReader {
 public:
  XmlReader();
  ~XmlReader();

  // Load a document into the reader from memory.  |input| must be UTF-8 and
  // exist for the lifetime of this object.  Returns false on error.
  // TODO(evanm): handle encodings other than UTF-8?
  bool Load(const std::string& input);

  // Load a document into the reader from a file.  Returns false on error.
  bool LoadFile(const std::string& file_path);

  // Wrappers around libxml functions -----------------------------------------

  // Read() advances to the next node.  Returns false on EOF or error.
  bool Read();

  // Next(), when pointing at an opening tag, advances to the node after
  // the matching closing tag.  Returns false on EOF or error.
  bool Next();

  // Return the depth in the tree of the current node.
  int Depth();

  // Returns the "local" name of the current node.
  // For a tag like <foo:bar>, this is the string "bar".
  std::string NodeName();

  // Returns the name of the current node.
  // For a tag like <foo:bar>, this is the string "foo:bar".
  std::string NodeFullName();

  // When pointing at a tag, retrieves the value of an attribute.
  // Returns false on failure.
  // E.g. for <foo bar:baz="a">, NodeAttribute("bar:baz", &value)
  // returns true and |value| is set to "a".
  bool NodeAttribute(const char* name, std::string* value);

  // Populates |attributes| with all the attributes of the current tag and
  // returns true. Note that namespace declarations are not reported.
  // Returns false if there are no attributes in the current tag.
  bool GetAllNodeAttributes(std::map<std::string, std::string>* attributes);

  // Populates |namespaces| with all the namespaces (prefix/URI pairs) declared
  // in the current tag and returns true. Note that the default namespace, if
  // declared in the tag, is populated with an empty prefix.
  // Returns false if there are no namespaces declared in the current tag.
  bool GetAllDeclaredNamespaces(std::map<std::string, std::string>* namespaces);

  // Sets |content| to the content of the current node if it is a #text/#cdata
  // node.
  // Returns true if the current node is a #text/#cdata node, false otherwise.
  bool GetTextIfTextElement(std::string* content);
  bool GetTextIfCDataElement(std::string* content);

  // Returns true if the node is an element (e.g. <foo>). Note this returns
  // false for self-closing elements (e.g. <foo/>). Use IsEmptyElement() to
  // check for those.
  bool IsElement();

  // Returns true if the node is a closing element (e.g. </foo>).
  bool IsClosingElement();

  // Returns true if the current node is an empty (self-closing) element (e.g.
  // <foo/>).
  bool IsEmptyElement();

  // Helper functions not provided by libxml ----------------------------------

  // Return the string content within an element.
  // "<foo>bar</foo>" is a sequence of three nodes:
  // (1) open tag, (2) text, (3) close tag.
  // With the reader currently at (1), this returns the text of (2),
  // and advances past (3).
  // Returns false on error.
  bool ReadElementContent(std::string* content);

  // Skip to the next opening tag, returning false if we reach a closing
  // tag or EOF first.
  // If currently on an opening tag, doesn't advance at all.
  bool SkipToElement();

 private:
  // Returns the libxml node type of the current node.
  int NodeType();

  // The underlying libxml xmlTextReader.
  _xmlTextReader* reader_;
};

#endif  // THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_