summaryrefslogtreecommitdiff
path: root/lib/banzai/querying.rb
blob: a19a05e8c0ded2ae648c8feeb7efe085023e1550 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
module Banzai
  module Querying
    module_function

    # Searches a Nokogiri document using a CSS query, optionally optimizing it
    # whenever possible.
    #
    # document          - A document/element to search.
    # query             - The CSS query to use.
    # reference_options - A hash with nodes filter options
    #
    # Returns an array of Nokogiri::XML::Element objects if location is specified
    # in reference_options. Otherwise it would a Nokogiri::XML::NodeSet.
    def css(document, query, reference_options = {})
      # When using "a.foo" Nokogiri compiles this to "//a[...]" but
      # "descendant::a[...]" is quite a bit faster and achieves the same result.
      xpath = Nokogiri::CSS.xpath_for(query)[0].gsub(%r{^//}, 'descendant::')
      xpath = restrict_to_p_nodes_at_root(xpath) if filter_nodes_at_beginning?(reference_options)
      nodes = document.xpath(xpath)

      filter_nodes(nodes, reference_options)
    end

    def restrict_to_p_nodes_at_root(xpath)
      xpath.gsub('descendant::', './p/')
    end

    def filter_nodes(nodes, reference_options)
      if filter_nodes_at_beginning?(reference_options)
        filter_nodes_at_beginning(nodes)
      else
        nodes
      end
    end

    def filter_nodes_at_beginning?(reference_options)
      reference_options && reference_options[:location] == :beginning
    end

    # Selects child nodes if they are present in the beginning among other siblings.
    #
    # nodes - A Nokogiri::XML::NodeSet.
    #
    # Returns an array of Nokogiri::XML::Element objects.
    def filter_nodes_at_beginning(nodes)
      parents_and_nodes = nodes.group_by(&:parent)
      filtered_nodes = []

      parents_and_nodes.each do |parent, nodes|
        children = parent.children
        nodes    = nodes.to_a

        children.each do |child|
          next if child.text.blank?

          node = nodes.shift
          break unless node == child

          filtered_nodes << node
        end
      end

      filtered_nodes
    end
  end
end