From ecd535d7fb4036baf949833f7f88cfe0911ddca7 Mon Sep 17 00:00:00 2001 From: Steven Danna Date: Mon, 5 Oct 2015 14:18:52 +0100 Subject: Fix search result pagination The start and rows parameter that are passed as part of the search request are passed directly to Solr on the backend. Results from Solr may contain deleted nodes no longer in the erchef database. Erchef will filter such nodes from the results. Thus, a user may receive fewer rows than they asked for. Incrementing 'start' only by the number of rows received will then result in the next Solr response overlapping the first, which can lead to duplicate results. In the case of a Solr results page that was completely filtered, it would lead to an infinite loop. This commit changes the code to always increment by the requested page size (args[:rows]) when it is non-nil. Incrementing by the length of the response set is still wrong in the case when the args[:rows] is nil, but the server doesn't give us anything else to increment by. Fixes #4027 --- lib/chef/search/query.rb | 18 ++++++++++++++++-- spec/unit/search/query_spec.rb | 20 +++++++++++++++++++- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/lib/chef/search/query.rb b/lib/chef/search/query.rb index 6469a18c49..658af8779c 100644 --- a/lib/chef/search/query.rb +++ b/lib/chef/search/query.rb @@ -88,8 +88,21 @@ WARNDEP if block response["rows"].each { |row| block.call(row) if row } - unless (response["start"] + response["rows"].length) >= response["total"] - args_h[:start] = response["start"] + response["rows"].length + # + # args_h[:rows] and args_h[:start] are the page size and + # start position requested of the search index backing the + # search API. + # + # The response may contain fewer rows than arg_h[:rows] if + # the page of index results included deleted nodes which + # have been filtered from the returned data. In this case, + # we still want to start the next page at start + + # args_h[:rows] to avoid asking the search backend for + # overlapping pages (which could result in duplicates). + # + next_start = response["start"] + (args_h[:rows] || response["rows"].length) + unless next_start >= response["total"] + args_h[:start] = next_start search(type, query, args_h, &block) end true @@ -99,6 +112,7 @@ WARNDEP end private + def validate_type(t) unless t.kind_of?(String) || t.kind_of?(Symbol) msg = "Invalid search object type #{t.inspect} (#{t.class}), must be a String or Symbol." + diff --git a/spec/unit/search/query_spec.rb b/spec/unit/search/query_spec.rb index 59ac80f228..f85b1760d4 100644 --- a/spec/unit/search/query_spec.rb +++ b/spec/unit/search/query_spec.rb @@ -83,6 +83,8 @@ describe Chef::Search::Query do describe "search" do let(:query_string) { "search/node?q=platform:rhel&sort=X_CHEF_id_CHEF_X%20asc&start=0" } let(:query_string_continue) { "search/node?q=platform:rhel&sort=X_CHEF_id_CHEF_X%20asc&start=4" } + let(:query_string_with_rows) { "search/node?q=platform:rhel&sort=X_CHEF_id_CHEF_X%20asc&start=0&rows=4" } + let(:query_string_continue_with_rows) { "search/node?q=platform:rhel&sort=X_CHEF_id_CHEF_X%20asc&start=4&rows=4" } let(:response) { { "rows" => [ @@ -149,6 +151,14 @@ describe Chef::Search::Query do r } + let(:big_response_empty) { + { + "start" => 0, + "total" => 8, + "rows" => [] + } + } + let(:big_response_end) { r = response.dup r["start"] = 4 @@ -208,7 +218,7 @@ describe Chef::Search::Query do it "pages through the responses" do @call_me = double("blocky") response["rows"].each { |r| expect(@call_me).to receive(:do).with(r) } - query.search(:node, "*:*", sort: nil, start: 0, rows: 1) { |r| @call_me.do(r) } + query.search(:node, "*:*", sort: nil, start: 0, rows: 4) { |r| @call_me.do(r) } end it "sends multiple API requests when the server indicates there is more data" do @@ -219,6 +229,14 @@ describe Chef::Search::Query do end end + it "paginates correctly in the face of filtered nodes" do + expect(rest).to receive(:get_rest).with(query_string_with_rows).and_return(big_response_empty) + expect(rest).to receive(:get_rest).with(query_string_continue_with_rows).and_return(big_response_end) + query.search(:node, "platform:rhel", rows: 4) do |r| + nil + end + end + context "when :filter_result is provided as a result" do include_context "filtered search" do let(:filter_key) { :filter_result } -- cgit v1.2.1