summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNickNorth <North.N@gmail.com>2013-12-03 20:58:53 +0000
committerNickNorth <North.N@gmail.com>2014-01-25 16:19:50 +0000
commit0bf823c6e5d6a7054e7cb1979eabc7695c2707e2 (patch)
tree3872cb6f796aeee38b637a47224640962f3a420b
parent37c8459693dbf55cd4683c7e288dcc9ca8899e97 (diff)
downloadcouchdb-0bf823c6e5d6a7054e7cb1979eabc7695c2707e2.tar.gz
Speed up and move couch_httpd:find_in_binary.
See https://issues.apache.org/jira/browse/COUCHDB-1953
-rw-r--r--src/couchdb/couch_httpd.erl30
-rw-r--r--src/couchdb/couch_util.erl32
-rwxr-xr-xtest/etap/043-find-in-binary.t68
3 files changed, 101 insertions, 29 deletions
diff --git a/src/couchdb/couch_httpd.erl b/src/couchdb/couch_httpd.erl
index 465bc7a41..9245f4b20 100644
--- a/src/couchdb/couch_httpd.erl
+++ b/src/couchdb/couch_httpd.erl
@@ -1003,7 +1003,7 @@ split_header(Line) ->
mochiweb_util:parse_header(Value)}].
read_until(#mp{data_fun=DataFun, buffer=Buffer}=Mp, Pattern, Callback) ->
- case find_in_binary(Pattern, Buffer) of
+ case couch_util:find_in_binary(Pattern, Buffer) of
not_found ->
Callback2 = Callback(Buffer),
{Buffer2, DataFun2} = DataFun(),
@@ -1079,34 +1079,6 @@ check_for_last(#mp{buffer=Buffer, data_fun=DataFun}=Mp) ->
data_fun = DataFun2})
end.
-find_in_binary(_B, <<>>) ->
- not_found;
-
-find_in_binary(B, Data) ->
- case binary:match(Data, [B], []) of
- nomatch ->
- partial_find(binary:part(B, {0, byte_size(B) - 1}),
- binary:part(Data, {byte_size(Data), -byte_size(Data) + 1}), 1);
- {Pos, _Len} ->
- {exact, Pos}
- end.
-
-partial_find(<<>>, _Data, _Pos) ->
- not_found;
-
-partial_find(B, Data, N) when byte_size(Data) > 0 ->
- case binary:match(Data, [B], []) of
- nomatch ->
- partial_find(binary:part(B, {0, byte_size(B) - 1}),
- binary:part(Data, {byte_size(Data), -byte_size(Data) + 1}), N + 1);
- {Pos, _Len} ->
- {partial, N + Pos}
- end;
-
-partial_find(_B, _Data, _N) ->
- not_found.
-
-
validate_bind_address(Address) ->
case inet_parse:address(Address) of
{ok, _} -> ok;
diff --git a/src/couchdb/couch_util.erl b/src/couchdb/couch_util.erl
index afe3528a6..2509bef92 100644
--- a/src/couchdb/couch_util.erl
+++ b/src/couchdb/couch_util.erl
@@ -29,6 +29,7 @@
-export([encode_doc_id/1]).
-export([with_db/2]).
-export([rfc1123_date/0, rfc1123_date/1]).
+-export([find_in_binary/2]).
-include("couch_db.hrl").
@@ -487,3 +488,34 @@ month(9) -> "Sep";
month(10) -> "Oct";
month(11) -> "Nov";
month(12) -> "Dec".
+
+
+find_in_binary(_B, <<>>) ->
+ not_found;
+
+find_in_binary(B, Data) ->
+ case binary:match(Data, [B], []) of
+ nomatch ->
+ MatchLength = erlang:min(byte_size(B), byte_size(Data)),
+ match_prefix_at_end(binary:part(B, {0, MatchLength}),
+ binary:part(Data, {byte_size(Data), -MatchLength}),
+ MatchLength, byte_size(Data) - MatchLength);
+ {Pos, _Len} ->
+ {exact, Pos}
+ end.
+
+match_prefix_at_end(Prefix, Data, PrefixLength, N) ->
+ FirstCharMatches = binary:matches(Data, [binary:part(Prefix, {0, 1})], []),
+ match_rest_of_prefix(FirstCharMatches, Prefix, Data, PrefixLength, N).
+
+match_rest_of_prefix([], _Prefix, _Data, _PrefixLength, _N) ->
+ not_found;
+
+match_rest_of_prefix([{Pos, _Len} | Rest], Prefix, Data, PrefixLength, N) ->
+ case binary:match(binary:part(Data, {PrefixLength, Pos - PrefixLength}),
+ [binary:part(Prefix, {0, PrefixLength - Pos})], []) of
+ nomatch ->
+ match_rest_of_prefix(Rest, Prefix, Data, PrefixLength, N);
+ {_Pos, _Len1} ->
+ {partial, N + Pos}
+ end.
diff --git a/test/etap/043-find-in-binary.t b/test/etap/043-find-in-binary.t
new file mode 100755
index 000000000..dca1d9c72
--- /dev/null
+++ b/test/etap/043-find-in-binary.t
@@ -0,0 +1,68 @@
+#!/usr/bin/env escript
+%% -*- erlang -*-
+
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+main(_) ->
+ test_util:init_code_path(),
+
+ etap:plan(length(cases())),
+ case (catch test()) of
+ ok ->
+ etap:end_tests();
+ Other ->
+ etap:diag(io_lib:format("Test died abnormally: ~p", [Other])),
+ etap:bail(Other)
+ end,
+ ok.
+
+
+test() ->
+ lists:foreach(fun({Needle, Haystack, Result}) ->
+ try
+ Msg = io_lib:format("Looking for ~s in ~s", [Needle, Haystack]),
+ etap:is(couch_util:find_in_binary(Needle, Haystack), Result, Msg)
+ catch _T:_R ->
+ etap:diag("~p", [{_T, _R}])
+ end
+ end, cases()),
+ ok.
+
+
+cases() ->
+ [
+ {<<"foo">>, <<"foobar">>, {exact, 0}},
+ {<<"foo">>, <<"foofoo">>, {exact, 0}},
+ {<<"foo">>, <<"barfoo">>, {exact, 3}},
+ {<<"foo">>, <<"barfo">>, {partial, 3}},
+ {<<"f">>, <<"fobarfff">>, {exact, 0}},
+ {<<"f">>, <<"obarfff">>, {exact, 4}},
+ {<<"f">>, <<"obarggf">>, {exact, 6}},
+ {<<"f">>, <<"f">>, {exact, 0}},
+ {<<"f">>, <<"g">>, not_found},
+ {<<"foo">>, <<"f">>, {partial, 0}},
+ {<<"foo">>, <<"g">>, not_found},
+ {<<"foo">>, <<"">>, not_found},
+ {<<"fofo">>, <<"foofo">>, {partial, 3}},
+ {<<"foo">>, <<"gfobarfo">>, {partial, 6}},
+ {<<"foo">>, <<"gfobarf">>, {partial, 6}},
+ {<<"foo">>, <<"gfobar">>, not_found},
+ {<<"fog">>, <<"gbarfogquiz">>, {exact, 4}},
+ {<<"ggg">>, <<"ggg">>, {exact, 0}},
+ {<<"ggg">>, <<"ggggg">>, {exact, 0}},
+ {<<"ggg">>, <<"bggg">>, {exact, 1}},
+ {<<"ggg">>, <<"bbgg">>, {partial, 2}},
+ {<<"ggg">>, <<"bbbg">>, {partial, 3}},
+ {<<"ggg">>, <<"bgbggbggg">>, {exact, 6}},
+ {<<"ggg">>, <<"bgbggb">>, not_found}
+ ].