diff options
author | NickNorth <North.N@gmail.com> | 2013-12-03 20:58:53 +0000 |
---|---|---|
committer | NickNorth <North.N@gmail.com> | 2014-01-25 16:19:50 +0000 |
commit | 0bf823c6e5d6a7054e7cb1979eabc7695c2707e2 (patch) | |
tree | 3872cb6f796aeee38b637a47224640962f3a420b | |
parent | 37c8459693dbf55cd4683c7e288dcc9ca8899e97 (diff) | |
download | couchdb-0bf823c6e5d6a7054e7cb1979eabc7695c2707e2.tar.gz |
Speed up and move couch_httpd:find_in_binary.
See https://issues.apache.org/jira/browse/COUCHDB-1953
-rw-r--r-- | src/couchdb/couch_httpd.erl | 30 | ||||
-rw-r--r-- | src/couchdb/couch_util.erl | 32 | ||||
-rwxr-xr-x | test/etap/043-find-in-binary.t | 68 |
3 files changed, 101 insertions, 29 deletions
diff --git a/src/couchdb/couch_httpd.erl b/src/couchdb/couch_httpd.erl index 465bc7a41..9245f4b20 100644 --- a/src/couchdb/couch_httpd.erl +++ b/src/couchdb/couch_httpd.erl @@ -1003,7 +1003,7 @@ split_header(Line) -> mochiweb_util:parse_header(Value)}]. read_until(#mp{data_fun=DataFun, buffer=Buffer}=Mp, Pattern, Callback) -> - case find_in_binary(Pattern, Buffer) of + case couch_util:find_in_binary(Pattern, Buffer) of not_found -> Callback2 = Callback(Buffer), {Buffer2, DataFun2} = DataFun(), @@ -1079,34 +1079,6 @@ check_for_last(#mp{buffer=Buffer, data_fun=DataFun}=Mp) -> data_fun = DataFun2}) end. -find_in_binary(_B, <<>>) -> - not_found; - -find_in_binary(B, Data) -> - case binary:match(Data, [B], []) of - nomatch -> - partial_find(binary:part(B, {0, byte_size(B) - 1}), - binary:part(Data, {byte_size(Data), -byte_size(Data) + 1}), 1); - {Pos, _Len} -> - {exact, Pos} - end. - -partial_find(<<>>, _Data, _Pos) -> - not_found; - -partial_find(B, Data, N) when byte_size(Data) > 0 -> - case binary:match(Data, [B], []) of - nomatch -> - partial_find(binary:part(B, {0, byte_size(B) - 1}), - binary:part(Data, {byte_size(Data), -byte_size(Data) + 1}), N + 1); - {Pos, _Len} -> - {partial, N + Pos} - end; - -partial_find(_B, _Data, _N) -> - not_found. - - validate_bind_address(Address) -> case inet_parse:address(Address) of {ok, _} -> ok; diff --git a/src/couchdb/couch_util.erl b/src/couchdb/couch_util.erl index afe3528a6..2509bef92 100644 --- a/src/couchdb/couch_util.erl +++ b/src/couchdb/couch_util.erl @@ -29,6 +29,7 @@ -export([encode_doc_id/1]). -export([with_db/2]). -export([rfc1123_date/0, rfc1123_date/1]). +-export([find_in_binary/2]). -include("couch_db.hrl"). @@ -487,3 +488,34 @@ month(9) -> "Sep"; month(10) -> "Oct"; month(11) -> "Nov"; month(12) -> "Dec". + + +find_in_binary(_B, <<>>) -> + not_found; + +find_in_binary(B, Data) -> + case binary:match(Data, [B], []) of + nomatch -> + MatchLength = erlang:min(byte_size(B), byte_size(Data)), + match_prefix_at_end(binary:part(B, {0, MatchLength}), + binary:part(Data, {byte_size(Data), -MatchLength}), + MatchLength, byte_size(Data) - MatchLength); + {Pos, _Len} -> + {exact, Pos} + end. + +match_prefix_at_end(Prefix, Data, PrefixLength, N) -> + FirstCharMatches = binary:matches(Data, [binary:part(Prefix, {0, 1})], []), + match_rest_of_prefix(FirstCharMatches, Prefix, Data, PrefixLength, N). + +match_rest_of_prefix([], _Prefix, _Data, _PrefixLength, _N) -> + not_found; + +match_rest_of_prefix([{Pos, _Len} | Rest], Prefix, Data, PrefixLength, N) -> + case binary:match(binary:part(Data, {PrefixLength, Pos - PrefixLength}), + [binary:part(Prefix, {0, PrefixLength - Pos})], []) of + nomatch -> + match_rest_of_prefix(Rest, Prefix, Data, PrefixLength, N); + {_Pos, _Len1} -> + {partial, N + Pos} + end. diff --git a/test/etap/043-find-in-binary.t b/test/etap/043-find-in-binary.t new file mode 100755 index 000000000..dca1d9c72 --- /dev/null +++ b/test/etap/043-find-in-binary.t @@ -0,0 +1,68 @@ +#!/usr/bin/env escript +%% -*- erlang -*- + +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +main(_) -> + test_util:init_code_path(), + + etap:plan(length(cases())), + case (catch test()) of + ok -> + etap:end_tests(); + Other -> + etap:diag(io_lib:format("Test died abnormally: ~p", [Other])), + etap:bail(Other) + end, + ok. + + +test() -> + lists:foreach(fun({Needle, Haystack, Result}) -> + try + Msg = io_lib:format("Looking for ~s in ~s", [Needle, Haystack]), + etap:is(couch_util:find_in_binary(Needle, Haystack), Result, Msg) + catch _T:_R -> + etap:diag("~p", [{_T, _R}]) + end + end, cases()), + ok. + + +cases() -> + [ + {<<"foo">>, <<"foobar">>, {exact, 0}}, + {<<"foo">>, <<"foofoo">>, {exact, 0}}, + {<<"foo">>, <<"barfoo">>, {exact, 3}}, + {<<"foo">>, <<"barfo">>, {partial, 3}}, + {<<"f">>, <<"fobarfff">>, {exact, 0}}, + {<<"f">>, <<"obarfff">>, {exact, 4}}, + {<<"f">>, <<"obarggf">>, {exact, 6}}, + {<<"f">>, <<"f">>, {exact, 0}}, + {<<"f">>, <<"g">>, not_found}, + {<<"foo">>, <<"f">>, {partial, 0}}, + {<<"foo">>, <<"g">>, not_found}, + {<<"foo">>, <<"">>, not_found}, + {<<"fofo">>, <<"foofo">>, {partial, 3}}, + {<<"foo">>, <<"gfobarfo">>, {partial, 6}}, + {<<"foo">>, <<"gfobarf">>, {partial, 6}}, + {<<"foo">>, <<"gfobar">>, not_found}, + {<<"fog">>, <<"gbarfogquiz">>, {exact, 4}}, + {<<"ggg">>, <<"ggg">>, {exact, 0}}, + {<<"ggg">>, <<"ggggg">>, {exact, 0}}, + {<<"ggg">>, <<"bggg">>, {exact, 1}}, + {<<"ggg">>, <<"bbgg">>, {partial, 2}}, + {<<"ggg">>, <<"bbbg">>, {partial, 3}}, + {<<"ggg">>, <<"bgbggbggg">>, {exact, 6}}, + {<<"ggg">>, <<"bgbggb">>, not_found} + ]. |