diff options
author | antirez <antirez@gmail.com> | 2020-04-06 13:51:55 +0200 |
---|---|---|
committer | antirez <antirez@gmail.com> | 2020-04-06 13:51:55 +0200 |
commit | 121c51f4f338dcb160398c3254024867807aa112 (patch) | |
tree | a9486ab9ac6e7fea00fe27f4e6f5df7f4dc8dbb8 /src/t_string.c | |
parent | af5c11874c4e534db0ca1c44c2ca20fec1ab2700 (diff) | |
parent | af3c722feccb2d002e57f02ee5ef623f86e7ea2f (diff) | |
download | redis-121c51f4f338dcb160398c3254024867807aa112.tar.gz |
Merge branch 'lcs' into unstable
Diffstat (limited to 'src/t_string.c')
-rw-r--r-- | src/t_string.c | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/src/t_string.c b/src/t_string.c index 3174e9ccd..335bda404 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -479,3 +479,207 @@ void strlenCommand(client *c) { checkType(c,o,OBJ_STRING)) return; addReplyLongLong(c,stringObjectLen(o)); } + +/* LCS -- Longest common subsequence. + * + * LCS [IDX] [MINMATCHLEN <len>] + * STRINGS <string> <string> | KEYS <keya> <keyb> */ +void lcsCommand(client *c) { + uint32_t i, j; + long long minmatchlen = 0; + sds a = NULL, b = NULL; + int getlen = 0, getidx = 0, withmatchlen = 0; + robj *obja = NULL, *objb = NULL; + + for (j = 1; j < (uint32_t)c->argc; j++) { + char *opt = c->argv[j]->ptr; + int moreargs = (c->argc-1) - j; + + if (!strcasecmp(opt,"IDX")) { + getidx = 1; + } else if (!strcasecmp(opt,"LEN")) { + getlen = 1; + } else if (!strcasecmp(opt,"WITHMATCHLEN")) { + withmatchlen = 1; + } else if (!strcasecmp(opt,"MINMATCHLEN") && moreargs) { + if (getLongLongFromObjectOrReply(c,c->argv[j+1],&minmatchlen,NULL) + != C_OK) return; + if (minmatchlen < 0) minmatchlen = 0; + j++; + } else if (!strcasecmp(opt,"STRINGS")) { + if (a != NULL) { + addReplyError(c,"Either use STRINGS or KEYS"); + return; + } + a = c->argv[j+1]->ptr; + b = c->argv[j+2]->ptr; + j += 2; + } else if (!strcasecmp(opt,"KEYS")) { + if (a != NULL) { + addReplyError(c,"Either use STRINGS or KEYS"); + return; + } + obja = lookupKeyRead(c->db,c->argv[j+1]); + objb = lookupKeyRead(c->db,c->argv[j+2]); + obja = obja ? getDecodedObject(obja) : createStringObject("",0); + objb = objb ? getDecodedObject(objb) : createStringObject("",0); + a = obja->ptr; + b = objb->ptr; + j += 2; + } else { + addReply(c,shared.syntaxerr); + return; + } + } + + /* Complain if the user passed ambiguous parameters. */ + if (a == NULL) { + addReplyError(c,"Please specify two strings: " + "STRINGS or KEYS options are mandatory"); + return; + } else if (getlen && getidx) { + addReplyError(c, + "If you want both the length and indexes, please " + "just use IDX."); + return; + } + + /* Compute the LCS using the vanilla dynamic programming technique of + * building a table of LCS(x,y) substrings. */ + uint32_t alen = sdslen(a); + uint32_t blen = sdslen(b); + + /* Setup an uint32_t array to store at LCS[i,j] the length of the + * LCS A0..i-1, B0..j-1. Note that we have a linear array here, so + * we index it as LCS[j+(blen+1)*j] */ + uint32_t *lcs = zmalloc((alen+1)*(blen+1)*sizeof(uint32_t)); + #define LCS(A,B) lcs[(B)+((A)*(blen+1))] + + /* Start building the LCS table. */ + for (uint32_t i = 0; i <= alen; i++) { + for (uint32_t j = 0; j <= blen; j++) { + if (i == 0 || j == 0) { + /* If one substring has length of zero, the + * LCS length is zero. */ + LCS(i,j) = 0; + } else if (a[i-1] == b[j-1]) { + /* The len LCS (and the LCS itself) of two + * sequences with the same final character, is the + * LCS of the two sequences without the last char + * plus that last char. */ + LCS(i,j) = LCS(i-1,j-1)+1; + } else { + /* If the last character is different, take the longest + * between the LCS of the first string and the second + * minus the last char, and the reverse. */ + uint32_t lcs1 = LCS(i-1,j); + uint32_t lcs2 = LCS(i,j-1); + LCS(i,j) = lcs1 > lcs2 ? lcs1 : lcs2; + } + } + } + + /* Store the actual LCS string in "result" if needed. We create + * it backward, but the length is already known, we store it into idx. */ + uint32_t idx = LCS(alen,blen); + sds result = NULL; /* Resulting LCS string. */ + void *arraylenptr = NULL; /* Deffered length of the array for IDX. */ + uint32_t arange_start = alen, /* alen signals that values are not set. */ + arange_end = 0, + brange_start = 0, + brange_end = 0; + + /* Do we need to compute the actual LCS string? Allocate it in that case. */ + int computelcs = getidx || !getlen; + if (computelcs) result = sdsnewlen(SDS_NOINIT,idx); + + /* Start with a deferred array if we have to emit the ranges. */ + uint32_t arraylen = 0; /* Number of ranges emitted in the array. */ + if (getidx) { + addReplyMapLen(c,2); + addReplyBulkCString(c,"matches"); + arraylenptr = addReplyDeferredLen(c); + } + + i = alen, j = blen; + while (computelcs && i > 0 && j > 0) { + int emit_range = 0; + if (a[i-1] == b[j-1]) { + /* If there is a match, store the character and reduce + * the indexes to look for a new match. */ + result[idx-1] = a[i-1]; + + /* Track the current range. */ + if (arange_start == alen) { + arange_start = i-1; + arange_end = i-1; + brange_start = j-1; + brange_end = j-1; + } else { + /* Let's see if we can extend the range backward since + * it is contiguous. */ + if (arange_start == i && brange_start == j) { + arange_start--; + brange_start--; + } else { + emit_range = 1; + } + } + /* Emit the range if we matched with the first byte of + * one of the two strings. We'll exit the loop ASAP. */ + if (arange_start == 0 || brange_start == 0) emit_range = 1; + idx--; i--; j--; + } else { + /* Otherwise reduce i and j depending on the largest + * LCS between, to understand what direction we need to go. */ + uint32_t lcs1 = LCS(i-1,j); + uint32_t lcs2 = LCS(i,j-1); + if (lcs1 > lcs2) + i--; + else + j--; + if (arange_start != alen) emit_range = 1; + } + + /* Emit the current range if needed. */ + uint32_t match_len = arange_end - arange_start + 1; + if (emit_range) { + if (minmatchlen == 0 || match_len >= minmatchlen) { + if (arraylenptr) { + addReplyArrayLen(c,2+withmatchlen); + addReplyArrayLen(c,2); + addReplyLongLong(c,arange_start); + addReplyLongLong(c,arange_end); + addReplyArrayLen(c,2); + addReplyLongLong(c,brange_start); + addReplyLongLong(c,brange_end); + if (withmatchlen) addReplyLongLong(c,match_len); + arraylen++; + } + } + arange_start = alen; /* Restart at the next match. */ + } + } + + /* Signal modified key, increment dirty, ... */ + + /* Reply depending on the given options. */ + if (arraylenptr) { + addReplyBulkCString(c,"len"); + addReplyLongLong(c,LCS(alen,blen)); + setDeferredArrayLen(c,arraylenptr,arraylen); + } else if (getlen) { + addReplyLongLong(c,LCS(alen,blen)); + } else { + addReplyBulkSds(c,result); + result = NULL; + } + + /* Cleanup. */ + if (obja) decrRefCount(obja); + if (objb) decrRefCount(objb); + sdsfree(result); + zfree(lcs); + return; +} + |