/* -*- c-basic-offset: 2 -*- */ /* Copyright(C) 2009-2014 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "groonga_in.h" #include #include #include #include #include "ii.h" #include "ctx_impl.h" #include "token.h" #include "pat.h" #include "db.h" #include "output.h" #include "util.h" #define MAX_PSEG 0x20000 #define S_CHUNK (1 << GRN_II_W_CHUNK) #define W_SEGMENT 18 #define S_SEGMENT (1 << W_SEGMENT) #define N_CHUNKS_PER_FILE (GRN_IO_FILE_SIZE >> W_SEGMENT) #define W_ARRAY_ELEMENT 3 #define S_ARRAY_ELEMENT (1 << W_ARRAY_ELEMENT) #define W_ARRAY (W_SEGMENT - W_ARRAY_ELEMENT) #define ARRAY_MASK_IN_A_SEGMENT ((1 << W_ARRAY) - 1) #define NOT_ASSIGNED 0xffffffff #define S_GARBAGE (1<<12) #define CHUNK_SPLIT 0x80000000 #define CHUNK_SPLIT_THRESHOLD 0x60000 #define MAX_N_ELEMENTS 5 #define LSEG(pos) ((pos) >> 16) #define LPOS(pos) (((pos) & 0xffff) << 2) #define SEG2POS(seg,pos) ((((uint32_t)(seg)) << 16) + (((uint32_t)(pos)) >> 2)) #define NEXT_ADDR(p) (((byte *)(p)) + sizeof(*(p))) #ifndef S_IRUSR # define S_IRUSR 0400 #endif /* S_IRUSR */ #ifndef S_IWUSR # define S_IWUSR 0200 #endif /* S_IWUSR */ /* segment */ inline static uint32_t segment_get(grn_ctx *ctx, grn_ii *ii) { uint32_t pseg; if (ii->header->bgqtail == ((ii->header->bgqhead + 1) & (GRN_II_BGQSIZE - 1))) { pseg = ii->header->bgqbody[ii->header->bgqtail]; ii->header->bgqtail = (ii->header->bgqtail + 1) & (GRN_II_BGQSIZE - 1); } else { pseg = ii->header->pnext; #ifndef CUT_OFF_COMPATIBILITY if (!pseg) { int i; uint32_t pmax = 0; char *used = GRN_CALLOC(MAX_PSEG); if (!used) { return MAX_PSEG; } for (i = 0; i < GRN_II_MAX_LSEG; i++) { if ((pseg = ii->header->ainfo[i]) != NOT_ASSIGNED) { if (pseg > pmax) { pmax = pseg; } used[pseg] = 1; } if ((pseg = ii->header->binfo[i]) != NOT_ASSIGNED) { if (pseg > pmax) { pmax = pseg; } used[pseg] = 1; } } for (pseg = 0; pseg < MAX_PSEG && used[pseg]; pseg++) ; GRN_FREE(used); ii->header->pnext = pmax + 1; } else #endif /* CUT_OFF_COMPATIBILITY */ if (ii->header->pnext < MAX_PSEG) { ii->header->pnext++; } } return pseg; } inline static grn_rc segment_get_clear(grn_ctx *ctx, grn_ii *ii, uint32_t *pseg) { uint32_t seg = segment_get(ctx, ii); if (seg < MAX_PSEG) { void *p = NULL; GRN_IO_SEG_REF(ii->seg, seg, p); if (!p) { return GRN_NO_MEMORY_AVAILABLE; } memset(p, 0, S_SEGMENT); GRN_IO_SEG_UNREF(ii->seg, seg); *pseg = seg; return GRN_SUCCESS; } else { return GRN_NO_MEMORY_AVAILABLE; } } inline static grn_rc buffer_segment_new(grn_ctx *ctx, grn_ii *ii, uint32_t *segno) { uint32_t lseg, pseg; if (*segno < GRN_II_MAX_LSEG) { if (ii->header->binfo[*segno] != NOT_ASSIGNED) { return GRN_INVALID_ARGUMENT; } lseg = *segno; } else { for (lseg = 0; lseg < GRN_II_MAX_LSEG; lseg++) { if (ii->header->binfo[lseg] == NOT_ASSIGNED) { break; } } if (lseg == GRN_II_MAX_LSEG) { return GRN_NO_MEMORY_AVAILABLE; } *segno = lseg; } pseg = segment_get(ctx, ii); if (pseg < MAX_PSEG) { ii->header->binfo[lseg] = pseg; if (lseg >= ii->header->bmax) { ii->header->bmax = lseg + 1; } return GRN_SUCCESS; } else { return GRN_NO_MEMORY_AVAILABLE; } } static grn_rc buffer_segment_reserve(grn_ctx *ctx, grn_ii *ii, uint32_t *lseg0, uint32_t *pseg0, uint32_t *lseg1, uint32_t *pseg1) { uint32_t i = 0; for (;; i++) { if (i == GRN_II_MAX_LSEG) { return GRN_NO_MEMORY_AVAILABLE; } if (ii->header->binfo[i] == NOT_ASSIGNED) { break; } } *lseg0 = i++; for (;; i++) { if (i == GRN_II_MAX_LSEG) { return GRN_NO_MEMORY_AVAILABLE; } if (ii->header->binfo[i] == NOT_ASSIGNED) { break; } } *lseg1 = i; if ((*pseg0 = segment_get(ctx, ii)) == MAX_PSEG) { return GRN_NO_MEMORY_AVAILABLE; } if ((*pseg1 = segment_get(ctx, ii)) == MAX_PSEG) { return GRN_NO_MEMORY_AVAILABLE; } /* { uint32_t pseg; char *used = GRN_CALLOC(MAX_PSEG); if (!used) { return GRN_NO_MEMORY_AVAILABLE; } for (i = 0; i < GRN_II_MAX_LSEG; i++) { if ((pseg = ii->header->ainfo[i]) != NOT_ASSIGNED) { used[pseg] = 1; } if ((pseg = ii->header->binfo[i]) != NOT_ASSIGNED) { used[pseg] = 1; } } for (pseg = 0;; pseg++) { if (pseg == MAX_PSEG) { GRN_FREE(used); return GRN_NO_MEMORY_AVAILABLE; } if (!used[pseg]) { break; } } *pseg0 = pseg++; for (;; pseg++) { if (pseg == MAX_PSEG) { GRN_FREE(used); return GRN_NO_MEMORY_AVAILABLE; } if (!used[pseg]) { break; } } *pseg1 = pseg; GRN_FREE(used); } */ return GRN_SUCCESS; } #define BGQENQUE(lseg) do {\ if (ii->header->binfo[lseg] != NOT_ASSIGNED) {\ ii->header->bgqbody[ii->header->bgqhead] = ii->header->binfo[lseg];\ ii->header->bgqhead = (ii->header->bgqhead + 1) & (GRN_II_BGQSIZE - 1);\ GRN_ASSERT(ii->header->bgqhead != ii->header->bgqtail);\ }\ } while (0) inline static void buffer_segment_update(grn_ii *ii, uint32_t lseg, uint32_t pseg) { BGQENQUE(lseg); // smb_wmb(); ii->header->binfo[lseg] = pseg; if (lseg >= ii->header->bmax) { ii->header->bmax = lseg + 1; } } inline static void buffer_segment_clear(grn_ii *ii, uint32_t lseg) { BGQENQUE(lseg); // smb_wmb(); ii->header->binfo[lseg] = NOT_ASSIGNED; } /* chunk */ #define HEADER_CHUNK_AT(ii,offset) \ ((((ii)->header->chunks[((offset) >> 3)]) >> ((offset) & 7)) & 1) #define HEADER_CHUNK_ON(ii,offset) \ (((ii)->header->chunks[((offset) >> 3)]) |= (1 << ((offset) & 7))) #define HEADER_CHUNK_OFF(ii,offset) \ (((ii)->header->chunks[((offset) >> 3)]) &= ~(1 << ((offset) & 7))) #define N_GARBAGES_TH 1 #define N_GARBAGES ((S_GARBAGE - (sizeof(uint32_t) * 4))/(sizeof(uint32_t))) typedef struct { uint32_t head; uint32_t tail; uint32_t nrecs; uint32_t next; uint32_t recs[N_GARBAGES]; } grn_ii_ginfo; #define WIN_MAP2(chunk,ctx,iw,seg,pos,size,mode)\ grn_io_win_map2(chunk, ctx, iw,\ ((seg) >> GRN_II_N_CHUNK_VARIATION),\ (((seg) & ((1 << GRN_II_N_CHUNK_VARIATION) - 1)) << GRN_II_W_LEAST_CHUNK) + (pos),\ size,mode) /* static int new_histogram[32]; static int free_histogram[32]; */ static grn_rc chunk_new(grn_ctx *ctx, grn_ii *ii, uint32_t *res, uint32_t size) { /* if (size) { int m, es = size - 1; GRN_BIT_SCAN_REV(es, m); m++; new_histogram[m]++; } */ if (size > S_CHUNK) { int i, j; uint32_t n = (size + S_CHUNK - 1) >> GRN_II_W_CHUNK; for (i = 0, j = -1; i < GRN_II_MAX_CHUNK; i++) { if (HEADER_CHUNK_AT(ii, i)) { j = i; } else { if (i == j + n) { j++; *res = j << GRN_II_N_CHUNK_VARIATION; for (; j <= i; j++) { HEADER_CHUNK_ON(ii, j); } return GRN_SUCCESS; } } } GRN_LOG(ctx, GRN_LOG_CRIT, "index full. requested chunk_size=%d.", size); return GRN_NO_MEMORY_AVAILABLE; } else { uint32_t *vp; int m, aligned_size; if (size > (1 << GRN_II_W_LEAST_CHUNK)) { int es = size - 1; GRN_BIT_SCAN_REV(es, m); m++; } else { m = GRN_II_W_LEAST_CHUNK; } aligned_size = 1 << (m - GRN_II_W_LEAST_CHUNK); if (ii->header->ngarbages[m - GRN_II_W_LEAST_CHUNK] > N_GARBAGES_TH) { grn_ii_ginfo *ginfo; uint32_t *gseg; grn_io_win iw, iw_; iw_.addr = NULL; gseg = &ii->header->garbages[m - GRN_II_W_LEAST_CHUNK]; while (*gseg != NOT_ASSIGNED) { ginfo = WIN_MAP2(ii->chunk, ctx, &iw, *gseg, 0, S_GARBAGE, grn_io_rdwr); //GRN_IO_SEG_MAP2(ii->chunk, *gseg, ginfo); if (!ginfo) { if (iw_.addr) { grn_io_win_unmap2(&iw_); } return GRN_NO_MEMORY_AVAILABLE; } if (ginfo->next != NOT_ASSIGNED || ginfo->nrecs > N_GARBAGES_TH) { *res = ginfo->recs[ginfo->tail]; if (++ginfo->tail == N_GARBAGES) { ginfo->tail = 0; } ginfo->nrecs--; ii->header->ngarbages[m - GRN_II_W_LEAST_CHUNK]--; if (!ginfo->nrecs) { HEADER_CHUNK_OFF(ii, *gseg); *gseg = ginfo->next; } if (iw_.addr) { grn_io_win_unmap2(&iw_); } grn_io_win_unmap2(&iw); return GRN_SUCCESS; } if (iw_.addr) { grn_io_win_unmap2(&iw_); } iw_ = iw; gseg = &ginfo->next; } if (iw_.addr) { grn_io_win_unmap2(&iw_); } } vp = &ii->header->free_chunks[m - GRN_II_W_LEAST_CHUNK]; if (*vp == NOT_ASSIGNED) { int i = 0; while (HEADER_CHUNK_AT(ii, i)) { if (++i >= GRN_II_MAX_CHUNK) { return GRN_NO_MEMORY_AVAILABLE; } } HEADER_CHUNK_ON(ii, i); *vp = i << GRN_II_N_CHUNK_VARIATION; } *res = *vp; *vp += 1 << (m - GRN_II_W_LEAST_CHUNK); if (!(*vp & ((1 << GRN_II_N_CHUNK_VARIATION) - 1))) { *vp = NOT_ASSIGNED; } return GRN_SUCCESS; } } static grn_rc chunk_free(grn_ctx *ctx, grn_ii *ii, uint32_t offset, uint32_t dummy, uint32_t size) { /* if (size) { int m, es = size - 1; GRN_BIT_SCAN_REV(es, m); m++; free_histogram[m]++; } */ grn_io_win iw, iw_; grn_ii_ginfo *ginfo; uint32_t seg, m, *gseg; seg = offset >> GRN_II_N_CHUNK_VARIATION; if (size > S_CHUNK) { int n = (size + S_CHUNK - 1) >> GRN_II_W_CHUNK; for (; n--; seg++) { HEADER_CHUNK_OFF(ii, seg); } return GRN_SUCCESS; } if (size > (1 << GRN_II_W_LEAST_CHUNK)) { int es = size - 1; GRN_BIT_SCAN_REV(es, m); m++; } else { m = GRN_II_W_LEAST_CHUNK; } gseg = &ii->header->garbages[m - GRN_II_W_LEAST_CHUNK]; iw_.addr = NULL; while (*gseg != NOT_ASSIGNED) { ginfo = WIN_MAP2(ii->chunk, ctx, &iw, *gseg, 0, S_GARBAGE, grn_io_rdwr); // GRN_IO_SEG_MAP2(ii->chunk, *gseg, ginfo); if (!ginfo) { if (iw_.addr) { grn_io_win_unmap2(&iw_); } return GRN_NO_MEMORY_AVAILABLE; } if (ginfo->nrecs < N_GARBAGES) { break; } if (iw_.addr) { grn_io_win_unmap2(&iw_); } iw_ = iw; gseg = &ginfo->next; } if (*gseg == NOT_ASSIGNED) { grn_rc rc; if ((rc = chunk_new(ctx, ii, gseg, S_GARBAGE))) { if (iw_.addr) { grn_io_win_unmap2(&iw_); } return rc; } ginfo = WIN_MAP2(ii->chunk, ctx, &iw, *gseg, 0, S_GARBAGE, grn_io_rdwr); /* uint32_t i = 0; while (HEADER_CHUNK_AT(ii, i)) { if (++i >= GRN_II_MAX_CHUNK) { return GRN_NO_MEMORY_AVAILABLE; } } HEADER_CHUNK_ON(ii, i); *gseg = i; GRN_IO_SEG_MAP2(ii->chunk, *gseg, ginfo); */ if (!ginfo) { if (iw_.addr) { grn_io_win_unmap2(&iw_); } return GRN_NO_MEMORY_AVAILABLE; } ginfo->head = 0; ginfo->tail = 0; ginfo->nrecs = 0; ginfo->next = NOT_ASSIGNED; } if (iw_.addr) { grn_io_win_unmap2(&iw_); } ginfo->recs[ginfo->head] = offset; if (++ginfo->head == N_GARBAGES) { ginfo->head = 0; } ginfo->nrecs++; grn_io_win_unmap2(&iw); ii->header->ngarbages[m - GRN_II_W_LEAST_CHUNK]++; return GRN_SUCCESS; } /* inline static grn_rc chunk_new(grn_ii *ii, uint32_t *res, uint32_t size) { int i, j; uint32_t n = (size + S_CHUNK - 1) >> GRN_II_W_CHUNK; uint32_t base_seg = grn_io_base_seg(ii->chunk); for (i = 0, j = -1; i < GRN_II_MAX_CHUNK; i++) { if (HEADER_CHUNK_AT(ii, i)) { j = i; } else { if (i == j + n) { j++; if (res) { *res = j; } for (; j <= i; j++) { HEADER_CHUNK_ON(ii, j); } return GRN_SUCCESS; } // todo : cut off if ((i + base_seg)/ N_CHUNKS_PER_FILE != (i + base_seg + 1) / N_CHUNKS_PER_FILE) { j = i; } } } GRN_LOG(ctx, GRN_LOG_CRIT, "index full."); return GRN_NO_MEMORY_AVAILABLE; } static void chunk_free(grn_ii *ii, int offset, uint32_t size1, uint32_t size2) { uint32_t i = offset + ((size1 + S_CHUNK - 1) >> GRN_II_W_CHUNK); uint32_t n = offset + ((size2 + S_CHUNK - 1) >> GRN_II_W_CHUNK); for (; i < n; i++) { HEADER_CHUNK_OFF(ii, i); } } */ #define UNIT_SIZE 0x80 #define UNIT_MASK (UNIT_SIZE - 1) /* */ static uint8_t * pack_1(uint32_t *p, uint8_t *rp) { uint8_t v; v = *p++ << 7; v += *p++ << 6; v += *p++ << 5; v += *p++ << 4; v += *p++ << 3; v += *p++ << 2; v += *p++ << 1; *rp++ = v + *p++; return rp; } static uint8_t * unpack_1(uint32_t *p, uint8_t *dp) { *p++ = (*dp >> 7); *p++ = ((*dp >> 6) & 0x1); *p++ = ((*dp >> 5) & 0x1); *p++ = ((*dp >> 4) & 0x1); *p++ = ((*dp >> 3) & 0x1); *p++ = ((*dp >> 2) & 0x1); *p++ = ((*dp >> 1) & 0x1); *p++ = (*dp++ & 0x1); return dp; } static uint8_t * pack_2(uint32_t *p, uint8_t *rp) { uint8_t v; v = *p++ << 6; v += *p++ << 4; v += *p++ << 2; *rp++ = v + *p++; v = *p++ << 6; v += *p++ << 4; v += *p++ << 2; *rp++ = v + *p++; return rp; } static uint8_t * unpack_2(uint32_t *p, uint8_t *dp) { *p++ = (*dp >> 6); *p++ = ((*dp >> 4) & 0x3); *p++ = ((*dp >> 2) & 0x3); *p++ = (*dp++ & 0x3); *p++ = (*dp >> 6); *p++ = ((*dp >> 4) & 0x3); *p++ = ((*dp >> 2) & 0x3); *p++ = (*dp++ & 0x3); return dp; } static uint8_t * pack_3(uint32_t *p, uint8_t *rp) { uint8_t v; v = *p++ << 5; v += *p++ << 2; *rp++ = v + (*p >> 1); v = *p++ << 7; v += *p++ << 4; v += *p++ << 1; *rp++ = v + (*p >> 2); v = *p++ << 6; v += *p++ << 3; *rp++ = v + *p++; return rp; } static uint8_t * unpack_3(uint32_t *p, uint8_t *dp) { uint32_t v; *p++ = (*dp >> 5); *p++ = ((*dp >> 2) & 0x7); v = ((*dp++ << 1) & 0x7); *p++ = v + (*dp >> 7); *p++ = ((*dp >> 4) & 0x7); *p++ = ((*dp >> 1) & 0x7); v = ((*dp++ << 2) & 0x7); *p++ = v + (*dp >> 6); *p++ = ((*dp >> 3) & 0x7); *p++ = (*dp++ & 0x7); return dp; } static uint8_t * pack_4(uint32_t *p, uint8_t *rp) { uint8_t v; v = *p++ << 4; *rp++ = v + *p++; v = *p++ << 4; *rp++ = v + *p++; v = *p++ << 4; *rp++ = v + *p++; v = *p++ << 4; *rp++ = v + *p++; return rp; } static uint8_t * unpack_4(uint32_t *p, uint8_t *dp) { *p++ = (*dp >> 4); *p++ = (*dp++ & 0xf); *p++ = (*dp >> 4); *p++ = (*dp++ & 0xf); *p++ = (*dp >> 4); *p++ = (*dp++ & 0xf); *p++ = (*dp >> 4); *p++ = (*dp++ & 0xf); return dp; } static uint8_t * pack_5(uint32_t *p, uint8_t *rp) { uint8_t v; v = *p++ << 3; *rp++ = v + (*p >> 2); v = *p++ << 6; v += *p++ << 1; *rp++ = v + (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 1); v = *p++ << 7; v += *p++ << 2; *rp++ = v + (*p >> 3); v = *p++ << 5; *rp++ = v + *p++; return rp; } static uint8_t * unpack_5(uint32_t *p, uint8_t *dp) { uint32_t v; *p++ = (*dp >> 3); v = ((*dp++ << 2) & 0x1f); *p++ = v + (*dp >> 6); *p++ = ((*dp >> 1) & 0x1f); v = ((*dp++ << 4) & 0x1f); *p++ = v + (*dp >> 4); v = ((*dp++ << 1) & 0x1f); *p++ = v + (*dp >> 7); *p++ = ((*dp >> 2) & 0x1f); v = ((*dp++ << 3) & 0x1f); *p++ = v + (*dp >> 5); *p++ = (*dp++ & 0x1f); return dp; } static uint8_t * pack_6(uint32_t *p, uint8_t *rp) { uint8_t v; v = *p++ << 2; *rp++ = v + (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 2); v = *p++ << 6; *rp++ = v + *p++; v = *p++ << 2; *rp++ = v + (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 2); v = *p++ << 6; *rp++ = v + *p++; return rp; } static uint8_t * unpack_6(uint32_t *p, uint8_t *dp) { uint32_t v; *p++ = (*dp >> 2); v = ((*dp++ << 4) & 0x3f); *p++ = v + (*dp >> 4); v = ((*dp++ << 2) & 0x3f); *p++ = v + (*dp >> 6); *p++ = (*dp++ & 0x3f); *p++ = (*dp >> 2); v = ((*dp++ << 4) & 0x3f); *p++ = v + (*dp >> 4); v = ((*dp++ << 2) & 0x3f); *p++ = v + (*dp >> 6); *p++ = (*dp++ & 0x3f); return dp; } static uint8_t * pack_7(uint32_t *p, uint8_t *rp) { uint8_t v; v = *p++ << 1; *rp++ = v + (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 1); v = *p++ << 7; *rp++ = v + *p++; return rp; } static uint8_t * unpack_7(uint32_t *p, uint8_t *dp) { uint32_t v; *p++ = (*dp >> 1); v = ((*dp++ << 6) & 0x7f); *p++ = v + (*dp >> 2); v = ((*dp++ << 5) & 0x7f); *p++ = v + (*dp >> 3); v = ((*dp++ << 4) & 0x7f); *p++ = v + (*dp >> 4); v = ((*dp++ << 3) & 0x7f); *p++ = v + (*dp >> 5); v = ((*dp++ << 2) & 0x7f); *p++ = v + (*dp >> 6); v = ((*dp++ << 1) & 0x7f); *p++ = v + (*dp >> 7); *p++ = (*dp++ & 0x7f); return dp; } static uint8_t * pack_8(uint32_t *p, uint8_t *rp) { *rp++ = *p++; *rp++ = *p++; *rp++ = *p++; *rp++ = *p++; *rp++ = *p++; *rp++ = *p++; *rp++ = *p++; *rp++ = *p++; return rp; } static uint8_t * unpack_8(uint32_t *p, uint8_t *dp) { *p++ = *dp++; *p++ = *dp++; *p++ = *dp++; *p++ = *dp++; *p++ = *dp++; *p++ = *dp++; *p++ = *dp++; *p++ = *dp++; return dp; } static uint8_t * pack_9(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_9(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 2) & 0x1ff); *p++ = v + (*dp >> 6); v = ((*dp++ << 3) & 0x1ff); *p++ = v + (*dp >> 5); v = ((*dp++ << 4) & 0x1ff); *p++ = v + (*dp >> 4); v = ((*dp++ << 5) & 0x1ff); *p++ = v + (*dp >> 3); v = ((*dp++ << 6) & 0x1ff); *p++ = v + (*dp >> 2); v = ((*dp++ << 7) & 0x1ff); *p++ = v + (*dp >> 1); v = ((*dp++ << 8) & 0x1ff); *p++ = v + *dp++; return dp; } static uint8_t * pack_10(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_10(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 4) & 0x3ff); *p++ = v + (*dp >> 4); v = ((*dp++ << 6) & 0x3ff); *p++ = v + (*dp >> 2); v = ((*dp++ << 8) & 0x3ff); *p++ = v + *dp++; v = *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 4) & 0x3ff); *p++ = v + (*dp >> 4); v = ((*dp++ << 6) & 0x3ff); *p++ = v + (*dp >> 2); v = ((*dp++ << 8) & 0x3ff); *p++ = v + *dp++; return dp; } static uint8_t * pack_11(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_11(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 3; *p++ = v + (*dp >> 5); v = ((*dp++ << 6) & 0x7ff); *p++ = v + (*dp >> 2); v = ((*dp++ << 9) & 0x7ff); v += *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 4) & 0x7ff); *p++ = v + (*dp >> 4); v = ((*dp++ << 7) & 0x7ff); *p++ = v + (*dp >> 1); v = ((*dp++ << 10) & 0x7ff); v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 5) & 0x7ff); *p++ = v + (*dp >> 3); v = ((*dp++ << 8) & 0x7ff); *p++ = v + *dp++; return dp; } static uint8_t * pack_12(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_12(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++; v = *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++; v = *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++; v = *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 8) & 0xfff); *p++ = v + *dp++; return dp; } static uint8_t * pack_13(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_13(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 5; *p++ = v + (*dp >> 3); v = ((*dp++ << 10) & 0x1fff); v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 7) & 0x1fff); *p++ = v + (*dp >> 1); v = ((*dp++ << 12) & 0x1fff); v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 9) & 0x1fff); v += *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 6) & 0x1fff); *p++ = v + (*dp >> 2); v = ((*dp++ << 11) & 0x1fff); v += *dp++ << 3; *p++ = v + (*dp >> 5); v = ((*dp++ << 8) & 0x1fff); *p++ = v + *dp++; return dp; } static uint8_t * pack_14(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_14(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 12) & 0x3fff); v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 10) & 0x3fff); v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 8) & 0x3fff); *p++ = v + *dp++; v = *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 12) & 0x3fff); v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 10) & 0x3fff); v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 8) & 0x3fff); *p++ = v + *dp++; return dp; } static uint8_t * pack_15(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_15(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 7; *p++ = v + (*dp >> 1); v = ((*dp++ << 14) & 0x7fff); v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 13) & 0x7fff); v += *dp++ << 5; *p++ = v + (*dp >> 3); v = ((*dp++ << 12) & 0x7fff); v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 11) & 0x7fff); v += *dp++ << 3; *p++ = v + (*dp >> 5); v = ((*dp++ << 10) & 0x7fff); v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 9) & 0x7fff); v += *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 8) & 0x7fff); *p++ = v + *dp++; return dp; } static uint8_t * pack_16(uint32_t *p, uint8_t *rp) { *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_16(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_17(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_17(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 10) & 0x1ffff); v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 11) & 0x1ffff); v += *dp++ << 3; *p++ = v + (*dp >> 5); v = ((*dp++ << 12) & 0x1ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 13) & 0x1ffff); v += *dp++ << 5; *p++ = v + (*dp >> 3); v = ((*dp++ << 14) & 0x1ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 15) & 0x1ffff); v += *dp++ << 7; *p++ = v + (*dp >> 1); v = ((*dp++ << 16) & 0x1ffff); v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_18(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_18(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 12) & 0x3ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 14) & 0x3ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 16) & 0x3ffff); v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 12) & 0x3ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 14) & 0x3ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 16) & 0x3ffff); v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_19(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_19(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 11; v += *dp++ << 3; *p++ = v + (*dp >> 5); v = ((*dp++ << 14) & 0x7ffff); v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 17) & 0x7ffff); v += *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 12) & 0x7ffff); v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 15) & 0x7ffff); v += *dp++ << 7; *p++ = v + (*dp >> 1); v = ((*dp++ << 18) & 0x7ffff); v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 13) & 0x7ffff); v += *dp++ << 5; *p++ = v + (*dp >> 3); v = ((*dp++ << 16) & 0x7ffff); v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_20(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_20(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 16) & 0xfffff); v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_21(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_21(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 13; v += *dp++ << 5; *p++ = v + (*dp >> 3); v = ((*dp++ << 18) & 0x1fffff); v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 15) & 0x1fffff); v += *dp++ << 7; *p++ = v + (*dp >> 1); v = ((*dp++ << 20) & 0x1fffff); v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 17) & 0x1fffff); v += *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 14) & 0x1fffff); v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 19) & 0x1fffff); v += *dp++ << 11; v += *dp++ << 3; *p++ = v + (*dp >> 5); v = ((*dp++ << 16) & 0x1fffff); v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_22(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_22(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 20) & 0x3fffff); v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 18) & 0x3fffff); v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 16) & 0x3fffff); v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 20) & 0x3fffff); v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 18) & 0x3fffff); v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 16) & 0x3fffff); v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_23(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_23(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 15; v += *dp++ << 7; *p++ = v + (*dp >> 1); v = ((*dp++ << 22) & 0x7fffff); v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 21) & 0x7fffff); v += *dp++ << 13; v += *dp++ << 5; *p++ = v + (*dp >> 3); v = ((*dp++ << 20) & 0x7fffff); v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 19) & 0x7fffff); v += *dp++ << 11; v += *dp++ << 3; *p++ = v + (*dp >> 5); v = ((*dp++ << 18) & 0x7fffff); v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 17) & 0x7fffff); v += *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 16) & 0x7fffff); v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_24(uint32_t *p, uint8_t *rp) { *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_24(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_25(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_25(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 17; v += *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 18) & 0x1ffffff); v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 19) & 0x1ffffff); v += *dp++ << 11; v += *dp++ << 3; *p++ = v + (*dp >> 5); v = ((*dp++ << 20) & 0x1ffffff); v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 21) & 0x1ffffff); v += *dp++ << 13; v += *dp++ << 5; *p++ = v + (*dp >> 3); v = ((*dp++ << 22) & 0x1ffffff); v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 23) & 0x1ffffff); v += *dp++ << 15; v += *dp++ << 7; *p++ = v + (*dp >> 1); v = ((*dp++ << 24) & 0x1ffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_26(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_26(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 18; v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 20) & 0x3ffffff); v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 22) & 0x3ffffff); v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 24) & 0x3ffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 18; v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 20) & 0x3ffffff); v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 22) & 0x3ffffff); v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 24) & 0x3ffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_27(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 25); *rp++ = (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_27(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 19; v += *dp++ << 11; v += *dp++ << 3; *p++ = v + (*dp >> 5); v = ((*dp++ << 22) & 0x7ffffff); v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 25) & 0x7ffffff); v += *dp++ << 17; v += *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 20) & 0x7ffffff); v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 23) & 0x7ffffff); v += *dp++ << 15; v += *dp++ << 7; *p++ = v + (*dp >> 1); v = ((*dp++ << 26) & 0x7ffffff); v += *dp++ << 18; v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 21) & 0x7ffffff); v += *dp++ << 13; v += *dp++ << 5; *p++ = v + (*dp >> 3); v = ((*dp++ << 24) & 0x7ffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_28(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_28(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 24) & 0xfffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_29(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 25); *rp++ = (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 27); *rp++ = (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_29(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 21; v += *dp++ << 13; v += *dp++ << 5; *p++ = v + (*dp >> 3); v = ((*dp++ << 26) & 0x1fffffff); v += *dp++ << 18; v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 23) & 0x1fffffff); v += *dp++ << 15; v += *dp++ << 7; *p++ = v + (*dp >> 1); v = ((*dp++ << 28) & 0x1fffffff); v += *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 25) & 0x1fffffff); v += *dp++ << 17; v += *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 22) & 0x1fffffff); v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 27) & 0x1fffffff); v += *dp++ << 19; v += *dp++ << 11; v += *dp++ << 3; *p++ = v + (*dp >> 5); v = ((*dp++ << 24) & 0x1fffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_30(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_30(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 22; v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 28) & 0x3fffffff); v += *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 26) & 0x3fffffff); v += *dp++ << 18; v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 24) & 0x3fffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 22; v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 28) & 0x3fffffff); v += *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 26) & 0x3fffffff); v += *dp++ << 18; v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 24) & 0x3fffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_31(uint32_t *p, uint8_t *rp) { uint8_t v; *rp++ = (*p >> 23); *rp++ = (*p >> 15); *rp++ = (*p >> 7); v = *p++ << 1; *rp++ = v + (*p >> 30); *rp++ = (*p >> 22); *rp++ = (*p >> 14); *rp++ = (*p >> 6); v = *p++ << 2; *rp++ = v + (*p >> 29); *rp++ = (*p >> 21); *rp++ = (*p >> 13); *rp++ = (*p >> 5); v = *p++ << 3; *rp++ = v + (*p >> 28); *rp++ = (*p >> 20); *rp++ = (*p >> 12); *rp++ = (*p >> 4); v = *p++ << 4; *rp++ = v + (*p >> 27); *rp++ = (*p >> 19); *rp++ = (*p >> 11); *rp++ = (*p >> 3); v = *p++ << 5; *rp++ = v + (*p >> 26); *rp++ = (*p >> 18); *rp++ = (*p >> 10); *rp++ = (*p >> 2); v = *p++ << 6; *rp++ = v + (*p >> 25); *rp++ = (*p >> 17); *rp++ = (*p >> 9); *rp++ = (*p >> 1); v = *p++ << 7; *rp++ = v + (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_31(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 23; v += *dp++ << 15; v += *dp++ << 7; *p++ = v + (*dp >> 1); v = ((*dp++ << 30) & 0x7fffffff); v += *dp++ << 22; v += *dp++ << 14; v += *dp++ << 6; *p++ = v + (*dp >> 2); v = ((*dp++ << 29) & 0x7fffffff); v += *dp++ << 21; v += *dp++ << 13; v += *dp++ << 5; *p++ = v + (*dp >> 3); v = ((*dp++ << 28) & 0x7fffffff); v += *dp++ << 20; v += *dp++ << 12; v += *dp++ << 4; *p++ = v + (*dp >> 4); v = ((*dp++ << 27) & 0x7fffffff); v += *dp++ << 19; v += *dp++ << 11; v += *dp++ << 3; *p++ = v + (*dp >> 5); v = ((*dp++ << 26) & 0x7fffffff); v += *dp++ << 18; v += *dp++ << 10; v += *dp++ << 2; *p++ = v + (*dp >> 6); v = ((*dp++ << 25) & 0x7fffffff); v += *dp++ << 17; v += *dp++ << 9; v += *dp++ << 1; *p++ = v + (*dp >> 7); v = ((*dp++ << 24) & 0x7fffffff); v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; return dp; } static uint8_t * pack_32(uint32_t *p, uint8_t *rp) { *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; *rp++ = (*p >> 24); *rp++ = (*p >> 16); *rp++ = (*p >> 8); *rp++ = *p++; return rp; } static uint8_t * unpack_32(uint32_t *p, uint8_t *dp) { uint32_t v; v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; v = *dp++ << 24; v += *dp++ << 16; v += *dp++ << 8; *p++ = v + *dp++; return dp; } /* */ static uint8_t * pack_(uint32_t *p, uint32_t i, int w, uint8_t *rp) { while (i >= 8) { switch (w) { case 0 : break; case 1 : rp = pack_1(p, rp); break; case 2 : rp = pack_2(p, rp); break; case 3 : rp = pack_3(p, rp); break; case 4 : rp = pack_4(p, rp); break; case 5 : rp = pack_5(p, rp); break; case 6 : rp = pack_6(p, rp); break; case 7 : rp = pack_7(p, rp); break; case 8 : rp = pack_8(p, rp); break; case 9 : rp = pack_9(p, rp); break; case 10 : rp = pack_10(p, rp); break; case 11 : rp = pack_11(p, rp); break; case 12 : rp = pack_12(p, rp); break; case 13 : rp = pack_13(p, rp); break; case 14 : rp = pack_14(p, rp); break; case 15 : rp = pack_15(p, rp); break; case 16 : rp = pack_16(p, rp); break; case 17 : rp = pack_17(p, rp); break; case 18 : rp = pack_18(p, rp); break; case 19 : rp = pack_19(p, rp); break; case 20 : rp = pack_20(p, rp); break; case 21 : rp = pack_21(p, rp); break; case 22 : rp = pack_22(p, rp); break; case 23 : rp = pack_23(p, rp); break; case 24 : rp = pack_24(p, rp); break; case 25 : rp = pack_25(p, rp); break; case 26 : rp = pack_26(p, rp); break; case 27 : rp = pack_27(p, rp); break; case 28 : rp = pack_28(p, rp); break; case 29 : rp = pack_29(p, rp); break; case 30 : rp = pack_30(p, rp); break; case 31 : rp = pack_31(p, rp); break; case 32 : rp = pack_32(p, rp); break; } p += 8; i -= 8; } { int b; uint8_t v; uint32_t *pe = p + i; for (b = 8 - w, v = 0; p < pe;) { if (b > 0) { v += *p++ << b; b -= w; } else if (b < 0) { *rp++ = v + (*p >> -b); b += 8; v = 0; } else { *rp++ = v + *p++; b = 8 - w; v = 0; } } if (b + w != 8) { *rp++ = v; } return rp; } } static uint8_t * pack(uint32_t *p, uint32_t i, uint8_t *freq, uint8_t *rp) { int32_t k, w; uint8_t ebuf[UNIT_SIZE], *ep = ebuf; uint32_t s, *pe = p + i, r, th = i - (i >> 3); for (w = 0, s = 0; w <= 32; w++) { if ((s += freq[w]) >= th) { break; } } if (i == s) { *rp++ = w; return pack_(p, i, w, rp); } r = 1 << w; *rp++ = w + 0x80; *rp++ = i - s; if (r >= UNIT_SIZE) { uint32_t first, *last = &first; for (k = 0; p < pe; p++, k++) { if (*p >= r) { GRN_B_ENC(*p - r, ep); *last = k; last = p; } } *last = 0; *rp++ = (uint8_t) first; } else { for (k = 0; p < pe; p++, k++) { if (*p >= r) { *ep++ = k; GRN_B_ENC(*p - r, ep); *p = 0; } } } rp = pack_(p - i, i, w, rp); memcpy(rp, ebuf, ep - ebuf); return rp + (ep - ebuf); } int grn_p_enc(grn_ctx *ctx, uint32_t *data, uint32_t data_size, uint8_t **res) { uint8_t *rp, freq[33]; uint32_t j, *dp, *dpe, d, w, buf[UNIT_SIZE]; *res = rp = GRN_MALLOC(data_size * sizeof(uint32_t) * 2); GRN_B_ENC(data_size, rp); memset(freq, 0, 33); for (j = 0, dp = data, dpe = dp + data_size; dp < dpe; j++, dp++) { if (j == UNIT_SIZE) { rp = pack(buf, j, freq, rp); memset(freq, 0, 33); j = 0; } if ((d = buf[j] = *dp)) { GRN_BIT_SCAN_REV(d, w); freq[w + 1]++; } else { freq[0]++; } } if (j) { rp = pack(buf, j, freq, rp); } return rp - *res; } #define USE_P_ENC (1<<0) #define CUT_OFF (1<<1) #define ODD (1<<2) typedef struct { uint32_t *data; uint32_t data_size; uint32_t flags; } datavec; static grn_rc datavec_reset(grn_ctx *ctx, datavec *dv, uint32_t dvlen, size_t unitsize, size_t totalsize) { int i; if (!dv[0].data || dv[dvlen].data < dv[0].data + totalsize) { if (dv[0].data) { GRN_FREE(dv[0].data); } if (!(dv[0].data = GRN_MALLOC(totalsize * sizeof(uint32_t)))) { return GRN_NO_MEMORY_AVAILABLE; } dv[dvlen].data = dv[0].data + totalsize; } for (i = 1; i < dvlen; i++) { dv[i].data = dv[i - 1].data + unitsize; } return GRN_SUCCESS; } static grn_rc datavec_init(grn_ctx *ctx, datavec *dv, uint32_t dvlen, size_t unitsize, size_t totalsize) { int i; if (!totalsize) { memset(dv, 0, sizeof(datavec) * (dvlen + 1)); return GRN_SUCCESS; } if (!(dv[0].data = GRN_MALLOC(totalsize * sizeof(uint32_t)))) { return GRN_NO_MEMORY_AVAILABLE; } dv[dvlen].data = dv[0].data + totalsize; for (i = 1; i < dvlen; i++) { dv[i].data = dv[i - 1].data + unitsize; } return GRN_SUCCESS; } static void datavec_fin(grn_ctx *ctx, datavec *dv) { if (dv[0].data) { GRN_FREE(dv[0].data); } } size_t grn_p_encv(grn_ctx *ctx, datavec *dv, uint32_t dvlen, uint8_t *res) { uint8_t *rp = res, freq[33]; uint32_t pgap, usep, l, df, data_size, *dp, *dpe; if (!dvlen || !(df = dv[0].data_size)) { return 0; } for (usep = 0, data_size = 0, l = 0; l < dvlen; l++) { uint32_t dl = dv[l].data_size; if (dl < df || ((dl > df) && (l != dvlen - 1))) { /* invalid argument */ return 0; } usep += (dv[l].flags & USE_P_ENC) << l; data_size += dl; } pgap = data_size - df * dvlen; if (!usep) { GRN_B_ENC((df << 1) + 1, rp); for (l = 0; l < dvlen; l++) { for (dp = dv[l].data, dpe = dp + dv[l].data_size; dp < dpe; dp++) { GRN_B_ENC(*dp, rp); } } } else { uint32_t buf[UNIT_SIZE]; GRN_B_ENC((usep << 1), rp); GRN_B_ENC(df, rp); if (dv[dvlen - 1].flags & ODD) { GRN_B_ENC(pgap, rp); } else { GRN_ASSERT(!pgap); } for (l = 0; l < dvlen; l++) { dp = dv[l].data; dpe = dp + dv[l].data_size; if ((dv[l].flags & USE_P_ENC)) { uint32_t j = 0, d; memset(freq, 0, 33); while (dp < dpe) { if (j == UNIT_SIZE) { rp = pack(buf, j, freq, rp); memset(freq, 0, 33); j = 0; } if ((d = buf[j++] = *dp++)) { uint32_t w; GRN_BIT_SCAN_REV(d, w); freq[w + 1]++; } else { freq[0]++; } } if (j) { rp = pack(buf, j, freq, rp); } } else { while (dp < dpe) { GRN_B_ENC(*dp++, rp); } } } } return rp - res; } #define GRN_B_DEC_CHECK(v,p,pe) do { \ uint8_t *_p = (uint8_t *)p; \ uint32_t _v; \ if (_p >= pe) { return 0; } \ _v = *_p++; \ switch (_v >> 4) { \ case 0x08 : \ if (_v == 0x8f) { \ if (_p + sizeof(uint32_t) > pe) { return 0; } \ memcpy(&_v, _p, sizeof(uint32_t)); \ _p += sizeof(uint32_t); \ } \ break; \ case 0x09 : \ if (_p + 3 > pe) { return 0; } \ _v = (_v - 0x90) * 0x100 + *_p++; \ _v = _v * 0x100 + *_p++; \ _v = _v * 0x100 + *_p++ + 0x20408f; \ break; \ case 0x0a : \ case 0x0b : \ if (_p + 2 > pe) { return 0; } \ _v = (_v - 0xa0) * 0x100 + *_p++; \ _v = _v * 0x100 + *_p++ + 0x408f; \ break; \ case 0x0c : \ case 0x0d : \ case 0x0e : \ case 0x0f : \ if (_p + 1 > pe) { return 0; } \ _v = (_v - 0xc0) * 0x100 + *_p++ + 0x8f; \ break; \ } \ v = _v; \ p = _p; \ } while (0) static uint8_t * unpack(uint8_t *dp, uint8_t *dpe, int i, uint32_t *rp) { uint8_t ne = 0, k = 0, w = *dp++; uint32_t m, *p = rp; if (w & 0x80) { ne = *dp++; w -= 0x80; m = (1 << w) - 1; if (m >= UNIT_MASK) { k = *dp++; } } else { m = (1 << w) - 1; } if (w) { while (i >= 8) { if (dp + w > dpe) { return NULL; } switch (w) { case 1 : dp = unpack_1(p, dp); break; case 2 : dp = unpack_2(p, dp); break; case 3 : dp = unpack_3(p, dp); break; case 4 : dp = unpack_4(p, dp); break; case 5 : dp = unpack_5(p, dp); break; case 6 : dp = unpack_6(p, dp); break; case 7 : dp = unpack_7(p, dp); break; case 8 : dp = unpack_8(p, dp); break; case 9 : dp = unpack_9(p, dp); break; case 10 : dp = unpack_10(p, dp); break; case 11 : dp = unpack_11(p, dp); break; case 12 : dp = unpack_12(p, dp); break; case 13 : dp = unpack_13(p, dp); break; case 14 : dp = unpack_14(p, dp); break; case 15 : dp = unpack_15(p, dp); break; case 16 : dp = unpack_16(p, dp); break; case 17 : dp = unpack_17(p, dp); break; case 18 : dp = unpack_18(p, dp); break; case 19 : dp = unpack_19(p, dp); break; case 20 : dp = unpack_20(p, dp); break; case 21 : dp = unpack_21(p, dp); break; case 22 : dp = unpack_22(p, dp); break; case 23 : dp = unpack_23(p, dp); break; case 24 : dp = unpack_24(p, dp); break; case 25 : dp = unpack_25(p, dp); break; case 26 : dp = unpack_26(p, dp); break; case 27 : dp = unpack_27(p, dp); break; case 28 : dp = unpack_28(p, dp); break; case 29 : dp = unpack_29(p, dp); break; case 30 : dp = unpack_30(p, dp); break; case 31 : dp = unpack_31(p, dp); break; case 32 : dp = unpack_32(p, dp); break; } i -= 8; p += 8; } { int b; uint32_t v, *pe; for (b = 8 - w, v = 0, pe = p + i; p < pe && dp < dpe;) { if (b > 0) { *p++ = v + ((*dp >> b) & m); b -= w; v = 0; } else if (b < 0) { v += (*dp++ << -b) & m; b += 8; } else { *p++ = v + (*dp++ & m); b = 8 - w; v = 0; } } if (b + w != 8) { dp++; } } } else { memset(p, 0, sizeof(uint32_t) * i); } if (ne) { if (m >= UNIT_MASK) { uint32_t *pp; while (ne--) { pp = &rp[k]; k = *pp; GRN_B_DEC_CHECK(*pp, dp, dpe); *pp += (m + 1); } } else { while (ne--) { k = *dp++; GRN_B_DEC_CHECK(rp[k], dp, dpe); rp[k] += (m + 1); } } } return dp; } int grn_p_dec(grn_ctx *ctx, uint8_t *data, uint32_t data_size, uint32_t nreq, uint32_t **res) { uint8_t *dp = data, *dpe = data + data_size; uint32_t rest, orig_size, *rp, *rpe; GRN_B_DEC(orig_size, dp); if (!orig_size) { if (!nreq || nreq > data_size) { nreq = data_size; } if ((*res = rp = GRN_MALLOC(nreq * 4))) { for (rpe = rp + nreq; dp < data + data_size && rp < rpe; rp++) { GRN_B_DEC(*rp, dp); } } return rp - *res; } else { if (!(*res = rp = GRN_MALLOC(orig_size * sizeof(uint32_t)))) { return 0; } if (!nreq || nreq > orig_size) { nreq = orig_size; } for (rest = nreq; rest >= UNIT_SIZE; rest -= UNIT_SIZE) { if (!(dp = unpack(dp, dpe, UNIT_SIZE, rp))) { return 0; } rp += UNIT_SIZE; } if (rest) { if (!(dp = unpack(dp, dpe, rest, rp))) { return 0; } } GRN_ASSERT(data + data_size == dp); return nreq; } } int grn_p_decv(grn_ctx *ctx, uint8_t *data, uint32_t data_size, datavec *dv, uint32_t dvlen) { size_t size; uint32_t df, l, i, *rp, nreq; uint8_t *dp = data, *dpe = data + data_size; if (!data_size) { dv[0].data_size = 0; return 0; } for (nreq = 0; nreq < dvlen; nreq++) { if (dv[nreq].flags & CUT_OFF) { break; } } if (!nreq) { return 0; } GRN_B_DEC_CHECK(df, dp, dpe); if ((df & 1)) { df >>= 1; size = nreq == dvlen ? data_size : df * nreq; if (dv[dvlen].data < dv[0].data + size) { if (dv[0].data) { GRN_FREE(dv[0].data); } if (!(rp = GRN_MALLOC(size * sizeof(uint32_t)))) { return 0; } dv[dvlen].data = rp + size; } else { rp = dv[0].data; } for (l = 0; l < dvlen; l++) { if (dv[l].flags & CUT_OFF) { break; } dv[l].data = rp; if (l < dvlen - 1) { for (i = 0; i < df; i++, rp++) { GRN_B_DEC_CHECK(*rp, dp, dpe); } } else { for (i = 0; dp < dpe; i++, rp++) { GRN_B_DEC_CHECK(*rp, dp, dpe); } } dv[l].data_size = i; } } else { uint32_t n, rest, usep = df >> 1; GRN_B_DEC_CHECK(df, dp, dpe); if (dv[dvlen -1].flags & ODD) { GRN_B_DEC_CHECK(rest, dp, dpe); } else { rest = 0; } size = df * nreq + (nreq == dvlen ? rest : 0); if (dv[dvlen].data < dv[0].data + size) { if (dv[0].data) { GRN_FREE(dv[0].data); } if (!(rp = GRN_MALLOC(size * sizeof(uint32_t)))) { return 0; } dv[dvlen].data = rp + size; } else { rp = dv[0].data; } for (l = 0; l < dvlen; l++) { if (dv[l].flags & CUT_OFF) { break; } dv[l].data = rp; dv[l].data_size = n = (l < dvlen - 1) ? df : df + rest; if (usep & (1 << l)) { for (; n >= UNIT_SIZE; n -= UNIT_SIZE) { if (!(dp = unpack(dp, dpe, UNIT_SIZE, rp))) { return 0; } rp += UNIT_SIZE; } if (n) { if (!(dp = unpack(dp, dpe, n, rp))) { return 0; } rp += n; } dv[l].flags |= USE_P_ENC; } else { for (; n; n--, rp++) { GRN_B_DEC_CHECK(*rp, dp, dpe); } } } GRN_ASSERT(dp == dpe); if (dp != dpe) { GRN_LOG(ctx, GRN_LOG_NOTICE, "data_size=%d, %" GRN_FMT_LLD, data_size, (long long int)(dpe - dp)); } } return rp - dv[0].data; } int grn_b_enc(grn_ctx *ctx, uint32_t *data, uint32_t data_size, uint8_t **res) { uint8_t *rp; uint32_t *dp, i; *res = rp = GRN_MALLOC(data_size * sizeof(uint32_t) * 2); GRN_B_ENC(data_size, rp); for (i = data_size, dp = data; i; i--, dp++) { GRN_B_ENC(*dp, rp); } return rp - *res; } int grn_b_dec(grn_ctx *ctx, uint8_t *data, uint32_t data_size, uint32_t **res) { uint32_t i, *rp, orig_size; uint8_t *dp = data; GRN_B_DEC(orig_size, dp); *res = rp = GRN_MALLOC(orig_size * sizeof(uint32_t)); for (i = orig_size; i; i--, rp++) { GRN_B_DEC(*rp, dp); } return orig_size; } /* buffer */ typedef struct { uint32_t tid; uint32_t size_in_chunk; uint32_t pos_in_chunk; uint16_t size_in_buffer; uint16_t pos_in_buffer; } buffer_term; typedef struct { uint16_t step; uint16_t jump; } buffer_rec; typedef struct { uint32_t chunk; uint32_t chunk_size; uint32_t buffer_free; uint16_t nterms; uint16_t nterms_void; } buffer_header; struct grn_ii_buffer { buffer_header header; buffer_term terms[(S_SEGMENT - sizeof(buffer_header))/sizeof(buffer_term)]; }; typedef struct grn_ii_buffer buffer; inline static uint32_t buffer_open(grn_ctx *ctx, grn_ii *ii, uint32_t pos, buffer_term **bt, buffer **b) { byte *p = NULL; uint16_t lseg = (uint16_t) (LSEG(pos)); uint32_t pseg = ii->header->binfo[lseg]; if (pseg != NOT_ASSIGNED) { GRN_IO_SEG_REF(ii->seg, pseg, p); if (!p) { return NOT_ASSIGNED; } if (b) { *b = (buffer *)p; } if (bt) { *bt = (buffer_term *)(p + LPOS(pos)); } } return pseg; } inline static grn_rc buffer_close(grn_ctx *ctx, grn_ii *ii, uint32_t pseg) { if (pseg >= MAX_PSEG) { GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid pseg buffer_close(%d)", pseg); return GRN_INVALID_ARGUMENT; } GRN_IO_SEG_UNREF(ii->seg, pseg); return GRN_SUCCESS; } inline static uint32_t buffer_open_if_capable(grn_ctx *ctx, grn_ii *ii, int32_t seg, int size, buffer **b) { uint32_t pseg, pos = SEG2POS(seg, 0); if ((pseg = buffer_open(ctx, ii, pos, NULL, b)) != NOT_ASSIGNED) { uint16_t nterms = (*b)->header.nterms - (*b)->header.nterms_void; if (!((nterms < 4096 || (ii->header->total_chunk_size >> ((nterms >> 8) - 6)) > (*b)->header.chunk_size) && ((*b)->header.buffer_free >= size + sizeof(buffer_term)))) { buffer_close(ctx, ii, pseg); return NOT_ASSIGNED; } } return pseg; } typedef struct { uint32_t rid; uint32_t sid; } docid; #define BUFFER_REC_DEL(r) ((r)->jump = 1) #define BUFFER_REC_DELETED(r) ((r)->jump == 1) #define BUFFER_REC_AT(b,pos) ((buffer_rec *)(b) + (pos)) #define BUFFER_REC_POS(b,rec) ((uint16_t)((rec) - (buffer_rec *)(b))) inline static void buffer_term_dump(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_term *bt) { int pos, rid, sid; uint8_t *p; buffer_rec *r; GRN_LOG(ctx, GRN_LOG_DEBUG, "b=(%x %u %u %u)", b->header.chunk, b->header.chunk_size, b->header.buffer_free, b->header.nterms); GRN_LOG(ctx, GRN_LOG_DEBUG, "bt=(%u %u %u %u %u)", bt->tid, bt->size_in_chunk, bt->pos_in_chunk, bt->size_in_buffer, bt->pos_in_buffer); for (pos = bt->pos_in_buffer; pos; pos = r->step) { r = BUFFER_REC_AT(b, pos); p = NEXT_ADDR(r); GRN_B_DEC(rid, p); if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { GRN_B_DEC(sid, p); } else { sid = 1; } GRN_LOG(ctx, GRN_LOG_DEBUG, "%d=(%d:%d),(%d:%d)", pos, r->jump, r->step, rid, sid); } } inline static grn_rc check_jump(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_rec *r, int j) { uint16_t i = BUFFER_REC_POS(b, r); uint8_t *p; buffer_rec *r2; docid id, id2; if (!j) { return GRN_SUCCESS; } p = NEXT_ADDR(r); GRN_B_DEC(id.rid, p); if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { GRN_B_DEC(id.sid, p); } else { id.sid = 1; } if (j == 1) { GRN_LOG(ctx, GRN_LOG_DEBUG, "deleting! %d(%d:%d)", i, id.rid, id.sid); return GRN_SUCCESS; } r2 = BUFFER_REC_AT(b, j); p = NEXT_ADDR(r2); GRN_B_DEC(id2.rid, p); if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { GRN_B_DEC(id2.sid, p); } else { id2.sid = 1; } if (r2->step == i) { GRN_LOG(ctx, GRN_LOG_EMERG, "cycle! %d(%d:%d)<->%d(%d:%d)", i, id.rid, id.sid, j, id2.rid, id2.sid); return GRN_FILE_CORRUPT; } if (id2.rid < id.rid || (id2.rid == id.rid && id2.sid <= id.sid)) { GRN_LOG(ctx, GRN_LOG_CRIT, "invalid jump! %d(%d:%d)(%d:%d)->%d(%d:%d)(%d:%d)", i, r->jump, r->step, id.rid, id.sid, j, r2->jump, r2->step, id2.rid, id2.sid); return GRN_FILE_CORRUPT; } return GRN_SUCCESS; } inline static grn_rc set_jump_r(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_rec *from, int to) { int i, j, max_jump = 100; buffer_rec *r, *r2; for (r = from, j = to; j > 1 && max_jump--; r = BUFFER_REC_AT(b, r->step)) { r2 = BUFFER_REC_AT(b, j); if (r == r2) { break; } if (BUFFER_REC_DELETED(r2)) { break; } if (j == (i = r->jump)) { break; } if (j == r->step) { break; } if (check_jump(ctx, ii, b, r, j)) { ERR(GRN_FILE_CORRUPT, "check_jump failed"); return ctx->rc; } r->jump = j; j = i; if (!r->step) { return GRN_FILE_CORRUPT; } } return GRN_SUCCESS; } #define GET_NUM_BITS(x,n) do {\ n = x;\ n = (n & 0x55555555) + ((n >> 1) & 0x55555555);\ n = (n & 0x33333333) + ((n >> 2) & 0x33333333);\ n = (n & 0x0F0F0F0F) + ((n >> 4) & 0x0F0F0F0F);\ n = (n & 0x00FF00FF) + ((n >> 8) & 0x00FF00FF);\ n = (n & 0x0000FFFF) + ((n >>16) & 0x0000FFFF);\ } while (0) inline static grn_rc buffer_put(grn_ctx *ctx, grn_ii *ii, buffer *b, buffer_term *bt, buffer_rec *rnew, uint8_t *bs, grn_ii_updspec *u, int size) { uint8_t *p; grn_rc rc = GRN_SUCCESS; docid id_curr = {0, 0}, id_start = {0, 0}, id_post = {0, 0}; buffer_rec *r_curr, *r_start = NULL; uint16_t last = 0, *lastp = &bt->pos_in_buffer, pos = BUFFER_REC_POS(b, rnew); int vdelta = 0, delta, delta0 = 0, vhops = 0, nhops = 0, reset = 1; memcpy(NEXT_ADDR(rnew), bs, size - sizeof(buffer_rec)); for (;;) { if (!*lastp) { rnew->step = 0; rnew->jump = 0; // smb_wmb(); *lastp = pos; if (bt->size_in_buffer++ > 1) { buffer_rec *rhead = BUFFER_REC_AT(b, bt->pos_in_buffer); rhead->jump = pos; if (!(bt->size_in_buffer & 1)) { int n; buffer_rec *r = BUFFER_REC_AT(b, rhead->step), *r2; GET_NUM_BITS(bt->size_in_buffer, n); while (n-- && (r->jump > 1)) { r2 = BUFFER_REC_AT(b, r->jump); if (BUFFER_REC_DELETED(r2)) { break; } r = r2; } if (r != rnew) { set_jump_r(ctx, ii, b, r, last); } } } break; } r_curr = BUFFER_REC_AT(b, *lastp); p = NEXT_ADDR(r_curr); GRN_B_DEC(id_curr.rid, p); if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { GRN_B_DEC(id_curr.sid, p); } else { id_curr.sid = 1; } if (id_curr.rid < id_post.rid || (id_curr.rid == id_post.rid && id_curr.sid < id_post.sid)) { rc = GRN_FILE_CORRUPT; ERRSET(ctx, GRN_CRIT, rc, "loop found!!! (%d:%d)->(%d:%d)", id_post.rid, id_post.sid, id_curr.rid, id_curr.sid); buffer_term_dump(ctx, ii, b, bt); bt->pos_in_buffer = 0; bt->size_in_buffer = 0; lastp = &bt->pos_in_buffer; continue; } id_post.rid = id_curr.rid; id_post.sid = id_curr.sid; if (u->rid < id_curr.rid || (u->rid == id_curr.rid && u->sid <= id_curr.sid)) { uint16_t step = *lastp, jump = r_curr->jump; if (u->rid == id_curr.rid) { if (u->sid == 0) { while (id_curr.rid == u->rid) { BUFFER_REC_DEL(r_curr); if (!(step = r_curr->step)) { break; } r_curr = BUFFER_REC_AT(b, step); p = NEXT_ADDR(r_curr); GRN_B_DEC(id_curr.rid, p); if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { GRN_B_DEC(id_curr.sid, p); } else { id_curr.sid = 1; } } } else if (u->sid == id_curr.sid) { BUFFER_REC_DEL(r_curr); step = r_curr->step; } } rnew->step = step; rnew->jump = check_jump(ctx, ii, b, rnew, jump) ? 0 : jump; // smb_wmb(); *lastp = pos; break; } if (reset) { r_start = r_curr; id_start.rid = id_curr.rid; id_start.sid = id_curr.sid; if (!(delta0 = u->rid - id_start.rid)) { delta0 = u->sid - id_start.sid; } nhops = 0; vhops = 1; vdelta = delta0 >> 1; } else { if (!(delta = id_curr.rid - id_start.rid)) { delta = id_curr.sid - id_start.sid; } if (vdelta < delta) { vdelta += (delta0 >> ++vhops); r_start = r_curr; } if (nhops > vhops) { set_jump_r(ctx, ii, b, r_start, *lastp); } else { nhops++; } } last = *lastp; lastp = &r_curr->step; reset = 0; { uint16_t posj = r_curr->jump; if (posj > 1) { buffer_rec *rj = BUFFER_REC_AT(b, posj); if (!BUFFER_REC_DELETED(rj)) { docid idj; p = NEXT_ADDR(rj); GRN_B_DEC(idj.rid, p); if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { GRN_B_DEC(idj.sid, p); } else { idj.sid = 1; } if (idj.rid < u->rid || (idj.rid == u->rid && idj.sid < u->sid)) { last = posj; lastp = &rj->step; } else { reset = 1; } } } } } return rc; } /* array */ inline static uint32_t * array_at(grn_ctx *ctx, grn_ii *ii, uint32_t id) { byte *p = NULL; uint32_t seg, pseg; if (id > GRN_ID_MAX) { return NULL; } seg = id >> W_ARRAY; if ((pseg = ii->header->ainfo[seg]) == NOT_ASSIGNED) { return NULL; } GRN_IO_SEG_REF(ii->seg, pseg, p); if (!p) { return NULL; } return (uint32_t *)(p + (id & ARRAY_MASK_IN_A_SEGMENT) * S_ARRAY_ELEMENT); } inline static uint32_t * array_get(grn_ctx *ctx, grn_ii *ii, uint32_t id) { byte *p = NULL; uint16_t seg; uint32_t pseg; if (id > GRN_ID_MAX) { return NULL; } seg = id >> W_ARRAY; if ((pseg = ii->header->ainfo[seg]) == NOT_ASSIGNED) { if (segment_get_clear(ctx, ii, &pseg)) { return NULL; } ii->header->ainfo[seg] = pseg; if (seg >= ii->header->amax) { ii->header->amax = seg + 1; } } GRN_IO_SEG_REF(ii->seg, pseg, p); if (!p) { return NULL; } return (uint32_t *)(p + (id & ARRAY_MASK_IN_A_SEGMENT) * S_ARRAY_ELEMENT); } inline static void array_unref(grn_ii *ii, uint32_t id) { GRN_IO_SEG_UNREF(ii->seg, ii->header->ainfo[id >> W_ARRAY]); } /* updspec */ grn_ii_updspec * grn_ii_updspec_open(grn_ctx *ctx, uint32_t rid, uint32_t sid) { grn_ii_updspec *u; if (!(u = GRN_MALLOC(sizeof(grn_ii_updspec)))) { return NULL; } u->rid = rid; u->sid = sid; u->weight = 0; u->tf = 0; u->atf = 0; u->pos = NULL; u->tail = NULL; // u->vnodes = NULL; return u; } #define GRN_II_MAX_TF 0x1ffff grn_rc grn_ii_updspec_add(grn_ctx *ctx, grn_ii_updspec *u, int pos, int32_t weight) { struct _grn_ii_pos *p; u->atf++; if (u->tf >= GRN_II_MAX_TF) { return GRN_SUCCESS; } if (!(p = GRN_MALLOC(sizeof(struct _grn_ii_pos)))) { return GRN_NO_MEMORY_AVAILABLE; } u->weight += weight; p->pos = pos; p->next = NULL; if (u->tail) { u->tail->next = p; } else { u->pos = p; } u->tail = p; u->tf++; return GRN_SUCCESS; } int grn_ii_updspec_cmp(grn_ii_updspec *a, grn_ii_updspec *b) { struct _grn_ii_pos *pa, *pb; if (a->rid != b->rid) { return a->rid - b->rid; } if (a->sid != b->sid) { return a->sid - b->sid; } if (a->weight != b->weight) { return a->weight - b->weight; } if (a->tf != b->tf) { return a->tf - b->tf; } for (pa = a->pos, pb = b->pos; pa && pb; pa = pa->next, pb = pb->next) { if (pa->pos != pb->pos) { return pa->pos - pb->pos; } } if (pa) { return 1; } if (pb) { return -1; } return 0; } grn_rc grn_ii_updspec_close(grn_ctx *ctx, grn_ii_updspec *u) { struct _grn_ii_pos *p = u->pos, *q; while (p) { q = p->next; GRN_FREE(p); p = q; } GRN_FREE(u); return GRN_SUCCESS; } inline static uint8_t * encode_rec(grn_ctx *ctx, grn_ii *ii, grn_ii_updspec *u, unsigned int *size, int deletep) { uint8_t *br, *p; struct _grn_ii_pos *pp; uint32_t lpos, tf, weight; if (deletep) { tf = 0; weight = 0; } else { tf = u->tf; weight = u->weight; } if (!(br = GRN_MALLOC((tf + 4) * 5))) { return NULL; } p = br; GRN_B_ENC(u->rid, p); if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { GRN_B_ENC(u->sid, p); } else { u->sid = 1; } GRN_B_ENC(tf, p); if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { GRN_B_ENC(weight, p); } if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) { for (lpos = 0, pp = u->pos; pp && tf--; lpos = pp->pos, pp = pp->next) { GRN_B_ENC(pp->pos - lpos, p); } } while (((intptr_t)p & 0x03)) { *p++ = 0; } *size = (unsigned int) ((p - br) + sizeof(buffer_rec)); return br; } typedef struct { grn_ii *ii; grn_hash *h; } lexicon_deletable_arg; #ifdef CASCADE_DELETE_LEXICON static int lexicon_deletable(grn_ctx *ctx, grn_obj *lexicon, grn_id tid, void *arg) { uint32_t *a; grn_hash *h = ((lexicon_deletable_arg *)arg)->h; grn_ii *ii = ((lexicon_deletable_arg *)arg)->ii; if (!h) { return 0; } if ((a = array_at(ctx, ii, tid))) { if (a[0]) { array_unref(ii, tid); return 0; } array_unref(ii, tid); } { grn_ii_updspec **u; if (!grn_hash_get(ctx, h, &tid, sizeof(grn_id), (void **) &u)) { return (ERRP(ctx, GRN_ERROR)) ? 0 : 1; } if (!(*u)->tf || !(*u)->sid) { return 1; } return 0; } } #endif /* CASCADE_DELETE_LEXICON */ inline static void lexicon_delete(grn_ctx *ctx, grn_ii *ii, uint32_t tid, grn_hash *h) { #ifdef CASCADE_DELETE_LEXICON lexicon_deletable_arg arg = {ii, h}; grn_table_delete_optarg optarg = {0, lexicon_deletable, &arg}; _grn_table_delete_by_id(ctx, ii->lexicon, tid, &optarg); #endif /* CASCADE_DELETE_LEXICON */ } typedef struct { grn_id rid; uint32_t sid; uint32_t tf; uint32_t weight; uint32_t flags; } docinfo; #define GETNEXTC() do {\ if (sdf) {\ uint32_t dgap = *srp++;\ cid.rid += dgap;\ if (dgap) { cid.sid = 0; }\ snp += cid.tf;\ cid.tf = 1 + *stp++;\ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { cid.weight = *sop++; }\ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {\ cid.sid += 1 + *ssp++;\ } else {\ cid.sid = 1;\ }\ sdf--;\ } else {\ cid.rid = 0;\ }\ } while (0) #define PUTNEXT_(id) do {\ uint32_t dgap = id.rid - lid.rid;\ uint32_t sgap = (dgap ? id.sid : id.sid - lid.sid) - 1;\ *ridp++ = dgap;\ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {\ *sidp++ = sgap;\ }\ *tfp++ = id.tf - 1;\ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { *weightp++ = id.weight; }\ lid.rid = id.rid;\ lid.sid = id.sid;\ } while (0) #define PUTNEXTC() do {\ if (cid.rid) {\ if (cid.tf) {\ if (lid.rid > cid.rid || (lid.rid == cid.rid && lid.sid >= cid.sid)) {\ GRN_LOG(ctx, GRN_LOG_CRIT, "brokenc!! (%d:%d) -> (%d:%d)", lid.rid, lid.sid, bid.rid, bid.sid);\ rc = GRN_FILE_CORRUPT;\ break;\ }\ PUTNEXT_(cid);\ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {\ uint32_t i;\ for (i = 0; i < cid.tf; i++) {\ *posp++ = snp[i];\ spos += snp[i];\ }\ }\ } else {\ GRN_LOG(ctx, GRN_LOG_CRIT, "invalid chunk(%d,%d)", bt->tid, cid.rid);\ rc = GRN_FILE_CORRUPT;\ break;\ }\ }\ GETNEXTC();\ } while (0) #define GETNEXTB() do {\ if (nextb) {\ uint32_t lrid = bid.rid, lsid = bid.sid;\ buffer_rec *br = BUFFER_REC_AT(sb, nextb);\ sbp = NEXT_ADDR(br);\ GRN_B_DEC(bid.rid, sbp);\ if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) {\ GRN_B_DEC(bid.sid, sbp);\ } else {\ bid.sid = 1;\ }\ if (lrid > bid.rid || (lrid == bid.rid && lsid >= bid.sid)) {\ GRN_LOG(ctx, GRN_LOG_CRIT, "brokeng!! (%d:%d) -> (%d:%d)", lrid, lsid, bid.rid, bid.sid);\ rc = GRN_FILE_CORRUPT;\ break;\ }\ nextb = br->step;\ } else {\ bid.rid = 0;\ }\ } while (0) #define PUTNEXTB() do {\ if (bid.rid && bid.sid) {\ GRN_B_DEC(bid.tf, sbp);\ if (bid.tf > 0) {\ if (lid.rid > bid.rid || (lid.rid == bid.rid && lid.sid >= bid.sid)) {\ GRN_LOG(ctx, GRN_LOG_CRIT, "brokenb!! (%d:%d) -> (%d:%d)", lid.rid, lid.sid, bid.rid, bid.sid);\ rc = GRN_FILE_CORRUPT;\ break;\ }\ if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { GRN_B_DEC(bid.weight, sbp); }\ PUTNEXT_(bid);\ if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) {\ while (bid.tf--) { GRN_B_DEC(*posp, sbp); spos += *posp++; }\ }\ }\ }\ GETNEXTB();\ } while (0) #define MERGE_BC(cond) do {\ if (bid.rid) {\ if (cid.rid) {\ if (cid.rid < bid.rid) {\ PUTNEXTC();\ } else {\ if (bid.rid < cid.rid) {\ PUTNEXTB();\ } else {\ if (bid.sid) {\ if (cid.sid < bid.sid) {\ PUTNEXTC();\ } else {\ if (bid.sid == cid.sid) { GETNEXTC(); }\ PUTNEXTB();\ }\ } else {\ GETNEXTC();\ }\ }\ }\ } else {\ PUTNEXTB();\ }\ } else {\ if (cid.rid) {\ PUTNEXTC();\ } else {\ break;\ }\ }\ } while (cond) typedef struct { uint32_t segno; uint32_t size; uint32_t dgap; } chunk_info; static grn_rc chunk_flush(grn_ctx *ctx, grn_ii *ii, chunk_info *cinfo, uint8_t *enc, uint32_t encsize) { grn_rc rc; uint8_t *dc; uint32_t dcn; grn_io_win dw; if (!(rc = chunk_new(ctx, ii, &dcn, encsize))) { if ((dc = WIN_MAP2(ii->chunk, ctx, &dw, dcn, 0, encsize, grn_io_wronly))) { memcpy(dc, enc, encsize); grn_io_win_unmap2(&dw); cinfo->segno = dcn; cinfo->size = encsize; rc = GRN_SUCCESS; } else { chunk_free(ctx, ii, dcn, 0, encsize); rc = GRN_NO_MEMORY_AVAILABLE; } } return rc; } static grn_rc chunk_merge(grn_ctx *ctx, grn_ii *ii, buffer *sb, buffer_term *bt, chunk_info *cinfo, grn_id rid, datavec *dv, uint16_t *nextbp, uint8_t **sbpp, docinfo *bidp, int32_t *balance) { grn_rc rc; grn_io_win sw; uint64_t spos = 0; uint32_t segno = cinfo->segno, size = cinfo->size, sdf = 0, ndf = 0; uint32_t *ridp = NULL, *sidp = NULL, *tfp, *weightp = NULL, *posp = NULL; docinfo cid = {0, 0, 0, 0, 0}, lid = {0, 0, 0, 0, 0}, bid = *bidp; uint8_t *scp = WIN_MAP2(ii->chunk, ctx, &sw, segno, 0, size, grn_io_rdonly); if (scp) { uint16_t nextb = *nextbp; uint32_t snn = 0, *srp, *ssp = NULL, *stp, *sop = NULL, *snp; uint8_t *sbp = *sbpp; datavec rdv[MAX_N_ELEMENTS + 1]; size_t bufsize = S_SEGMENT * ii->n_elements; datavec_init(ctx, rdv, ii->n_elements, 0, 0); if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) { rdv[ii->n_elements - 1].flags = ODD; } bufsize += grn_p_decv(ctx, scp, cinfo->size, rdv, ii->n_elements); // (df in chunk list) = a[1] - sdf; { int j = 0; sdf = rdv[j].data_size; srp = rdv[j++].data; if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { ssp = rdv[j++].data; } stp = rdv[j++].data; if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { sop = rdv[j++].data; } snn = rdv[j].data_size; snp = rdv[j].data; } if (!(rc = datavec_reset(ctx, dv, ii->n_elements, sdf + S_SEGMENT, bufsize))) { { int j = 0; ridp = dv[j++].data; if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { sidp = dv[j++].data; } tfp = dv[j++].data; if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { weightp = dv[j++].data; } posp = dv[j].data; } GETNEXTC(); MERGE_BC(bid.rid <= rid || cid.rid); *sbpp = sbp; *nextbp = nextb; *bidp = bid; GRN_ASSERT(posp < dv[ii->n_elements].data); ndf = ridp - dv[0].data; } datavec_fin(ctx, rdv); grn_io_win_unmap2(&sw); } else { rc = GRN_NO_MEMORY_AVAILABLE; } if (!rc) { int j = 0; uint8_t *enc; uint32_t encsize; uint32_t np = posp - dv[ii->n_elements - 1].data; uint32_t f_s = (ndf < 3) ? 0 : USE_P_ENC; uint32_t f_d = ((ndf < 16) || (ndf <= (lid.rid >> 8))) ? 0 : USE_P_ENC; dv[j].data_size = ndf; dv[j++].flags = f_d; if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { dv[j].data_size = ndf; dv[j++].flags = f_s; } dv[j].data_size = ndf; dv[j++].flags = f_s; if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { dv[j].data_size = ndf; dv[j++].flags = f_s; } if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) { uint32_t f_p = ((np < 32) || (np <= (spos >> 13))) ? 0 : USE_P_ENC; dv[j].data_size = np; dv[j].flags = f_p|ODD; } if ((enc = GRN_MALLOC((ndf * 4 + np) * 2))) { encsize = grn_p_encv(ctx, dv, ii->n_elements, enc); if (!(rc = chunk_flush(ctx, ii, cinfo, enc, encsize))) { chunk_free(ctx, ii, segno, 0, size); } GRN_FREE(enc); } else { rc = GRN_NO_MEMORY_AVAILABLE; } } *balance += (ndf - sdf); return rc; } static grn_rc buffer_merge(grn_ctx *ctx, grn_ii *ii, uint32_t seg, grn_hash *h, buffer *sb, uint8_t *sc, buffer *db, uint8_t *dc) { buffer_term *bt; grn_rc rc = GRN_SUCCESS; uint8_t *sbp = NULL, *dcp = dc; datavec dv[MAX_N_ELEMENTS + 1]; datavec rdv[MAX_N_ELEMENTS + 1]; uint16_t n = db->header.nterms, nterms_void = 0; size_t unitsize = (S_SEGMENT + sb->header.chunk_size / sb->header.nterms) * 2; // size_t unitsize = (S_SEGMENT + sb->header.chunk_size) * 2 + (1<<24); size_t totalsize = unitsize * ii->n_elements; //todo : realloc if ((rc = datavec_init(ctx, dv, ii->n_elements, unitsize, totalsize))) { return rc; } datavec_init(ctx, rdv, ii->n_elements, 0, 0); if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) { rdv[ii->n_elements - 1].flags = ODD; } for (bt = db->terms; n; n--, bt++) { uint16_t nextb; uint64_t spos = 0; int32_t balance = 0; uint32_t *ridp, *sidp = NULL, *tfp, *weightp = NULL, *posp, nchunks = 0; chunk_info *cinfo = NULL; grn_id crid = GRN_ID_NIL; docinfo cid = {0, 0, 0, 0, 0}, lid = {0, 0, 0, 0, 0}, bid = {0, 0}; uint32_t sdf = 0, snn = 0, ndf; uint32_t *srp = NULL, *ssp = NULL, *stp = NULL, *sop = NULL, *snp = NULL; if (!bt->tid) { nterms_void++; continue; } if (!bt->pos_in_buffer) { GRN_ASSERT(!bt->size_in_buffer); if (bt->size_in_chunk) { memcpy(dcp, sc + bt->pos_in_chunk, bt->size_in_chunk); bt->pos_in_chunk = (uint32_t)(dcp - dc); dcp += bt->size_in_chunk; } continue; } nextb = bt->pos_in_buffer; GETNEXTB(); if (sc && bt->size_in_chunk) { uint8_t *scp = sc + bt->pos_in_chunk; uint8_t *sce = scp + bt->size_in_chunk; size_t size = S_SEGMENT * ii->n_elements; if ((bt->tid & CHUNK_SPLIT)) { int i; GRN_B_DEC(nchunks, scp); if (!(cinfo = GRN_MALLOCN(chunk_info, nchunks + 1))) { datavec_fin(ctx, dv); datavec_fin(ctx, rdv); return GRN_NO_MEMORY_AVAILABLE; } for (i = 0; i < nchunks; i++) { GRN_B_DEC(cinfo[i].segno, scp); GRN_B_DEC(cinfo[i].size, scp); GRN_B_DEC(cinfo[i].dgap, scp); crid += cinfo[i].dgap; if (bid.rid <= crid) { rc = chunk_merge(ctx, ii, sb, bt, &cinfo[i], crid, dv, &nextb, &sbp, &bid, &balance); if (rc) { datavec_fin(ctx, dv); datavec_fin(ctx, rdv); return rc; } } } } if (sce > scp) { size += grn_p_decv(ctx, scp, sce - scp, rdv, ii->n_elements); { int j = 0; sdf = rdv[j].data_size; srp = rdv[j++].data; if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { ssp = rdv[j++].data; } stp = rdv[j++].data; if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { sop = rdv[j++].data; } snn = rdv[j].data_size; snp = rdv[j].data; } if ((rc = datavec_reset(ctx, dv, ii->n_elements, sdf + S_SEGMENT, size))) { datavec_fin(ctx, dv); datavec_fin(ctx, rdv); return rc; } } } { int j = 0; ridp = dv[j++].data; if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { sidp = dv[j++].data; } tfp = dv[j++].data; if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { weightp = dv[j++].data; } posp = dv[j].data; } GETNEXTC(); MERGE_BC(1); GRN_ASSERT(posp < dv[ii->n_elements].data); ndf = ridp - dv[0].data; /* { grn_obj buf; uint32_t rid, sid, tf, i, pos, *pp; GRN_TEXT_INIT(&buf, 0); rid = 0; pp = dv[3].data; for (i = 0; i < ndf; i++) { GRN_BULK_REWIND(&buf); rid += dv[0].data[i]; if (dv[0].data[i]) { sid = 0; } sid += dv[1].data[i] + 1; tf = dv[2].data[i] + 1; pos = 0; grn_text_itoa(ctx, &buf, rid); GRN_TEXT_PUTC(ctx, &buf, ':'); grn_text_itoa(ctx, &buf, sid); GRN_TEXT_PUTC(ctx, &buf, ':'); grn_text_itoa(ctx, &buf, tf); GRN_TEXT_PUTC(ctx, &buf, ':'); while (tf--) { pos += *pp++; grn_text_itoa(ctx, &buf, pos); if (tf) { GRN_TEXT_PUTC(ctx, &buf, ','); } } GRN_TEXT_PUTC(ctx, &buf, '\0'); GRN_LOG(ctx, GRN_LOG_NOTICE, "Posting:%s", GRN_TEXT_VALUE(&buf)); } GRN_OBJ_FIN(ctx, &buf); } */ { grn_id tid = bt->tid & GRN_ID_MAX; uint32_t *a = array_at(ctx, ii, tid); if (!a) { GRN_LOG(ctx, GRN_LOG_NOTICE, "array_entry not found tid=%d", tid); memset(bt, 0, sizeof(buffer_term)); nterms_void++; } else { if (!ndf && !nchunks) { a[0] = 0; a[1] = 0; lexicon_delete(ctx, ii, tid, h); memset(bt, 0, sizeof(buffer_term)); nterms_void++; } else if ((ii->header->flags & GRN_OBJ_WITH_SECTION) && !nchunks && ndf == 1 && lid.rid < 0x100000 && lid.sid < 0x800 && lid.tf == 1 && lid.weight == 0) { a[0] = (lid.rid << 12) + (lid.sid << 1) + 1; a[1] = (ii->header->flags & GRN_OBJ_WITH_POSITION) ? posp[-1] : 0; memset(bt, 0, sizeof(buffer_term)); nterms_void++; } else if (!(ii->header->flags & GRN_OBJ_WITH_SECTION) && !nchunks && ndf == 1 && lid.tf == 1 && lid.weight == 0) { a[0] = (lid.rid << 1) + 1; a[1] = (ii->header->flags & GRN_OBJ_WITH_POSITION) ? posp[-1] : 0; memset(bt, 0, sizeof(buffer_term)); nterms_void++; } else { int j = 0; uint8_t *dcp0; uint32_t encsize; uint32_t f_s = (ndf < 3) ? 0 : USE_P_ENC; uint32_t f_d = ((ndf < 16) || (ndf <= (lid.rid >> 8))) ? 0 : USE_P_ENC; dv[j].data_size = ndf; dv[j++].flags = f_d; if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { dv[j].data_size = ndf; dv[j++].flags = f_s; } dv[j].data_size = ndf; dv[j++].flags = f_s; if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { dv[j].data_size = ndf; dv[j++].flags = f_s; } if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) { uint32_t np = posp - dv[ii->n_elements - 1].data; uint32_t f_p = ((np < 32) || (np <= (spos >> 13))) ? 0 : USE_P_ENC; dv[j].data_size = np; dv[j].flags = f_p|ODD; } dcp0 = dcp; a[1] = (bt->size_in_chunk ? a[1] : 0) + (ndf - sdf) + balance; if (nchunks) { int i; GRN_B_ENC(nchunks, dcp); for (i = 0; i < nchunks; i++) { GRN_B_ENC(cinfo[i].segno, dcp); GRN_B_ENC(cinfo[i].size, dcp); GRN_B_ENC(cinfo[i].dgap, dcp); } } encsize = grn_p_encv(ctx, dv, ii->n_elements, dcp); if (sb->header.chunk_size + S_SEGMENT <= (dcp - dc) + encsize) { int i; char buf[255], *bufp; GRN_LOG(ctx, GRN_LOG_NOTICE, "cs(%d)+(%d)=(%d)<=(%" GRN_FMT_LLD ")+(%d)=(%" GRN_FMT_LLD ")", sb->header.chunk_size, S_SEGMENT, sb->header.chunk_size + S_SEGMENT, (long long int)(dcp - dc), encsize, (long long int)((dcp - dc) + encsize)); for (j = 0; j < ii->n_elements; j++) { GRN_LOG(ctx, GRN_LOG_NOTICE, "rdv[%d] data_size=%d, flags=%d", j, rdv[j].data_size, rdv[j].flags); for (i = 0, bufp = buf; i < rdv[j].data_size;) { bufp += sprintf(bufp, " %d", rdv[j].data[i]); i++; if (!(i % 32) || i == rdv[j].data_size) { GRN_LOG(ctx, GRN_LOG_NOTICE, "rdv[%d].data[%d]%s", j, i, buf); bufp = buf; } } } for (j = 0; j < ii->n_elements; j++) { GRN_LOG(ctx, GRN_LOG_NOTICE, "dv[%d] data_size=%d, flags=%d", j, dv[j].data_size, dv[j].flags); for (i = 0, bufp = buf; i < dv[j].data_size;) { bufp += sprintf(bufp, " %d", dv[j].data[i]); i++; if (!(i % 32) || i == dv[j].data_size) { GRN_LOG(ctx, GRN_LOG_NOTICE, "dv[%d].data[%d]%s", j, i, buf); bufp = buf; } } } } if (encsize > CHUNK_SPLIT_THRESHOLD && (cinfo || (cinfo = GRN_MALLOCN(chunk_info, nchunks + 1))) && !chunk_flush(ctx, ii, &cinfo[nchunks], dcp, encsize)) { int i; cinfo[nchunks].dgap = lid.rid - crid; nchunks++; dcp = dcp0; GRN_B_ENC(nchunks, dcp); for (i = 0; i < nchunks; i++) { GRN_B_ENC(cinfo[i].segno, dcp); GRN_B_ENC(cinfo[i].size, dcp); GRN_B_ENC(cinfo[i].dgap, dcp); } GRN_LOG(ctx, GRN_LOG_NOTICE, "split (%d) encsize=%d", tid, encsize); bt->tid |= CHUNK_SPLIT; } else { dcp += encsize; } bt->pos_in_chunk = (uint32_t)(dcp0 - dc); bt->size_in_chunk = (uint32_t)(dcp - dcp0); bt->size_in_buffer = 0; bt->pos_in_buffer = 0; } array_unref(ii, tid); } } if (cinfo) { GRN_FREE(cinfo); } } datavec_fin(ctx, rdv); datavec_fin(ctx, dv); db->header.chunk_size = (uint32_t)(dcp - dc); db->header.buffer_free = S_SEGMENT - sizeof(buffer_header) - db->header.nterms * sizeof(buffer_term); db->header.nterms_void = nterms_void; return rc; } static void fake_map2(grn_ctx *ctx, grn_io *io, grn_io_win *iw, void *addr, uint32_t seg, uint32_t size) { iw->ctx = ctx; iw->diff = 0; iw->io = io; iw->mode = grn_io_wronly; iw->segment = ((seg) >> GRN_II_N_CHUNK_VARIATION); iw->offset = (((seg) & ((1 << GRN_II_N_CHUNK_VARIATION) - 1)) << GRN_II_W_LEAST_CHUNK); iw->size = size; iw->cached = 0; iw->addr = addr; } static grn_rc buffer_flush(grn_ctx *ctx, grn_ii *ii, uint32_t seg, grn_hash *h) { grn_rc rc; grn_io_win sw, dw; buffer *sb, *db = NULL; uint8_t *dc, *sc = NULL; uint32_t ds, pseg, scn, dcn = 0; if (ii->header->binfo[seg] == NOT_ASSIGNED) { return GRN_FILE_CORRUPT; } if ((ds = segment_get(ctx, ii)) == MAX_PSEG) { return GRN_NO_MEMORY_AVAILABLE; } pseg = buffer_open(ctx, ii, SEG2POS(seg, 0), NULL, &sb); if (pseg != NOT_ASSIGNED) { GRN_IO_SEG_REF(ii->seg, ds, db); if (db) { uint32_t actual_chunk_size = 0; uint32_t max_dest_chunk_size = sb->header.chunk_size + S_SEGMENT; if ((dc = GRN_MALLOC(max_dest_chunk_size * 2))) { if ((scn = sb->header.chunk) == NOT_ASSIGNED || (sc = WIN_MAP2(ii->chunk, ctx, &sw, scn, 0, sb->header.chunk_size, grn_io_rdonly))) { uint16_t n = sb->header.nterms; memset(db, 0, S_SEGMENT); memcpy(db->terms, sb->terms, n * sizeof(buffer_term)); db->header.nterms = n; if (!(rc = buffer_merge(ctx, ii, seg, h, sb, sc, db, dc))) { actual_chunk_size = db->header.chunk_size; if (actual_chunk_size >= max_dest_chunk_size) { GRN_LOG(ctx, GRN_LOG_WARNING, "actual_chunk_size(%d) >= max_dest_chunk_size(%d)", actual_chunk_size, max_dest_chunk_size); } if (!actual_chunk_size || !(rc = chunk_new(ctx, ii, &dcn, actual_chunk_size))) { db->header.chunk = actual_chunk_size ? dcn : NOT_ASSIGNED; fake_map2(ctx, ii->chunk, &dw, dc, dcn, actual_chunk_size); if (!(rc = grn_io_win_unmap2(&dw))) { buffer_segment_update(ii, seg, ds); ii->header->total_chunk_size += actual_chunk_size; if (scn != NOT_ASSIGNED) { grn_io_win_unmap2(&sw); chunk_free(ctx, ii, scn, 0, sb->header.chunk_size); ii->header->total_chunk_size -= sb->header.chunk_size; } } else { GRN_FREE(dc); if (actual_chunk_size) { chunk_free(ctx, ii, dcn, 0, actual_chunk_size); } if (scn != NOT_ASSIGNED) { grn_io_win_unmap2(&sw); } } } else { GRN_FREE(dc); if (scn != NOT_ASSIGNED) { grn_io_win_unmap2(&sw); } } } else { GRN_FREE(dc); if (scn != NOT_ASSIGNED) { grn_io_win_unmap2(&sw); } } } else { GRN_FREE(dc); rc = GRN_NO_MEMORY_AVAILABLE; } } else { rc = GRN_NO_MEMORY_AVAILABLE; } GRN_IO_SEG_UNREF(ii->seg, ds); } else { rc = GRN_NO_MEMORY_AVAILABLE; } buffer_close(ctx, ii, pseg); } else { rc = GRN_NO_MEMORY_AVAILABLE; } return rc; } void grn_ii_buffer_check(grn_ctx *ctx, grn_ii *ii, uint32_t seg) { grn_rc rc; grn_io_win sw; buffer *sb; uint8_t *sc = NULL; uint32_t pseg, scn, nterms_with_corrupt_chunk = 0, nterm_with_chunk = 0; uint32_t ndeleted_terms_with_value = 0; buffer_term *bt; uint8_t *sbp = NULL; datavec rdv[MAX_N_ELEMENTS + 1]; uint16_t n; int nterms_void = 0; int size_in_buffer = 0; grn_obj buf; size_t lower_bound; int64_t nloops = 0, nviolations = 0; if (ii->header->binfo[seg] == NOT_ASSIGNED) { GRN_OUTPUT_BOOL(GRN_FALSE); return; } pseg = buffer_open(ctx, ii, SEG2POS(seg, 0), NULL, &sb); if (pseg == NOT_ASSIGNED) { GRN_OUTPUT_BOOL(GRN_FALSE); return; } lower_bound = (sb->header.buffer_free + sizeof(buffer_term) * sb->header.nterms) / sizeof(buffer_rec); datavec_init(ctx, rdv, ii->n_elements, 0, 0); if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) { rdv[ii->n_elements - 1].flags = ODD; } GRN_OUTPUT_MAP_OPEN("BUFFER", -1); GRN_OUTPUT_CSTR("buffer id"); GRN_OUTPUT_INT64(seg); if ((scn = sb->header.chunk) == NOT_ASSIGNED) { GRN_OUTPUT_CSTR("void chunk size"); GRN_OUTPUT_INT64(sb->header.chunk_size); } else { if ((sc = WIN_MAP2(ii->chunk, ctx, &sw, scn, 0, sb->header.chunk_size, grn_io_rdonly))) { GRN_OUTPUT_CSTR("chunk size"); GRN_OUTPUT_INT64(sb->header.chunk_size); } else { GRN_OUTPUT_CSTR("unmappable chunk size"); GRN_OUTPUT_INT64(sb->header.chunk_size); } } GRN_OUTPUT_CSTR("buffer term"); GRN_OUTPUT_ARRAY_OPEN("TERMS", sb->header.nterms); GRN_OBJ_INIT(&buf, GRN_BULK, 0, ii->lexicon->header.domain); for (bt = sb->terms, n = sb->header.nterms; n; n--, bt++) { grn_id tid, tid_; char key[GRN_TABLE_MAX_KEY_SIZE]; int key_size; uint16_t nextb; uint32_t nchunks = 0; chunk_info *cinfo = NULL; grn_id crid = GRN_ID_NIL; docinfo bid = {0, 0}; uint32_t sdf = 0, snn = 0; uint32_t *srp = NULL, *ssp = NULL, *stp = NULL, *sop = NULL, *snp = NULL; if (!bt->tid && !bt->pos_in_buffer && !bt->size_in_buffer) { nterms_void++; continue; } GRN_OUTPUT_ARRAY_OPEN("TERM", -1); tid = (bt->tid & GRN_ID_MAX); key_size = grn_table_get_key(ctx, ii->lexicon, tid, key, GRN_TABLE_MAX_KEY_SIZE); tid_ = grn_table_get(ctx, ii->lexicon, key, key_size); GRN_TEXT_SET(ctx, &buf, key, key_size); GRN_OUTPUT_OBJ(&buf, NULL); GRN_OUTPUT_INT64(bt->tid); GRN_OUTPUT_INT64(tid_); nextb = bt->pos_in_buffer; size_in_buffer += bt->size_in_buffer; if (tid != tid_ && (bt->size_in_buffer || bt->size_in_chunk)) { ndeleted_terms_with_value++; } GETNEXTB(); GRN_OUTPUT_INT64(bt->size_in_buffer); GRN_OUTPUT_INT64(bt->size_in_chunk); if (sc && bt->size_in_chunk) { uint8_t *scp = sc + bt->pos_in_chunk; uint8_t *sce = scp + bt->size_in_chunk; size_t size = S_SEGMENT * ii->n_elements; if ((bt->tid & CHUNK_SPLIT)) { int i; GRN_B_DEC(nchunks, scp); if (!(cinfo = GRN_MALLOCN(chunk_info, nchunks + 1))) { datavec_fin(ctx, rdv); GRN_OBJ_FIN(ctx, &buf); return; } for (i = 0; i < nchunks; i++) { GRN_B_DEC(cinfo[i].segno, scp); GRN_B_DEC(cinfo[i].size, scp); GRN_B_DEC(cinfo[i].dgap, scp); crid += cinfo[i].dgap; } } if (sce > scp) { size += grn_p_decv(ctx, scp, sce - scp, rdv, ii->n_elements); { int j = 0; sdf = rdv[j].data_size; GRN_OUTPUT_INT64(sdf); srp = rdv[j++].data; if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { ssp = rdv[j++].data; } if (sdf != rdv[j].data_size) { nterms_with_corrupt_chunk++; } stp = rdv[j++].data; if ((ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { sop = rdv[j++].data; } GRN_OUTPUT_INT64(rdv[j].data_size); snn = rdv[j].data_size; snp = rdv[j].data; } nterm_with_chunk++; } } { uint16_t pos; grn_id rid, sid, rid_ = 0, sid_ = 0; uint8_t *p; buffer_rec *r; for (pos = bt->pos_in_buffer; pos; pos = r->step) { if (pos < lower_bound) { nviolations++; } r = BUFFER_REC_AT(sb, pos); p = NEXT_ADDR(r); GRN_B_DEC(rid, p); if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { GRN_B_DEC(sid, p); } else { sid = 1; } if (rid < rid_ || (rid == rid_ && sid < sid_)) { nloops++; } rid_ = rid; sid_ = sid; } } GRN_OUTPUT_ARRAY_CLOSE(); if (cinfo) { GRN_FREE(cinfo); } } GRN_OBJ_FIN(ctx, &buf); GRN_OUTPUT_ARRAY_CLOSE(); GRN_OUTPUT_CSTR("buffer free"); GRN_OUTPUT_INT64(sb->header.buffer_free); GRN_OUTPUT_CSTR("size in buffer"); GRN_OUTPUT_INT64(size_in_buffer); GRN_OUTPUT_CSTR("nterms"); GRN_OUTPUT_INT64(sb->header.nterms); if (nterms_void != sb->header.nterms_void) { GRN_OUTPUT_CSTR("nterms void gap"); GRN_OUTPUT_INT64(nterms_void - sb->header.nterms_void); } GRN_OUTPUT_CSTR("nterms with chunk"); GRN_OUTPUT_INT64(nterm_with_chunk); if (nterms_with_corrupt_chunk) { GRN_OUTPUT_CSTR("nterms with corrupt chunk"); GRN_OUTPUT_INT64(nterms_with_corrupt_chunk); } if (ndeleted_terms_with_value) { GRN_OUTPUT_CSTR("number of deleted terms with value"); GRN_OUTPUT_INT64(ndeleted_terms_with_value); } if (nloops) { GRN_OUTPUT_CSTR("number of loops"); GRN_OUTPUT_INT64(nloops); } if (nviolations) { GRN_OUTPUT_CSTR("number of violations"); GRN_OUTPUT_INT64(nviolations); } GRN_OUTPUT_MAP_CLOSE(); datavec_fin(ctx, rdv); if (sc) { grn_io_win_unmap2(&sw); } buffer_close(ctx, ii, pseg); } typedef struct { buffer_term *bt; const char *key; uint32_t key_size; } term_sort; static int term_compar(const void *t1, const void *t2) { int r; const term_sort *x = (term_sort *)t1, *y = (term_sort *)t2; if (x->key_size > y->key_size) { return (r = memcmp(x->key, y->key, y->key_size)) ? r : x->key_size - y->key_size; } else { return (r = memcmp(x->key, y->key, x->key_size)) ? r : x->key_size - y->key_size; } } static grn_rc term_split(grn_ctx *ctx, grn_obj *lexicon, buffer *sb, buffer *db0, buffer *db1) { uint16_t i, n, *nt; buffer_term *bt; uint32_t s, th = (sb->header.chunk_size + sb->header.nterms) >> 1; term_sort *ts = GRN_MALLOC(sb->header.nterms * sizeof(term_sort)); if (!ts) { return GRN_NO_MEMORY_AVAILABLE; } for (i = 0, n = sb->header.nterms, bt = sb->terms; n; bt++, n--) { if (bt->tid) { grn_id tid = bt->tid & GRN_ID_MAX; ts[i].key = _grn_table_key(ctx, lexicon, tid, &ts[i].key_size); ts[i].bt = bt; i++; } } qsort(ts, i, sizeof(term_sort), term_compar); memset(db0, 0, S_SEGMENT); bt = db0->terms; nt = &db0->header.nterms; for (s = 0; n + 1 < i && s <= th; n++, bt++) { memcpy(bt, ts[n].bt, sizeof(buffer_term)); (*nt)++; s += ts[n].bt->size_in_chunk + 1; } memset(db1, 0, S_SEGMENT); bt = db1->terms; nt = &db1->header.nterms; for (; n < i; n++, bt++) { memcpy(bt, ts[n].bt, sizeof(buffer_term)); (*nt)++; } GRN_FREE(ts); GRN_LOG(ctx, GRN_LOG_NOTICE, "d0=%d d1=%d", db0->header.nterms, db1->header.nterms); return GRN_SUCCESS; } static void array_update(grn_ctx *ctx, grn_ii *ii, uint32_t dls, buffer *db) { uint16_t n; buffer_term *bt; uint32_t *a, pos = SEG2POS(dls, sizeof(buffer_header)); for (n = db->header.nterms, bt = db->terms; n; n--, bt++) { if (bt->tid) { grn_id tid = bt->tid & GRN_ID_MAX; if ((a = array_at(ctx, ii, tid))) { a[0] = pos; array_unref(ii, tid); } else { GRN_LOG(ctx, GRN_LOG_WARNING, "array_at failed (%d)", tid); } } pos += sizeof(buffer_term) >> 2; } } static grn_rc buffer_split(grn_ctx *ctx, grn_ii *ii, uint32_t seg, grn_hash *h) { grn_rc rc; grn_io_win sw, dw0, dw1; buffer *sb, *db0 = NULL, *db1 = NULL; uint8_t *sc = NULL, *dc0, *dc1; uint32_t dps0, dps1, dls0, dls1, sps, scn, dcn0 = 0, dcn1 = 0; if (ii->header->binfo[seg] == NOT_ASSIGNED) { return GRN_FILE_CORRUPT; } if ((rc = buffer_segment_reserve(ctx, ii, &dls0, &dps0, &dls1, &dps1))) { return rc; } sps = buffer_open(ctx, ii, SEG2POS(seg, 0), NULL, &sb); if (sps != NOT_ASSIGNED) { GRN_IO_SEG_REF(ii->seg, dps0, db0); if (db0) { GRN_IO_SEG_REF(ii->seg, dps1, db1); if (db1) { uint32_t actual_db0_chunk_size = 0; uint32_t actual_db1_chunk_size = 0; uint32_t max_dest_chunk_size = sb->header.chunk_size + S_SEGMENT; if ((dc0 = GRN_MALLOC(max_dest_chunk_size * 2))) { if ((dc1 = GRN_MALLOC(max_dest_chunk_size * 2))) { if ((scn = sb->header.chunk) == NOT_ASSIGNED || (sc = WIN_MAP2(ii->chunk, ctx, &sw, scn, 0, sb->header.chunk_size, grn_io_rdonly))) { term_split(ctx, ii->lexicon, sb, db0, db1); if (!(rc = buffer_merge(ctx, ii, seg, h, sb, sc, db0, dc0))) { actual_db0_chunk_size = db0->header.chunk_size; if (actual_db0_chunk_size >= max_dest_chunk_size) { GRN_LOG(ctx, GRN_LOG_WARNING, "actual_db0_chunk_size(%d) >= max_dest_chunk_size(%d)", actual_db0_chunk_size, max_dest_chunk_size); } if (!actual_db0_chunk_size || !(rc = chunk_new(ctx, ii, &dcn0, actual_db0_chunk_size))) { db0->header.chunk = actual_db0_chunk_size ? dcn0 : NOT_ASSIGNED; fake_map2(ctx, ii->chunk, &dw0, dc0, dcn0, actual_db0_chunk_size); if (!(rc = grn_io_win_unmap2(&dw0))) { if (!(rc = buffer_merge(ctx, ii, seg, h, sb, sc, db1, dc1))) { actual_db1_chunk_size = db1->header.chunk_size; if (actual_db1_chunk_size >= max_dest_chunk_size) { GRN_LOG(ctx, GRN_LOG_WARNING, "actual_db1_chunk_size(%d) >= max_dest_chunk_size(%d)", actual_db1_chunk_size, max_dest_chunk_size); } if (!actual_db1_chunk_size || !(rc = chunk_new(ctx, ii, &dcn1, actual_db1_chunk_size))) { fake_map2(ctx, ii->chunk, &dw1, dc1, dcn1, actual_db1_chunk_size); if (!(rc = grn_io_win_unmap2(&dw1))) { db1->header.chunk = actual_db1_chunk_size ? dcn1 : NOT_ASSIGNED; buffer_segment_update(ii, dls0, dps0); buffer_segment_update(ii, dls1, dps1); array_update(ctx, ii, dls0, db0); array_update(ctx, ii, dls1, db1); buffer_segment_clear(ii, seg); ii->header->total_chunk_size += actual_db0_chunk_size; ii->header->total_chunk_size += actual_db1_chunk_size; if (scn != NOT_ASSIGNED) { grn_io_win_unmap2(&sw); chunk_free(ctx, ii, scn, 0, sb->header.chunk_size); ii->header->total_chunk_size -= sb->header.chunk_size; } } else { if (actual_db1_chunk_size) { chunk_free(ctx, ii, dcn1, 0, actual_db1_chunk_size); } if (actual_db0_chunk_size) { chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size); } GRN_FREE(dc1); if (scn != NOT_ASSIGNED) { grn_io_win_unmap2(&sw); } } } else { if (actual_db0_chunk_size) { chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size); } GRN_FREE(dc1); if (scn != NOT_ASSIGNED) { grn_io_win_unmap2(&sw); } } } else { if (actual_db0_chunk_size) { chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size); } GRN_FREE(dc1); if (scn != NOT_ASSIGNED) { grn_io_win_unmap2(&sw); } } } else { if (actual_db0_chunk_size) { chunk_free(ctx, ii, dcn0, 0, actual_db0_chunk_size); } GRN_FREE(dc1); GRN_FREE(dc0); if (scn != NOT_ASSIGNED) { grn_io_win_unmap2(&sw); } } } else { GRN_FREE(dc1); GRN_FREE(dc0); if (scn != NOT_ASSIGNED) { grn_io_win_unmap2(&sw); } } } else { GRN_FREE(dc1); GRN_FREE(dc0); if (scn != NOT_ASSIGNED) { grn_io_win_unmap2(&sw); } } } else { GRN_FREE(dc1); GRN_FREE(dc0); rc = GRN_NO_MEMORY_AVAILABLE; } } else { GRN_FREE(dc0); rc = GRN_NO_MEMORY_AVAILABLE; } } else { rc = GRN_NO_MEMORY_AVAILABLE; } GRN_IO_SEG_UNREF(ii->seg, dps1); } else { rc = GRN_NO_MEMORY_AVAILABLE; } GRN_IO_SEG_UNREF(ii->seg, dps0); } else { rc = GRN_NO_MEMORY_AVAILABLE; } buffer_close(ctx, ii, sps); } else { rc = GRN_NO_MEMORY_AVAILABLE; } return rc; } #define SCALE_FACTOR 2048 #define MAX_NTERMS 8192 #define SPLIT_COND (b->header.nterms > 1024 ||\ (b->header.nterms > 1 &&\ b->header.chunk_size * 100 > ii->header->total_chunk_size)) inline static uint32_t buffer_new(grn_ctx *ctx, grn_ii *ii, int size, uint32_t *pos, buffer_term **bt, buffer_rec **br, buffer **bp, grn_id id, grn_hash *h) { buffer *b = NULL; grn_id tid; uint16_t offset; char key[GRN_TABLE_MAX_KEY_SIZE]; // unsigned int key_size; // const char *key = _grn_table_key(ctx, ii->lexicon, id, &key_size); int key_size = grn_table_get_key(ctx, ii->lexicon, id, key, GRN_TABLE_MAX_KEY_SIZE); uint32_t *a, lseg = NOT_ASSIGNED, pseg = NOT_ASSIGNED; grn_table_cursor *tc = NULL; if (S_SEGMENT - sizeof(buffer_header) < size + sizeof(buffer_term)) { GRN_LOG(ctx, GRN_LOG_CRIT, "requested size(%d) is too large", size); return NOT_ASSIGNED; } if (ii->lexicon->header.type == GRN_TABLE_PAT_KEY) { if (ii->lexicon->header.flags & GRN_OBJ_KEY_VAR_SIZE) { tc = grn_table_cursor_open(ctx, ii->lexicon, key, key_size, NULL, 0, 0, -1, GRN_CURSOR_ASCENDING|GRN_CURSOR_GT); } else { tc = grn_table_cursor_open(ctx, ii->lexicon, NULL, 0, key, key_size, 0, -1, GRN_CURSOR_PREFIX); } } else { tc = grn_table_cursor_open(ctx, ii->lexicon, NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_ASCENDING); } if (tc) { while (lseg == NOT_ASSIGNED && (tid = grn_table_cursor_next(ctx, tc))) { if ((a = array_at(ctx, ii, tid))) { for (;;) { uint32_t pos = a[0]; if (!pos || (pos & 1)) { break; } if ((pseg = buffer_open(ctx, ii, pos, NULL, &b)) == NOT_ASSIGNED) { break; } if (b->header.buffer_free >= size + sizeof(buffer_term)) { lseg = LSEG(pos); break; } buffer_close(ctx, ii, pseg); if (SPLIT_COND) /* ((S_SEGMENT - sizeof(buffer_header) + ii->header->bmax - b->header.nterms * sizeof(buffer_term)) * 4 < b->header.chunk_size) */ { GRN_LOG(ctx, GRN_LOG_NOTICE, "nterms=%d chunk=%d total=%" GRN_FMT_INT64U, b->header.nterms, b->header.chunk_size, ii->header->total_chunk_size >> 10); if (buffer_split(ctx, ii, LSEG(pos), h)) { break; } } else { if (S_SEGMENT - sizeof(buffer_header) - b->header.nterms * sizeof(buffer_term) < size + sizeof(buffer_term)) { break; } if (buffer_flush(ctx, ii, LSEG(pos), h)) { break; } } } array_unref(ii, tid); } } grn_table_cursor_close(ctx, tc); } if (lseg == NOT_ASSIGNED) { if (buffer_segment_new(ctx, ii, &lseg) || (pseg = buffer_open(ctx, ii, SEG2POS(lseg, 0), NULL, &b)) == NOT_ASSIGNED) { return NOT_ASSIGNED; } memset(b, 0, S_SEGMENT); b->header.buffer_free = S_SEGMENT - sizeof(buffer_header); b->header.chunk = NOT_ASSIGNED; } if (b->header.nterms_void) { for (offset = 0; offset < b->header.nterms; offset++) { if (!b->terms[offset].tid) { break; } } if (offset == b->header.nterms) { GRN_LOG(ctx, GRN_LOG_NOTICE, "inconsistent buffer(%d)", lseg); b->header.nterms_void = 0; b->header.nterms++; b->header.buffer_free -= size + sizeof(buffer_term); } else { b->header.nterms_void--; b->header.buffer_free -= size; } } else { offset = b->header.nterms++; b->header.buffer_free -= size + sizeof(buffer_term); } *pos = SEG2POS(lseg, (sizeof(buffer_header) + sizeof(buffer_term) * offset)); *bt = &b->terms[offset]; *br = (buffer_rec *)(((byte *)&b->terms[b->header.nterms]) + b->header.buffer_free); *bp = b; return pseg; } /* ii */ static grn_ii * _grn_ii_create(grn_ctx *ctx, grn_ii *ii, const char *path, grn_obj *lexicon, uint32_t flags) { int i; grn_io *seg, *chunk; char path2[PATH_MAX]; struct grn_ii_header *header; grn_obj_flags lflags; grn_encoding encoding; grn_obj *tokenizer; /* for (i = 0; i < 32; i++) { new_histogram[i] = 0; free_histogram[i] = 0; } */ if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer, NULL, NULL)) { return NULL; } if (path && strlen(path) + 6 >= PATH_MAX) { return NULL; } seg = grn_io_create(ctx, path, sizeof(struct grn_ii_header), S_SEGMENT, MAX_PSEG, grn_io_auto, GRN_IO_EXPIRE_SEGMENT); if (!seg) { return NULL; } if (path) { strcpy(path2, path); strcat(path2, ".c"); chunk = grn_io_create(ctx, path2, 0, S_CHUNK, GRN_II_MAX_CHUNK, grn_io_auto, GRN_IO_EXPIRE_SEGMENT); } else { chunk = grn_io_create(ctx, NULL, 0, S_CHUNK, GRN_II_MAX_CHUNK, grn_io_auto, 0); } if (!chunk) { grn_io_close(ctx, seg); return NULL; } header = grn_io_header(seg); grn_io_set_type(seg, GRN_COLUMN_INDEX); for (i = 0; i < GRN_II_MAX_LSEG; i++) { header->ainfo[i] = NOT_ASSIGNED; header->binfo[i] = NOT_ASSIGNED; } for (i = 0; i <= GRN_II_N_CHUNK_VARIATION; i++) { header->free_chunks[i] = NOT_ASSIGNED; header->garbages[i] = NOT_ASSIGNED; } header->flags = flags; ii->seg = seg; ii->chunk = chunk; ii->lexicon = lexicon; ii->lflags = lflags; ii->encoding = encoding; ii->header = header; ii->n_elements = 2; if ((flags & GRN_OBJ_WITH_SECTION)) { ii->n_elements++; } if ((flags & GRN_OBJ_WITH_WEIGHT)) { ii->n_elements++; } if ((flags & GRN_OBJ_WITH_POSITION)) { ii->n_elements++; } return ii; } grn_ii * grn_ii_create(grn_ctx *ctx, const char *path, grn_obj *lexicon, uint32_t flags) { grn_ii *ii = NULL; if (!(ii = GRN_GMALLOC(sizeof(grn_ii)))) { return NULL; } GRN_DB_OBJ_SET_TYPE(ii, GRN_COLUMN_INDEX); if (!_grn_ii_create(ctx, ii, path, lexicon, flags)) { GRN_FREE(ii); return NULL; } return ii; } grn_rc grn_ii_remove(grn_ctx *ctx, const char *path) { grn_rc rc; char buffer[PATH_MAX]; if (!path || strlen(path) > PATH_MAX - 4) { return GRN_INVALID_ARGUMENT; } if ((rc = grn_io_remove(ctx, path))) { goto exit; } snprintf(buffer, PATH_MAX, "%s.c", path); rc = grn_io_remove(ctx, buffer); exit : return rc; } grn_rc grn_ii_truncate(grn_ctx *ctx, grn_ii *ii) { grn_rc rc; const char *io_segpath, *io_chunkpath; char *segpath, *chunkpath = NULL; grn_obj *lexicon; uint32_t flags; if ((io_segpath = grn_io_path(ii->seg)) && *io_segpath != '\0') { if (!(segpath = GRN_STRDUP(io_segpath))) { ERR(GRN_NO_MEMORY_AVAILABLE, "cannot duplicate path: <%s>", io_segpath); return GRN_NO_MEMORY_AVAILABLE; } if ((io_chunkpath = grn_io_path(ii->chunk)) && *io_chunkpath != '\0') { if (!(chunkpath = GRN_STRDUP(io_chunkpath))) { ERR(GRN_NO_MEMORY_AVAILABLE, "cannot duplicate path: <%s>", io_chunkpath); return GRN_NO_MEMORY_AVAILABLE; } } else { chunkpath = NULL; } } else { segpath = NULL; } lexicon = ii->lexicon; flags = ii->header->flags; if ((rc = grn_io_close(ctx, ii->seg))) { goto exit; } if ((rc = grn_io_close(ctx, ii->chunk))) { goto exit; } ii->seg = NULL; ii->chunk = NULL; if (segpath && (rc = grn_io_remove(ctx, segpath))) { goto exit; } if (chunkpath && (rc = grn_io_remove(ctx, chunkpath))) { goto exit; } if (!_grn_ii_create(ctx, ii, segpath, lexicon, flags)) { rc = GRN_UNKNOWN_ERROR; } exit: if (segpath) { GRN_FREE(segpath); } if (chunkpath) { GRN_FREE(chunkpath); } return rc; } grn_ii * grn_ii_open(grn_ctx *ctx, const char *path, grn_obj *lexicon) { grn_io *seg, *chunk; grn_ii *ii; char path2[PATH_MAX]; struct grn_ii_header *header; grn_obj_flags lflags; grn_encoding encoding; grn_obj *tokenizer; if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer, NULL, NULL)) { return NULL; } if (strlen(path) + 6 >= PATH_MAX) { return NULL; } strcpy(path2, path); strcat(path2, ".c"); seg = grn_io_open(ctx, path, grn_io_auto); if (!seg) { return NULL; } chunk = grn_io_open(ctx, path2, grn_io_auto); if (!chunk) { grn_io_close(ctx, seg); return NULL; } header = grn_io_header(seg); if (grn_io_get_type(seg) != GRN_COLUMN_INDEX) { ERR(GRN_INVALID_FORMAT, "file type unmatch"); grn_io_close(ctx, seg); grn_io_close(ctx, chunk); return NULL; } if (!(ii = GRN_GMALLOC(sizeof(grn_ii)))) { grn_io_close(ctx, seg); grn_io_close(ctx, chunk); return NULL; } GRN_DB_OBJ_SET_TYPE(ii, GRN_COLUMN_INDEX); ii->seg = seg; ii->chunk = chunk; ii->lexicon = lexicon; ii->lflags = lflags; ii->encoding = encoding; ii->header = header; ii->n_elements = 2; if ((header->flags & GRN_OBJ_WITH_SECTION)) { ii->n_elements++; } if ((header->flags & GRN_OBJ_WITH_WEIGHT)) { ii->n_elements++; } if ((header->flags & GRN_OBJ_WITH_POSITION)) { ii->n_elements++; } return ii; } grn_rc grn_ii_close(grn_ctx *ctx, grn_ii *ii) { grn_rc rc; if (!ii) { return GRN_INVALID_ARGUMENT; } if ((rc = grn_io_close(ctx, ii->seg))) { return rc; } if ((rc = grn_io_close(ctx, ii->chunk))) { return rc; } GRN_GFREE(ii); /* { int i; for (i = 0; i < 32; i++) { GRN_LOG(ctx, GRN_LOG_NOTICE, "new[%d]=%d free[%d]=%d", i, new_histogram[i], i, free_histogram[i]); } } */ return rc; } grn_rc grn_ii_info(grn_ctx *ctx, grn_ii *ii, uint64_t *seg_size, uint64_t *chunk_size) { grn_rc rc; if (seg_size) { if ((rc = grn_io_size(ctx, ii->seg, seg_size))) { return rc; } } if (chunk_size) { if ((rc = grn_io_size(ctx, ii->chunk, chunk_size))) { return rc; } } return GRN_SUCCESS; } void grn_ii_expire(grn_ctx *ctx, grn_ii *ii) { /* grn_io_expire(ctx, ii->seg, 128, 1000000); */ grn_io_expire(ctx, ii->chunk, 0, 1000000); } #define BIT11_01(x) ((x >> 1) & 0x7ff) #define BIT31_12(x) (x >> 12) grn_rc grn_ii_update_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_hash *h) { grn_rc rc = GRN_SUCCESS; buffer *b; uint8_t *bs; buffer_rec *br = NULL; buffer_term *bt; uint32_t pseg = 0, pos = 0, size, *a; if (!tid) { return rc; } if (!u->tf || !u->sid) { return grn_ii_delete_one(ctx, ii, tid, u, h); } if (u->sid > ii->header->smax) { ii->header->smax = u->sid; } if (!(a = array_get(ctx, ii, tid))) { return GRN_NO_MEMORY_AVAILABLE; } if (!(bs = encode_rec(ctx, ii, u, &size, 0))) { rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } for (;;) { if (a[0]) { if (!(a[0] & 1)) { pos = a[0]; if ((pseg = buffer_open(ctx, ii, pos, &bt, &b)) == NOT_ASSIGNED) { rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } if (b->header.buffer_free < size) { int bfb = b->header.buffer_free; GRN_LOG(ctx, GRN_LOG_DEBUG, "flushing a[0]=%d seg=%d(%p) free=%d", a[0], LSEG(a[0]), b, b->header.buffer_free); buffer_close(ctx, ii, pseg); if (SPLIT_COND) /*((S_SEGMENT - sizeof(buffer_header) + ii->header->bmax - b->header.nterms * sizeof(buffer_term)) * 4 < b->header.chunk_size)*/ { GRN_LOG(ctx, GRN_LOG_NOTICE, "nterms=%d chunk=%d total=%" GRN_FMT_INT64U, b->header.nterms, b->header.chunk_size, ii->header->total_chunk_size >> 10); if ((rc = buffer_split(ctx, ii, LSEG(pos), h))) { goto exit; } continue; } if ((rc = buffer_flush(ctx, ii, LSEG(pos), h))) { goto exit; } if (a[0] != pos) { GRN_LOG(ctx, GRN_LOG_DEBUG, "grn_ii_update_one: a[0] changed %d->%d", a[0], pos); continue; } if ((pseg = buffer_open(ctx, ii, pos, &bt, &b)) == NOT_ASSIGNED) { GRN_LOG(ctx, GRN_LOG_CRIT, "buffer not found a[0]=%d", a[0]); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } GRN_LOG(ctx, GRN_LOG_DEBUG, "flushed a[0]=%d seg=%d(%p) free=%d->%d nterms=%d v=%d", a[0], LSEG(a[0]), b, bfb, b->header.buffer_free, b->header.nterms, b->header.nterms_void); if (b->header.buffer_free < size) { buffer_close(ctx, ii, pseg); GRN_LOG(ctx, GRN_LOG_CRIT, "buffer(%d) is full (%d < %d) in grn_ii_update_one", a[0], b->header.buffer_free, size); /* todo: direct merge */ rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } } b->header.buffer_free -= size; br = (buffer_rec *)(((byte *)&b->terms[b->header.nterms]) + b->header.buffer_free); } else { grn_ii_updspec u2; uint32_t size2 = 0, v = a[0]; struct _grn_ii_pos pos2; pos2.pos = a[1]; pos2.next = NULL; u2.pos = &pos2; if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { u2.rid = BIT31_12(v); u2.sid = BIT11_01(v); } else { u2.rid = v >> 1; u2.sid = 1; } u2.tf = 1; u2.weight = 0; if (u2.rid != u->rid || u2.sid != u->sid) { uint8_t *bs2 = encode_rec(ctx, ii, &u2, &size2, 0); if (!bs2) { GRN_LOG(ctx, GRN_LOG_ALERT, "encode_rec on grn_ii_update_one failed !"); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } pseg = buffer_new(ctx, ii, size + size2, &pos, &bt, &br, &b, tid, h); if (pseg == NOT_ASSIGNED) { GRN_FREE(bs2); goto exit; } bt->tid = tid; bt->size_in_chunk = 0; bt->pos_in_chunk = 0; bt->size_in_buffer = 0; bt->pos_in_buffer = 0; if ((rc = buffer_put(ctx, ii, b, bt, br, bs2, &u2, size2))) { GRN_FREE(bs2); buffer_close(ctx, ii, pseg); goto exit; } br = (buffer_rec *)(((byte *)br) + size2); GRN_FREE(bs2); } } } break; } if (!br) { if (u->tf == 1 && u->weight == 0) { if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { if (u->rid < 0x100000 && u->sid < 0x800) { a[0] = (u->rid << 12) + (u->sid << 1) + 1; a[1] = u->pos->pos; goto exit; } } else { a[0] = (u->rid << 1) + 1; a[1] = u->pos->pos; goto exit; } } pseg = buffer_new(ctx, ii, size, &pos, &bt, &br, &b, tid, h); if (pseg == NOT_ASSIGNED) { goto exit; } bt->tid = tid; bt->size_in_chunk = 0; bt->pos_in_chunk = 0; bt->size_in_buffer = 0; bt->pos_in_buffer = 0; } rc = buffer_put(ctx, ii, b, bt, br, bs, u, size); buffer_close(ctx, ii, pseg); if (!a[0] || (a[0] & 1)) { a[0] = pos; } exit : array_unref(ii, tid); if (bs) { GRN_FREE(bs); } if (u->tf != u->atf) { char term[GRN_TABLE_MAX_KEY_SIZE]; int term_size; term_size = grn_table_get_key(ctx, ii->lexicon, tid, term, GRN_TABLE_MAX_KEY_SIZE); GRN_LOG(ctx, GRN_LOG_WARNING, "too many postings(%d). %d postings are discarded. " "term: <%d>(<%.*s>)", u->atf, u->atf - u->tf, tid, term_size, term); } grn_ii_expire(ctx, ii); return rc; } grn_rc grn_ii_delete_one(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_ii_updspec *u, grn_hash *h) { grn_rc rc = GRN_SUCCESS; buffer *b; uint8_t *bs = NULL; buffer_rec *br; buffer_term *bt; uint32_t pseg, size, *a; if (!tid) { return rc; } if (!(a = array_at(ctx, ii, tid))) { return GRN_INVALID_ARGUMENT; } for (;;) { if (!a[0]) { goto exit; } if (a[0] & 1) { if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { uint32_t rid = BIT31_12(a[0]); uint32_t sid = BIT11_01(a[0]); if (u->rid == rid && (!u->sid || u->sid == sid)) { a[0] = 0; lexicon_delete(ctx, ii, tid, h); } } else { uint32_t rid = a[0] >> 1; if (u->rid == rid) { a[0] = 0; lexicon_delete(ctx, ii, tid, h); } } goto exit; } if (!(bs = encode_rec(ctx, ii, u, &size, 1))) { rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } if ((pseg = buffer_open(ctx, ii, a[0], &bt, &b)) == NOT_ASSIGNED) { rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } if (b->header.buffer_free < size) { uint32_t _a = a[0]; GRN_LOG(ctx, GRN_LOG_DEBUG, "flushing! b=%p free=%d, seg(%d)", b, b->header.buffer_free, LSEG(a[0])); buffer_close(ctx, ii, pseg); if ((rc = buffer_flush(ctx, ii, LSEG(a[0]), h))) { goto exit; } if (a[0] != _a) { GRN_LOG(ctx, GRN_LOG_DEBUG, "grn_ii_delete_one: a[0] changed %d->%d)", a[0], _a); continue; } if ((pseg = buffer_open(ctx, ii, a[0], &bt, &b)) == NOT_ASSIGNED) { rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } GRN_LOG(ctx, GRN_LOG_DEBUG, "flushed! b=%p free=%d, seg(%d)", b, b->header.buffer_free, LSEG(a[0])); if (b->header.buffer_free < size) { GRN_LOG(ctx, GRN_LOG_CRIT, "buffer(%d) is full (%d < %d) in grn_ii_delete_one", a[0], b->header.buffer_free, size); rc = GRN_NO_MEMORY_AVAILABLE; buffer_close(ctx, ii, pseg); goto exit; } } b->header.buffer_free -= size; br = (buffer_rec *)(((byte *)&b->terms[b->header.nterms]) + b->header.buffer_free); rc = buffer_put(ctx, ii, b, bt, br, bs, u, size); buffer_close(ctx, ii, pseg); break; } exit : array_unref(ii, tid); if (bs) { GRN_FREE(bs); } return rc; } #define CHUNK_USED 1 #define BUFFER_USED 2 #define SOLE_DOC_USED 4 #define SOLE_POS_USED 8 struct _grn_ii_cursor { grn_db_obj obj; grn_ctx *ctx; grn_ii *ii; grn_id id; grn_ii_posting *post; grn_id min; grn_id max; grn_ii_posting pc; grn_ii_posting pb; uint32_t cdf; uint32_t *cdp; uint32_t *crp; uint32_t *csp; uint32_t *ctp; uint32_t *cwp; uint32_t *cpp; uint8_t *bp; int nelements; uint32_t nchunks; uint32_t curr_chunk; chunk_info *cinfo; grn_io_win iw; uint8_t *cp; uint8_t *cpe; datavec rdv[MAX_N_ELEMENTS + 1]; struct grn_ii_buffer *buf; uint16_t stat; uint16_t nextb; uint32_t buffer_pseg; int flags; uint32_t *ppseg; }; static int buffer_is_reused(grn_ctx *ctx, grn_ii *ii, grn_ii_cursor *c) { if (*c->ppseg != c->buffer_pseg) { uint32_t i; for (i = ii->header->bgqtail; i != ii->header->bgqhead; i = (i + 1) & (GRN_II_BGQSIZE - 1)) { if (ii->header->bgqbody[i] == c->buffer_pseg) { return 0; } } return 1; } return 0; } static int chunk_is_reused(grn_ctx *ctx, grn_ii *ii, grn_ii_cursor *c, uint32_t offset, uint32_t size) { if (*c->ppseg != c->buffer_pseg) { uint32_t i, m, gseg; if (size > S_CHUNK) { return 1; } if (size > (1 << GRN_II_W_LEAST_CHUNK)) { int es = size - 1; GRN_BIT_SCAN_REV(es, m); m++; } else { m = GRN_II_W_LEAST_CHUNK; } gseg = ii->header->garbages[m - GRN_II_W_LEAST_CHUNK]; while (gseg != NOT_ASSIGNED) { grn_io_win iw; grn_ii_ginfo *ginfo = WIN_MAP2(ii->chunk, ctx, &iw, gseg, 0, S_GARBAGE, grn_io_rdwr); if (!ginfo) { break; } for (i = 0; i < ginfo->nrecs; i++) { if (ginfo->recs[i] == offset) { grn_io_win_unmap2(&iw); return 0; } } gseg = ginfo->next; grn_io_win_unmap2(&iw); } return 1; } return 0; } #define GRN_II_CURSOR_CMP(c1,c2) \ (((c1)->post->rid > (c2)->post->rid) || \ (((c1)->post->rid == (c2)->post->rid) && \ (((c1)->post->sid > (c2)->post->sid) || \ (((c1)->post->sid == (c2)->post->sid) && \ ((c1)->post->pos > (c2)->post->pos))))) grn_ii_cursor * grn_ii_cursor_open(grn_ctx *ctx, grn_ii *ii, grn_id tid, grn_id min, grn_id max, int nelements, int flags) { grn_ii_cursor *c = NULL; uint32_t pos, *a; if (!(a = array_at(ctx, ii, tid))) { return NULL; } for (;;) { if (!(pos = a[0])) { goto exit; } if (!(c = GRN_MALLOC(sizeof(grn_ii_cursor)))) { goto exit; } memset(c, 0, sizeof(grn_ii_cursor)); c->ctx = ctx; c->ii = ii; c->id = tid; c->min = min; c->max = max; c->nelements = nelements; c->flags = flags; if (pos & 1) { c->stat = 0; if ((ii->header->flags & GRN_OBJ_WITH_SECTION)) { c->pb.rid = BIT31_12(pos); c->pb.sid = BIT11_01(pos); } else { c->pb.rid = pos >> 1; c->pb.sid = 1; } c->pb.tf = 1; c->pb.weight = 0; c->pb.pos = a[1]; } else { uint32_t chunk; buffer_term *bt; if ((c->buffer_pseg = buffer_open(ctx, ii, pos, &bt, &c->buf)) == NOT_ASSIGNED) { GRN_FREE(c); c = NULL; goto exit; } c->ppseg = &ii->header->binfo[LSEG(pos)]; if (bt->size_in_chunk && (chunk = c->buf->header.chunk) != NOT_ASSIGNED) { if (!(c->cp = WIN_MAP2(ii->chunk, ctx, &c->iw, chunk, bt->pos_in_chunk, bt->size_in_chunk, grn_io_rdonly))) { buffer_close(ctx, ii, c->buffer_pseg); GRN_FREE(c); c = NULL; goto exit; } if (buffer_is_reused(ctx, ii, c)) { grn_ii_cursor_close(ctx, c); continue; } c->cpe = c->cp + bt->size_in_chunk; if ((bt->tid & CHUNK_SPLIT)) { int i; grn_id crid; GRN_B_DEC(c->nchunks, c->cp); if (chunk_is_reused(ctx, ii, c, chunk, c->buf->header.chunk_size)) { grn_ii_cursor_close(ctx, c); continue; } if (!(c->cinfo = GRN_MALLOCN(chunk_info, c->nchunks))) { buffer_close(ctx, ii, c->buffer_pseg); grn_io_win_unmap2(&c->iw); GRN_FREE(c); c = NULL; goto exit; } for (i = 0, crid = GRN_ID_NIL; i < c->nchunks; i++) { GRN_B_DEC(c->cinfo[i].segno, c->cp); GRN_B_DEC(c->cinfo[i].size, c->cp); GRN_B_DEC(c->cinfo[i].dgap, c->cp); crid += c->cinfo[i].dgap; if (crid < min) { c->curr_chunk = i + 1; } } if (chunk_is_reused(ctx, ii, c, chunk, c->buf->header.chunk_size)) { grn_ii_cursor_close(ctx, c); continue; } } if ((ii->header->flags & GRN_OBJ_WITH_POSITION)) { c->rdv[ii->n_elements - 1].flags = ODD; } } c->nextb = bt->pos_in_buffer; c->stat = CHUNK_USED|BUFFER_USED; } if (pos == a[0]) { break; } grn_ii_cursor_close(ctx, c); } exit : array_unref(ii, tid); return c; } grn_ii_posting * grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c) { if (c->buf) { for (;;) { if (c->stat & CHUNK_USED) { for (;;) { if (c->crp < c->cdp + c->cdf) { uint32_t dgap = *c->crp++; c->pc.rid += dgap; if (dgap) { c->pc.sid = 0; } if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) { c->pc.sid += 1 + *c->csp++; } else { c->pc.sid = 1; } c->cpp += c->pc.rest; c->pc.rest = c->pc.tf = 1 + *c->ctp++; if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { c->pc.weight = *c->cwp++; } else { c->pc.weight = 0; } c->pc.pos = 0; /* { static int count = 0; int tf = c->pc.tf, pos = 0, *pp = (int *)c->cpp; grn_obj buf; GRN_TEXT_INIT(&buf, 0); grn_text_itoa(ctx, &buf, c->pc.rid); GRN_TEXT_PUTC(ctx, &buf, ':'); grn_text_itoa(ctx, &buf, c->pc.sid); GRN_TEXT_PUTC(ctx, &buf, ':'); grn_text_itoa(ctx, &buf, c->pc.tf); GRN_TEXT_PUTC(ctx, &buf, '('); while (tf--) { pos += *pp++; count++; grn_text_itoa(ctx, &buf, pos); if (tf) { GRN_TEXT_PUTC(ctx, &buf, ':'); } } GRN_TEXT_PUTC(ctx, &buf, ')'); GRN_TEXT_PUTC(ctx, &buf, '\0'); GRN_LOG(ctx, GRN_LOG_NOTICE, "posting(%d):%s", count, GRN_TEXT_VALUE(&buf)); GRN_OBJ_FIN(ctx, &buf); } */ } else { if (c->curr_chunk <= c->nchunks) { if (c->curr_chunk == c->nchunks) { if (c->cp < c->cpe) { grn_p_decv(ctx, c->cp, c->cpe - c->cp, c->rdv, c->ii->n_elements); } else { c->pc.rid = 0; break; } } else { uint8_t *cp; grn_io_win iw; uint32_t size = c->cinfo[c->curr_chunk].size; if (size && (cp = WIN_MAP2(c->ii->chunk, ctx, &iw, c->cinfo[c->curr_chunk].segno, 0, size, grn_io_rdonly))) { grn_p_decv(ctx, cp, size, c->rdv, c->ii->n_elements); grn_io_win_unmap2(&iw); if (chunk_is_reused(ctx, c->ii, c, c->cinfo[c->curr_chunk].segno, size)) { GRN_LOG(ctx, GRN_LOG_WARNING, "chunk(%d) is reused by another thread", c->cinfo[c->curr_chunk].segno); c->pc.rid = 0; break; } } else { c->pc.rid = 0; break; } } { int j = 0; c->cdf = c->rdv[j].data_size; c->crp = c->cdp = c->rdv[j++].data; if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) { c->csp = c->rdv[j++].data; } c->ctp = c->rdv[j++].data; if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { c->cwp = c->rdv[j++].data; } c->cpp = c->rdv[j].data; } c->pc.rid = 0; c->pc.sid = 0; c->pc.rest = 0; c->curr_chunk++; continue; } else { c->pc.rid = 0; } } break; } } if (c->stat & BUFFER_USED) { if (c->nextb) { uint32_t lrid = c->pb.rid, lsid = c->pb.sid; /* for check */ buffer_rec *br = BUFFER_REC_AT(c->buf, c->nextb); if (buffer_is_reused(ctx, c->ii, c)) { GRN_LOG(ctx, GRN_LOG_NOTICE, "buffer reused(%d,%d)", c->buffer_pseg, *c->ppseg); // todo : rewind; } c->bp = NEXT_ADDR(br); GRN_B_DEC(c->pb.rid, c->bp); if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) { GRN_B_DEC(c->pb.sid, c->bp); } else { c->pb.sid = 1; } if (lrid > c->pb.rid || (lrid == c->pb.rid && lsid >= c->pb.sid)) { ERR(GRN_FILE_CORRUPT, "brokend!! (%d:%d) -> (%d:%d) (%d->%d)", lrid, lsid, c->pb.rid, c->pb.sid, c->buffer_pseg, *c->ppseg); } c->nextb = br->step; GRN_B_DEC(c->pb.tf, c->bp); if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { GRN_B_DEC(c->pb.weight, c->bp); } else { c->pb.weight = 0; } c->pb.rest = c->pb.tf; c->pb.pos = 0; } else { c->pb.rid = 0; } } if (c->pb.rid) { if (c->pc.rid) { if (c->pc.rid < c->pb.rid) { c->stat = CHUNK_USED; if (c->pc.tf && c->pc.sid) { c->post = &c->pc; break; } } else { if (c->pb.rid < c->pc.rid) { c->stat = BUFFER_USED; if (c->pb.tf && c->pb.sid) { c->post = &c->pb; break; } } else { if (c->pb.sid) { if (c->pc.sid < c->pb.sid) { c->stat = CHUNK_USED; if (c->pc.tf && c->pc.sid) { c->post = &c->pc; break; } } else { c->stat = BUFFER_USED; if (c->pb.sid == c->pc.sid) { c->stat |= CHUNK_USED; } if (c->pb.tf) { c->post = &c->pb; break; } } } else { c->stat = CHUNK_USED; } } } } else { c->stat = BUFFER_USED; if (c->pb.tf && c->pb.sid) { c->post = &c->pb; break; } } } else { if (c->pc.rid) { c->stat = CHUNK_USED; if (c->pc.tf && c->pc.sid) { c->post = &c->pc; break; } } else { c->post = NULL; return NULL; } } } } else { if (c->stat & SOLE_DOC_USED) { c->post = NULL; return NULL; } else { c->post = &c->pb; c->stat |= SOLE_DOC_USED; } } return c->post; } grn_ii_posting * grn_ii_cursor_next_pos(grn_ctx *ctx, grn_ii_cursor *c) { uint32_t gap; if ((c->ii->header->flags & GRN_OBJ_WITH_POSITION)) { if (c->nelements == c->ii->n_elements) { if (c->buf) { if (c->post == &c->pc) { if (c->pc.rest) { c->pc.rest--; c->pc.pos += *c->cpp++; } else { return NULL; } } else if (c->post == &c->pb) { if (buffer_is_reused(ctx, c->ii, c)) { GRN_LOG(ctx, GRN_LOG_NOTICE, "buffer reused(%d,%d)", c->buffer_pseg, *c->ppseg); // todo : rewind; } if (c->pb.rest) { c->pb.rest--; GRN_B_DEC(gap, c->bp); c->pb.pos += gap; } else { return NULL; } } else { return NULL; } } else { if (c->stat & SOLE_POS_USED) { return NULL; } else { c->stat |= SOLE_POS_USED; } } } } else { if (c->stat & SOLE_POS_USED) { return NULL; } else { c->stat |= SOLE_POS_USED; } } return c->post; } grn_rc grn_ii_cursor_close(grn_ctx *ctx, grn_ii_cursor *c) { if (!c) { return GRN_INVALID_ARGUMENT; } datavec_fin(ctx, c->rdv); if (c->cinfo) { GRN_FREE(c->cinfo); } if (c->buf) { buffer_close(ctx, c->ii, c->buffer_pseg); } if (c->cp) { grn_io_win_unmap2(&c->iw); } GRN_FREE(c); return GRN_SUCCESS; } uint32_t grn_ii_get_chunksize(grn_ctx *ctx, grn_ii *ii, grn_id tid) { uint32_t res, pos, *a; a = array_at(ctx, ii, tid); if (!a) { return 0; } if ((pos = a[0])) { if (pos & 1) { res = 0; } else { buffer *buf; uint32_t pseg; buffer_term *bt; if ((pseg = buffer_open(ctx, ii, pos, &bt, &buf)) == NOT_ASSIGNED) { res = 0; } else { res = bt->size_in_chunk; buffer_close(ctx, ii, pseg); } } } else { res = 0; } array_unref(ii, tid); return res; } uint32_t grn_ii_estimate_size(grn_ctx *ctx, grn_ii *ii, grn_id tid) { uint32_t res, pos, *a; a = array_at(ctx, ii, tid); if (!a) { return 0; } if ((pos = a[0])) { if (pos & 1) { res = 1; } else { buffer *buf; uint32_t pseg; buffer_term *bt; if ((pseg = buffer_open(ctx, ii, pos, &bt, &buf)) == NOT_ASSIGNED) { res = 0; } else { res = a[1] + bt->size_in_buffer + 2; buffer_close(ctx, ii, pseg); } } } else { res = 0; } array_unref(ii, tid); return res; } int grn_ii_entry_info(grn_ctx *ctx, grn_ii *ii, grn_id tid, unsigned int *a, unsigned int *chunk, unsigned int *chunk_size, unsigned int *buffer_free, unsigned int *nterms, unsigned int *nterms_void, unsigned int *bt_tid, unsigned int *size_in_chunk, unsigned int *pos_in_chunk, unsigned int *size_in_buffer, unsigned int *pos_in_buffer) { buffer *b; buffer_term *bt; uint32_t pseg, *ap; ERRCLR(NULL); ap = array_at(ctx, ii, tid); if (!ap) { return 0; } a[0] = *ap; array_unref(ii, tid); if (!a[0]) { return 1; } if (a[0] & 1) { return 2; } if ((pseg = buffer_open(ctx, ii, a[0], &bt, &b)) == NOT_ASSIGNED) { return 3; } *chunk = b->header.chunk; *chunk_size = b->header.chunk_size; *buffer_free = b->header.buffer_free; *nterms = b->header.nterms; *bt_tid = bt->tid; *size_in_chunk = bt->size_in_chunk; *pos_in_chunk = bt->pos_in_chunk; *size_in_buffer = bt->size_in_buffer; *pos_in_buffer = bt->pos_in_buffer; buffer_close(ctx, ii, pseg); return 4; } const char * grn_ii_path(grn_ii *ii) { return grn_io_path(ii->seg); } uint32_t grn_ii_max_section(grn_ii *ii) { return ii->header->smax; } grn_obj * grn_ii_lexicon(grn_ii *ii) { return ii->lexicon; } /* private classes */ /* b-heap */ typedef struct { int n_entries; int n_bins; grn_ii_cursor **bins; } cursor_heap; static inline cursor_heap * cursor_heap_open(grn_ctx *ctx, int max) { cursor_heap *h = GRN_MALLOC(sizeof(cursor_heap)); if (!h) { return NULL; } h->bins = GRN_MALLOC(sizeof(grn_ii_cursor *) * max); if (!h->bins) { GRN_FREE(h); return NULL; } h->n_entries = 0; h->n_bins = max; return h; } static inline grn_rc cursor_heap_push(grn_ctx *ctx, cursor_heap *h, grn_ii *ii, grn_id tid, uint32_t offset2) { int n, n2; grn_ii_cursor *c, *c2; if (h->n_entries >= h->n_bins) { int max = h->n_bins * 2; grn_ii_cursor **bins = GRN_REALLOC(h->bins, sizeof(grn_ii_cursor *) * max); GRN_LOG(ctx, GRN_LOG_DEBUG, "expanded cursor_heap to %d,%p", max, bins); if (!bins) { return GRN_NO_MEMORY_AVAILABLE; } h->n_bins = max; h->bins = bins; } { if (!(c = grn_ii_cursor_open(ctx, ii, tid, GRN_ID_NIL, GRN_ID_MAX, ii->n_elements, 0))) { GRN_LOG(ctx, GRN_LOG_ERROR, "cursor open failed"); return ctx->rc; } if (!grn_ii_cursor_next(ctx, c)) { grn_ii_cursor_close(ctx, c); return GRN_END_OF_DATA; } if (!grn_ii_cursor_next_pos(ctx, c)) { GRN_LOG(ctx, GRN_LOG_ERROR, "invalid ii_cursor b"); grn_ii_cursor_close(ctx, c); return GRN_END_OF_DATA; } n = h->n_entries++; while (n) { n2 = (n - 1) >> 1; c2 = h->bins[n2]; if (GRN_II_CURSOR_CMP(c, c2)) { break; } h->bins[n] = c2; n = n2; } h->bins[n] = c; } return GRN_SUCCESS; } static inline grn_rc cursor_heap_push2(cursor_heap *h) { grn_rc rc = GRN_SUCCESS; return rc; } static inline grn_ii_cursor * cursor_heap_min(cursor_heap *h) { return h->n_entries ? h->bins[0] : NULL; } static inline void cursor_heap_recalc_min(cursor_heap *h) { int n = 0, n1, n2, m; if ((m = h->n_entries) > 1) { grn_ii_cursor *c = h->bins[0], *c1, *c2; for (;;) { n1 = n * 2 + 1; n2 = n1 + 1; c1 = n1 < m ? h->bins[n1] : NULL; c2 = n2 < m ? h->bins[n2] : NULL; if (c1 && GRN_II_CURSOR_CMP(c, c1)) { if (c2 && GRN_II_CURSOR_CMP(c, c2) && GRN_II_CURSOR_CMP(c1, c2)) { h->bins[n] = c2; n = n2; } else { h->bins[n] = c1; n = n1; } } else { if (c2 && GRN_II_CURSOR_CMP(c, c2)) { h->bins[n] = c2; n = n2; } else { h->bins[n] = c; break; } } } } } static inline void cursor_heap_pop(grn_ctx *ctx, cursor_heap *h) { if (h->n_entries) { grn_ii_cursor *c = h->bins[0]; if (!grn_ii_cursor_next(ctx, c)) { grn_ii_cursor_close(ctx, c); h->bins[0] = h->bins[--h->n_entries]; } else if (!grn_ii_cursor_next_pos(ctx, c)) { GRN_LOG(ctx, GRN_LOG_ERROR, "invalid ii_cursor c"); grn_ii_cursor_close(ctx, c); h->bins[0] = h->bins[--h->n_entries]; } if (h->n_entries > 1) { cursor_heap_recalc_min(h); } } } static inline void cursor_heap_pop_pos(grn_ctx *ctx, cursor_heap *h) { if (h->n_entries) { grn_ii_cursor *c = h->bins[0]; if (!grn_ii_cursor_next_pos(ctx, c)) { if (!grn_ii_cursor_next(ctx, c)) { grn_ii_cursor_close(ctx, c); h->bins[0] = h->bins[--h->n_entries]; } else if (!grn_ii_cursor_next_pos(ctx, c)) { GRN_LOG(ctx, GRN_LOG_ERROR, "invalid ii_cursor d"); grn_ii_cursor_close(ctx, c); h->bins[0] = h->bins[--h->n_entries]; } } if (h->n_entries > 1) { cursor_heap_recalc_min(h); } } } static inline void cursor_heap_close(grn_ctx *ctx, cursor_heap *h) { int i; if (!h) { return; } for (i = h->n_entries; i--;) { grn_ii_cursor_close(ctx, h->bins[i]); } GRN_FREE(h->bins); GRN_FREE(h); } /* update */ #ifdef USE_VGRAM inline static grn_rc index_add(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgram, const char *value, size_t value_len) { grn_hash *h; unsigned int token_flags = 0; grn_token_cursor *token_cursor; grn_ii_updspec **u; grn_id tid, *tp; grn_rc r, rc = GRN_SUCCESS; grn_vgram_buf *sbuf = NULL; if (!rid) { return GRN_INVALID_ARGUMENT; } if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, value, value_len, GRN_TOKEN_ADD, token_flags))) { return GRN_NO_MEMORY_AVAILABLE; } if (vgram) { sbuf = grn_vgram_buf_open(value_len); } h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!h) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on index_add failed !"); grn_token_cursor_close(ctx, token_cursor); if (sbuf) { grn_vgram_buf_close(sbuf); } return GRN_NO_MEMORY_AVAILABLE; } while (!token_cursor->status) { (tid = grn_token_cursor_next(ctx, token_cursor)); if (tid) { if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, 1))) { GRN_LOG(ctx, GRN_LOG_ERROR, "grn_ii_updspec_open on index_add failed!"); goto exit; } } if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, 0)) { GRN_LOG(ctx, GRN_LOG_ERROR, "grn_ii_updspec_add on index_add failed!"); goto exit; } if (sbuf) { grn_vgram_buf_add(sbuf, tid); } } } grn_token_cursor_close(ctx, token_cursor); // todo : support vgram // if (sbuf) { grn_vgram_update(vgram, rid, sbuf, (grn_set *)h); } GRN_HASH_EACH(ctx, h, id, &tp, NULL, &u, { if ((r = grn_ii_update_one(ctx, ii, *tp, *u, h))) { rc = r; } grn_ii_updspec_close(ctx, *u); }); grn_hash_close(ctx, h); if (sbuf) { grn_vgram_buf_close(sbuf); } return rc; exit: grn_hash_close(ctx, h); grn_token_cursor_close(ctx, token_cursor); if (sbuf) { grn_vgram_buf_close(sbuf); } return GRN_NO_MEMORY_AVAILABLE; } inline static grn_rc index_del(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgram, const char *value, size_t value_len) { grn_hash *h; unsigned int token_flags = 0; grn_token_cursor *token_cursor; grn_ii_updspec **u; grn_id tid, *tp; if (!rid) { return GRN_INVALID_ARGUMENT; } if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, value, value_len, GRN_TOKEN_DEL, token_flags))) { return GRN_NO_MEMORY_AVAILABLE; } h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!h) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on index_del failed !"); grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } while (!token_cursor->status) { if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, 0))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on index_del failed !"); grn_hash_close(ctx, h); grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } } } } grn_token_cursor_close(ctx, token_cursor); GRN_HASH_EACH(ctx, h, id, &tp, NULL, &u, { if (*tp) { grn_ii_delete_one(ctx, ii, *tp, *u, NULL); } grn_ii_updspec_close(ctx, *u); }); grn_hash_close(ctx, h); return GRN_SUCCESS; } grn_rc grn_ii_upd(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, const char *oldvalue, unsigned int oldvalue_len, const char *newvalue, unsigned int newvalue_len) { grn_rc rc; grn_obj *lexicon = ii->lexicon; if (!rid) { return GRN_INVALID_ARGUMENT; } if (oldvalue && *oldvalue) { if ((rc = index_del(ctx, rid, lexicon, ii, vgram, oldvalue, oldvalue_len))) { GRN_LOG(ctx, GRN_LOG_ERROR, "index_del on grn_ii_upd failed !"); goto exit; } } if (newvalue && *newvalue) { rc = index_add(ctx, rid, lexicon, ii, vgram, newvalue, newvalue_len); } exit : return rc; } grn_rc grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned int section, grn_values *oldvalues, grn_values *newvalues) { int j; grn_value *v; unsigned int token_flags = 0; grn_token_cursor *token_cursor; grn_rc rc = GRN_SUCCESS; grn_hash *old, *new; grn_id tid, *tp; grn_ii_updspec **u, **un; grn_obj *lexicon = ii->lexicon; if (!lexicon || !ii || !rid) { GRN_LOG(ctx, GRN_LOG_WARNING, "grn_ii_update: invalid argument"); return GRN_INVALID_ARGUMENT; } if (newvalues) { new = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!new) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on grn_ii_update failed !"); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } for (j = newvalues->n_values, v = newvalues->values; j; j--, v++) { if ((token_cursor = grn_token_cursor_open(ctx, lexicon, v->str, v->str_len, GRN_TOKEN_ADD, token_flags))) { while (!token_cursor->status) { if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (!grn_hash_add(ctx, new, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, section))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on grn_ii_update failed!"); grn_token_cursor_close(ctx, token_cursor); grn_hash_close(ctx, new); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } } if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!"); grn_token_cursor_close(ctx, token_cursor); grn_hash_close(ctx, new); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } } } grn_token_cursor_close(ctx, token_cursor); } } if (!GRN_HASH_SIZE(new)) { grn_hash_close(ctx, new); new = NULL; } } else { new = NULL; } if (oldvalues) { old = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!old) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create(ctx, NULL, old) on grn_ii_update failed!"); if (new) { grn_hash_close(ctx, new); } rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } for (j = oldvalues->n_values, v = oldvalues->values; j; j--, v++) { if ((token_cursor = grn_token_cursor_open(ctx, lexicon, v->str, v->str_len, GRN_TOKEN_DEL, token_flags))) { while (!token_cursor->status) { if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (!grn_hash_add(ctx, old, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, section))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on grn_ii_update failed!"); grn_token_cursor_close(ctx, token_cursor); if (new) { grn_hash_close(ctx, new); }; grn_hash_close(ctx, old); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } } if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!"); grn_token_cursor_close(ctx, token_cursor); if (new) { grn_hash_close(ctx, new); }; grn_hash_close(ctx, old); rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } } } grn_token_cursor_close(ctx, token_cursor); } } } else { old = NULL; } if (old) { grn_id eid; GRN_HASH_EACH(ctx, old, id, &tp, NULL, &u, { if (new && (eid = grn_hash_get(ctx, new, tp, sizeof(grn_id), (void **) &un))) { if (!grn_ii_updspec_cmp(*u, *un)) { grn_ii_updspec_close(ctx, *un); grn_hash_delete_by_id(ctx, new, eid, NULL); } } else { grn_ii_delete_one(ctx, ii, *tp, *u, new); } grn_ii_updspec_close(ctx, *u); }); grn_hash_close(ctx, old); } if (new) { GRN_HASH_EACH(ctx, new, id, &tp, NULL, &u, { grn_rc r; if ((r = grn_ii_update_one(ctx, ii, *tp, *u, new))) { rc = r; } grn_ii_updspec_close(ctx, *u); }); grn_hash_close(ctx, new); } else { if (!section) { /* todo: delete key when all sections deleted */ } } exit : return rc; } #endif /* USE_VGRAM */ static grn_rc grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, grn_obj *in, grn_obj *out, grn_token_mode mode, grn_obj *posting) { int j; grn_id tid; grn_section *v; grn_token_cursor *token_cursor; grn_ii_updspec **u; grn_hash *h = (grn_hash *)out; grn_obj *lexicon = ii->lexicon; if (in->u.v.body) { const char *head = GRN_BULK_HEAD(in->u.v.body); for (j = in->u.v.n_sections, v = in->u.v.sections; j; j--, v++) { unsigned int token_flags = 0; if (v->length && (token_cursor = grn_token_cursor_open(ctx, lexicon, head + v->offset, v->length, mode, token_flags))) { while (!token_cursor->status) { if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (posting) { GRN_RECORD_PUT(ctx, posting, tid); } if (!grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **) &u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, section))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on grn_ii_update failed!"); grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } } if (grn_ii_updspec_add(ctx, *u, token_cursor->pos, v->weight)) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!"); grn_token_cursor_close(ctx, token_cursor); return GRN_NO_MEMORY_AVAILABLE; } } } grn_token_cursor_close(ctx, token_cursor); } } } return GRN_SUCCESS; } static grn_rc grn_uvector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, grn_obj *in, grn_obj *out) { int i, n; grn_ii_updspec **u; grn_hash *h = (grn_hash *)out; n = grn_vector_size(ctx, in); for (i = 0; i < n; i++) { grn_id id; unsigned int weight; id = grn_uvector_get_element(ctx, in, i, &weight); if (!grn_hash_add(ctx, h, &id, sizeof(grn_id), (void **)&u, NULL)) { break; } if (!*u) { if (!(*u = grn_ii_updspec_open(ctx, rid, section))) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_open on grn_ii_update failed!"); return GRN_NO_MEMORY_AVAILABLE; } } if (grn_ii_updspec_add(ctx, *u, i, weight)) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_ii_updspec_add on grn_ii_update failed!"); return GRN_NO_MEMORY_AVAILABLE; } } return GRN_SUCCESS; } grn_rc grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section, grn_obj *oldvalue, grn_obj *newvalue, grn_obj *posting) { grn_id *tp; grn_bool do_grn_ii_updspec_cmp = GRN_TRUE; grn_rc rc = GRN_SUCCESS; grn_ii_updspec **u, **un; grn_obj *old_, *old = oldvalue, *new_, *new = newvalue, oldv, newv, buf, *post = NULL; if (!ii || !ii->lexicon || !rid) { ERR(GRN_INVALID_ARGUMENT, "grn_ii_column_update: invalid argument"); return GRN_INVALID_ARGUMENT; } if (posting) { GRN_RECORD_INIT(&buf, GRN_OBJ_VECTOR, grn_obj_id(ctx, ii->lexicon)); post = &buf; } if (grn_io_lock(ctx, ii->seg, grn_lock_timeout)) { return ctx->rc; } if (new) { unsigned char type = (ii->obj.header.domain == new->header.domain) ? GRN_UVECTOR : new->header.type; switch (type) { case GRN_BULK : { if (grn_bulk_is_zero(ctx, new)) { do_grn_ii_updspec_cmp = GRN_FALSE; } new_ = new; GRN_OBJ_INIT(&newv, GRN_VECTOR, GRN_OBJ_DO_SHALLOW_COPY, GRN_DB_TEXT); newv.u.v.body = new; new = &newv; grn_vector_delimit(ctx, new, 0, GRN_ID_NIL); if (new_ != newvalue) { grn_obj_close(ctx, new_); } } /* fallthru */ case GRN_VECTOR : new_ = new; new = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!new) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on grn_ii_update failed !"); rc = GRN_NO_MEMORY_AVAILABLE; } else { rc = grn_vector2updspecs(ctx, ii, rid, section, new_, new, GRN_TOKEN_ADD, post); } if (new_ != newvalue) { grn_obj_close(ctx, new_); } if (rc) { goto exit; } break; case GRN_UVECTOR : new_ = new; new = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!new) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on grn_ii_update failed !"); rc = GRN_NO_MEMORY_AVAILABLE; } else { if (new_->header.type == GRN_UVECTOR) { rc = grn_uvector2updspecs(ctx, ii, rid, section, new_, new); } else { grn_obj uvector; unsigned int weight = 0; GRN_VALUE_FIX_SIZE_INIT(&uvector, GRN_OBJ_VECTOR, new_->header.domain); if (new_->header.impl_flags & GRN_OBJ_WITH_WEIGHT) { uvector.header.impl_flags |= GRN_OBJ_WITH_WEIGHT; } grn_uvector_add_element(ctx, &uvector, GRN_RECORD_VALUE(new_), weight); rc = grn_uvector2updspecs(ctx, ii, rid, section, &uvector, new); GRN_OBJ_FIN(ctx, &uvector); } } if (new_ != newvalue) { grn_obj_close(ctx, new_); } if (rc) { goto exit; } break; case GRN_TABLE_HASH_KEY : break; default : ERR(GRN_INVALID_ARGUMENT, "invalid object assigned as newvalue"); goto exit; } } if (posting) { grn_ii_updspec *u_; uint32_t offset = 0; grn_id tid_ = 0, gap, tid, *tpe; grn_table_sort_optarg arg = {GRN_TABLE_SORT_ASC| GRN_TABLE_SORT_AS_NUMBER| GRN_TABLE_SORT_AS_UNSIGNED, NULL, NULL,0 }; grn_array *sorted = grn_array_create(ctx, NULL, sizeof(grn_id), 0); grn_hash_sort(ctx, (grn_hash *)new, -1, sorted, &arg); GRN_TEXT_PUT(ctx, posting, ((grn_hash *)new)->n_entries, sizeof(uint32_t)); GRN_ARRAY_EACH(ctx, sorted, 0, 0, id, &tp, { grn_hash_get_key(ctx, (grn_hash *)new, *tp, &tid, sizeof(grn_id)); gap = tid - tid_; GRN_TEXT_PUT(ctx, posting, &gap, sizeof(grn_id)); tid_ = tid; }); GRN_ARRAY_EACH(ctx, sorted, 0, 0, id, &tp, { grn_hash_get_value(ctx, (grn_hash *)new, *tp, &u_); u_->offset = offset++; GRN_TEXT_PUT(ctx, posting, &u_->tf, sizeof(int32_t)); }); tpe = (grn_id *)GRN_BULK_CURR(post); for (tp = (grn_id *)GRN_BULK_HEAD(post); tp < tpe; tp++) { grn_hash_get(ctx, (grn_hash *)new, (void *)tp, sizeof(grn_id), (void **)&u); GRN_TEXT_PUT(ctx, posting, &(*u)->offset, sizeof(int32_t)); } GRN_OBJ_FIN(ctx, post); grn_array_close(ctx, sorted); } if (old) { unsigned char type = (ii->obj.header.domain == old->header.domain) ? GRN_UVECTOR : old->header.type; switch (type) { case GRN_BULK : { // const char *str = GRN_BULK_HEAD(old); // unsigned int str_len = GRN_BULK_VSIZE(old); old_ = old; GRN_OBJ_INIT(&oldv, GRN_VECTOR, GRN_OBJ_DO_SHALLOW_COPY, GRN_DB_TEXT); oldv.u.v.body = old; old = &oldv; grn_vector_delimit(ctx, old, 0, GRN_ID_NIL); if (old_ != oldvalue) { grn_obj_close(ctx, old_); } } /* fallthru */ case GRN_VECTOR : old_ = old; old = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!old) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create(ctx, NULL, old) on grn_ii_update failed!"); rc = GRN_NO_MEMORY_AVAILABLE; } else { rc = grn_vector2updspecs(ctx, ii, rid, section, old_, old, GRN_TOKEN_DEL, NULL); } if (old_ != oldvalue) { grn_obj_close(ctx, old_); } if (rc) { goto exit; } break; case GRN_UVECTOR : old_ = old; old = (grn_obj *)grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY); if (!old) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create(ctx, NULL, old) on grn_ii_update failed!"); rc = GRN_NO_MEMORY_AVAILABLE; } else { if (old_->header.type == GRN_UVECTOR) { rc = grn_uvector2updspecs(ctx, ii, rid, section, old_, old); } else { grn_obj uvector; unsigned int weight = 0; GRN_VALUE_FIX_SIZE_INIT(&uvector, GRN_OBJ_VECTOR, old_->header.domain); if (old_->header.impl_flags & GRN_OBJ_WITH_WEIGHT) { uvector.header.impl_flags |= GRN_OBJ_WITH_WEIGHT; } grn_uvector_add_element(ctx, &uvector, GRN_RECORD_VALUE(old_), weight); rc = grn_uvector2updspecs(ctx, ii, rid, section, &uvector, old); GRN_OBJ_FIN(ctx, &uvector); } } if (old_ != oldvalue) { grn_obj_close(ctx, old_); } if (rc) { goto exit; } break; case GRN_TABLE_HASH_KEY : break; default : ERR(GRN_INVALID_ARGUMENT, "invalid object assigned as oldvalue"); goto exit; } } if (old) { grn_id eid; grn_hash *o = (grn_hash *)old; grn_hash *n = (grn_hash *)new; GRN_HASH_EACH(ctx, o, id, &tp, NULL, &u, { if (n && (eid = grn_hash_get(ctx, n, tp, sizeof(grn_id), (void **) &un))) { if (do_grn_ii_updspec_cmp && !grn_ii_updspec_cmp(*u, *un)) { grn_ii_updspec_close(ctx, *un); grn_hash_delete_by_id(ctx, n, eid, NULL); } } else { grn_ii_delete_one(ctx, ii, *tp, *u, n); } grn_ii_updspec_close(ctx, *u); }); } if (new) { grn_hash *n = (grn_hash *)new; GRN_HASH_EACH(ctx, n, id, &tp, NULL, &u, { grn_rc r; if ((r = grn_ii_update_one(ctx, ii, *tp, *u, n))) { rc = r; } grn_ii_updspec_close(ctx, *u); }); } else { if (!section) { /* todo: delete key when all sections deleted */ } } exit : grn_io_unlock(ii->seg); if (old && old != oldvalue) { grn_obj_close(ctx, old); } if (new && new != newvalue) { grn_obj_close(ctx, new); } return ctx->rc; } /* token_info */ typedef struct { cursor_heap *cursors; int offset; int pos; int size; int ntoken; grn_ii_posting *p; } token_info; #define EX_NONE 0 #define EX_PREFIX 1 #define EX_SUFFIX 2 #define EX_BOTH 3 inline static void token_info_expand_both(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *key, unsigned int key_size, token_info *ti) { int s = 0; grn_hash *h, *g; uint32_t *offset2; grn_hash_cursor *c; grn_id *tp, *tq; if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) { grn_table_search(ctx, lexicon, key, key_size, GRN_OP_PREFIX, (grn_obj *)h, GRN_OP_OR); if (GRN_HASH_SIZE(h)) { if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h) + 256))) { if ((c = grn_hash_cursor_open(ctx, h, NULL, 0, NULL, 0, 0, -1, 0))) { uint32_t key2_size; const char *key2; while (grn_hash_cursor_next(ctx, c)) { grn_hash_cursor_get_key(ctx, c, (void **) &tp); key2 = _grn_table_key(ctx, lexicon, *tp, &key2_size); if (!key2) { break; } if ((lexicon->header.type != GRN_TABLE_PAT_KEY) || !(lexicon->header.flags & GRN_OBJ_KEY_WITH_SIS) || key2_size <= 2) { // todo: refine if ((s = grn_ii_estimate_size(ctx, ii, *tp))) { cursor_heap_push(ctx, ti->cursors, ii, *tp, 0); ti->ntoken++; ti->size += s; } } else { if ((g = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, GRN_HASH_TINY))) { grn_pat_suffix_search(ctx, (grn_pat *)lexicon, key2, key2_size, g); GRN_HASH_EACH(ctx, g, id, &tq, NULL, &offset2, { if ((s = grn_ii_estimate_size(ctx, ii, *tq))) { cursor_heap_push(ctx, ti->cursors, ii, *tq, /* *offset2 */ 0); ti->ntoken++; ti->size += s; } }); grn_hash_close(ctx, g); } } } grn_hash_cursor_close(ctx, c); } } } grn_hash_close(ctx, h); } } inline static grn_rc token_info_close(grn_ctx *ctx, token_info *ti) { cursor_heap_close(ctx, ti->cursors); GRN_FREE(ti); return GRN_SUCCESS; } inline static token_info * token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *key, unsigned int key_size, uint32_t offset, int mode) { int s = 0; grn_hash *h; token_info *ti; grn_id tid; grn_id *tp; if (!key) { return NULL; } if (!(ti = GRN_MALLOC(sizeof(token_info)))) { return NULL; } ti->cursors = NULL; ti->size = 0; ti->ntoken = 0; ti->offset = offset; switch (mode) { case EX_BOTH : token_info_expand_both(ctx, lexicon, ii, key, key_size, ti); break; case EX_NONE : if ((tid = grn_table_get(ctx, lexicon, key, key_size)) && (s = grn_ii_estimate_size(ctx, ii, tid)) && (ti->cursors = cursor_heap_open(ctx, 1))) { cursor_heap_push(ctx, ti->cursors, ii, tid, 0); ti->ntoken++; ti->size = s; } break; case EX_PREFIX : if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) { grn_table_search(ctx, lexicon, key, key_size, GRN_OP_PREFIX, (grn_obj *)h, GRN_OP_OR); if (GRN_HASH_SIZE(h)) { if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h)))) { GRN_HASH_EACH(ctx, h, id, &tp, NULL, NULL, { if ((s = grn_ii_estimate_size(ctx, ii, *tp))) { cursor_heap_push(ctx, ti->cursors, ii, *tp, 0); ti->ntoken++; ti->size += s; } }); } } grn_hash_close(ctx, h); } break; case EX_SUFFIX : if ((h = grn_hash_create(ctx, NULL, sizeof(grn_id), 0, 0))) { grn_table_search(ctx, lexicon, key, key_size, GRN_OP_SUFFIX, (grn_obj *)h, GRN_OP_OR); if (GRN_HASH_SIZE(h)) { if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h)))) { uint32_t *offset2; GRN_HASH_EACH(ctx, h, id, &tp, NULL, &offset2, { if ((s = grn_ii_estimate_size(ctx, ii, *tp))) { cursor_heap_push(ctx, ti->cursors, ii, *tp, /* *offset2 */ 0); ti->ntoken++; ti->size += s; } }); } } grn_hash_close(ctx, h); } break; } if (cursor_heap_push2(ti->cursors)) { token_info_close(ctx, ti); return NULL; } { grn_ii_cursor *ic; if (ti->cursors && (ic = cursor_heap_min(ti->cursors))) { grn_ii_posting *p = ic->post; ti->pos = p->pos - ti->offset; ti->p = p; } else { token_info_close(ctx, ti); ti = NULL; } } return ti; } static inline grn_rc token_info_skip(grn_ctx *ctx, token_info *ti, uint32_t rid, uint32_t sid) { grn_ii_cursor *c; grn_ii_posting *p; for (;;) { if (!(c = cursor_heap_min(ti->cursors))) { return GRN_END_OF_DATA; } p = c->post; if (p->rid > rid || (p->rid == rid && p->sid >= sid)) { break; } cursor_heap_pop(ctx, ti->cursors); } ti->pos = p->pos - ti->offset; ti->p = p; return GRN_SUCCESS; } static inline grn_rc token_info_skip_pos(grn_ctx *ctx, token_info *ti, uint32_t rid, uint32_t sid, uint32_t pos) { grn_ii_cursor *c; grn_ii_posting *p; pos += ti->offset; for (;;) { if (!(c = cursor_heap_min(ti->cursors))) { return GRN_END_OF_DATA; } p = c->post; if (p->rid != rid || p->sid != sid || p->pos >= pos) { break; } cursor_heap_pop_pos(ctx, ti->cursors); } ti->pos = p->pos - ti->offset; ti->p = p; return GRN_SUCCESS; } inline static int token_compare(const void *a, const void *b) { const token_info *t1 = *((token_info **)a), *t2 = *((token_info **)b); return t1->size - t2->size; } inline static grn_rc token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, unsigned int string_len, token_info **tis, uint32_t *n, grn_bool *only_skip_token, grn_operator mode) { token_info *ti; const char *key; uint32_t size; grn_rc rc = GRN_END_OF_DATA; unsigned int token_flags = GRN_TOKEN_ENABLE_TOKENIZED_DELIMITER; grn_token_cursor *token_cursor = grn_token_cursor_open(ctx, lexicon, string, string_len, GRN_TOKEN_GET, token_flags); *only_skip_token = GRN_FALSE; if (!token_cursor) { return GRN_NO_MEMORY_AVAILABLE; } if (mode == GRN_OP_UNSPLIT) { if ((ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, token_cursor->orig_blen, 0, EX_BOTH))) { tis[(*n)++] = ti; rc = GRN_SUCCESS; } } else { grn_id tid; int ef; switch (mode) { case GRN_OP_PREFIX : ef = EX_PREFIX; break; case GRN_OP_SUFFIX : ef = EX_SUFFIX; break; case GRN_OP_PARTIAL : ef = EX_BOTH; break; default : ef = EX_NONE; break; } tid = grn_token_cursor_next(ctx, token_cursor); if (token_cursor->force_prefix) { ef |= EX_PREFIX; } switch (token_cursor->status) { case GRN_TOKEN_DOING : key = _grn_table_key(ctx, lexicon, tid, &size); ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_SUFFIX); break; case GRN_TOKEN_DONE : ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr, token_cursor->curr_size, 0, ef); /* key = _grn_table_key(ctx, lexicon, tid, &size); ti = token_info_open(ctx, lexicon, ii, token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef); ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, token_cursor->orig_blen, token_cursor->pos, ef); */ break; case GRN_TOKEN_NOT_FOUND : ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, token_cursor->orig_blen, 0, ef); break; case GRN_TOKEN_DONE_SKIP : *only_skip_token = GRN_TRUE; goto exit; default : goto exit; } if (!ti) { goto exit ; } tis[(*n)++] = ti; while (token_cursor->status == GRN_TOKEN_DOING) { tid = grn_token_cursor_next(ctx, token_cursor); switch (token_cursor->status) { case GRN_TOKEN_DONE_SKIP : continue; case GRN_TOKEN_DOING : key = _grn_table_key(ctx, lexicon, tid, &size); ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, EX_NONE); break; case GRN_TOKEN_DONE : if (tid) { key = _grn_table_key(ctx, lexicon, tid, &size); ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, ef & EX_PREFIX); break; } /* else fallthru */ default : ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef & EX_PREFIX); break; } if (!ti) { goto exit; } tis[(*n)++] = ti; } rc = GRN_SUCCESS; } exit : grn_token_cursor_close(ctx, token_cursor); return rc; } static void token_info_clear_offset(token_info **tis, uint32_t n) { token_info **tie; for (tie = tis + n; tis < tie; tis++) { (*tis)->offset = 0; } } /* select */ inline static void res_add(grn_ctx *ctx, grn_hash *s, grn_rset_posinfo *pi, uint32_t score, grn_operator op) { grn_rset_recinfo *ri; switch (op) { case GRN_OP_OR : if (grn_hash_add(ctx, s, pi, s->key_size, (void **)&ri, NULL)) { if (s->obj.header.flags & GRN_OBJ_WITH_SUBREC) { grn_table_add_subrec((grn_obj *)s, ri, score, pi, 1); } } break; case GRN_OP_AND : if (grn_hash_get(ctx, s, pi, s->key_size, (void **)&ri)) { if (s->obj.header.flags & GRN_OBJ_WITH_SUBREC) { ri->n_subrecs |= GRN_RSET_UTIL_BIT; grn_table_add_subrec((grn_obj *)s, ri, score, pi, 1); } } break; case GRN_OP_AND_NOT : { grn_id id; if ((id = grn_hash_get(ctx, s, pi, s->key_size, (void **)&ri))) { grn_hash_delete_by_id(ctx, s, id, NULL); } } break; case GRN_OP_ADJUST : if (grn_hash_get(ctx, s, pi, s->key_size, (void **)&ri)) { if (s->obj.header.flags & GRN_OBJ_WITH_SUBREC) { ri->score += score; } } break; default : break; } } grn_rc grn_ii_posting_add(grn_ctx *ctx, grn_ii_posting *pos, grn_hash *s, grn_operator op) { res_add(ctx, s, (grn_rset_posinfo *)(pos), (1 + pos->weight), op); return ctx->rc; } #ifdef USE_BHEAP /* todo */ #else /* USE_BHEAP */ struct _btr_node { struct _btr_node *car; struct _btr_node *cdr; token_info *ti; }; typedef struct _btr_node btr_node; typedef struct { int n; token_info *min; token_info *max; btr_node *root; btr_node *nodes; } btr; inline static void bt_zap(btr *bt) { bt->n = 0; bt->min = NULL; bt->max = NULL; bt->root = NULL; } inline static btr * bt_open(grn_ctx *ctx, int size) { btr *bt = GRN_MALLOC(sizeof(btr)); if (bt) { bt_zap(bt); if (!(bt->nodes = GRN_MALLOC(sizeof(btr_node) * size))) { GRN_FREE(bt); bt = NULL; } } return bt; } inline static void bt_close(grn_ctx *ctx, btr *bt) { if (!bt) { return; } GRN_FREE(bt->nodes); GRN_FREE(bt); } inline static void bt_push(btr *bt, token_info *ti) { int pos = ti->pos, minp = 1, maxp = 1; btr_node *node, *new, **last; new = bt->nodes + bt->n++; new->ti = ti; new->car = NULL; new->cdr = NULL; for (last = &bt->root; (node = *last);) { if (pos < node->ti->pos) { last = &node->car; maxp = 0; } else { last = &node->cdr; minp = 0; } } *last = new; if (minp) { bt->min = ti; } if (maxp) { bt->max = ti; } } inline static void bt_pop(btr *bt) { btr_node *node, *min, *newmin, **last; for (last = &bt->root; (min = *last) && min->car; last = &min->car) ; if (min) { int pos = min->ti->pos, minp = 1, maxp = 1; *last = min->cdr; min->cdr = NULL; for (last = &bt->root; (node = *last);) { if (pos < node->ti->pos) { last = &node->car; maxp = 0; } else { last = &node->cdr; minp = 0; } } *last = min; if (maxp) { bt->max = min->ti; } if (!minp) { for (newmin = bt->root; newmin->car; newmin = newmin->car) ; bt->min = newmin->ti; } } } #endif /* USE_BHEAP */ typedef enum { grn_wv_none = 0, grn_wv_static, grn_wv_dynamic, grn_wv_constant } grn_wv_mode; inline static int get_weight(grn_ctx *ctx, grn_hash *s, grn_id rid, int sid, grn_wv_mode wvm, grn_select_optarg *optarg) { switch (wvm) { case grn_wv_none : return 1; case grn_wv_static : return sid <= optarg->vector_size ? optarg->weight_vector[sid - 1] : 0; case grn_wv_dynamic : /* todo : support hash with keys if (s->keys) { uint32_t key_size; const char *key = _grn_table_key(ctx, s->keys, rid, &key_size); // todo : change grn_select_optarg return key ? optarg->func(s, key, key_size, sid, optarg->func_arg) : 0; } */ /* todo : cast */ return optarg->func(ctx, (void *)s, (void *)(intptr_t)rid, sid, optarg->func_arg); case grn_wv_constant : return optarg->vector_size; default : return 1; } } grn_rc grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len, grn_hash *s, grn_operator op, grn_select_optarg *optarg) { int *w1, limit; grn_id tid, *tp, max_size; grn_rc rc = GRN_SUCCESS; grn_hash *h; grn_token_cursor *token_cursor; unsigned int token_flags = GRN_TOKEN_ENABLE_TOKENIZED_DELIMITER; grn_obj *lexicon = ii->lexicon; if (!lexicon || !ii || !string || !string_len || !s || !optarg) { return GRN_INVALID_ARGUMENT; } if (!(h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(int), 0))) { return GRN_NO_MEMORY_AVAILABLE; } if (!(token_cursor = grn_token_cursor_open(ctx, lexicon, string, string_len, GRN_TOKEN_GET, token_flags))) { grn_hash_close(ctx, h); return GRN_NO_MEMORY_AVAILABLE; } if (!(max_size = optarg->max_size)) { max_size = 1048576; } while (token_cursor->status != GRN_TOKEN_DONE && token_cursor->status != GRN_TOKEN_DONE_SKIP) { if ((tid = grn_token_cursor_next(ctx, token_cursor))) { if (grn_hash_add(ctx, h, &tid, sizeof(grn_id), (void **)&w1, NULL)) { (*w1)++; } } if (tid && token_cursor->curr_size) { if (optarg->max_interval == GRN_OP_UNSPLIT) { grn_table_search(ctx, lexicon, token_cursor->curr, token_cursor->curr_size, GRN_OP_PREFIX, (grn_obj *)h, GRN_OP_OR); } if (optarg->max_interval == GRN_OP_PARTIAL) { grn_table_search(ctx, lexicon, token_cursor->curr, token_cursor->curr_size, GRN_OP_SUFFIX, (grn_obj *)h, GRN_OP_OR); } } } grn_token_cursor_close(ctx, token_cursor); { grn_hash_cursor *c = grn_hash_cursor_open(ctx, h, NULL, 0, NULL, 0, 0, -1, 0); if (!c) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_cursor_open on grn_ii_similar_search failed !"); grn_hash_close(ctx, h); return GRN_NO_MEMORY_AVAILABLE; } while (grn_hash_cursor_next(ctx, c)) { uint32_t es; grn_hash_cursor_get_key_value(ctx, c, (void **) &tp, NULL, (void **) &w1); if ((es = grn_ii_estimate_size(ctx, ii, *tp))) { *w1 += max_size / es; } else { grn_hash_cursor_delete(ctx, c, NULL); } } grn_hash_cursor_close(ctx, c); } limit = optarg->similarity_threshold ? (optarg->similarity_threshold > GRN_HASH_SIZE(h) ? GRN_HASH_SIZE(h) : optarg->similarity_threshold) : (GRN_HASH_SIZE(h) >> 3) + 1; if (GRN_HASH_SIZE(h)) { grn_id j, id; int w2, rep; grn_ii_cursor *c; grn_ii_posting *pos; grn_wv_mode wvm = grn_wv_none; grn_table_sort_optarg arg = { GRN_TABLE_SORT_DESC|GRN_TABLE_SORT_BY_VALUE|GRN_TABLE_SORT_AS_NUMBER, NULL, NULL, 0 }; grn_array *sorted = grn_array_create(ctx, NULL, sizeof(grn_id), 0); if (!sorted) { GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_sort on grn_ii_similar_search failed !"); grn_hash_close(ctx, h); return GRN_NO_MEMORY_AVAILABLE; } grn_hash_sort(ctx, h, limit, sorted, &arg); /* todo support subrec rep = (s->record_unit == grn_rec_position || s->subrec_unit == grn_rec_position); */ rep = 0; if (optarg->func) { wvm = grn_wv_dynamic; } else if (optarg->vector_size) { wvm = optarg->weight_vector ? grn_wv_static : grn_wv_constant; } for (j = 1; j <= limit; j++) { grn_array_get_value(ctx, sorted, j, &id); _grn_hash_get_key_value(ctx, h, id, (void **) &tp, (void **) &w1); if (!*tp || !(c = grn_ii_cursor_open(ctx, ii, *tp, GRN_ID_NIL, GRN_ID_MAX, rep ? ii->n_elements : ii->n_elements - 1, 0))) { GRN_LOG(ctx, GRN_LOG_ERROR, "cursor open failed (%d)", *tp); continue; } if (rep) { while (grn_ii_cursor_next(ctx, c)) { pos = c->post; if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg))) { while (grn_ii_cursor_next_pos(ctx, c)) { res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * (1 + pos->weight), op); } } } } else { while (grn_ii_cursor_next(ctx, c)) { pos = c->post; if ((w2 = get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg))) { res_add(ctx, s, (grn_rset_posinfo *) pos, *w1 * w2 * (pos->tf + pos->weight), op); } } } grn_ii_cursor_close(ctx, c); } grn_array_close(ctx, sorted); } grn_hash_close(ctx, h); grn_ii_resolve_sel_and(ctx, s, op); // grn_hash_cursor_clear(r); return rc; } #define TERM_EXTRACT_EACH_POST 0 #define TERM_EXTRACT_EACH_TERM 1 grn_rc grn_ii_term_extract(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len, grn_hash *s, grn_operator op, grn_select_optarg *optarg) { grn_rset_posinfo pi; grn_id tid; const char *p, *pe; grn_obj *nstr; const char *normalized; unsigned int normalized_length_in_bytes; grn_ii_cursor *c; grn_ii_posting *pos; int skip, rep, policy; grn_rc rc = GRN_SUCCESS; grn_wv_mode wvm = grn_wv_none; if (!ii || !string || !string_len || !s || !optarg) { return GRN_INVALID_ARGUMENT; } if (!(nstr = grn_string_open(ctx, string, string_len, NULL, 0))) { return GRN_INVALID_ARGUMENT; } policy = optarg->max_interval; if (optarg->func) { wvm = grn_wv_dynamic; } else if (optarg->vector_size) { wvm = optarg->weight_vector ? grn_wv_static : grn_wv_constant; } /* todo support subrec if (policy == TERM_EXTRACT_EACH_POST) { if ((rc = grn_records_reopen(s, grn_rec_section, grn_rec_none, 0))) { goto exit; } } rep = (s->record_unit == grn_rec_position || s->subrec_unit == grn_rec_position); */ rep = 0; grn_string_get_normalized(ctx, nstr, &normalized, &normalized_length_in_bytes, NULL); for (p = normalized, pe = p + normalized_length_in_bytes; p < pe; p += skip) { if ((tid = grn_table_lcp_search(ctx, ii->lexicon, p, pe - p))) { if (policy == TERM_EXTRACT_EACH_POST) { if (!(skip = grn_table_get_key(ctx, ii->lexicon, tid, NULL, 0))) { break; } } else { if (!(skip = (int)grn_charlen(ctx, p, pe))) { break; } } if (!(c = grn_ii_cursor_open(ctx, ii, tid, GRN_ID_NIL, GRN_ID_MAX, rep ? ii->n_elements : ii->n_elements - 1, 0))) { GRN_LOG(ctx, GRN_LOG_ERROR, "cursor open failed (%d)", tid); continue; } if (rep) { while (grn_ii_cursor_next(ctx, c)) { pos = c->post; while (grn_ii_cursor_next_pos(ctx, c)) { res_add(ctx, s, (grn_rset_posinfo *) pos, get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg), op); } } } else { while (grn_ii_cursor_next(ctx, c)) { if (policy == TERM_EXTRACT_EACH_POST) { pi.rid = c->post->rid; pi.sid = p - normalized; res_add(ctx, s, &pi, pi.sid + 1, op); } else { pos = c->post; res_add(ctx, s, (grn_rset_posinfo *) pos, get_weight(ctx, s, pos->rid, pos->sid, wvm, optarg), op); } } } grn_ii_cursor_close(ctx, c); } else { if (!(skip = (int)grn_charlen(ctx, p, pe))) { break; } } } grn_obj_close(ctx, nstr); return rc; } grn_rc grn_ii_select(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len, grn_hash *s, grn_operator op, grn_select_optarg *optarg) { btr *bt = NULL; grn_rc rc = GRN_SUCCESS; int rep, orp, weight, max_interval = 0; token_info *ti, **tis = NULL, **tip, **tie; uint32_t n = 0, rid, sid, nrid, nsid; grn_bool only_skip_token = GRN_FALSE; grn_operator mode = GRN_OP_EXACT; grn_wv_mode wvm = grn_wv_none; grn_obj *lexicon = ii->lexicon; if (!lexicon || !ii || !s) { return GRN_INVALID_ARGUMENT; } if (optarg) { mode = optarg->mode; if (optarg->func) { wvm = grn_wv_dynamic; } else if (optarg->vector_size) { wvm = optarg->weight_vector ? grn_wv_static : grn_wv_constant; } } if (mode == GRN_OP_SIMILAR) { return grn_ii_similar_search(ctx, ii, string, string_len, s, op, optarg); } if (mode == GRN_OP_TERM_EXTRACT) { return grn_ii_term_extract(ctx, ii, string, string_len, s, op, optarg); } /* todo : support subrec rep = (s->record_unit == grn_rec_position || s->subrec_unit == grn_rec_position); orp = (s->record_unit == grn_rec_position || op == GRN_OP_OR); */ rep = 0; orp = op == GRN_OP_OR; if (!string_len) { goto exit; } if (!(tis = GRN_MALLOC(sizeof(token_info *) * string_len * 2))) { return GRN_NO_MEMORY_AVAILABLE; } if (token_info_build(ctx, lexicon, ii, string, string_len, tis, &n, &only_skip_token, mode) || !n) { goto exit; } switch (mode) { case GRN_OP_NEAR2 : token_info_clear_offset(tis, n); mode = GRN_OP_NEAR; /* fallthru */ case GRN_OP_NEAR : if (!(bt = bt_open(ctx, n))) { rc = GRN_NO_MEMORY_AVAILABLE; goto exit; } max_interval = optarg->max_interval; break; default : break; } qsort(tis, n, sizeof(token_info *), token_compare); tie = tis + n; /* for (tip = tis; tip < tie; tip++) { ti = *tip; grn_log("o=%d n=%d s=%d r=%d", ti->offset, ti->ntoken, ti->size, ti->rid); } */ GRN_LOG(ctx, GRN_LOG_INFO, "n=%d (%.*s)", n, string_len, string); /* todo : array as result if (n == 1 && (*tis)->cursors->n_entries == 1 && op == GRN_OP_OR && !GRN_HASH_SIZE(s) && !s->garbages && s->record_unit == grn_rec_document && !s->max_n_subrecs && grn_ii_max_section(ii) == 1) { grn_ii_cursor *c = (*tis)->cursors->bins[0]; if ((rc = grn_hash_array_init(s, (*tis)->size + 32768))) { goto exit; } do { grn_rset_recinfo *ri; grn_ii_posting *p = c->post; if ((weight = get_weight(ctx, s, p->rid, p->sid, wvm, optarg))) { GRN_HASH_INT_ADD(s, p, ri); ri->score = (p->tf + p->score) * weight; ri->n_subrecs = 1; } } while (grn_ii_cursor_next(ctx, c)); goto exit; } */ for (;;) { rid = (*tis)->p->rid; sid = (*tis)->p->sid; for (tip = tis + 1, nrid = rid, nsid = sid + 1; tip < tie; tip++) { ti = *tip; if (token_info_skip(ctx, ti, rid, sid)) { goto exit; } if (ti->p->rid != rid || ti->p->sid != sid) { nrid = ti->p->rid; nsid = ti->p->sid; break; } } weight = get_weight(ctx, s, rid, sid, wvm, optarg); if (tip == tie && weight) { grn_rset_posinfo pi = {rid, sid, 0}; if (orp || grn_hash_get(ctx, s, &pi, s->key_size, NULL)) { int count = 0, noccur = 0, pos = 0, score = 0, tscore = 0, min, max; #define SKIP_OR_BREAK(pos) {\ if (token_info_skip_pos(ctx, ti, rid, sid, pos)) { break; } \ if (ti->p->rid != rid || ti->p->sid != sid) { \ nrid = ti->p->rid; \ nsid = ti->p->sid; \ break; \ } \ } if (n == 1 && !rep) { noccur = (*tis)->p->tf; tscore = (*tis)->p->weight; } else if (mode == GRN_OP_NEAR) { bt_zap(bt); for (tip = tis; tip < tie; tip++) { ti = *tip; SKIP_OR_BREAK(pos); bt_push(bt, ti); } if (tip == tie) { for (;;) { ti = bt->min; min = ti->pos; max = bt->max->pos; if (min > max) { exit(0); } if (max - min <= max_interval) { if (rep) { pi.pos = min; res_add(ctx, s, &pi, weight, op); } noccur++; if (ti->pos == max + 1) { break; } SKIP_OR_BREAK(max + 1); } else { if (ti->pos == max - max_interval) { break; } SKIP_OR_BREAK(max - max_interval); } bt_pop(bt); } } } else { for (tip = tis; ; tip++) { if (tip == tie) { tip = tis; } ti = *tip; SKIP_OR_BREAK(pos); if (ti->pos == pos) { score += ti->p->weight; count++; } else { score = ti->p->weight; count = 1; pos = ti->pos; } if (count == n) { if (rep) { pi.pos = pos; res_add(ctx, s, &pi, (score + 1) * weight, op); } tscore += score; score = 0; count = 0; pos++; noccur++; } } } if (noccur && !rep) { res_add(ctx, s, &pi, (noccur + tscore) * weight, op); } #undef SKIP_OR_BREAK } } if (token_info_skip(ctx, *tis, nrid, nsid)) { goto exit; } } exit : for (tip = tis; tip < tis + n; tip++) { if (*tip) { token_info_close(ctx, *tip); } } if (tis) { GRN_FREE(tis); } if (!only_skip_token) { grn_ii_resolve_sel_and(ctx, s, op); } // grn_hash_cursor_clear(r); bt_close(ctx, bt); #ifdef DEBUG { uint32_t segno = GRN_II_MAX_LSEG, nnref = 0; grn_io_mapinfo *info = ii->seg->maps; for (; segno; segno--, info++) { if (info->nref) { nnref++; } } GRN_LOG(ctx, GRN_LOG_INFO, "nnref=%d", nnref); } #endif /* DEBUG */ return rc; } grn_rc grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len, grn_hash *s, grn_operator op, grn_search_optarg *optarg) { ERRCLR(ctx); GRN_LOG(ctx, GRN_LOG_INFO, "grn_ii_sel > (%.*s)", string_len, string); { grn_select_optarg arg = {GRN_OP_EXACT, 0, 0, NULL, 0, NULL, NULL, 0}; if (!s) { return GRN_INVALID_ARGUMENT; } if (optarg) { switch (optarg->mode) { case GRN_OP_NEAR : case GRN_OP_NEAR2 : arg.mode = optarg->mode; arg.max_interval = optarg->max_interval; break; case GRN_OP_SIMILAR : arg.mode = optarg->mode; arg.similarity_threshold = optarg->similarity_threshold; break; default : break; } if (optarg->vector_size > 0) { arg.weight_vector = optarg->weight_vector; arg.vector_size = optarg->vector_size; } } /* todo : support subrec grn_rset_init(ctx, s, grn_rec_document, 0, grn_rec_none, 0, 0); */ if (grn_ii_select(ctx, ii, string, string_len, s, op, &arg)) { GRN_LOG(ctx, GRN_LOG_ERROR, "grn_ii_select on grn_ii_sel(1) failed !"); return ctx->rc; } GRN_LOG(ctx, GRN_LOG_INFO, "exact: %d", GRN_HASH_SIZE(s)); if (op == GRN_OP_OR) { if ((int64_t)GRN_HASH_SIZE(s) <= ctx->impl->match_escalation_threshold) { arg.mode = GRN_OP_UNSPLIT; if (grn_ii_select(ctx, ii, string, string_len, s, op, &arg)) { GRN_LOG(ctx, GRN_LOG_ERROR, "grn_ii_select on grn_ii_sel(2) failed !"); return ctx->rc; } GRN_LOG(ctx, GRN_LOG_INFO, "unsplit: %d", GRN_HASH_SIZE(s)); } if ((int64_t)GRN_HASH_SIZE(s) <= ctx->impl->match_escalation_threshold) { arg.mode = GRN_OP_PARTIAL; if (grn_ii_select(ctx, ii, string, string_len, s, op, &arg)) { GRN_LOG(ctx, GRN_LOG_ERROR, "grn_ii_select on grn_ii_sel(3) failed !"); return ctx->rc; } GRN_LOG(ctx, GRN_LOG_INFO, "partial: %d", GRN_HASH_SIZE(s)); } } GRN_LOG(ctx, GRN_LOG_INFO, "hits=%d", GRN_HASH_SIZE(s)); return GRN_SUCCESS; } } grn_rc grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator op) { int rep = 0; grn_ii_cursor *c; grn_ii_posting *pos; if ((c = grn_ii_cursor_open(ctx, ii, id, GRN_ID_NIL, GRN_ID_MAX, rep ? ii->n_elements : ii->n_elements - 1, 0))) { while ((pos = grn_ii_cursor_next(ctx, c))) { res_add(ctx, s, (grn_rset_posinfo *) pos, (1 + pos->weight), op); } grn_ii_cursor_close(ctx, c); } return ctx->rc; } void grn_ii_resolve_sel_and(grn_ctx *ctx, grn_hash *s, grn_operator op) { if (op == GRN_OP_AND && !(ctx->flags & GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND)) { grn_id eid; grn_rset_recinfo *ri; grn_hash_cursor *c = grn_hash_cursor_open(ctx, s, NULL, 0, NULL, 0, 0, -1, 0); if (c) { while ((eid = grn_hash_cursor_next(ctx, c))) { grn_hash_cursor_get_value(ctx, c, (void **) &ri); if ((ri->n_subrecs & GRN_RSET_UTIL_BIT)) { ri->n_subrecs &= ~GRN_RSET_UTIL_BIT; } else { grn_hash_delete_by_id(ctx, s, eid, NULL); } } grn_hash_cursor_close(ctx, c); } } } /* just for inspect */ static grn_ii_posting * grn_ii_cursor_next_all(grn_ctx *ctx, grn_ii_cursor *c) { if (c->buf) { for (;;) { if (c->stat & CHUNK_USED) { for (;;) { if (c->crp < c->cdp + c->cdf) { uint32_t dgap = *c->crp++; c->pc.rid += dgap; if (dgap) { c->pc.sid = 0; } if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) { c->pc.sid += 1 + *c->csp++; } else { c->pc.sid = 1; } c->cpp += c->pc.rest; c->pc.rest = c->pc.tf = 1 + *c->ctp++; if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { c->pc.weight = *c->cwp++; } else { c->pc.weight = 0; } c->pc.pos = 0; /* { static int count = 0; int tf = c->pc.tf, pos = 0, *pp = (int *)c->cpp; grn_obj buf; GRN_TEXT_INIT(&buf, 0); grn_text_itoa(ctx, &buf, c->pc.rid); GRN_TEXT_PUTC(ctx, &buf, ':'); grn_text_itoa(ctx, &buf, c->pc.sid); GRN_TEXT_PUTC(ctx, &buf, ':'); grn_text_itoa(ctx, &buf, c->pc.tf); GRN_TEXT_PUTC(ctx, &buf, '('); while (tf--) { pos += *pp++; count++; grn_text_itoa(ctx, &buf, pos); if (tf) { GRN_TEXT_PUTC(ctx, &buf, ':'); } } GRN_TEXT_PUTC(ctx, &buf, ')'); GRN_TEXT_PUTC(ctx, &buf, '\0'); GRN_LOG(ctx, GRN_LOG_NOTICE, "posting(%d):%s", count, GRN_TEXT_VALUE(&buf)); GRN_OBJ_FIN(ctx, &buf); } */ } else { if (c->curr_chunk <= c->nchunks) { if (c->curr_chunk == c->nchunks) { if (c->cp < c->cpe) { grn_p_decv(ctx, c->cp, c->cpe - c->cp, c->rdv, c->ii->n_elements); } else { c->pc.rid = 0; break; } } else { uint8_t *cp; grn_io_win iw; uint32_t size = c->cinfo[c->curr_chunk].size; if (size && (cp = WIN_MAP2(c->ii->chunk, ctx, &iw, c->cinfo[c->curr_chunk].segno, 0, size, grn_io_rdonly))) { grn_p_decv(ctx, cp, size, c->rdv, c->ii->n_elements); grn_io_win_unmap2(&iw); } else { c->pc.rid = 0; break; } } { int j = 0; c->cdf = c->rdv[j].data_size; c->crp = c->cdp = c->rdv[j++].data; if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) { c->csp = c->rdv[j++].data; } c->ctp = c->rdv[j++].data; if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { c->cwp = c->rdv[j++].data; } c->cpp = c->rdv[j].data; } c->pc.rid = 0; c->pc.sid = 0; c->pc.rest = 0; c->curr_chunk++; continue; } else { c->pc.rid = 0; } } break; } } if (c->stat & BUFFER_USED) { if (c->nextb) { uint32_t lrid = c->pb.rid, lsid = c->pb.sid; /* for check */ buffer_rec *br = BUFFER_REC_AT(c->buf, c->nextb); if (buffer_is_reused(ctx, c->ii, c)) { GRN_LOG(ctx, GRN_LOG_NOTICE, "buffer reused(%d,%d)", c->buffer_pseg, *c->ppseg); // todo : rewind; } c->bp = NEXT_ADDR(br); GRN_B_DEC(c->pb.rid, c->bp); if ((c->ii->header->flags & GRN_OBJ_WITH_SECTION)) { GRN_B_DEC(c->pb.sid, c->bp); } else { c->pb.sid = 1; } if (lrid > c->pb.rid || (lrid == c->pb.rid && lsid >= c->pb.sid)) { ERR(GRN_FILE_CORRUPT, "brokend!! (%d:%d) -> (%d:%d) (%d->%d)", lrid, lsid, c->pb.rid, c->pb.sid, c->buffer_pseg, *c->ppseg); } c->nextb = br->step; GRN_B_DEC(c->pb.tf, c->bp); if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { GRN_B_DEC(c->pb.weight, c->bp); } else { c->pb.weight = 0; } c->pb.rest = c->pb.tf; c->pb.pos = 0; } else { c->pb.rid = 0; } } if (c->pb.rid) { if (c->pc.rid) { if (c->pc.rid < c->pb.rid) { c->stat = CHUNK_USED; c->post = &c->pc; break; } else { if (c->pb.rid < c->pc.rid) { c->stat = BUFFER_USED; c->post = &c->pb; break; } else { if (c->pb.sid) { if (c->pc.sid < c->pb.sid) { c->stat = CHUNK_USED; c->post = &c->pc; break; } else { c->stat = BUFFER_USED; if (c->pb.sid == c->pc.sid) { c->stat |= CHUNK_USED; } c->post = &c->pb; break; } } else { c->stat = CHUNK_USED; } } } } else { c->stat = BUFFER_USED; c->post = &c->pb; break; } } else { if (c->pc.rid) { c->stat = CHUNK_USED; c->post = &c->pc; break; } else { c->post = NULL; return NULL; } } } } else { if (c->stat & SOLE_DOC_USED) { c->post = NULL; return NULL; } else { c->post = &c->pb; c->stat |= SOLE_DOC_USED; } } return c->post; } void grn_ii_cursor_inspect(grn_ctx *ctx, grn_ii_cursor *c, grn_obj *buf) { grn_obj key_buf; char key[GRN_TABLE_MAX_KEY_SIZE]; int key_size; int i = 0; GRN_TEXT_PUTS(ctx, buf, " #<"); key_size = grn_table_get_key(ctx, c->ii->lexicon, c->id, key, GRN_TABLE_MAX_KEY_SIZE); GRN_OBJ_INIT(&key_buf, GRN_BULK, 0, c->ii->lexicon->header.domain); GRN_TEXT_SET(ctx, &key_buf, key, key_size); grn_inspect(ctx, buf, &key_buf); GRN_OBJ_FIN(ctx, &key_buf); GRN_TEXT_PUTS(ctx, buf, "\n elements:[\n "); while (grn_ii_cursor_next_all(ctx, c)) { grn_ii_posting *pos = c->post; if (i > 0) { GRN_TEXT_PUTS(ctx, buf, ",\n "); } i++; GRN_TEXT_PUTS(ctx, buf, "{status:"); if (pos->tf && pos->sid) { GRN_TEXT_PUTS(ctx, buf, "available"); } else { GRN_TEXT_PUTS(ctx, buf, "garbage"); } GRN_TEXT_PUTS(ctx, buf, ", rid:"); grn_text_lltoa(ctx, buf, pos->rid); GRN_TEXT_PUTS(ctx, buf, ", sid:"); grn_text_lltoa(ctx, buf, pos->sid); GRN_TEXT_PUTS(ctx, buf, ", pos:"); grn_text_lltoa(ctx, buf, pos->pos); GRN_TEXT_PUTS(ctx, buf, ", tf:"); grn_text_lltoa(ctx, buf, pos->tf); GRN_TEXT_PUTS(ctx, buf, ", weight:"); grn_text_lltoa(ctx, buf, pos->weight); GRN_TEXT_PUTS(ctx, buf, ", rest:"); grn_text_lltoa(ctx, buf, pos->rest); GRN_TEXT_PUTS(ctx, buf, "}"); } GRN_TEXT_PUTS(ctx, buf, "\n ]\n >"); } void grn_ii_inspect_elements(grn_ctx *ctx, grn_ii *ii, grn_obj *buf) { grn_table_cursor *tc; GRN_TEXT_PUTS(ctx, buf, "["); if ((tc = grn_table_cursor_open(ctx, ii->lexicon, NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_ASCENDING))) { int i = 0; grn_id tid; grn_ii_cursor *c; while ((tid = grn_table_cursor_next(ctx, tc))) { if (i > 0) { GRN_TEXT_PUTS(ctx, buf, ","); } i++; GRN_TEXT_PUTS(ctx, buf, "\n"); if ((c = grn_ii_cursor_open(ctx, ii, tid, GRN_ID_NIL, GRN_ID_MAX, ii->n_elements, GRN_OBJ_WITH_POSITION|GRN_OBJ_WITH_SECTION))) { grn_ii_cursor_inspect(ctx, c, buf); grn_ii_cursor_close(ctx, c); } } grn_table_cursor_close(ctx, tc); } GRN_TEXT_PUTS(ctx, buf, "]"); } /********************** buffered index builder ***********************/ const grn_id II_BUFFER_RID_FLAG = 0x80000000; const grn_id II_BUFFER_WEIGHT_FLAG = 0x40000000; #ifdef II_BUFFER_ORDER_BY_ID const int II_BUFFER_ORDER = GRN_CURSOR_BY_ID; #else /* II_BUFFER_ORDER_BY_ID */ const int II_BUFFER_ORDER = GRN_CURSOR_BY_KEY; #endif /* II_BUFFER_ORDER_BY_ID */ const uint16_t II_BUFFER_NTERMS_PER_BUFFER = 16380; const uint32_t II_BUFFER_PACKED_BUF_SIZE = 0x4000000; const char *TMPFILE_PATH = "grn_ii_buffer_tmp"; const uint32_t II_BUFFER_NCOUNTERS_MARGIN = 0x100000; const size_t II_BUFFER_BLOCK_SIZE = 0x1000000; const uint32_t II_BUFFER_BLOCK_READ_UNIT_SIZE = 0x200000; typedef struct { uint32_t nrecs; uint32_t nposts; grn_id last_rid; uint32_t last_sid; uint32_t last_tf; uint32_t last_weight; uint32_t last_pos; uint32_t offset_rid; uint32_t offset_sid; uint32_t offset_tf; uint32_t offset_weight; uint32_t offset_pos; } ii_buffer_counter; typedef struct { off_t head; off_t tail; uint32_t nextsize; uint8_t *buffer; uint32_t buffersize; uint8_t *bufcur; uint32_t rest; grn_id tid; uint32_t nrecs; uint32_t nposts; grn_id *recs; uint32_t *tfs; uint32_t *posts; } ii_buffer_block; struct _grn_ii_buffer { grn_obj *lexicon; grn_obj *tmp_lexicon; ii_buffer_block *blocks; uint32_t nblocks; int tmpfd; char tmpfpath[PATH_MAX]; uint64_t update_buffer_size; // stuff for parsing off_t filepos; grn_id *block_buf; size_t block_buf_size; size_t block_pos; ii_buffer_counter *counters; uint32_t ncounters; size_t total_size; size_t curr_size; // stuff for merging grn_ii *ii; uint32_t lseg; uint32_t dseg; buffer *term_buffer; datavec data_vectors[MAX_N_ELEMENTS + 1]; uint8_t *packed_buf; size_t packed_buf_size; size_t packed_len; size_t total_chunk_size; }; static ii_buffer_block * block_new(grn_ctx *ctx, grn_ii_buffer *ii_buffer) { ii_buffer_block *block; if (!(ii_buffer->nblocks & 0x3ff)) { ii_buffer_block *blocks; if (!(blocks = GRN_REALLOC(ii_buffer->blocks, (ii_buffer->nblocks + 0x400) * sizeof(ii_buffer_block)))) { return NULL; } ii_buffer->blocks = blocks; } block = &ii_buffer->blocks[ii_buffer->nblocks]; block->head = ii_buffer->filepos; block->rest = 0; block->buffer = NULL; block->buffersize = 0; return block; } static uint8_t * allocate_outbuf(grn_ctx *ctx, grn_ii_buffer *ii_buffer) { size_t bufsize = 0, bufsize_ = 0; uint32_t flags = ii_buffer->ii->header->flags; ii_buffer_counter *counter = ii_buffer->counters; grn_id tid, tid_max = grn_table_size(ctx, ii_buffer->tmp_lexicon); for (tid = 1; tid <= tid_max; counter++, tid++) { counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1); counter->last_rid = 0; counter->last_tf = 0; bufsize += 5; bufsize += GRN_B_ENC_SIZE(counter->nrecs); bufsize += GRN_B_ENC_SIZE(counter->nposts); bufsize += counter->offset_rid; if ((flags & GRN_OBJ_WITH_SECTION)) { bufsize += counter->offset_sid; } bufsize += counter->offset_tf; if ((flags & GRN_OBJ_WITH_WEIGHT)) { bufsize += counter->offset_weight; } if ((flags & GRN_OBJ_WITH_POSITION)) { bufsize += counter->offset_pos; } if (bufsize_ + II_BUFFER_BLOCK_READ_UNIT_SIZE < bufsize) { bufsize += sizeof(uint32_t); bufsize_ = bufsize; } } GRN_LOG(ctx, GRN_LOG_INFO, "flushing:%d bufsize:%zu", ii_buffer->nblocks, bufsize); return (uint8_t *)GRN_MALLOC(bufsize); } static size_t encode_terms(grn_ctx *ctx, grn_ii_buffer *ii_buffer, uint8_t *outbuf, ii_buffer_block *block) { grn_id tid; uint8_t *outbufp = outbuf; uint8_t *outbufp_ = outbuf; grn_table_cursor *tc; uint8_t *pnext = (uint8_t *)&block->nextsize; uint32_t flags = ii_buffer->ii->header->flags; tc = grn_table_cursor_open(ctx, ii_buffer->tmp_lexicon, NULL, 0, NULL, 0, 0, -1, II_BUFFER_ORDER); while ((tid = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) { char key[GRN_TABLE_MAX_KEY_SIZE]; int key_size = grn_table_get_key(ctx, ii_buffer->tmp_lexicon, tid, key, GRN_TABLE_MAX_KEY_SIZE); grn_id gtid = grn_table_add(ctx, ii_buffer->lexicon, key, key_size, NULL); ii_buffer_counter *counter = &ii_buffer->counters[tid - 1]; if (counter->nrecs) { uint32_t offset_rid = counter->offset_rid; uint32_t offset_sid = counter->offset_sid; uint32_t offset_tf = counter->offset_tf; uint32_t offset_weight = counter->offset_weight; uint32_t offset_pos = counter->offset_pos; GRN_B_ENC(gtid, outbufp); GRN_B_ENC(counter->nrecs, outbufp); GRN_B_ENC(counter->nposts, outbufp); ii_buffer->total_size += counter->nrecs + counter->nposts; counter->offset_rid = outbufp - outbuf; outbufp += offset_rid; if ((flags & GRN_OBJ_WITH_SECTION)) { counter->offset_sid = outbufp - outbuf; outbufp += offset_sid; } counter->offset_tf = outbufp - outbuf; outbufp += offset_tf; if ((flags & GRN_OBJ_WITH_WEIGHT)) { counter->offset_weight = outbufp - outbuf; outbufp += offset_weight; } if ((flags & GRN_OBJ_WITH_POSITION)) { counter->offset_pos = outbufp - outbuf; outbufp += offset_pos; } } if (outbufp_ + II_BUFFER_BLOCK_READ_UNIT_SIZE < outbufp) { uint32_t size = outbufp - outbufp_ + sizeof(uint32_t); memcpy(pnext, &size, sizeof(uint32_t)); pnext = outbufp; outbufp += sizeof(uint32_t); outbufp_ = outbufp; } } grn_table_cursor_close(ctx, tc); if (outbufp_ < outbufp) { uint32_t size = outbufp - outbufp_; memcpy(pnext, &size, sizeof(uint32_t)); } return outbufp - outbuf; } static void encode_postings(grn_ctx *ctx, grn_ii_buffer *ii_buffer, uint8_t *outbuf) { grn_id rid = 0; unsigned int sid = 1; unsigned int weight = 0; uint32_t pos = 0; uint32_t rest; grn_id *bp = ii_buffer->block_buf; uint32_t flags = ii_buffer->ii->header->flags; for (rest = ii_buffer->block_pos; rest; bp++, rest--) { grn_id id = *bp; if (id & II_BUFFER_RID_FLAG) { rid = id - II_BUFFER_RID_FLAG; if ((flags & GRN_OBJ_WITH_SECTION) && rest) { sid = *++bp; rest--; } weight = 0; pos = 0; } else if (id & II_BUFFER_WEIGHT_FLAG) { weight = id - II_BUFFER_WEIGHT_FLAG; } else { ii_buffer_counter *counter = &ii_buffer->counters[id - 1]; if (counter->last_rid == rid && counter->last_sid == sid) { counter->last_tf++; counter->last_weight += weight; } else { if (counter->last_tf) { uint8_t *p = outbuf + counter->offset_tf; GRN_B_ENC(counter->last_tf - 1, p); counter->offset_tf = p - outbuf; if (flags & GRN_OBJ_WITH_WEIGHT) { p = outbuf + counter->offset_weight; GRN_B_ENC(counter->last_weight, p); counter->offset_weight = p - outbuf; } } { uint8_t *p = outbuf + counter->offset_rid; GRN_B_ENC(rid - counter->last_rid, p); counter->offset_rid = p - outbuf; } if (flags & GRN_OBJ_WITH_SECTION) { uint8_t *p = outbuf + counter->offset_sid; if (counter->last_rid != rid) { GRN_B_ENC(sid - 1, p); } else { GRN_B_ENC(sid - counter->last_sid - 1, p); } counter->offset_sid = p - outbuf; } counter->last_rid = rid; counter->last_sid = sid; counter->last_tf = 1; counter->last_weight = weight; counter->last_pos = 0; } if (flags & GRN_OBJ_WITH_POSITION) { uint8_t *p = outbuf + counter->offset_pos; GRN_B_ENC(pos - counter->last_pos, p); counter->offset_pos = p - outbuf; counter->last_pos = pos; } pos++; } } } static void encode_last_tf(grn_ctx *ctx, grn_ii_buffer *ii_buffer, uint8_t *outbuf) { ii_buffer_counter *counter = ii_buffer->counters; grn_id tid, tid_max = grn_table_size(ctx, ii_buffer->tmp_lexicon); for (tid = 1; tid <= tid_max; counter++, tid++) { uint8_t *p = outbuf + counter->offset_tf; GRN_B_ENC(counter->last_tf - 1, p); } if ((ii_buffer->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { for (tid = 1; tid <= tid_max; counter++, tid++) { uint8_t *p = outbuf + counter->offset_weight; GRN_B_ENC(counter->last_weight, p); } } } static void grn_ii_buffer_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer) { size_t encsize; uint8_t *outbuf; ii_buffer_block *block; GRN_LOG(ctx, GRN_LOG_NOTICE, "flushing:%d npostings:%zu", ii_buffer->nblocks, ii_buffer->block_pos); if (!(block = block_new(ctx, ii_buffer))) { return; } if (!(outbuf = allocate_outbuf(ctx, ii_buffer))) { return; } encsize = encode_terms(ctx, ii_buffer, outbuf, block); encode_postings(ctx, ii_buffer, outbuf); encode_last_tf(ctx, ii_buffer, outbuf); { ssize_t r = GRN_WRITE(ii_buffer->tmpfd, outbuf, encsize); if (r != encsize) { ERR(GRN_INPUT_OUTPUT_ERROR, "write returned %" GRN_FMT_LLD " != %" GRN_FMT_LLU, (long long int)r, (unsigned long long int)encsize); return; } ii_buffer->filepos += r; block->tail = ii_buffer->filepos; } GRN_FREE(outbuf); memset(ii_buffer->counters, 0, grn_table_size(ctx, ii_buffer->tmp_lexicon) * sizeof(ii_buffer_counter)); grn_obj_close(ctx, ii_buffer->tmp_lexicon); GRN_LOG(ctx, GRN_LOG_NOTICE, "flushed: %d encsize:%zu", ii_buffer->nblocks, encsize); ii_buffer->tmp_lexicon = NULL; ii_buffer->nblocks++; ii_buffer->block_pos = 0; } const uint32_t PAT_CACHE_SIZE = 1<<20; static grn_obj * get_tmp_lexicon(grn_ctx *ctx, grn_ii_buffer *ii_buffer) { grn_obj *tmp_lexicon = ii_buffer->tmp_lexicon; if (!tmp_lexicon) { grn_obj *domain = grn_ctx_at(ctx, ii_buffer->lexicon->header.domain); grn_obj *range = grn_ctx_at(ctx, DB_OBJ(ii_buffer->lexicon)->range); grn_obj *tokenizer; grn_obj *normalizer; grn_obj_flags flags; grn_table_get_info(ctx, ii_buffer->lexicon, &flags, NULL, &tokenizer, &normalizer, NULL); flags &= ~GRN_OBJ_PERSISTENT; tmp_lexicon = grn_table_create(ctx, NULL, 0, NULL, flags, domain, range); if (tmp_lexicon) { ii_buffer->tmp_lexicon = tmp_lexicon; grn_obj_set_info(ctx, tmp_lexicon, GRN_INFO_DEFAULT_TOKENIZER, tokenizer); grn_obj_set_info(ctx, tmp_lexicon, GRN_INFO_NORMALIZER, normalizer); if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) { grn_pat_cache_enable(ctx, (grn_pat *)tmp_lexicon, PAT_CACHE_SIZE); } } } return tmp_lexicon; } static ii_buffer_counter * get_buffer_counter(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_obj *tmp_lexicon, grn_id tid) { if (tid > ii_buffer->ncounters) { ii_buffer_counter *counters; uint32_t ncounters = grn_table_size(ctx, tmp_lexicon) + II_BUFFER_NCOUNTERS_MARGIN; counters = GRN_REALLOC(ii_buffer->counters, ncounters * sizeof(ii_buffer_counter)); if (!counters) { return NULL; } memset(&counters[ii_buffer->ncounters], 0, (ncounters - ii_buffer->ncounters) * sizeof(ii_buffer_counter)); ii_buffer->ncounters = ncounters; ii_buffer->counters = counters; } return &ii_buffer->counters[tid - 1]; } static void grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid, unsigned int sid, unsigned int weight, const char *value, uint32_t value_len) { if (value_len) { grn_obj *tmp_lexicon; uint32_t est_len = value_len + 2; if (ii_buffer->block_buf_size < ii_buffer->block_pos + est_len) { grn_ii_buffer_flush(ctx, ii_buffer); } if (ii_buffer->block_buf_size < est_len) { grn_id *block_buf = (grn_id *)GRN_REALLOC(ii_buffer->block_buf, est_len * sizeof(grn_id)); if (!block_buf) { return; } ii_buffer->block_buf = block_buf; ii_buffer->block_buf_size = est_len; } if ((tmp_lexicon = get_tmp_lexicon(ctx, ii_buffer))) { unsigned int token_flags = 0; grn_token_cursor *token_cursor; grn_id *buffer = ii_buffer->block_buf; uint32_t block_pos = ii_buffer->block_pos; buffer[block_pos++] = rid + II_BUFFER_RID_FLAG; if ((ii_buffer->ii->header->flags & GRN_OBJ_WITH_SECTION)) { buffer[block_pos++] = sid; } if (weight) { buffer[block_pos++] = weight + II_BUFFER_WEIGHT_FLAG; } if ((token_cursor = grn_token_cursor_open(ctx, tmp_lexicon, value, value_len, GRN_TOKEN_ADD, token_flags))) { uint32_t pos; for (pos = 0; !token_cursor->status; pos++) { grn_id tid; if ((tid = grn_token_cursor_next(ctx, token_cursor))) { ii_buffer_counter *counter; counter = get_buffer_counter(ctx, ii_buffer, tmp_lexicon, tid); if (!counter) { return; } buffer[block_pos++] = tid; if (counter->last_rid != rid) { counter->offset_rid += GRN_B_ENC_SIZE(rid - counter->last_rid); counter->last_rid = rid; counter->offset_sid += GRN_B_ENC_SIZE(sid - 1); counter->last_sid = sid; if (counter->last_tf) { counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1); counter->last_tf = 0; counter->offset_weight += GRN_B_ENC_SIZE(counter->last_weight); counter->last_weight = 0; } counter->last_pos = 0; counter->nrecs++; } else if (counter->last_sid != sid) { counter->offset_rid += GRN_B_ENC_SIZE(0); counter->offset_sid += GRN_B_ENC_SIZE(sid - counter->last_sid - 1); counter->last_sid = sid; if (counter->last_tf) { counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1); counter->last_tf = 0; counter->offset_weight += GRN_B_ENC_SIZE(counter->last_weight); counter->last_weight = 0; } counter->last_pos = 0; counter->nrecs++; } counter->offset_pos += GRN_B_ENC_SIZE(pos - counter->last_pos); counter->last_pos = pos; counter->last_tf++; counter->last_weight += weight; counter->nposts++; } } grn_token_cursor_close(ctx, token_cursor); } ii_buffer->block_pos = block_pos; } } } static void grn_ii_buffer_fetch(grn_ctx *ctx, grn_ii_buffer *ii_buffer, ii_buffer_block *block) { if (!block->rest) { if (block->head < block->tail) { size_t bytesize = block->nextsize; if (block->buffersize < block->nextsize) { void *r = GRN_REALLOC(block->buffer, bytesize); if (r) { block->buffer = (uint8_t *)r; block->buffersize = block->nextsize; } else { GRN_LOG(ctx, GRN_LOG_WARNING, "realloc: %" GRN_FMT_LLU, (unsigned long long int)bytesize); return; } } if (lseek(ii_buffer->tmpfd, block->head, SEEK_SET) != block->head) { SERR("lseek"); return; } if (read(ii_buffer->tmpfd, block->buffer, bytesize) != bytesize) { SERR("read"); return; } block->head += bytesize; block->bufcur = block->buffer; if (block->head >= block->tail) { if (block->head > block->tail) { GRN_LOG(ctx, GRN_LOG_WARNING, "fetch error: %jd > %jd", block->head, block->tail); } block->rest = block->nextsize; block->nextsize = 0; } else { block->rest = block->nextsize - sizeof(uint32_t); memcpy(&block->nextsize, &block->buffer[block->rest], sizeof(uint32_t)); } } } if (block->rest) { uint8_t *p = block->bufcur; GRN_B_DEC(block->tid, p); GRN_B_DEC(block->nrecs, p); GRN_B_DEC(block->nposts, p); block->rest -= (p - block->bufcur); block->bufcur = p; } else { block->tid = 0; } } static void grn_ii_buffer_chunk_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer) { grn_io_win io_win; uint32_t chunk_number; chunk_new(ctx, ii_buffer->ii, &chunk_number, ii_buffer->packed_len); GRN_LOG(ctx, GRN_LOG_INFO, "chunk:%d, packed_len:%zu", chunk_number, ii_buffer->packed_len); fake_map2(ctx, ii_buffer->ii->chunk, &io_win, ii_buffer->packed_buf, chunk_number, ii_buffer->packed_len); grn_io_win_unmap2(&io_win); ii_buffer->term_buffer->header.chunk = chunk_number; ii_buffer->term_buffer->header.chunk_size = ii_buffer->packed_len; ii_buffer->term_buffer->header.buffer_free = S_SEGMENT - sizeof(buffer_header) - ii_buffer->term_buffer->header.nterms * sizeof(buffer_term); ii_buffer->term_buffer->header.nterms_void = 0; buffer_segment_update(ii_buffer->ii, ii_buffer->lseg, ii_buffer->dseg); ii_buffer->ii->header->total_chunk_size += ii_buffer->packed_len; ii_buffer->total_chunk_size += ii_buffer->packed_len; GRN_LOG(ctx, GRN_LOG_NOTICE, "nterms=%d chunk=%d total=%" GRN_FMT_INT64U "KB", ii_buffer->term_buffer->header.nterms, ii_buffer->term_buffer->header.chunk_size, ii_buffer->ii->header->total_chunk_size >> 10); ii_buffer->term_buffer = NULL; ii_buffer->packed_buf = NULL; ii_buffer->packed_len = 0; ii_buffer->packed_buf_size = 0; ii_buffer->curr_size = 0; } static size_t merge_hit_blocks(grn_ctx *ctx, grn_ii_buffer *ii_buffer, ii_buffer_block *hits[], int nhits) { uint64_t nrecs = 0; uint64_t nposts = 0; size_t max_size; uint64_t flags = ii_buffer->ii->header->flags; int i; for (i = 0; i < nhits; i++) { ii_buffer_block *block = hits[i]; nrecs += block->nrecs; nposts += block->nposts; } ii_buffer->curr_size += nrecs + nposts; max_size = nrecs * (ii_buffer->ii->n_elements); if (flags & GRN_OBJ_WITH_POSITION) { max_size += nposts - nrecs; } datavec_reset(ctx, ii_buffer->data_vectors, ii_buffer->ii->n_elements, nrecs, max_size); { int i; uint32_t lr = 0; uint64_t spos = 0; uint32_t *ridp, *sidp = NULL, *tfp, *weightp = NULL, *posp = NULL; { int j = 0; ridp = ii_buffer->data_vectors[j++].data; if (flags & GRN_OBJ_WITH_SECTION) { sidp = ii_buffer->data_vectors[j++].data; } tfp = ii_buffer->data_vectors[j++].data; if (flags & GRN_OBJ_WITH_WEIGHT) { weightp = ii_buffer->data_vectors[j++].data; } if (flags & GRN_OBJ_WITH_POSITION) { posp = ii_buffer->data_vectors[j++].data; } } for (i = 0; i < nhits; i++) { ii_buffer_block *block = hits[i]; uint8_t *p = block->bufcur; uint32_t n = block->nrecs; if (n) { GRN_B_DEC(*ridp, p); *ridp -= lr; lr += *ridp++; while (--n) { GRN_B_DEC(*ridp, p); lr += *ridp++; } } if ((flags & GRN_OBJ_WITH_SECTION)) { for (n = block->nrecs; n; n--) { GRN_B_DEC(*sidp++, p); } } for (n = block->nrecs; n; n--) { GRN_B_DEC(*tfp++, p); } if ((flags & GRN_OBJ_WITH_WEIGHT)) { for (n = block->nrecs; n; n--) { GRN_B_DEC(*weightp++, p); } } if ((flags & GRN_OBJ_WITH_POSITION)) { for (n = block->nposts; n; n--) { GRN_B_DEC(*posp, p); spos += *posp++; } } block->rest -= (p - block->bufcur); block->bufcur = p; grn_ii_buffer_fetch(ctx, ii_buffer, block); } { int j = 0; uint32_t f_s = (nrecs < 3) ? 0 : USE_P_ENC; uint32_t f_d = ((nrecs < 16) || (nrecs <= (lr >> 8))) ? 0 : USE_P_ENC; ii_buffer->data_vectors[j].data_size = nrecs; ii_buffer->data_vectors[j++].flags = f_d; if ((flags & GRN_OBJ_WITH_SECTION)) { ii_buffer->data_vectors[j].data_size = nrecs; ii_buffer->data_vectors[j++].flags = f_s; } ii_buffer->data_vectors[j].data_size = nrecs; ii_buffer->data_vectors[j++].flags = f_s; if ((flags & GRN_OBJ_WITH_WEIGHT)) { ii_buffer->data_vectors[j].data_size = nrecs; ii_buffer->data_vectors[j++].flags = f_s; } if ((flags & GRN_OBJ_WITH_POSITION)) { uint32_t f_p = (((nposts < 32) || (nposts <= (spos >> 13))) ? 0 : USE_P_ENC); ii_buffer->data_vectors[j].data_size = nposts; ii_buffer->data_vectors[j++].flags = f_p|ODD; } } } return (max_size + ii_buffer->ii->n_elements) * 4; } static buffer * get_term_buffer(grn_ctx *ctx, grn_ii_buffer *ii_buffer) { if (!ii_buffer->term_buffer) { uint32_t lseg; void *term_buffer; for (lseg = 0; lseg < GRN_II_MAX_LSEG; lseg++) { if (ii_buffer->ii->header->binfo[lseg] == NOT_ASSIGNED) { break; } } if (lseg == GRN_II_MAX_LSEG) { ERR(GRN_NO_MEMORY_AVAILABLE, "segment allocate failed"); return NULL; } ii_buffer->lseg = lseg; ii_buffer->dseg = segment_get(ctx, ii_buffer->ii); GRN_IO_SEG_REF(ii_buffer->ii->seg, ii_buffer->dseg, term_buffer); ii_buffer->term_buffer = (buffer *)term_buffer; } return ii_buffer->term_buffer; } static grn_bool try_in_place_packing(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id tid, ii_buffer_block *hits[], int nhits) { if (nhits == 1 && hits[0]->nrecs == 1 && hits[0]->nposts == 1) { grn_id rid; uint32_t sid = 1, tf, pos = 0, weight = 0; ii_buffer_block *block = hits[0]; uint8_t *p = block->bufcur; uint32_t flags = ii_buffer->ii->header->flags; GRN_B_DEC(rid, p); if (flags & GRN_OBJ_WITH_SECTION) { GRN_B_DEC(sid, p); sid++; } GRN_B_DEC(tf, p); if (tf != 0) { GRN_LOG(ctx, GRN_LOG_WARNING, "tf=%d", tf); } if (flags & GRN_OBJ_WITH_WEIGHT) { GRN_B_DEC(weight, p); } if (flags & GRN_OBJ_WITH_POSITION) { GRN_B_DEC(pos, p); } if (!weight) { if (flags & GRN_OBJ_WITH_SECTION) { if (rid < 0x100000 && sid < 0x800) { uint32_t *a = array_get(ctx, ii_buffer->ii, tid); a[0] = (rid << 12) + (sid << 1) + 1; a[1] = pos; } else { return GRN_FALSE; } } else { uint32_t *a = array_get(ctx, ii_buffer->ii, tid); a[0] = (rid << 1) + 1; a[1] = pos; } block->rest -= (p - block->bufcur); block->bufcur = p; grn_ii_buffer_fetch(ctx, ii_buffer, block); return GRN_TRUE; } } return GRN_FALSE; } static void grn_ii_buffer_merge(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id tid, ii_buffer_block *hits[], int nhits) { if (!try_in_place_packing(ctx, ii_buffer, tid, hits, nhits)) { size_t max_size = merge_hit_blocks(ctx, ii_buffer, hits, nhits); if (ii_buffer->packed_buf && ii_buffer->packed_buf_size < ii_buffer->packed_len + max_size) { grn_ii_buffer_chunk_flush(ctx, ii_buffer); } if (!ii_buffer->packed_buf) { size_t buf_size = (max_size > II_BUFFER_PACKED_BUF_SIZE) ? max_size : II_BUFFER_PACKED_BUF_SIZE; if ((ii_buffer->packed_buf = GRN_MALLOC(buf_size))) { ii_buffer->packed_buf_size = buf_size; } } { uint16_t nterm; size_t packed_len; buffer_term *bt; uint32_t *a = array_get(ctx, ii_buffer->ii, tid); buffer *term_buffer = get_term_buffer(ctx, ii_buffer); if (!term_buffer) { return; } nterm = term_buffer->header.nterms++; bt = &term_buffer->terms[nterm]; a[0] = SEG2POS(ii_buffer->lseg, (sizeof(buffer_header) + sizeof(buffer_term) * nterm)); packed_len = grn_p_encv(ctx, ii_buffer->data_vectors, ii_buffer->ii->n_elements, ii_buffer->packed_buf + ii_buffer->packed_len); a[1] = ii_buffer->data_vectors[0].data_size; bt->tid = tid; bt->size_in_buffer = 0; bt->pos_in_buffer = 0; bt->size_in_chunk = packed_len; bt->pos_in_chunk = ii_buffer->packed_len; ii_buffer->packed_len += packed_len; if (((ii_buffer->curr_size * ii_buffer->update_buffer_size) + (ii_buffer->total_size * term_buffer->header.nterms * 16)) >= (ii_buffer->total_size * II_BUFFER_NTERMS_PER_BUFFER * 16)) { grn_ii_buffer_chunk_flush(ctx, ii_buffer); } } } } grn_ii_buffer * grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii, long long unsigned int update_buffer_size) { if (ii && ii->lexicon) { grn_ii_buffer *ii_buffer = GRN_MALLOCN(grn_ii_buffer, 1); if (ii_buffer) { ii_buffer->ii = ii; ii_buffer->lexicon = ii->lexicon; ii_buffer->tmp_lexicon = NULL; ii_buffer->nblocks = 0; ii_buffer->blocks = NULL; ii_buffer->ncounters = II_BUFFER_NCOUNTERS_MARGIN; ii_buffer->block_pos = 0; ii_buffer->filepos = 0; ii_buffer->curr_size = 0; ii_buffer->total_size = 0; ii_buffer->update_buffer_size = update_buffer_size; ii_buffer->counters = GRN_CALLOC(ii_buffer->ncounters * sizeof(ii_buffer_counter)); ii_buffer->term_buffer = NULL; ii_buffer->packed_buf = NULL; ii_buffer->packed_len = 0; ii_buffer->packed_buf_size = 0; ii_buffer->total_chunk_size = 0; if (ii_buffer->counters) { ii_buffer->block_buf = GRN_MALLOCN(grn_id, II_BUFFER_BLOCK_SIZE); if (ii_buffer->block_buf) { snprintf(ii_buffer->tmpfpath, PATH_MAX, "%sXXXXXX", grn_io_path(ii->seg)); ii_buffer->block_buf_size = II_BUFFER_BLOCK_SIZE; ii_buffer->tmpfd = GRN_MKOSTEMP(ii_buffer->tmpfpath, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR); if (ii_buffer->tmpfd != -1) { grn_obj_flags flags; grn_table_get_info(ctx, ii->lexicon, &flags, NULL, NULL, NULL, NULL); if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) { grn_pat_cache_enable(ctx, (grn_pat *)ii->lexicon, PAT_CACHE_SIZE); } return ii_buffer; } else { SERR("mkostemp"); } GRN_FREE(ii_buffer->block_buf); } GRN_FREE(ii_buffer->counters); } GRN_FREE(ii_buffer); } } else { ERR(GRN_INVALID_ARGUMENT, "ii or ii->lexicon is NULL"); } return NULL; } grn_rc grn_ii_buffer_append(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid, unsigned int sid, grn_obj *value) { grn_ii_buffer_tokenize(ctx, ii_buffer, rid, sid, 0, GRN_TEXT_VALUE(value), GRN_TEXT_LEN(value)); return ctx->rc; } grn_rc grn_ii_buffer_commit(grn_ctx *ctx, grn_ii_buffer *ii_buffer) { if (ii_buffer->block_pos) { grn_ii_buffer_flush(ctx, ii_buffer); } if (ii_buffer->tmpfd != -1) { GRN_CLOSE(ii_buffer->tmpfd); } if (ii_buffer->block_buf) { GRN_FREE(ii_buffer->block_buf); ii_buffer->block_buf = NULL; } if (ii_buffer->counters) { GRN_FREE(ii_buffer->counters); ii_buffer->counters = NULL; } if (ii_buffer->update_buffer_size && ii_buffer->update_buffer_size < 20) { if (ii_buffer->update_buffer_size < 10) { ii_buffer->update_buffer_size = ii_buffer->total_size >> (10 - ii_buffer->update_buffer_size); } else { ii_buffer->update_buffer_size = ii_buffer->total_size << (ii_buffer->update_buffer_size - 10); } } GRN_LOG(ctx, GRN_LOG_NOTICE, "nblocks=%d, update_buffer_size=%" GRN_FMT_INT64U, ii_buffer->nblocks, ii_buffer->update_buffer_size); datavec_init(ctx, ii_buffer->data_vectors, ii_buffer->ii->n_elements, 0, 0); #ifdef WIN32 ii_buffer->tmpfd = GRN_OPEN(ii_buffer->tmpfpath, O_RDONLY|O_BINARY); #else /* WIN32 */ ii_buffer->tmpfd = GRN_OPEN(ii_buffer->tmpfpath, O_RDONLY); #endif /* WIN32 */ if (ii_buffer->tmpfd == -1) { SERR("oepn"); return ctx->rc; } { uint32_t i; for (i = 0; i < ii_buffer->nblocks; i++) { grn_ii_buffer_fetch(ctx, ii_buffer, &ii_buffer->blocks[i]); } } { ii_buffer_block **hits; if ((hits = GRN_MALLOCN(ii_buffer_block *, ii_buffer->nblocks))) { grn_id tid; grn_table_cursor *tc; tc = grn_table_cursor_open(ctx, ii_buffer->lexicon, NULL, 0, NULL, 0, 0, -1, II_BUFFER_ORDER); if (tc) { while ((tid = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) { int nrests = 0; int nhits = 0; uint32_t i; for (i = 0; i < ii_buffer->nblocks; i++) { if (ii_buffer->blocks[i].tid == tid) { hits[nhits++] = &ii_buffer->blocks[i]; } if (ii_buffer->blocks[i].tid) { nrests++; } } if (nhits) { grn_ii_buffer_merge(ctx, ii_buffer, tid, hits, nhits); } if (!nrests) { break; } } if (ii_buffer->packed_len) { grn_ii_buffer_chunk_flush(ctx, ii_buffer); } grn_table_cursor_close(ctx, tc); } GRN_FREE(hits); } } datavec_fin(ctx, ii_buffer->data_vectors); GRN_LOG(ctx, GRN_LOG_NOTICE, "tmpfile_size:%jd > total_chunk_size:%" GRN_FMT_INT64U, ii_buffer->filepos, ii_buffer->total_chunk_size); GRN_CLOSE(ii_buffer->tmpfd); unlink(ii_buffer->tmpfpath); ii_buffer->tmpfd = -1; return ctx->rc; } grn_rc grn_ii_buffer_close(grn_ctx *ctx, grn_ii_buffer *ii_buffer) { uint32_t i; grn_obj_flags flags; grn_table_get_info(ctx, ii_buffer->ii->lexicon, &flags, NULL, NULL, NULL, NULL); if ((flags & GRN_OBJ_TABLE_TYPE_MASK) == GRN_OBJ_TABLE_PAT_KEY) { grn_pat_cache_disable(ctx, (grn_pat *)ii_buffer->ii->lexicon); } if (ii_buffer->tmp_lexicon) { grn_obj_close(ctx, ii_buffer->tmp_lexicon); } if (ii_buffer->tmpfd != -1) { GRN_CLOSE(ii_buffer->tmpfd); unlink(ii_buffer->tmpfpath); } if (ii_buffer->block_buf) { GRN_FREE(ii_buffer->block_buf); } if (ii_buffer->counters) { GRN_FREE(ii_buffer->counters); } if (ii_buffer->blocks) { for (i = 0; i < ii_buffer->nblocks; i++) { if (ii_buffer->blocks[i].buffer) { GRN_FREE(ii_buffer->blocks[i].buffer); } } GRN_FREE(ii_buffer->blocks); } GRN_FREE(ii_buffer); return ctx->rc; } static void grn_ii_buffer_parse(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_obj *target, int ncols, grn_obj **cols) { grn_table_cursor *tc; if ((tc = grn_table_cursor_open(ctx, target, NULL, 0, NULL, 0, 0, -1, GRN_CURSOR_BY_ID))) { grn_id rid; grn_obj rv; GRN_TEXT_INIT(&rv, 0); while ((rid = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) { int sid; grn_obj **col; for (sid = 1, col = cols; sid <= ncols; sid++, col++) { grn_obj_reinit_for(ctx, &rv, *col); if (GRN_OBJ_TABLEP(*col)) { grn_table_get_key2(ctx, *col, rid, &rv); } else { grn_obj_get_value(ctx, *col, rid, &rv); } switch (rv.header.type) { case GRN_BULK : grn_ii_buffer_tokenize(ctx, ii_buffer, rid, sid, 0, GRN_TEXT_VALUE(&rv), GRN_TEXT_LEN(&rv)); break; case GRN_VECTOR : if (rv.u.v.body) { int i; int n_sections = rv.u.v.n_sections; grn_section *sections = rv.u.v.sections; const char *head = GRN_BULK_HEAD(rv.u.v.body); for (i = 0; i < n_sections; i++) { grn_section *section = sections + i; if (section->length == 0) { continue; } grn_ii_buffer_tokenize(ctx, ii_buffer, rid, sid, section->weight, head + section->offset, section->length); } } break; default : ERR(GRN_INVALID_ARGUMENT, "[index] invalid object assigned as value"); break; } } } GRN_OBJ_FIN(ctx, &rv); grn_table_cursor_close(ctx, tc); } } grn_rc grn_ii_build(grn_ctx *ctx, grn_ii *ii, uint64_t sparsity) { grn_ii_buffer *ii_buffer = grn_ii_buffer_open(ctx, ii, sparsity); if (ii_buffer) { grn_id *s = ii->obj.source; if ((ii->obj.source_size) && s) { int ncols = ii->obj.source_size / sizeof(grn_id); grn_obj **cols = GRN_MALLOCN(grn_obj *, ncols); if (cols) { int i; for (i = 0; i < ncols; i++) { if (!(cols[i] = grn_ctx_at(ctx, s[i]))) { break; } } if (i == ncols) { grn_obj *target = cols[0]; if (!GRN_OBJ_TABLEP(target)) { target = grn_ctx_at(ctx, target->header.domain); } if (target) { grn_ii_buffer_parse(ctx, ii_buffer, target, ncols, cols); grn_ii_buffer_commit(ctx, ii_buffer); } else { ERR(GRN_INVALID_ARGUMENT, "failed to resolve the target"); } } else { ERR(GRN_INVALID_ARGUMENT, "failed to resolve a column (%d)", i); } GRN_FREE(cols); } } else { ERR(GRN_INVALID_ARGUMENT, "ii->obj.source is void"); } grn_ii_buffer_close(ctx, ii_buffer); } return ctx->rc; }