summaryrefslogtreecommitdiff
path: root/src/lazyfree.c
blob: 84d4b37527190d6c4d1b96021868c9539a99c0e7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#include "server.h"

/* Initialization of the lazy free engine. Must be called only once at server
 * startup. */
void initLazyfreeEngine(void) {
    server.lazyfree_dbs = listCreate();
    server.lazyfree_obj = listCreate();
    server.lazyfree_elements = 0;
}

/* Return the amount of work needed in order to free an object.
 * The return value is not always the actual number of allocations the
 * object is compoesd of, but a number proportional to it.
 *
 * For strings the function always returns 1.
 *
 * For aggregated objects represented by hash tables or other data structures
 * the function just returns the number of elements the object is composed of.
 *
 * Objects composed of single allocations are always reported as having a
 * single item even if they are actaully logical composed of multiple
 * elements.
 *
 * For lists the funciton returns the number of elements in the quicklist
 * representing the list. */
size_t lazyfreeGetFreeEffort(robj *obj) {
    if (obj->type == OBJ_LIST) {
        quicklist *ql = obj->ptr;
        return ql->len;
    } else if (obj->type == OBJ_SET && obj->encoding == OBJ_ENCODING_HT) {
        dict *ht = obj->ptr;
        return dictSize(ht);
    } else if (obj->type == OBJ_ZSET && obj->encoding == OBJ_ENCODING_SKIPLIST){
        zset *zs = obj->ptr;
        return zs->zsl->length;
    } else if (obj->type == OBJ_HASH && obj->encoding == OBJ_ENCODING_HT) {
        dict *ht = obj->ptr;
        return dictSize(ht);
    } else {
        return 1; /* Everything else is a single allocation. */
    }
}

/* This callback is used together with dictScan() in order to free a dict.c
 * hash table incrementally. */
void lazyfreeScanCallback(void *privdata, const dictEntry *de) {
    dict *ht = privdata;
    long saved_iterators = ht->iterators;
    ht->iterators = 1; /* Make sure no rehashing happens. */
    dictDelete(ht,dictGetKey(de));
    ht->iterators = saved_iterators;
}

/* Free some object from the lazy free list. */
#define LAZYFREE_ITER_PER_STEP 100
size_t lazyfreeFastStep(void) {
    size_t maxiter = LAZYFREE_ITER_PER_STEP;
    size_t workdone = 0;
    robj *current = NULL;

    while(maxiter--) {
        if (current == NULL) {
            listNode *ln = listFirst(server.lazyfree_obj);
            if (ln == NULL) break; /* Nothing more to free. */
            current = ln->value;
        }
        if ((current->type == OBJ_SET ||
            current->type == OBJ_HASH) &&
            current->encoding == OBJ_ENCODING_HT)
        {
            dict *ht = current->ptr;
            size_t origsize = dictSize(ht);
            ht->iterators = dictScan(ht,ht->iterators,lazyfreeScanCallback,ht);
            workdone++; /* We are not sure how many elements we freed, even if
                           zero, the free list is non empty so we don't return
                           0 to the caller. */
            server.lazyfree_elements -= (origsize - dictSize(ht));
            if (dictSize(ht) == 0) {
                decrRefCount(current);
                listNode *ln = listFirst(server.lazyfree_obj);
                listDelNode(server.lazyfree_obj,ln);
                current = NULL;
            }
        } else {
            /* Not handled type or encoding. Do a blocking free. */
            size_t effort = lazyfreeGetFreeEffort(current);
            server.lazyfree_elements -= effort;
            workdone += effort;
            decrRefCount(current);
            listNode *ln = listFirst(server.lazyfree_obj);
            listDelNode(server.lazyfree_obj,ln);
            current = NULL;
        }
    }
    return workdone;
}

/* Handles slow or fast collection steps. */
size_t lazyfreeStep(int type) {
    if (type == LAZYFREE_STEP_FAST) return lazyfreeFastStep();

    size_t totalwork = 0;
    mstime_t end = mstime()+2;
    do {
        size_t workdone = lazyfreeFastStep();
        if (workdone == 0) break;
        totalwork += workdone;
    } while(mstime() < end);
    return totalwork;
}

/* Delete a key, value, and associated expiration entry if any, from the DB.
 * If there are enough allocations to free the value object may be put into
 * a lazy free list instead of being freed synchronously. The lazy free list
 * will be reclaimed incrementally in a non blocking way. */
#define LAZYFREE_THRESHOLD 64
int dbAsyncDelete(redisDb *db, robj *key) {
    /* Deleting an entry from the expires dict will not free the sds of
     * the key, because it is shared with the main dictionary. */
    if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);

    /* If the value is composed of a few allocations, to free in a lazy way
     * is actually just slower... So under a certain limit we just free
     * the object synchronously. */
    dictEntry *de = dictFind(db->dict,key->ptr);
    if (de) {
        robj *val = dictGetVal(de);
        size_t free_effort = lazyfreeGetFreeEffort(val);

        /* If releasing the object is too much work, let's put it into the
         * lazy free list. */
        if (free_effort > LAZYFREE_THRESHOLD) {
            listAddNodeTail(server.lazyfree_obj,val);
            server.lazyfree_elements += free_effort;
            dictSetVal(db->dict,de,NULL);
        }
    }

    /* Release the key-val pair, or just the key if we set the val
     * field to NULL in order to lazy free it later. */
    if (dictDelete(db->dict,key->ptr) == DICT_OK) {
        if (server.cluster_enabled) slotToKeyDel(key);
        return 1;
    } else {
        return 0;
    }
}

/* This is the timer handler we use to incrementally perform collection
 * into the lazy free lists. We can't use serverCron since we need a
 * very high timer frequency when there are many objects to collect, while
 * we lower the frequency to just 1HZ when there is nothing to do.
 *
 * Since a slow lazy free step will take 1.5 milliseconds and we modulate
 * the timer frequency from 1 to 333 HZ in an adaptive way, the CPU
 * used is between 0% (nothing in the lazy free list) to 50%.
 *
 * The frequency is obtained as follows: if the lazy free list is empty
 * it is set to 1HZ. If the lazy free has elements the call period starts
 * at 20 (50HZ) and is decremented (up to 3 ms = 333HZ) each time the server
 * used memory raises between calls of this function. */
int lazyfreeCron(struct aeEventLoop *eventLoop, long long id, void *clientData)
{
    UNUSED(eventLoop);
    UNUSED(id);
    UNUSED(clientData);

    static size_t prev_mem;
    static int timer_period = 1000; /* Defauls to 1HZ */
    static double mem_trend = 0;
    size_t mem = zmalloc_used_memory();

    /* Compute the memory trend, biased towards thinking memory is raising
     * for a few calls every time previous and current memory raise. */
    if (prev_mem < mem) mem_trend = 1;
    mem_trend *= 0.9; /* Make it slowly forget. */
    int mem_is_raising = mem_trend > .1;

    /* Free a few items. */
    size_t workdone = lazyfreeStep(LAZYFREE_STEP_SLOW);

    /* Adjust this timer call frequency according to the current state. */
    if (workdone) {
        if (timer_period == 1000) timer_period = 20;
        if (mem_is_raising && timer_period > 3)
            timer_period--; /* Raise call frequency. */
        else if (!mem_is_raising && timer_period < 20)
            timer_period++; /* Lower call frequency. */
    } else {
        timer_period = 1000;    /* 1 HZ */
    }
    prev_mem = mem;
#if 0
    printf("%llu (%d hz) %s (%f)\n",
        (unsigned long long)server.lazyfree_elements,
        1000/timer_period,
        mem_is_raising ? "RAISING" : "lowering",
        mem_trend);
#endif
    return timer_period;
}