summaryrefslogtreecommitdiff
path: root/scripts/memcached-automove
blob: a5b531c0631613587a974e85a1ffe371213c8828 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#!/usr/bin/python3
# Copyright 2017 Facebook.
# Licensed under the same terms as memcached itself.

import argparse
import socket
import sys
import re
import traceback
from time import sleep

parser = argparse.ArgumentParser(description="daemon for rebalancing slabs")
parser.add_argument("--host", help="host to connect to",
        default="localhost:11211", metavar="HOST:PORT")
parser.add_argument("-s", "--sleep", help="seconds between runs",
                    type=int, default="1")
parser.add_argument("-v", "--verbose", action="store_true")
parser.add_argument("-a", "--automove", action="store_true", default=False,
                    help="enable automatic page rebalancing")
parser.add_argument("-w", "--window", type=int, default="30",
                    help="rolling window size for decision history")
parser.add_argument("-r", "--ratio", type=float, default=0.8,
                    help="ratio limiting distance between low/high class ages")

# TODO:
# - age adjuster function
#   - by ratio of get_hits
#   - by ratio of chunk size

args = parser.parse_args()

host, port = args.host.split(':')

def window_check_count(history, sid, key):
    total = 0
    for window in history['w']:
        s = window.get(sid)
        if s and s.get(key):
            total += s.get(key) > 0 and 1 or 0
    return total

def window_check_sum(history, sid, key):
    total = 0
    for window in history['w']:
        s = window.get(sid)
        if s and s.get(key):
            total += s.get(key)
    return total

def determine_move(history, diffs, totals):
    """ Figure out of a page move is in order.

    - if > 2.5 pages of free space without free chunks reducing for N trials,
      and no evictions for N trials, free to global.
    - use ratio of how far apart age can be between slab classes
    - TODO: if get_hits exists, allowable distance in age from the *youngest* slab
      class is based on the percentage of get_hits the class gets, against the
      factored max distance, ie:
      1% max delta. youngest is 900, oldest allowable is 900+90
      if class has 30% of get_hits, it can be 930
    - youngest evicting slab class gets page moved to it, if outside ratio max
    - use age as average over window. smooths over items falling out of WARM.
      also gives a little weight: if still evicting, a few more pages than
      necessary may be moved, pulling the classes closer together. Hopefully
      avoiding ping-ponging.
    """
    # rotate windows
    history['w'].append({})
    if (len(history['w']) > args.window):
        history['w'].pop(0)
    w = history['w'][-1]
    oldest = (-1, 0)
    youngest = (-1, sys.maxsize)
    decision = (-1, -1)
    for sid, slab in diffs.items():

        w[sid] = {}
        if 'evicted_d' not in slab or 'total_pages_d' not in slab:
            continue
        # mark this window as dirty if total pages increases or evictions
        # happened
        if slab['total_pages_d'] > 0:
            w[sid]['dirty'] = 1
        if slab['evicted_d'] > 0:
            w[sid]['dirty'] = 1
            w[sid]['ev'] = slab['evicted_d'] / totals['evicted_d']
        w[sid]['age'] = slab['age']
        age = window_check_sum(history, sid, 'age') / len(history['w'])

        # if > 2.5 pages free, and not dirty, reassign to global page pool and
        # break.
        if slab['free_chunks'] > slab['chunks_per_page'] * 2.5:
            if window_check_sum(history, sid, 'dirty') == 0:
                decision = (sid, 0)
                break

        # are we the oldest slab class? (and a valid target)
        if age > oldest[1] and slab['total_pages'] > 2:
            oldest = (sid, age)

        # are we the youngest evicting slab class?
        ev_total = window_check_count(history, sid, 'ev')
        ev_total_sum = window_check_sum(history, sid, 'ev') / args.window
        window_min = args.window / 2
        if args.verbose:
            print("sid {} age {} ev_total {} window_min {} ev_total_sum {}".format(sid, age, ev_total, window_min, ev_total_sum))
        # If youngest and evicted in more than 50% of the window interval, or more than 25% of the total evictions in the window
        if age < youngest[1] and ( ev_total > window_min or ev_total_sum > 0.25 ):
            youngest = (sid, age)
            #if args.verbose:
            #    print("window: {} range: {}".format(ev_total, window_min))

    # is the youngest slab class too young?
    if youngest[0] != -1 and oldest[0] != -1:
        if args.verbose:
            print("old:   [class: {}] [age: {:.2f}]\nyoung: [class: {}] [age: {:.2f}]".format(
                int(oldest[0]), oldest[1], int(youngest[0]), youngest[1]))
        if youngest[1] < oldest[1] * args.ratio and w[youngest[0]].get('ev'):
            decision = (oldest[0], youngest[0])

    if (len(history['w']) >= args.window):
        return decision
    return (-1, -1)


def run_move(s, decision):
    s.write("slabs reassign " + str(decision[0]) + " " + str(decision[1]) + "\r\n")
    line = s.readline().rstrip()
    if args.verbose:
        print("move result:", line)


def diff_stats(before, after):
    """ fills out "diffs" as deltas between before/after,
    and "totals" as the sum of all slab classes.
    "_d" postfix to keys means the delta between before/after.
    non-postfix keys are total as of 'after's reading.
    """
    diffs = {}
    totals = {}
    for slabid in after.keys():
        sb = before.get(slabid)
        sa = after.get(slabid)
        if not (sb and sa):
            continue
        slab = sa.copy()
        for k in sa.keys():
            if k not in sb:
                continue
            if k not in totals:
                totals[k] = 0
                totals[k + '_d'] = 0
            if k + '_d' not in slab:
                slab[k + '_d'] = 0
            if re.search(r"^\d+$", sa[k]):
                totals[k] += int(sa[k])
                slab[k] = int(sa[k])
                slab[k + '_d'] = int(sa[k]) - int(sb[k])
                totals[k + '_d'] += int(sa[k]) - int(sb[k])
        slab['slab'] = slabid
        diffs[slabid] = slab
    return (diffs, totals)


def read_stats(s):
    slabs = {}
    for statcmd in ['items', 'slabs']:
        #print("stat cmd: " + statcmd)
        # FIXME: Formatting
        s.write("stats " + statcmd + "\r\n")
        while True:
            line = s.readline().rstrip()
            if line.startswith("END"):
                break

            m = re.match(r"^STAT (?:items:)?(\d+):(\S+) (\S+)", line)
            if m:
                (slab, var, val) = m.groups()
                if slab not in slabs:
                    slabs[slab] = {}
                slabs[slab][var] = val
            #print("line: " + line)
    return slabs


def pct(num, divisor):
    if not divisor:
        return 0
    return (num / divisor)


def show_detail(diffs, totals):
    """ just a pretty printer for some extra data """
    print("\n  {:2s}: {:8s} (pct  ) {:10s} (pct    ) {:6s} (pct)   {:6s}".format('sb',
                'evicted', 'items', 'pages', 'age'))

    for sid, slab in diffs.items():
        if 'evicted_d' not in slab:
            continue
        print("  {:2d}: {:8d} ({:.2f}%) {:10d} ({:.4f}%) {:6d} ({:.2f}%) {:6d}".format(
              int(sid), slab['evicted_d'], pct(slab['evicted_d'], totals['evicted_d']),
              slab['number'], pct(slab['number'], totals['number']),
              slab['total_pages'], pct(slab['total_pages'],
              totals['total_pages']),
              slab['age']))


stats_pre = {}
history = { 'w': [{}] }
while True:
    try:
        with socket.create_connection((host, port), 5) as c:
            s = c.makefile(mode="rw", buffering=1)
            s.write("slabs automove 0\r\n")
            print(s.readline().rstrip())
            while True:
                stats_post = read_stats(s)
                (diffs, totals) = diff_stats(stats_pre, stats_post)
                if args.verbose:
                    show_detail(diffs, totals)
                decision = determine_move(history, diffs, totals)
                if decision[0] != -1 and decision[1] != -1:
                    print("moving page from, to:", decision)
                    if args.automove:
                        run_move(s, decision)

                # Minimize sleeping if we just moved a page to global pool.
                # Improves responsiveness during flushes/quick changes.
                if decision[1] == 0:
                    sleep(0.05)
                else:
                    sleep(args.sleep)
                stats_pre = stats_post
    except:
        err = sys.exc_info()
        print("disconnected:", err[0], err[1])
        traceback.print_exc()
        stats_pre = {}
        history = { 'w': [{}] }
        sleep(args.sleep)