summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--items.c9
-rw-r--r--items.h1
-rw-r--r--memcached.c94
-rwxr-xr-xscripts/mc_slab_mover260
-rw-r--r--slabs.c74
5 files changed, 402 insertions, 36 deletions
diff --git a/items.c b/items.c
index a743fb5..7ce6b15 100644
--- a/items.c
+++ b/items.c
@@ -398,6 +398,15 @@ char *do_item_cachedump(const unsigned int slabs_clsid, const unsigned int limit
return buffer;
}
+void item_stats_evictions(uint64_t *evicted) {
+ int i;
+ mutex_lock(&cache_lock);
+ for (i = 0; i < LARGEST_ID; i++) {
+ evicted[i] = itemstats[i].evicted;
+ }
+ pthread_mutex_unlock(&cache_lock);
+}
+
void do_item_stats(ADD_STAT add_stats, void *c) {
int i;
for (i = 0; i < LARGEST_ID; i++) {
diff --git a/items.h b/items.h
index fc7b85e..2ec142d 100644
--- a/items.h
+++ b/items.h
@@ -24,3 +24,4 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv);
item *do_item_touch(const char *key, const size_t nkey, uint32_t exptime, const uint32_t hv);
void item_stats_reset(void);
extern pthread_mutex_t cache_lock;
+void item_stats_evictions(uint64_t *evicted);
diff --git a/memcached.c b/memcached.c
index e89c555..496ec13 100644
--- a/memcached.c
+++ b/memcached.c
@@ -3189,6 +3189,26 @@ static void process_verbosity_command(conn *c, token_t *tokens, const size_t nto
return;
}
+static void process_slabs_automove_command(conn *c, token_t *tokens, const size_t ntokens) {
+ unsigned int level;
+
+ assert(c != NULL);
+
+ set_noreply_maybe(c, tokens, ntokens);
+
+ level = strtoul(tokens[2].value, NULL, 10);
+ if (level == 0) {
+ settings.slab_automove = false;
+ } else if (level == 1) {
+ settings.slab_automove = true;
+ } else {
+ out_string(c, "ERROR");
+ return;
+ }
+ out_string(c, "OK");
+ return;
+}
+
static void process_command(conn *c, char *command) {
token_t tokens[MAX_TOKENS];
@@ -3303,45 +3323,51 @@ static void process_command(conn *c, char *command) {
conn_set_state(c, conn_closing);
- } else if (ntokens == 5 && (strcmp(tokens[COMMAND_TOKEN].value, "slabs") == 0 &&
- strcmp(tokens[COMMAND_TOKEN + 1].value, "reassign") == 0)) {
- int src, dst, rv;
+ } else if (strcmp(tokens[COMMAND_TOKEN].value, "slabs") == 0) {
+ if (ntokens == 5 && strcmp(tokens[COMMAND_TOKEN + 1].value, "reassign") == 0) {
+ int src, dst, rv;
- if (settings.slab_reassign == false) {
- out_string(c, "CLIENT_ERROR slab reassignment disabled");
- return;
- }
+ if (settings.slab_reassign == false) {
+ out_string(c, "CLIENT_ERROR slab reassignment disabled");
+ return;
+ }
- src = strtol(tokens[2].value, NULL, 10);
- dst = strtol(tokens[3].value, NULL, 10);
+ src = strtol(tokens[2].value, NULL, 10);
+ dst = strtol(tokens[3].value, NULL, 10);
- if (errno == ERANGE) {
- out_string(c, "CLIENT_ERROR bad command line format");
- return;
- }
+ if (errno == ERANGE) {
+ out_string(c, "CLIENT_ERROR bad command line format");
+ return;
+ }
- rv = slabs_reassign(src, dst);
- switch (rv) {
- case REASSIGN_OK:
- out_string(c, "OK");
- break;
- case REASSIGN_RUNNING:
- out_string(c, "BUSY");
- break;
- case REASSIGN_BADCLASS:
- out_string(c, "BADCLASS");
- break;
- case REASSIGN_NOSPARE:
- out_string(c, "NOSPARE");
- break;
- case REASSIGN_DEST_NOT_FULL:
- out_string(c, "NOTFULL");
- break;
- case REASSIGN_SRC_NOT_SAFE:
- out_string(c, "UNSAFE");
- break;
+ rv = slabs_reassign(src, dst);
+ switch (rv) {
+ case REASSIGN_OK:
+ out_string(c, "OK");
+ break;
+ case REASSIGN_RUNNING:
+ out_string(c, "BUSY");
+ break;
+ case REASSIGN_BADCLASS:
+ out_string(c, "BADCLASS");
+ break;
+ case REASSIGN_NOSPARE:
+ out_string(c, "NOSPARE");
+ break;
+ case REASSIGN_DEST_NOT_FULL:
+ out_string(c, "NOTFULL");
+ break;
+ case REASSIGN_SRC_NOT_SAFE:
+ out_string(c, "UNSAFE");
+ break;
+ }
+ return;
+ } else if (ntokens == 4 &&
+ (strcmp(tokens[COMMAND_TOKEN + 1].value, "automove") == 0)) {
+ process_slabs_automove_command(c, tokens, ntokens);
+ } else {
+ out_string(c, "ERROR");
}
- return;
} else if ((ntokens == 3 || ntokens == 4) && (strcmp(tokens[COMMAND_TOKEN].value, "verbosity") == 0)) {
process_verbosity_command(c, tokens, ntokens);
} else {
diff --git a/scripts/mc_slab_mover b/scripts/mc_slab_mover
new file mode 100755
index 0000000..d7bd5e4
--- /dev/null
+++ b/scripts/mc_slab_mover
@@ -0,0 +1,260 @@
+#! /usr/bin/perl
+# See memcached for LICENSE
+# Copyright 2011 Dormando (dormando@rydia.net)
+
+=head1 NAME
+
+mc_slab_mover -- example utility for slab page reassignment for memcached
+
+=head1 SYNOPSIS
+
+ $ mc_slab_mover --host="127.0.0.1:11211" --verbose
+ $ mc_slab_mover --host="127.0.0.1:11211" --automove
+ $ mc_slab_mover --host="127.0.0.1:11211" --sleep=60 --loops=4 --automove
+
+=head1 DESCRIPTION
+
+This utility is an example implementation of an algorithm for reassigning
+slab memory in a running memcached instance. If memcached's built-in
+automover isn't working for you, you may use this script as an example
+base and expand on it. We welcome modifications or alternatives on the
+mailing list.
+
+=head1 ALGORITHM
+
+The default algorithm is simple, and may serve for a common case: over
+time one slab may grow in use compare to others, and as evictions stop
+in one slab and start in another it will reassign memory.
+
+If a slab has the most evictions three times in a row, it will pull a page
+from a slab which has had zero evictions three times in a row.
+
+There are many traffic patterns where this does not work well. IE: If you
+never use expirations and rely on the LRU (so all slabs always evict),
+it will not be as likely to find source pages to move.
+
+=head1 OPTIONS
+
+=over
+
+=item --host="IP:PORT"
+
+The hostname to connect to. NOTE: If connection to the host breaks, script
+will stop.
+
+=item --sleep=10
+
+How long to wait between loops for gathering stats.
+
+=item --loops=3
+
+How many loops to run before making a decision for a move.
+
+=item --verbose
+
+Prints a formatted dump of some common statistics per loop.
+
+=item --automove
+
+Enables the automover, and will attempt to move memory around if it finds
+viable candidates.
+
+=back
+
+=head1 AUTHOR
+
+Dormando E<lt>L<dormando@rydia.net>E<gt>
+
+=head1 LICENSE
+
+Licensed for use and redistribution under the same terms as Memcached itself.
+
+=cut
+
+use warnings;
+use strict;
+
+use IO::Socket::INET;
+
+use FindBin;
+use Data::Dumper qw/Dumper/;
+use Getopt::Long;
+
+my %opts = ('sleep' => 10, automove => 0, verbose => 0, loops => 3);
+GetOptions(
+ "host=s" => \$opts{host},
+ "sleep=i" => \$opts{'sleep'},
+ "loops=i" => \$opts{loops},
+ "automove" => \$opts{automove},
+ "verbose" => \$opts{verbose},
+ ) or usage();
+
+die "Must specify at least --host='127.0.0.1:11211'" unless $opts{host};
+my $sock = IO::Socket::INET->new(PeerAddr => $opts{host},
+ Timeout => 3);
+die "$!\n" unless $sock;
+
+my %stats = ();
+my %move = (winner => 0, wins => 0);
+
+$SIG{INT} = sub {
+ print "STATS: ", Dumper(\%stats), "\n";
+ exit;
+};
+$SIG{USR1} = sub {
+ print "STATS: ", Dumper(\%stats), "\n";
+};
+run();
+
+sub usage {
+ print qq{Usage:
+ mc_slab_ratios --host="127.0.0.1:11211" --verbose --automove
+ run `perldoc mc_slab_ratios` for full information
+
+};
+ exit 1;
+}
+
+sub run {
+ my $slabs_before = grab_stats();
+
+ while (1) {
+ sleep $opts{'sleep'};
+ my $slabs_after = grab_stats();
+
+ my ($totals, $sorted) = calc_results_evicted($slabs_before, $slabs_after);
+# my ($totals, $sorted) = calc_results_numratio($slabs_before, $slabs_after);
+
+ my $pct = sub {
+ my ($num, $divisor) = @_;
+ return 0 unless $divisor;
+ return ($num / $divisor);
+ };
+ if ($opts{verbose}) {
+ printf " %02s: %-8s (pct ) %-10s (pct ) %-6s (pct ) get_hits (pct ) cmd_set (pct )\n",
+ 'sb', 'evicted', 'items', 'pages';
+ for my $slab (@$sorted) {
+ printf " %02d: %-8d (%.2f%%) %-10s (%.4f%%) %-6d (%.2f%%) %-8d (%.3f%%) %-7d (%.2f%%)\n",
+ $slab->{slab}, $slab->{evicted_d},
+ $pct->($slab->{evicted_d}, $totals->{evicted_d}),
+ $slab->{number},
+ $pct->($slab->{number}, $totals->{number}),
+ $slab->{total_pages},
+ $pct->($slab->{total_pages}, $totals->{total_pages}),
+ $slab->{get_hits_d},
+ $pct->($slab->{get_hits_d}, $totals->{get_hits_d}),
+ $slab->{cmd_set_d},
+ $pct->($slab->{cmd_set_d}, $totals->{cmd_set_d});
+ }
+ }
+
+ next unless @$sorted;
+ my $highest = $sorted->[-1];
+ $stats{$highest->{slab}}++;
+ print " (winner: ", $highest->{slab}, " wins: ", $stats{$highest->{slab}}, ")\n";
+ automove_basic($totals, $sorted) if ($opts{automove});
+
+ $slabs_before = $slabs_after;
+ }
+}
+
+sub grab_stats {
+ my %slabs = ();
+ for my $stat (qw/items slabs/) {
+ print $sock "stats $stat\r\n";
+ while (my $line = <$sock>) {
+ chomp $line;
+ last if ($line =~ m/^END/);
+ if ($line =~ m/^STAT (?:items:)?(\d+):(\S+) (\S+)/) {
+ my ($slab, $var, $val) = ($1, $2, $3);
+ $slabs{$slab}->{$var} = $val;
+ }
+ }
+ }
+
+ return \%slabs;
+}
+
+# Really stupid algo, same as the initial algo built into memcached.
+# If a slab "wins" most evictions 3 times in a row, pick from a slab which
+# has had 0 evictions 3 times in a row and move it over.
+sub automove_basic {
+ my ($totals, $sorted) = @_;
+
+ my $source = 0;
+ my $dest = 0;
+ my $high = $sorted->[-1];
+ return unless $high->{evicted_d} > 0;
+ if ($move{winner} == $high->{slab}) {
+ $move{wins}++;
+ $dest = $move{winner} if $move{wins} >= $opts{loops};
+ } else {
+ $move{wins} = 1;
+ $move{winner} = $high->{slab};
+ }
+ for my $slab (@$sorted) {
+ my $id = $slab->{slab};
+ if ($slab->{evicted_d} == 0 && $slab->{total_pages} > 2) {
+ $move{zeroes}->{$id}++;
+ $source = $id if (!$source && $move{zeroes}->{$id} >= $opts{loops});
+ } else {
+ delete $move{zeroes}->{$slab->{slab}}
+ if exists $move{zeroes}->{$slab->{slab}};
+ }
+ }
+
+ if ($source && $dest) {
+ print " slabs reassign $source $dest\n";
+ print $sock "slabs reassign $source $dest\r\n";
+ my $res = <$sock>;
+ print " RES: ", $res;
+ } elsif ($dest && !$source) {
+ print "FAIL: want to move memory to $dest but no valid source slab available\n";
+ }
+}
+
+# Using just the evicted stats.
+sub calc_results_evicted {
+ my ($slabs, $totals) = calc_slabs(@_);
+ my @sorted = sort { $a->{evicted_d} <=> $b->{evicted_d} } values %$slabs;
+ return ($totals, \@sorted);
+}
+
+# Weighted ratios of evictions vs total stored items
+# Seems to fail as an experiment, but it tries to weight stats.
+# In this case evictions in underused classes tend to get vastly inflated
+sub calc_results_numratio {
+ my ($slabs, $totals) = calc_slabs(@_, sub {
+ my ($sb, $sa, $s) = @_;
+ if ($s->{evicted_d}) {
+ $s->{numratio} = $s->{evicted_d} / $s->{number};
+ } else { $s->{numratio} = 0; }
+ });
+ my @sorted = sort { $a->{numratio} <=> $b->{numratio} } values %$slabs;
+ return ($totals, \@sorted);
+}
+
+sub calc_slabs {
+ my ($slabs_before, $slabs_after, $code) = @_;
+ my %slabs = ();
+ my %totals = ();
+ for my $id (keys %$slabs_after) {
+ my $sb = $slabs_before->{$id};
+ my $sa = $slabs_after->{$id};
+ next unless ($sb && $sa);
+ my %slab = %$sa;
+ for my $key (keys %slab) {
+ # Add totals, diffs
+ if ($slab{$key} =~ m/^\d+$/) {
+ $totals{$key} += $slab{$key};
+ $slab{$key . '_d'} = $sa->{$key} - $sb->{$key};
+ $totals{$key . '_d'} += $sa->{$key} - $sb->{$key};
+ }
+ }
+ # External code
+ $code->($sb, $sa, \%slab) if $code;
+ $slab{slab} = $id;
+ $slabs{$id} = \%slab;
+ }
+ return (\%slabs, \%totals);
+}
diff --git a/slabs.c b/slabs.c
index eddd59e..10b12b9 100644
--- a/slabs.c
+++ b/slabs.c
@@ -627,16 +627,83 @@ static void slab_rebalance_finish(void) {
}
}
+/* Return 1 means a decision was reached.
+ * Move to its own thread (created/destroyed as needed) once automover is more
+ * complex.
+ */
+static int slab_automove_decision(int *src, int *dst) {
+ static uint64_t evicted_old[POWER_LARGEST];
+ static unsigned int slab_zeroes[POWER_LARGEST];
+ static unsigned int slab_winner = 0;
+ static unsigned int slab_wins = 0;
+ uint64_t evicted_new[POWER_LARGEST];
+ uint64_t evicted_diff = 0;
+ uint64_t evicted_max = 0;
+ unsigned int highest_slab = 0;
+ unsigned int total_pages[POWER_LARGEST];
+ int i;
+ int source = 0;
+ int dest = 0;
+ static rel_time_t next_run;
+
+ /* Run less frequently than the slabmove tester. */
+ if (current_time >= next_run) {
+ next_run = current_time + 10;
+ } else {
+ return 0;
+ }
+
+ item_stats_evictions(evicted_new);
+ pthread_mutex_lock(&cache_lock);
+ for (i = POWER_SMALLEST; i < power_largest; i++) {
+ total_pages[i] = slabclass[i].slabs;
+ }
+ pthread_mutex_unlock(&cache_lock);
+
+ /* Find a candidate source; something with zero evicts 3+ times */
+ for (i = POWER_SMALLEST; i < power_largest; i++) {
+ evicted_diff = evicted_new[i] - evicted_old[i];
+ if (evicted_diff == 0 && total_pages[i] > 2) {
+ slab_zeroes[i]++;
+ if (source == 0 && slab_zeroes[i] >= 3)
+ source = i;
+ } else {
+ slab_zeroes[i] = 0;
+ if (evicted_diff > evicted_max) {
+ evicted_max = evicted_diff;
+ highest_slab = i;
+ }
+ }
+ evicted_old[i] = evicted_new[i];
+ }
+
+ /* Pick a valid destination */
+ if (slab_winner != 0 && slab_winner == highest_slab) {
+ slab_wins++;
+ if (slab_wins >= 3)
+ dest = slab_winner;
+ } else {
+ slab_wins = 1;
+ slab_winner = highest_slab;
+ }
+
+ if (source && dest) {
+ *src = source;
+ *dst = dest;
+ return 1;
+ }
+ return 0;
+}
+
/* Slab rebalancer thread.
* Does not use spinlocks since it is not timing sensitive. Burn less CPU and
* go to sleep if locks are contended
*/
static void *slab_maintenance_thread(void *arg) {
int was_busy = 0;
+ int src, dest;
while (do_run_slab_thread) {
- /* TODO: Call code to make a calculated decision */
-
if (slab_rebalance_signal == 1) {
if (slab_rebalance_start() < 0) {
/* Handle errors with more specifity as required. */
@@ -646,6 +713,9 @@ static void *slab_maintenance_thread(void *arg) {
} else if (slab_rebalance_signal && slab_rebal.slab_start != NULL) {
/* If we have a decision to continue, continue it */
was_busy = slab_rebalance_move();
+ } else if (settings.slab_automove && slab_automove_decision(&src, &dest) == 1) {
+ /* Blind to the return codes. It will retry on its own */
+ slabs_reassign(src, dest);
}
if (slab_rebal.done) {