Reclaim page cache of RDB file (#11248)

# Background The RDB file is usually generated and used once and seldom used again, but the content would reside in page cache until OS evicts it. A potential problem is that once the free memory exhausts, the OS have to reclaim some memory from page cache or swap anonymous page out, which may result in a jitters to the Redis service. Supposing an exact scenario, a high-capacity machine hosts many redis instances, and we're upgrading the Redis together. The page cache in host machine increases as RDBs are generated. Once the free memory drop into low watermark(which is more likely to happen in older Linux kernel like 3.10, before [watermark_scale_factor](https://lore.kernel.org/lkml/1455813719-2395-1-git-send-email-hannes@cmpxchg.org/) is introduced, the `low watermark` is linear to `min watermark`, and there'is not too much buffer space for `kswapd` to be wake up to reclaim memory), a `direct reclaim` happens, which means the process would stall to wait for memory allocation. # What the PR does The PR introduces a capability to reclaim the cache when the RDB is operated. Generally there're two cases, read and write the RDB. For read it's a little messy to address the incremental reclaim, so the reclaim is done in one go in background after the load is finished to avoid blocking the work thread. For write, incremental reclaim amortizes the work of reclaim so no need to put it into background, and the peak watermark of cache can be reduced in this way. Two cases are addresses specially, replication and restart, for both of which the cache is leveraged to speed up the processing, so the reclaim is postponed to a right time. To do this, a flag is added to`rdbSave` and `rdbLoad` to control whether the cache need to be kept, with the default value false. # Something deserve noting 1. Though `posix_fadvise` is the POSIX standard, but only few platform support it, e.g. Linux, FreeBSD 10.0. 2. In Linux `posix_fadvise` only take effect on writeback-ed pages, so a `sync`(or `fsync`, `fdatasync`) is needed to flush the dirty page before `posix_fadvise` if we reclaim write cache. # About test A unit test is added to verify the effect of `posix_fadvise`. In integration test overall cache increase is checked, as well as the cache backed by RDB as a specific TCL test is executed in isolated Github action job.
author: Tian <skylypig@gmail.com> 2023-02-12 15:23:29 +0800
committer: GitHub <noreply@github.com> 2023-02-12 09:23:29 +0200
commit: 7dae142a2ebf909a63df13e5813c073c79be521f (patch)
tree: 28df352d6c4711a669acf171d5a07319528474df /.github
parent: 5c3938d5cc08b42acc99f314d92f9e0d5671f96e (diff)
download: redis-7dae142a2ebf909a63df13e5813c073c79be521f.tar.gz
1 files changed, 71 insertions, 1 deletions
diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml
index 414f99f67..f1108ff6e 100644
--- a/.github/workflows/daily.yml
+++ b/.github/workflows/daily.yml
@@ -11,7 +11,7 @@ on:
     inputs:
       skipjobs:
         description: 'jobs to skip (delete the ones you wanna keep, do not leave empty)'
-        default: 'valgrind,sanitizer,tls,freebsd,macos,alpine,32bit,iothreads,ubuntu,centos,malloc'
+        default: 'valgrind,sanitizer,tls,freebsd,macos,alpine,32bit,iothreads,ubuntu,centos,malloc,specific'
       skiptests:
         description: 'tests to skip (delete the ones you wanna keep, do not leave empty)'
         default: 'redis,modules,sentinel,cluster,unittest'
@@ -282,6 +282,76 @@ jobs:
       if: true && !contains(github.event.inputs.skiptests, 'cluster')
       run: ./runtest-cluster --config io-threads 4 --config io-threads-do-reads yes ${{github.event.inputs.cluster_test_args}}
 
+  test-ubuntu-reclaim-cache:
+    runs-on: ubuntu-latest
+    if: |
+      (github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
+      !contains(github.event.inputs.skipjobs, 'specific')
+    timeout-minutes: 14400
+    steps:
+    - name: prep
+      if: github.event_name == 'workflow_dispatch'
+      run: |
+        echo "GITHUB_REPOSITORY=${{github.event.inputs.use_repo}}" >> $GITHUB_ENV
+        echo "GITHUB_HEAD_REF=${{github.event.inputs.use_git_ref}}" >> $GITHUB_ENV
+    - uses: actions/checkout@v3
+      with:
+        repository: ${{ env.GITHUB_REPOSITORY }}
+        ref: ${{ env.GITHUB_HEAD_REF }}
+    - name: make
+      run: |
+        make REDIS_CFLAGS='-Werror'
+    - name: testprep
+      run: |
+        sudo apt-get install vmtouch
+        mkdir /tmp/master 
+        mkdir /tmp/slave
+    - name: warm up
+      run: |
+        ./src/redis-server --daemonize yes --logfile /dev/null
+        ./src/redis-benchmark -n 1 > /dev/null
+        ./src/redis-cli save | grep OK > /dev/null
+        vmtouch -v ./dump.rdb > /dev/null
+    - name: test
+      run: |
+        echo "test SAVE doesn't increase cache"
+        CACHE0=$(grep -w file /sys/fs/cgroup/memory.stat | awk '{print $2}')
+        ./src/redis-server --daemonize yes --logfile /dev/null --dir /tmp/master --port 8080 --repl-diskless-sync no --pidfile /tmp/master/redis.pid
+        ./src/redis-server --daemonize yes --logfile /dev/null --dir /tmp/slave --port 8081 --repl-diskless-load disabled
+        ./src/redis-benchmark -p 8080 -d 102400 -t set -r 100000 -n 10000 > /dev/null
+        ./src/redis-cli -p 8080 save > /dev/null
+        VMOUT=$(vmtouch -v /tmp/master/dump.rdb)
+        echo $VMOUT
+        grep -q "0%" <<< $VMOUT 
+        CACHE=$(grep -w file /sys/fs/cgroup/memory.stat | awk '{print $2}')
+        if [ "$(( $CACHE-$CACHE0 ))" -gt "500000" ]; then echo "$CACHE0 $CACHE"; exit 1; fi
+
+        echo "test replication doesn't increase cache"
+        ./src/redis-cli -p 8081 REPLICAOF 127.0.0.1 8080 > /dev/null
+        while [ $(./src/redis-cli -p 8081 info replication | grep "master_link_status:down") ]; do sleep 1; done;
+        sleep 1 # wait for the completion of cache reclaim bio
+        VMOUT=$(vmtouch -v /tmp/master/dump.rdb)
+        echo $VMOUT
+        grep -q "0%" <<< $VMOUT 
+        VMOUT=$(vmtouch -v /tmp/slave/dump.rdb)
+        echo $VMOUT
+        grep -q "0%" <<< $VMOUT 
+        CACHE=$(grep -w file /sys/fs/cgroup/memory.stat | awk '{print $2}')
+        if [ "$(( $CACHE-$CACHE0 ))" -gt "500000" ]; then echo "$CACHE0 $CACHE"; exit 1; fi
+        
+        echo "test reboot doesn't increase cache"
+        PID=$(cat /tmp/master/redis.pid)
+        kill -15 $PID
+        while [ -x /proc/${PID} ]; do sleep 1; done
+        ./src/redis-server --daemonize yes --logfile /dev/null --dir /tmp/master --port 8080
+        while [ $(./src/redis-cli -p 8080 info persistence | grep "loading:1") ]; do sleep 1; done;
+        sleep 1 # wait for the completion of cache reclaim bio
+        VMOUT=$(vmtouch -v /tmp/master/dump.rdb)
+        echo $VMOUT
+        grep -q "0%" <<< $VMOUT
+        CACHE=$(grep -w file /sys/fs/cgroup/memory.stat | awk '{print $2}')
+        if [ "$(( $CACHE-$CACHE0 ))" -gt "500000" ]; then echo "$CACHE0 $CACHE"; exit 1; fi
+
   test-valgrind-test:
     runs-on: ubuntu-latest
     if: |
author	Tian <skylypig@gmail.com>	2023-02-12 15:23:29 +0800
committer	GitHub <noreply@github.com>	2023-02-12 09:23:29 +0200
commit	7dae142a2ebf909a63df13e5813c073c79be521f (patch)
tree	28df352d6c4711a669acf171d5a07319528474df /.github
parent	5c3938d5cc08b42acc99f314d92f9e0d5671f96e (diff)
download	redis-7dae142a2ebf909a63df13e5813c073c79be521f.tar.gz