summaryrefslogtreecommitdiff
path: root/content/doc/r3.8.1/zookeeperTools.html
blob: 271a7a706714726dc6da32ef74384b0d40b09cf6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686

<!DOCTYPE html>
<html>
<head>
    <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
    <title>ZooKeeper: Because Coordinating Distributed Systems is a Zoo</title>
    <link type="text/css" href="skin/basic.css" rel="stylesheet">
    <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
    <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
    <link type="text/css" href="skin/profile.css" rel="stylesheet">
    <script src="skin/getBlank.js" language="javascript" type="text/javascript"></script>
    <script src="skin/getMenu.js" language="javascript" type="text/javascript"></script>
    <script src="skin/init.js" language="javascript" type="text/javascript"></script>
    <link rel="shortcut icon" href="images/favicon.ico">
</head>
<body onload="init();">
<div id="top">
    <div class="breadtrail">
        <a href="http://www.apache.org/">Apache</a> &gt; <a href="http://zookeeper.apache.org/">ZooKeeper</a>
    </div>
    <div class="header">
        <div class="projectlogo">
            <a href="http://zookeeper.apache.org/"><img class="logoImage" alt="ZooKeeper" src="images/zookeeper_small.gif" title="ZooKeeper: distributed coordination"></a>
        </div>
        <div class="searchbox">
            <form action="http://www.google.com/search" method="get">
                <input value="zookeeper.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp;
                <input name="Search" value="Search" type="submit">
            </form>
        </div>
        <ul id="tabs">
            <li>
                <a class="unselected" href="http://zookeeper.apache.org/">Project</a>
            </li>
            <li>
                <a class="unselected" href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/">Wiki</a>
            </li>
            <li class="current">
                <a class="selected" href="index.html">ZooKeeper 3.8 Documentation</a>
            </li>
        </ul>
    </div>
</div>
<div id="main">
    <div id="publishedStrip">
        <div id="level2tabs"></div>
        <script type="text/javascript"><!--
document.write("Last Published: " + document.lastModified);
//  --></script>
    </div>
    <div class="breadtrail">
        &nbsp;
    </div>
    <div id="menu">
        <div onclick="SwitchMenu('menu_1', 'skin/')" id="menu_1Title" class="menutitle">Overview</div>
        <div id="menu_1" class="menuitemgroup">
            <div class="menuitem">
                <a href="index.html">Welcome</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperOver.html">Overview</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperStarted.html">Getting Started</a>
            </div>
            <div class="menuitem">
                <a href="releasenotes.html">Release Notes</a>
            </div>
        </div>
        <div onclick="SwitchMenu('menu_2', 'skin/')" id="menu_2Title" class="menutitle">Developer</div>
        <div id="menu_2" class="menuitemgroup">
            <div class="menuitem">
                <a href="apidocs/zookeeper-server/index.html">API Docs</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperProgrammers.html">Programmer's Guide</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperUseCases.html">Use Cases</a>
            </div>
            <div class="menuitem">
                <a href="javaExample.html">Java Example</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperTutorial.html">Barrier and Queue Tutorial</a>
            </div>
            <div class="menuitem">
                <a href="recipes.html">Recipes</a>
            </div>
        </div>
        <div onclick="SwitchMenu('menu_3', 'skin/')" id="menu_3Title" class="menutitle">Admin &amp; Ops</div>
        <div id="menu_3" class="menuitemgroup">
            <div class="menuitem">
                <a href="zookeeperAdmin.html">Administrator's Guide</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperQuotas.html">Quota Guide</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperJMX.html">JMX</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperHierarchicalQuorums.html">Hierarchical Quorums</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperOracleQuorums.html">Oracle Quorum</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperObservers.html">Observers Guide</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperReconfig.html">Dynamic Reconfiguration</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperCLI.html">ZooKeeper CLI</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperTools.html">ZooKeeper Tools</a>
            </div>
            <div class="menuitem">
                <a href="zookeeperMonitor.html">ZooKeeper Monitor</a>
            </div>
			<div class="menuitem">
                <a href="zookeeperAuditLogs.html">Audit Logs</a>
            </div>
        </div>
        <div onclick="SwitchMenu('menu_4', 'skin/')" id="menu_4Title" class="menutitle">Contributor</div>
        <div id="menu_4" class="menuitemgroup">
            <div class="menuitem">
                <a href="zookeeperInternals.html">ZooKeeper Internals</a>
            </div>
        </div>
        <div onclick="SwitchMenu('menu_5', 'skin/')" id="menu_5Title" class="menutitle">Miscellaneous</div>
        <div id="menu_5" class="menuitemgroup">
            <div class="menuitem">
                <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER">Wiki</a>
            </div>
            <div class="menuitem">
                <a href="https://cwiki.apache.org/confluence/display/ZOOKEEPER/FAQ">FAQ</a>
            </div>
            <div class="menuitem">
                <a href="http://zookeeper.apache.org/mailing_lists.html">Mailing Lists</a>
            </div>
        </div>
    </div>
    <div id="content">
<!--
Copyright 2002-2022 The Apache Software Foundation

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
//-->
<h1>A series of tools for ZooKeeper</h1>
<ul>
<li>
<p><a href="#Scripts">Scripts</a></p>
<ul>
<li><a href="#zkServer">zkServer.sh</a></li>
<li><a href="#zkCli">zkCli.sh</a></li>
<li><a href="#zkEnv">zkEnv.sh</a></li>
<li><a href="#zkCleanup">zkCleanup.sh</a></li>
<li><a href="#zkTxnLogToolkit">zkTxnLogToolkit.sh</a></li>
<li><a href="#zkSnapShotToolkit">zkSnapShotToolkit.sh</a></li>
<li><a href="#zkSnapshotComparer">zkSnapshotComparer.sh</a></li>
</ul>
</li>
<li>
<p><a href="#Benchmark">Benchmark</a></p>
<ul>
<li><a href="#YCSB">YCSB</a></li>
<li><a href="#zk-smoketest">zk-smoketest</a></li>
</ul>
</li>
<li>
<p><a href="#Testing">Testing</a></p>
<ul>
<li><a href="#fault-injection">Fault Injection Framework</a>
<ul>
<li><a href="#Byteman">Byteman</a></li>
</ul>
</li>
<li><a href="#jepsen-test">Jepsen Test</a></li>
</ul>
</li>
</ul>
<p><a name="Scripts"></a></p>
<h2>Scripts</h2>
<p><a name="zkServer"></a></p>
<h3>zkServer.sh</h3>
<p>A command for the operations for the ZooKeeper server.</p>
<pre><code class="language-bash">Usage: ./zkServer.sh {start|start-foreground|stop|version|restart|status|upgrade|print-cmd}
# start the server
./zkServer.sh start

# start the server in the foreground for debugging
./zkServer.sh start-foreground

# stop the server
./zkServer.sh stop

# restart the server
./zkServer.sh restart

# show the status,mode,role of the server
./zkServer.sh status
JMX enabled by default
Using config: /data/software/zookeeper/conf/zoo.cfg
Mode: standalone

# Deprecated
./zkServer.sh upgrade

# print the parameters of the start-up
./zkServer.sh print-cmd

# show the version of the ZooKeeper server
./zkServer.sh version
Apache ZooKeeper, version 3.6.0-SNAPSHOT 06/11/2019 05:39 GMT

</code></pre>
<p>The <code>status</code> command establishes a client connection to the server to execute diagnostic commands. When the ZooKeeper cluster is started in client SSL only mode (by omitting the clientPort from the zoo.cfg), then additional SSL related configuration has to be provided before using the <code>./zkServer.sh status</code> command to find out if the ZooKeeper server is running. An example:</p>
<pre><code>CLIENT_JVMFLAGS=&quot;-Dzookeeper.clientCnxnSocket=org.apache.zookeeper.ClientCnxnSocketNetty -Dzookeeper.ssl.trustStore.location=/tmp/clienttrust.jks -Dzookeeper.ssl.trustStore.password=password -Dzookeeper.ssl.keyStore.location=/tmp/client.jks -Dzookeeper.ssl.keyStore.password=password -Dzookeeper.client.secure=true&quot; ./zkServer.sh status
</code></pre>
<p><a name="zkCli"></a></p>
<h3>zkCli.sh</h3>
<p>Look at the <a href="zookeeperCLI.html">ZooKeeperCLI</a></p>
<p><a name="zkEnv"></a></p>
<h3>zkEnv.sh</h3>
<p>The environment setting for the ZooKeeper server</p>
<pre><code class="language-bash"># the setting of log property
ZOO_LOG_DIR: the directory to store the logs
</code></pre>
<p><a name="zkCleanup"></a></p>
<h3>zkCleanup.sh</h3>
<p>Clean up the old snapshots and transaction logs.</p>
<pre><code class="language-bash">Usage:
     * args dataLogDir [snapDir] -n count
     * dataLogDir -- path to the txn log directory
     * snapDir -- path to the snapshot directory
     * count -- the number of old snaps/logs you want to keep, value should be greater than or equal to 3
# Keep the latest 5 logs and snapshots
./zkCleanup.sh -n 5
</code></pre>
<p><a name="zkTxnLogToolkit"></a></p>
<h3>zkTxnLogToolkit.sh</h3>
<p>TxnLogToolkit is a command line tool shipped with ZooKeeper which is capable of recovering transaction log entries with broken CRC.</p>
<p>Running it without any command line parameters or with the <code>-h,--help</code> argument, it outputs the following help page:</p>
<pre><code>$ bin/zkTxnLogToolkit.sh
usage: TxnLogToolkit [-dhrv] txn_log_file_name
-d,--dump      Dump mode. Dump all entries of the log file. (this is the default)
-h,--help      Print help message
-r,--recover   Recovery mode. Re-calculate CRC for broken entries.
-v,--verbose   Be verbose in recovery mode: print all entries, not just fixed ones.
-y,--yes       Non-interactive mode: repair all CRC errors without asking
</code></pre>
<p>The default behaviour is safe: it dumps the entries of the given transaction log file to the screen: (same as using <code>-d,--dump</code> parameter)</p>
<pre><code>$ bin/zkTxnLogToolkit.sh log.100000001
ZooKeeper Transactional Log File with dbid 0 txnlog format version 2
4/5/18 2:15:58 PM CEST session 0x16295bafcc40000 cxid 0x0 zxid 0x100000001 createSession 30000
CRC ERROR - 4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
4/5/18 2:16:12 PM CEST session 0x26295bafcc90000 cxid 0x0 zxid 0x100000003 createSession 30000
4/5/18 2:17:34 PM CEST session 0x26295bafcc90000 cxid 0x0 zxid 0x200000001 closeSession null
4/5/18 2:17:34 PM CEST session 0x16295bd23720000 cxid 0x0 zxid 0x200000002 createSession 30000
4/5/18 2:18:02 PM CEST session 0x16295bd23720000 cxid 0x2 zxid 0x200000003 create '/andor,#626262,v{s{31,s{'world,'anyone}}},F,1
EOF reached after 6 txns.
</code></pre>
<p>There's a CRC error in the 2nd entry of the above transaction log file. In <strong>dump</strong> mode, the toolkit only prints this information to the screen without touching the original file. In <strong>recovery</strong> mode (<code>-r,--recover</code> flag) the original file still remains untouched and all transactions will be copied over to a new txn log file with &quot;.fixed&quot; suffix. It recalculates CRC values and copies the calculated value, if it doesn't match the original txn entry. By default, the tool works interactively: it asks for confirmation whenever CRC error encountered.</p>
<pre><code>$ bin/zkTxnLogToolkit.sh -r log.100000001
ZooKeeper Transactional Log File with dbid 0 txnlog format version 2
CRC ERROR - 4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
Would you like to fix it (Yes/No/Abort) ?
</code></pre>
<p>Answering <strong>Yes</strong> means the newly calculated CRC value will be outputted to the new file. <strong>No</strong> means that the original CRC value will be copied over. <strong>Abort</strong> will abort the entire operation and exits. (In this case the &quot;.fixed&quot; will not be deleted and left in a half-complete state: contains only entries which have already been processed or only the header if the operation was aborted at the first entry.)</p>
<pre><code>$ bin/zkTxnLogToolkit.sh -r log.100000001
ZooKeeper Transactional Log File with dbid 0 txnlog format version 2
CRC ERROR - 4/5/18 2:16:05 PM CEST session 0x16295bafcc40000 cxid 0x1 zxid 0x100000002 closeSession null
Would you like to fix it (Yes/No/Abort) ? y
EOF reached after 6 txns.
Recovery file log.100000001.fixed has been written with 1 fixed CRC error(s)
</code></pre>
<p>The default behaviour of recovery is to be silent: only entries with CRC error get printed to the screen. One can turn on verbose mode with the <code>-v,--verbose</code> parameter to see all records. Interactive mode can be turned off with the <code>-y,--yes</code> parameter. In this case all CRC errors will be fixed in the new transaction file.</p>
<p><a name="zkSnapShotToolkit"></a></p>
<h3>zkSnapShotToolkit.sh</h3>
<p>Dump a snapshot file to stdout, showing the detailed information of the each zk-node.</p>
<pre><code class="language-bash"># help
./zkSnapShotToolkit.sh
/usr/bin/java
USAGE: SnapshotFormatter [-d|-json] snapshot_file
       -d dump the data for each znode
       -json dump znode info in json format

# show the each zk-node info without data content
./zkSnapShotToolkit.sh /data/zkdata/version-2/snapshot.fa01000186d
/zk-latencies_4/session_946
  cZxid = 0x00000f0003110b
  ctime = Wed Sep 19 21:58:22 CST 2018
  mZxid = 0x00000f0003110b
  mtime = Wed Sep 19 21:58:22 CST 2018
  pZxid = 0x00000f0003110b
  cversion = 0
  dataVersion = 0
  aclVersion = 0
  ephemeralOwner = 0x00000000000000
  dataLength = 100

# [-d] show the each zk-node info with data content
./zkSnapShotToolkit.sh -d /data/zkdata/version-2/snapshot.fa01000186d
/zk-latencies2/session_26229
  cZxid = 0x00000900007ba0
  ctime = Wed Aug 15 20:13:52 CST 2018
  mZxid = 0x00000900007ba0
  mtime = Wed Aug 15 20:13:52 CST 2018
  pZxid = 0x00000900007ba0
  cversion = 0
  dataVersion = 0
  aclVersion = 0
  ephemeralOwner = 0x00000000000000
  data = eHh4eHh4eHh4eHh4eA==

# [-json] show the each zk-node info with json format
./zkSnapShotToolkit.sh -json /data/zkdata/version-2/snapshot.fa01000186d
[[1,0,{&quot;progname&quot;:&quot;SnapshotFormatter.java&quot;,&quot;progver&quot;:&quot;0.01&quot;,&quot;timestamp&quot;:1559788148637},[{&quot;name&quot;:&quot;\/&quot;,&quot;asize&quot;:0,&quot;dsize&quot;:0,&quot;dev&quot;:0,&quot;ino&quot;:1001},[{&quot;name&quot;:&quot;zookeeper&quot;,&quot;asize&quot;:0,&quot;dsize&quot;:0,&quot;dev&quot;:0,&quot;ino&quot;:1002},{&quot;name&quot;:&quot;config&quot;,&quot;asize&quot;:0,&quot;dsize&quot;:0,&quot;dev&quot;:0,&quot;ino&quot;:1003},[{&quot;name&quot;:&quot;quota&quot;,&quot;asize&quot;:0,&quot;dsize&quot;:0,&quot;dev&quot;:0,&quot;ino&quot;:1004},[{&quot;name&quot;:&quot;test&quot;,&quot;asize&quot;:0,&quot;dsize&quot;:0,&quot;dev&quot;:0,&quot;ino&quot;:1005},{&quot;name&quot;:&quot;zookeeper_limits&quot;,&quot;asize&quot;:52,&quot;dsize&quot;:52,&quot;dev&quot;:0,&quot;ino&quot;:1006},{&quot;name&quot;:&quot;zookeeper_stats&quot;,&quot;asize&quot;:15,&quot;dsize&quot;:15,&quot;dev&quot;:0,&quot;ino&quot;:1007}]]],{&quot;name&quot;:&quot;test&quot;,&quot;asize&quot;:0,&quot;dsize&quot;:0,&quot;dev&quot;:0,&quot;ino&quot;:1008}]]
</code></pre>
<p><a name="zkSnapshotComparer"></a></p>
<h3>zkSnapshotComparer.sh</h3>
<p>SnapshotComparer is a tool that loads and compares two snapshots with configurable threshold and various filters, and outputs information about the delta.</p>
<p>The delta includes specific znode paths added, updated, deleted comparing one snapshot to another.</p>
<p>It's useful in use cases that involve snapshot analysis, such as offline data consistency checking, and data trending analysis (e.g. what's growing under which zNode path during when).</p>
<p>This tool only outputs information about permanent nodes, ignoring both sessions and ephemeral nodes.</p>
<p>It provides two tuning parameters to help filter out noise: 1. <code>--nodes</code> Threshold number of children added/removed; 2. <code>--bytes</code> Threshold number of bytes added/removed.</p>
<h4>Locate Snapshots</h4>
<p>Snapshots can be found in <a href="zookeeperAdmin.html#The+Data+Directory">Zookeeper Data Directory</a> which configured in <a href="zookeeperStarted.html#sc_InstallingSingleMode">conf/zoo.cfg</a> when set up Zookeeper server.</p>
<h4>Supported Snapshot Formats</h4>
<p>This tool supports uncompressed snapshot format, and compressed snapshot file formats: <code>snappy</code> and <code>gz</code>. Snapshots with different formats can be compared using this tool directly without decompression.</p>
<h4>Running the Tool</h4>
<p>Running the tool with no command line argument or an unrecognized argument, it outputs the following help page:</p>
<pre><code>usage: java -cp &lt;classPath&gt; org.apache.zookeeper.server.SnapshotComparer
 -b,--bytes &lt;BYTETHRESHOLD&gt;   (Required) The node data delta size threshold, in bytes, for printing the node.
 -d,--debug                   Use debug output.
 -i,--interactive             Enter interactive mode.
 -l,--left &lt;LEFT&gt;             (Required) The left snapshot file.
 -n,--nodes &lt;NODETHRESHOLD&gt;   (Required) The descendant node delta size threshold, in nodes, for printing the node.
 -r,--right &lt;RIGHT&gt;           (Required) The right snapshot file.
</code></pre>
<p>Example Command:</p>
<pre><code>./bin/zkSnapshotComparer.sh -l /zookeeper-data/backup/snapshot.d.snappy -r /zookeeper-data/backup/snapshot.44 -b 2 -n 1
</code></pre>
<p>Example Output:</p>
<pre><code>...
Deserialized snapshot in snapshot.44 in 0.002741 seconds
Processed data tree in 0.000361 seconds
Node count: 10
Total size: 0
Max depth: 4
Count of nodes at depth 0: 1
Count of nodes at depth 1: 2
Count of nodes at depth 2: 4
Count of nodes at depth 3: 3

Node count: 22
Total size: 2903
Max depth: 5
Count of nodes at depth 0: 1
Count of nodes at depth 1: 2
Count of nodes at depth 2: 4
Count of nodes at depth 3: 7
Count of nodes at depth 4: 8

Printing analysis for nodes difference larger than 2 bytes or node count difference larger than 1.
Analysis for depth 0
Node  found in both trees. Delta: 2903 bytes, 12 descendants
Analysis for depth 1
Node /zk_test found in both trees. Delta: 2903 bytes, 12 descendants
Analysis for depth 2
Node /zk_test/gz found in both trees. Delta: 730 bytes, 3 descendants
Node /zk_test/snappy found in both trees. Delta: 2173 bytes, 9 descendants
Analysis for depth 3
Node /zk_test/gz/12345 found in both trees. Delta: 9 bytes, 1 descendants
Node /zk_test/gz/a found only in right tree. Descendant size: 721. Descendant count: 0
Node /zk_test/snappy/anotherTest found in both trees. Delta: 1738 bytes, 2 descendants
Node /zk_test/snappy/test_1 found only in right tree. Descendant size: 344. Descendant count: 3
Node /zk_test/snappy/test_2 found only in right tree. Descendant size: 91. Descendant count: 2
Analysis for depth 4
Node /zk_test/gz/12345/abcdef found only in right tree. Descendant size: 9. Descendant count: 0
Node /zk_test/snappy/anotherTest/abc found only in right tree. Descendant size: 1738. Descendant count: 0
Node /zk_test/snappy/test_1/a found only in right tree. Descendant size: 93. Descendant count: 0
Node /zk_test/snappy/test_1/b found only in right tree. Descendant size: 251. Descendant count: 0
Node /zk_test/snappy/test_2/xyz found only in right tree. Descendant size: 33. Descendant count: 0
Node /zk_test/snappy/test_2/y found only in right tree. Descendant size: 58. Descendant count: 0
All layers compared.
</code></pre>
<h4>Interactive Mode</h4>
<p>Use &quot;-i&quot; or &quot;--interactive&quot; to enter interactive mode:</p>
<pre><code>./bin/zkSnapshotComparer.sh -l /zookeeper-data/backup/snapshot.d.snappy -r /zookeeper-data/backup/snapshot.44 -b 2 -n 1 -i
</code></pre>
<p>There are three options to proceed:</p>
<pre><code>- Press enter to move to print current depth layer;
- Type a number to jump to and print all nodes at a given depth;
- Enter an ABSOLUTE path to print the immediate subtree of a node. Path must start with '/'.
</code></pre>
<p>Note: As indicated by the interactive messages, the tool only shows analysis on the result that filtered by tuning parameters bytes threshold and nodes threshold.</p>
<p>Press enter to print current depth layer:</p>
<pre><code>Current depth is 0
Press enter to move to print current depth layer;
...
Printing analysis for nodes difference larger than 2 bytes or node count difference larger than 1.
Analysis for depth 0
Node  found in both trees. Delta: 2903 bytes, 12 descendants
</code></pre>
<p>Type a number to jump to and print all nodes at a given depth:</p>
<p>(Jump forward)</p>
<pre><code>Current depth is 1
...
Type a number to jump to and print all nodes at a given depth;
...
3
Printing analysis for nodes difference larger than 2 bytes or node count difference larger than 1.
Analysis for depth 3
Node /zk_test/gz/12345 found in both trees. Delta: 9 bytes, 1 descendants
Node /zk_test/gz/a found only in right tree. Descendant size: 721. Descendant count: 0
Filtered node /zk_test/gz/anotherOne of left size 0, right size 0
Filtered right node /zk_test/gz/b of size 0
Node /zk_test/snappy/anotherTest found in both trees. Delta: 1738 bytes, 2 descendants
Node /zk_test/snappy/test_1 found only in right tree. Descendant size: 344. Descendant count: 3
Node /zk_test/snappy/test_2 found only in right tree. Descendant size: 91. Descendant count: 2
</code></pre>
<p>(Jump back)</p>
<pre><code>Current depth is 3
...
Type a number to jump to and print all nodes at a given depth;
...
0
Printing analysis for nodes difference larger than 2 bytes or node count difference larger than 1.
Analysis for depth 0
Node  found in both trees. Delta: 2903 bytes, 12 descendants
</code></pre>
<p>Out of range depth is handled:</p>
<pre><code>Current depth is 1
...
Type a number to jump to and print all nodes at a given depth;
...
10
Printing analysis for nodes difference larger than 2 bytes or node count difference larger than 1.
Depth must be in range [0, 4]
</code></pre>
<p>Enter an ABSOLUTE path to print the immediate subtree of a node:</p>
<pre><code>Current depth is 3
...
Enter an ABSOLUTE path to print the immediate subtree of a node.
/zk_test
Printing analysis for nodes difference larger than 2 bytes or node count difference larger than 1.
Analysis for node /zk_test
Node /zk_test/gz found in both trees. Delta: 730 bytes, 3 descendants
Node /zk_test/snappy found in both trees. Delta: 2173 bytes, 9 descendants
</code></pre>
<p>Invalid path is handled:</p>
<pre><code>Current depth is 3
...
Enter an ABSOLUTE path to print the immediate subtree of a node.
/non-exist-path
Printing analysis for nodes difference larger than 2 bytes or node count difference larger than 1.
Analysis for node /non-exist-path
Path /non-exist-path is neither found in left tree nor right tree.
</code></pre>
<p>Invalid input is handled:</p>
<pre><code>Current depth is 1
- Press enter to move to print current depth layer;
- Type a number to jump to and print all nodes at a given depth;
- Enter an ABSOLUTE path to print the immediate subtree of a node. Path must start with '/'.
12223999999999999999999999999999999999999
Printing analysis for nodes difference larger than 2 bytes or node count difference larger than 1.
Input 12223999999999999999999999999999999999999 is not valid. Depth must be in range [0, 4]. Path must be an absolute path which starts with '/'.
</code></pre>
<p>Exit interactive mode automatically when all layers are compared:</p>
<pre><code>Printing analysis for nodes difference larger than 2 bytes or node count difference larger than 1.
Analysis for depth 4
Node /zk_test/gz/12345/abcdef found only in right tree. Descendant size: 9. Descendant count: 0
Node /zk_test/snappy/anotherTest/abc found only in right tree. Descendant size: 1738. Descendant count: 0
Filtered right node /zk_test/snappy/anotherTest/abcd of size 0
Node /zk_test/snappy/test_1/a found only in right tree. Descendant size: 93. Descendant count: 0
Node /zk_test/snappy/test_1/b found only in right tree. Descendant size: 251. Descendant count: 0
Filtered right node /zk_test/snappy/test_1/c of size 0
Node /zk_test/snappy/test_2/xyz found only in right tree. Descendant size: 33. Descendant count: 0
Node /zk_test/snappy/test_2/y found only in right tree. Descendant size: 58. Descendant count: 0
All layers compared.
</code></pre>
<p>Or use <code>^c</code> to exit interactive mode anytime.</p>
<p><a name="Benchmark"></a></p>
<h2>Benchmark</h2>
<p><a name="YCSB"></a></p>
<h3>YCSB</h3>
<h4>Quick Start</h4>
<p>This section describes how to run YCSB on ZooKeeper.</p>
<h4>1. Start ZooKeeper Server(s)</h4>
<h4>2. Install Java and Maven</h4>
<h4>3. Set Up YCSB</h4>
<p>Git clone YCSB and compile:</p>
<pre><code>git clone http://github.com/brianfrankcooper/YCSB.git
# more details in the landing page for instructions on downloading YCSB(https://github.com/brianfrankcooper/YCSB#getting-started).
cd YCSB
mvn -pl site.ycsb:zookeeper-binding -am clean package -DskipTests
</code></pre>
<h4>4. Provide ZooKeeper Connection Parameters</h4>
<p>Set connectString, sessionTimeout, watchFlag in the workload you plan to run.</p>
<ul>
<li><code>zookeeper.connectString</code></li>
<li><code>zookeeper.sessionTimeout</code></li>
<li><code>zookeeper.watchFlag</code></li>
<li>A parameter for enabling ZooKeeper's watch, optional values:true or false.the default value is false.</li>
<li>This parameter cannot test the watch performance, but for testing what effect will take on the read/write requests when enabling the watch.
<pre><code class="language-bash">./bin/ycsb run zookeeper -s -P workloads/workloadb -p zookeeper.connectString=127.0.0.1:2181/benchmark -p zookeeper.watchFlag=true
</code></pre>
</li>
</ul>
<p>Or, you can set configs with the shell command, EG:</p>
<pre><code># create a /benchmark namespace for sake of cleaning up the workspace after test.
# e.g the CLI:create /benchmark
./bin/ycsb run zookeeper -s -P workloads/workloadb -p zookeeper.connectString=127.0.0.1:2181/benchmark -p zookeeper.sessionTimeout=30000
</code></pre>
<h4>5. Load data and run tests</h4>
<p>Load the data:</p>
<pre><code># -p recordcount,the count of records/paths you want to insert
./bin/ycsb load zookeeper -s -P workloads/workloadb -p zookeeper.connectString=127.0.0.1:2181/benchmark -p recordcount=10000 &gt; outputLoad.txt
</code></pre>
<p>Run the workload test:</p>
<pre><code># YCSB workloadb is the most suitable workload for read-heavy workload for the ZooKeeper in the real world.

# -p fieldlength, test the length of value/data-content took effect on performance
./bin/ycsb run zookeeper -s -P workloads/workloadb -p zookeeper.connectString=127.0.0.1:2181/benchmark -p fieldlength=1000

# -p fieldcount
./bin/ycsb run zookeeper -s -P workloads/workloadb -p zookeeper.connectString=127.0.0.1:2181/benchmark -p fieldcount=20

# -p hdrhistogram.percentiles,show the hdrhistogram benchmark result
./bin/ycsb run zookeeper -threads 1 -P workloads/workloadb -p zookeeper.connectString=127.0.0.1:2181/benchmark -p hdrhistogram.percentiles=10,25,50,75,90,95,99,99.9 -p histogram.buckets=500

# -threads: multi-clients test, increase the **maxClientCnxns** in the zoo.cfg to handle more connections.
./bin/ycsb run zookeeper -threads 10 -P workloads/workloadb -p zookeeper.connectString=127.0.0.1:2181/benchmark

# show the timeseries benchmark result
./bin/ycsb run zookeeper -threads 1 -P workloads/workloadb -p zookeeper.connectString=127.0.0.1:2181/benchmark -p measurementtype=timeseries -p timeseries.granularity=50

# cluster test
./bin/ycsb run zookeeper -P workloads/workloadb -p zookeeper.connectString=192.168.10.43:2181,192.168.10.45:2181,192.168.10.27:2181/benchmark

# test leader's read/write performance by setting zookeeper.connectString to leader's(192.168.10.43:2181)
./bin/ycsb run zookeeper -P workloads/workloadb -p zookeeper.connectString=192.168.10.43:2181/benchmark

# test for large znode(by default: jute.maxbuffer is 1048575 bytes/1 MB ). Notice:jute.maxbuffer should also be set the same value in all the zk servers.
./bin/ycsb run zookeeper -jvm-args=&quot;-Djute.maxbuffer=4194304&quot; -s -P workloads/workloadc -p zookeeper.connectString=127.0.0.1:2181/benchmark

# Cleaning up the workspace after finishing the benchmark.
# e.g the CLI:deleteall /benchmark
</code></pre>
<p><a name="zk-smoketest"></a></p>
<h3>zk-smoketest</h3>
<p><strong>zk-smoketest</strong> provides a simple smoketest client for a ZooKeeper ensemble. Useful for verifying new, updated, existing installations. More details are <a href="https://github.com/phunt/zk-smoketest">here</a>.</p>
<p><a name="Testing"></a></p>
<h2>Testing</h2>
<p><a name="fault-injection"></a></p>
<h3>Fault Injection Framework</h3>
<p><a name="Byteman"></a></p>
<h4>Byteman</h4>
<ul>
<li><strong>Byteman</strong> is a tool which makes it easy to trace, monitor and test the behaviour of Java application and JDK runtime code. It injects Java code into your application methods or into Java runtime methods without the need for you to recompile, repackage or even redeploy your application. Injection can be performed at JVM startup or after startup while the application is still running.</li>
<li>Visit the official <a href="https://byteman.jboss.org/">website</a> to download the latest release</li>
<li>A brief tutorial can be found <a href="https://developer.jboss.org/wiki/ABytemanTutorial">here</a>
<pre><code class="language-bash">Preparations:
# attach the byteman to 3 zk servers during runtime
# 55001,55002,55003 is byteman binding port; 714,740,758 is the zk server pid
./bminstall.sh -b -Dorg.jboss.byteman.transform.all -Dorg.jboss.byteman.verbose -p 55001 714
./bminstall.sh -b -Dorg.jboss.byteman.transform.all -Dorg.jboss.byteman.verbose -p 55002 740
./bminstall.sh -b -Dorg.jboss.byteman.transform.all -Dorg.jboss.byteman.verbose -p 55003 758

# load the fault injection script
./bmsubmit.sh -p 55002 -l my_zk_fault_injection.btm
# unload the fault injection script
./bmsubmit.sh -p 55002 -u my_zk_fault_injectionr.btm
</code></pre>
</li>
</ul>
<p>Look at the below examples to customize your byteman fault injection script</p>
<p>Example 1: This script makes leader's zxid roll over, to force re-election.</p>
<pre><code class="language-bash">cat zk_leader_zxid_roll_over.btm

RULE trace zk_leader_zxid_roll_over
CLASS org.apache.zookeeper.server.quorum.Leader
METHOD propose
IF true
DO
  traceln(&quot;*** Leader zxid has rolled over, forcing re-election ***&quot;);
  $1.zxid = 4294967295L
ENDRULE
</code></pre>
<p>Example 2: This script makes the leader drop the ping packet to a specific follower. The leader will close the <strong>LearnerHandler</strong> with that follower, and the follower will enter the state:LOOKING then re-enter the quorum with the state:FOLLOWING</p>
<pre><code class="language-bash">cat zk_leader_drop_ping_packet.btm

RULE trace zk_leader_drop_ping_packet
CLASS org.apache.zookeeper.server.quorum.LearnerHandler
METHOD ping
AT ENTRY
IF $0.sid == 2
DO
  traceln(&quot;*** Leader drops ping packet to sid: 2 ***&quot;);
  return;
ENDRULE
</code></pre>
<p>Example 3: This script makes one follower drop ACK packet which has no big effect in the broadcast phrase, since after receiving the majority of ACKs from the followers, the leader can commit that proposal</p>
<pre><code class="language-bash">cat zk_leader_drop_ping_packet.btm

RULE trace zk.follower_drop_ack_packet
CLASS org.apache.zookeeper.server.quorum.SendAckRequestProcessor
METHOD processRequest
AT ENTRY
IF true
DO
  traceln(&quot;*** Follower drops ACK packet ***&quot;);
  return;
ENDRULE
</code></pre>
<p><a name="jepsen-test"></a></p>
<h3>Jepsen Test</h3>
<p>A framework for distributed systems verification, with fault injection. Jepsen has been used to verify everything from eventually-consistent commutative databases to linearizable coordination systems to distributed task schedulers. more details can be found in <a href="https://github.com/jepsen-io/jepsen">jepsen-io</a></p>
<p>Running the <a href="https://github.com/jepsen-io/jepsen/blob/master/docker/README.md">Dockerized Jepsen</a> is the simplest way to use the Jepsen.</p>
<p>Installation:</p>
<pre><code class="language-bash">git clone git@github.com:jepsen-io/jepsen.git
cd docker
# maybe a long time for the first init.
./up.sh
# docker ps to check one control node and five db nodes are up
docker ps
     CONTAINER ID        IMAGE               COMMAND                 CREATED             STATUS              PORTS                     NAMES
     8265f1d3f89c        docker_control      &quot;/bin/sh -c /init.sh&quot;   9 hours ago         Up 4 hours          0.0.0.0:32769-&gt;8080/tcp   jepsen-control
     8a646102da44        docker_n5           &quot;/run.sh&quot;               9 hours ago         Up 3 hours          22/tcp                    jepsen-n5
     385454d7e520        docker_n1           &quot;/run.sh&quot;               9 hours ago         Up 9 hours          22/tcp                    jepsen-n1
     a62d6a9d5f8e        docker_n2           &quot;/run.sh&quot;               9 hours ago         Up 9 hours          22/tcp                    jepsen-n2
     1485e89d0d9a        docker_n3           &quot;/run.sh&quot;               9 hours ago         Up 9 hours          22/tcp                    jepsen-n3
     27ae01e1a0c5        docker_node         &quot;/run.sh&quot;               9 hours ago         Up 9 hours          22/tcp                    jepsen-node
     53c444b00ebd        docker_n4           &quot;/run.sh&quot;               9 hours ago         Up 9 hours          22/tcp                    jepsen-n4
</code></pre>
<p>Running &amp; Test</p>
<pre><code class="language-bash"># Enter into the container:jepsen-control
docker exec -it jepsen-control bash
# Test
cd zookeeper &amp;&amp; lein run test --concurrency 10
# See something like the following to assert that ZooKeeper has passed the Jepsen test
INFO [2019-04-01 11:25:23,719] jepsen worker 8 - jepsen.util 8	:ok	:read	2
INFO [2019-04-01 11:25:23,722] jepsen worker 3 - jepsen.util 3	:invoke	:cas	[0 4]
INFO [2019-04-01 11:25:23,760] jepsen worker 3 - jepsen.util 3	:fail	:cas	[0 4]
INFO [2019-04-01 11:25:23,791] jepsen worker 1 - jepsen.util 1	:invoke	:read	nil
INFO [2019-04-01 11:25:23,794] jepsen worker 1 - jepsen.util 1	:ok	:read	2
INFO [2019-04-01 11:25:24,038] jepsen worker 0 - jepsen.util 0	:invoke	:write	4
INFO [2019-04-01 11:25:24,073] jepsen worker 0 - jepsen.util 0	:ok	:write	4
...............................................................................
Everything looks good! ヽ(‘ー`)ノ

</code></pre>
<p>Reference: read <a href="https://aphyr.com/posts/291-call-me-maybe-zookeeper">this blog</a> to learn more about the Jepsen test for the Zookeeper.</p>
</div>
<div class="clearboth">&nbsp;</div>
</div>
<div id="footer">
    <div class="lastmodified">
        <script type="text/javascript">
        <!--
            document.write("Last Published: " + document.lastModified);
        //  -->
        </script>
    </div>
    <div class="copyright">
        Copyright &copy; <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
    </div>
    <div id="logos"></div>
</div>
</body>
</html>