1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
|
## TiKV config template
## Human-readable big numbers:
## File size(based on byte): KB, MB, GB, TB, PB
## e.g.: 1_048_576 = "1MB"
## Time(based on ms): ms, s, m, h
## e.g.: 78_000 = "1.3m"
## Log levels: trace, debug, info, warning, error, critical.
## Note that `debug` and `trace` are only available in development builds.
# log-level = "info"
## File to store logs.
## If it is not set, logs will be appended to stderr.
# log-file = ""
## Log format, one of json, text. Default to text.
# log-format = "text"
## File to store slow logs.
## If "log-file" is set, but this is not set, the slow logs will be appeneded
## to "log-file". If both "log-file" and "slow-log-file" are not set, all logs
## will be appended to stderr.
# slow-log-file = ""
## The minimum operation cost to output relative logs.
# slow-log-threshold = "1s"
## Timespan between rotating the log files.
## Once this timespan passes, log files will be rotated, i.e. existing log file will have a
## timestamp appended to its name and a new file will be created.
# log-rotation-timespan = "24h"
## Size of log file that triggers the log rotation.
## Once the size of log file exceeds the threshold value, the log file will be rotated
## and place the old log file in a new file named by orginal file name subbfixed by a timestamp.
# log-rotation-size = "300MB"
## Enable io snoop which utilize eBPF to get accurate disk io of TiKV
## It won't take effect when compiling without BCC_IOSNOOP=1.
# enable-io-snoop = true
# Configurations for the single thread pool serving read requests.
[readpool.unified]
## The minimal working thread count of the thread pool.
# min-thread-count = 1
## The maximum working thread count of the thread pool.
## The default value is max(4, LOGICAL_CPU_NUM * 0.8).
# max-thread-count = 8
## Size of the stack for each thread in the thread pool.
# stack-size = "10MB"
## Max running tasks of each worker, reject if exceeded.
# max-tasks-per-worker = 2000
[readpool.storage]
## Whether to use the unified read pool to handle storage requests.
# use-unified-pool = false
## The following configurations only take effect when `use-unified-pool` is false.
## Size of the thread pool for high-priority operations.
# high-concurrency = 4
## Size of the thread pool for normal-priority operations.
# normal-concurrency = 4
## Size of the thread pool for low-priority operations.
# low-concurrency = 4
## Max running high-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-high = 2000
## Max running normal-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-normal = 2000
## Max running low-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-low = 2000
## Size of the stack for each thread in the thread pool.
# stack-size = "10MB"
[readpool.coprocessor]
## Whether to use the unified read pool to handle coprocessor requests.
# use-unified-pool = true
## The following configurations only take effect when `use-unified-pool` is false.
## Most read requests from TiDB are sent to the coprocessor of TiKV. high/normal/low-concurrency is
## used to set the number of threads of the coprocessor.
## If there are many read requests, you can increase these config values (but keep it within the
## number of system CPU cores). For example, for a 32-core machine deployed with TiKV, you can even
## set these config to 30 in heavy read scenarios.
## If CPU_NUM > 8, the default thread pool size for coprocessors is set to CPU_NUM * 0.8.
# high-concurrency = 8
# normal-concurrency = 8
# low-concurrency = 8
# max-tasks-per-worker-high = 2000
# max-tasks-per-worker-normal = 2000
# max-tasks-per-worker-low = 2000
[server]
## Listening address.
# addr = "127.0.0.1:20160"
## Advertise listening address for client communication.
## If not set, `addr` will be used.
# advertise-addr = ""
## Status address.
## This is used for reporting the status of TiKV directly through
## the HTTP address. Notice that there is a risk of leaking status
## information if this port is exposed to the public.
## Empty string means disabling it.
# status-addr = "127.0.0.1:20180"
## Set the maximum number of worker threads for the status report HTTP service.
# status-thread-pool-size = 1
## Compression type for gRPC channel: none, deflate or gzip.
# grpc-compression-type = "none"
## Size of the thread pool for the gRPC server.
# grpc-concurrency = 4
## The number of max concurrent streams/requests on a client connection.
# grpc-concurrent-stream = 1024
## Limit the memory size can be used by gRPC. Default is unlimited.
## gRPC usually works well to reclaim memory by itself. Limit the memory in case OOM
## is observed. Note that limit the usage can lead to potential stall.
# grpc-memory-pool-quota = "32G"
## The number of connections with each TiKV server to send Raft messages.
# grpc-raft-conn-num = 1
## Amount to read ahead on individual gRPC streams.
# grpc-stream-initial-window-size = "2MB"
## Time to wait before sending out a ping to check if server is still alive.
## This is only for communications between TiKV instances.
# grpc-keepalive-time = "10s"
## Time to wait before closing the connection without receiving KeepAlive ping Ack.
# grpc-keepalive-timeout = "3s"
## How many snapshots can be sent concurrently.
# concurrent-send-snap-limit = 32
## How many snapshots can be received concurrently.
# concurrent-recv-snap-limit = 32
## Max allowed recursion level when decoding Coprocessor DAG expression.
# end-point-recursion-limit = 1000
## Max time to handle Coprocessor requests before timeout.
# end-point-request-max-handle-duration = "60s"
## Max bytes that snapshot can be written to disk in one second.
## It should be set based on your disk performance.
# snap-max-write-bytes-per-sec = "100MB"
## Whether to enable request batch.
# enable-request-batch = true
## Attributes about this server, e.g. `{ zone = "us-west-1", disk = "ssd" }`.
# labels = {}
## The working thread count of the background pool, which include the endpoint of and br, split-check,
## region thread and other thread of delay-insensitive tasks.
## The default value is 2 if the number of CPU cores is less than 16, otherwise 3.
# background-thread-count = 2
## If handle time is larger than the threshold, it will print slow log in endpoint.
## The default value is 1s.
# end-point-slow-log-threshold = "1s"
[storage]
## The path to RocksDB directory.
# data-dir = "/tmp/tikv/store"
## The number of slots in Scheduler latches, which controls write concurrency.
## In most cases you can use the default value. When importing data, you can set it to a larger
## value.
# scheduler-concurrency = 2048000
## Scheduler's worker pool size, i.e. the number of write threads.
## It should be less than total CPU cores. When there are frequent write operations, set it to a
## higher value. More specifically, you can run `top -H -p tikv-pid` to check whether the threads
## named `sched-worker-pool` are busy.
# scheduler-worker-pool-size = 4
## When the pending write bytes exceeds this threshold, the "scheduler too busy" error is displayed.
# scheduler-pending-write-threshold = "100MB"
## For async commit transactions, it's possible to response to the client before applying prewrite
## requests. Enabling this can ease reduce latency when apply duration is significant, or reduce
## latency jittering when apply duration is not stable.
# enable-async-apply-prewrite = false
## Reserve some space to ensure recovering the store from `no space left` must succeed.
## `max(reserve-space, capacity * 5%)` will be reserved exactly.
##
## Set it to 0 will cause no space is reserved at all. It's generally used for tests.
# reserve-space = "5GB"
[storage.block-cache]
## Whether to create a shared block cache for all RocksDB column families.
##
## Block cache is used by RocksDB to cache uncompressed blocks. Big block cache can speed up read.
## It is recommended to turn on shared block cache. Since only the total cache size need to be
## set, it is easier to config. In most cases it should be able to auto-balance cache usage
## between column families with standard LRU algorithm.
##
## The rest of config in the storage.block-cache session is effective only when shared block cache
## is on.
# shared = true
## Size of the shared block cache. Normally it should be tuned to 30%-50% of system's total memory.
## When the config is not set, it is decided by the sum of the following fields or their default
## value:
## * rocksdb.defaultcf.block-cache-size or 25% of system's total memory
## * rocksdb.writecf.block-cache-size or 15% of system's total memory
## * rocksdb.lockcf.block-cache-size or 2% of system's total memory
## * raftdb.defaultcf.block-cache-size or 2% of system's total memory
##
## To deploy multiple TiKV nodes on a single physical machine, configure this parameter explicitly.
## Otherwise, the OOM problem might occur in TiKV.
# capacity = "1GB"
[pd]
## PD endpoints.
# endpoints = []
## The interval at which to retry a PD connection initialization.
## Default is 300ms.
# retry-interval = "300ms"
## If the client observes an error, it can can skip reporting it except every `n` times.
## Set to 1 to disable this feature.
## Default is 10.
# retry-log-every = 10
## The maximum number of times to retry a PD connection initialization.
## Set to 0 to disable retry.
## Default is -1, meaning isize::MAX times.
# retry-max-count = -1
[raftstore]
## Whether to enable Raft prevote.
## Prevote minimizes disruption when a partitioned node rejoins the cluster by using a two phase
## election.
# prevote = true
## The path to RaftDB directory.
## If not set, it will be `{data-dir}/raft`.
## If there are multiple disks on the machine, storing the data of Raft RocksDB on differen disks
## can improve TiKV performance.
# raftdb-path = ""
## Store capacity, i.e. max data size allowed.
## If it is not set, disk capacity is used.
# capacity = 0
## Internal notify capacity.
## 40960 is suitable for about 7000 Regions. It is recommended to use the default value.
# notify-capacity = 40960
## Maximum number of internal messages to process in a tick.
# messages-per-tick = 4096
## Region heartbeat tick interval for reporting to PD.
# pd-heartbeat-tick-interval = "60s"
## Store heartbeat tick interval for reporting to PD.
# pd-store-heartbeat-tick-interval = "10s"
## The threshold of triggering Region split check.
## When Region size change exceeds this config, TiKV will check whether the Region should be split
## or not. To reduce the cost of scanning data in the checking process, you can set the value to
## 32MB during checking and set it back to the default value in normal operations.
# region-split-check-diff = "6MB"
## The interval of triggering Region split check.
# split-region-check-tick-interval = "10s"
## When the number of Raft entries exceeds the max size, TiKV rejects to propose the entry.
# raft-entry-max-size = "8MB"
## Interval to GC unnecessary Raft log.
# raft-log-gc-tick-interval = "10s"
## Threshold to GC stale Raft log, must be >= 1.
# raft-log-gc-threshold = 50
## When the entry count exceeds this value, GC will be forced to trigger.
# raft-log-gc-count-limit = 72000
## When the approximate size of Raft log entries exceeds this value, GC will be forced trigger.
## It's recommanded to set it to 3/4 of `region-split-size`.
# raft-log-gc-size-limit = "72MB"
## Raft engine is a replaceable component. For some implementations, it's necessary to purge
## old log files to recycle disk space ASAP.
# raft-engine-purge-interval = "10s"
## How long the peer will be considered down and reported to PD when it hasn't been active for this
## time.
# max-peer-down-duration = "5m"
## Interval to check whether to start manual compaction for a Region.
# region-compact-check-interval = "5m"
## Number of Regions for each time to check.
# region-compact-check-step = 100
## The minimum number of delete tombstones to trigger manual compaction.
# region-compact-min-tombstones = 10000
## The minimum percentage of delete tombstones to trigger manual compaction.
## It should be set between 1 and 100. Manual compaction is only triggered when the number of
## delete tombstones exceeds `region-compact-min-tombstones` and the percentage of delete tombstones
## exceeds `region-compact-tombstones-percent`.
# region-compact-tombstones-percent = 30
## Interval to check whether to start a manual compaction for Lock Column Family.
## If written bytes reach `lock-cf-compact-bytes-threshold` for Lock Column Family, TiKV will
## trigger a manual compaction for Lock Column Family.
# lock-cf-compact-interval = "10m"
# lock-cf-compact-bytes-threshold = "256MB"
## Interval (s) to check Region whether the data are consistent.
# consistency-check-interval = 0
## Interval to clean up import SST files.
# cleanup-import-sst-interval = "10m"
## Use how many threads to handle log apply
# apply-pool-size = 2
## Use how many threads to handle raft messages
# store-pool-size = 2
[coprocessor]
## When it is set to `true`, TiKV will try to split a Region with table prefix if that Region
## crosses tables.
## It is recommended to turn off this option if there will be a large number of tables created.
# split-region-on-table = false
## One split check produces several split keys in batch. This config limits the number of produced
## split keys in one batch.
# batch-split-limit = 10
## When Region [a,e) size exceeds `region_max_size`, it will be split into several Regions [a,b),
## [b,c), [c,d), [d,e) and the size of [a,b), [b,c), [c,d) will be `region_split_size` (or a
## little larger).
# region-max-size = "144MB"
# region-split-size = "96MB"
## When the number of keys in Region [a,e) exceeds the `region_max_keys`, it will be split into
## several Regions [a,b), [b,c), [c,d), [d,e) and the number of keys in [a,b), [b,c), [c,d) will be
## `region_split_keys`.
# region-max-keys = 1440000
# region-split-keys = 960000
## Set to "mvcc" to do consistency check for MVCC data, or "raw" for raw data.
# consistency-check-method = "mvcc"
[rocksdb]
## Maximum number of threads of RocksDB background jobs.
## The background tasks include compaction and flush. For detailed information why RocksDB needs to
## do compaction, see RocksDB-related materials.
## When write traffic (like the importing data size) is big, it is recommended to enable more
## threads. But set the number of the enabled threads smaller than that of CPU cores. For example,
## when importing data, for a machine with a 32-core CPU, set the value to 28.
## The default value is set to 8 or CPU_NUM - 1, whichever is smaller.
# max-background-jobs = 8
## Maximum number of threads of RocksDB background memtable flush jobs.
## The default value is set to 2 or max_background_jobs / 4, whichever is bigger.
# max-background-flushes = 2
## Represents the maximum number of threads that will concurrently perform a sub-compaction job by
## breaking it into multiple, smaller ones running simultaneously.
## The default value is set to 3 or the largest number to allow for two compactions, whichever is
## smaller.
# max-sub-compactions = 3
## Number of open files that can be used by the DB.
## Value -1 means files opened are always kept open and RocksDB will prefetch index and filter
## blocks into block cache at startup. So if your database has a large working set, it will take
## several minutes to open the DB. You may need to increase this if your database has a large
## working set. You can estimate the number of files based on `target-file-size-base` and
## `target_file_size_multiplier` for level-based compaction.
# max-open-files = 40960
## Max size of RocksDB's MANIFEST file.
## For detailed explanation, please refer to https://github.com/facebook/rocksdb/wiki/MANIFEST
# max-manifest-file-size = "128MB"
## If the value is `true`, the database will be created if it is missing.
# create-if-missing = true
## RocksDB Write-Ahead Logs (WAL) recovery mode.
## 0 : TolerateCorruptedTailRecords, tolerate incomplete record in trailing data on all logs;
## 1 : AbsoluteConsistency, We don't expect to find any corruption in the WAL;
## 2 : PointInTimeRecovery, Recover to point-in-time consistency;
## 3 : SkipAnyCorruptedRecords, Recovery after a disaster;
# wal-recovery-mode = 2
## RocksDB WAL directory.
## This config specifies the absolute directory path for WAL.
## If it is not set, the log files will be in the same directory as data. When you set the path to
## RocksDB directory in memory like in `/dev/shm`, you may want to set`wal-dir` to a directory on a
## persistent storage. See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database .
## If there are two disks on the machine, storing RocksDB data and WAL logs on different disks can
## improve performance.
# wal-dir = "/tmp/tikv/store"
## The following two fields affect how archived WAL will be deleted.
## 1. If both values are set to 0, logs will be deleted ASAP and will not get into the archive.
## 2. If `wal-ttl-seconds` is 0 and `wal-size-limit` is not 0, WAL files will be checked every 10
## min and if total size is greater than `wal-size-limit`, they will be deleted starting with the
## earliest until `wal-size-limit` is met. All empty files will be deleted.
## 3. If `wal-ttl-seconds` is not 0 and `wal-size-limit` is 0, then WAL files will be checked every
## `wal-ttl-seconds / 2` and those that are older than `wal-ttl-seconds` will be deleted.
## 4. If both are not 0, WAL files will be checked every 10 min and both checks will be performed
## with ttl being first.
## When you set the path to RocksDB directory in memory like in `/dev/shm`, you may want to set
## `wal-ttl-seconds` to a value greater than 0 (like 86400) and backup your DB on a regular basis.
## See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database .
# wal-ttl-seconds = 0
# wal-size-limit = 0
## Max RocksDB WAL size in total
# max-total-wal-size = "4GB"
## RocksDB Statistics provides cumulative stats over time.
## Turning statistics on will introduce about 5%-10% overhead for RocksDB, but it can help you to
## know the internal status of RocksDB.
# enable-statistics = true
## Dump statistics periodically in information logs.
## Same as RocksDB's default value (10 min).
# stats-dump-period = "10m"
## Refer to: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ
## If you want to use RocksDB on multi disks or spinning disks, you should set value at least 2MB.
# compaction-readahead-size = 0
## Max buffer size that is used by WritableFileWrite.
# writable-file-max-buffer-size = "1MB"
## Use O_DIRECT for both reads and writes in background flush and compactions.
# use-direct-io-for-flush-and-compaction = false
## Limit the disk IO of compaction and flush.
## Compaction and flush can cause terrible spikes if they exceed a certain threshold. Consider
## setting this to 50% ~ 80% of the disk throughput for a more stable result. However, in heavy
## write workload, limiting compaction and flush speed can cause write stalls too.
## 1. rate-bytes-per-sec is the only parameter you want to set most of the time. It controls the
## total write rate of compaction and flush in bytes per second. Currently, RocksDB does not
## enforce rate limit for anything other than flush and compaction, e.g. write to WAL.
## 2. rate-limiter-refill-period controls how often IO tokens are refilled. Smaller value will flatten
## IO bursts while introducing more CPU overhead.
## 3. rate-limiter-mode indicates which types of operations count against the limit.
## 1 : ReadOnly
## 2 : WriteOnly
## 3 : AllIo
## 4. rate-limiter-auto_tuned enables dynamic adjustment of rate limit within the range
## [10MB/s, rate_bytes_per_sec], according to the recent demand for background I/O.
# rate-bytes-per-sec = "10GB"
# rate-limiter-refill-period = "100ms"
# rate-limiter-mode = 2
# rate-limiter-auto-tuned = true
## Enable or disable the pipelined write.
# enable-pipelined-write = true
## Allows OS to incrementally sync files to disk while they are being written, asynchronously,
## in the background.
# bytes-per-sync = "1MB"
## Allows OS to incrementally sync WAL to disk while it is being written.
# wal-bytes-per-sync = "512KB"
## Specify the maximal size of the RocksDB info log file.
## If the log file is larger than this config, a new info log file will be created.
## If it is set to 0, all logs will be written to one log file.
# info-log-max-size = "1GB"
## Time for the RocksDB info log file to roll (in seconds).
## If the log file has been active longer than this config, it will be rolled.
## If it is set to 0, rolling will be disabled.
# info-log-roll-time = "0s"
## Maximal RocksDB info log files to be kept.
# info-log-keep-log-file-num = 10
## Specifies the RocksDB info log directory.
## If it is empty, the log files will be in the same directory as data.
## If it is not empty, the log files will be in the specified directory, and the DB data directory's
## absolute path will be used as the log file name's prefix.
# info-log-dir = ""
# RocksDB log levels
# info-log-level = "info"
## Options for `Titan`.
[rocksdb.titan]
## Enables or disables `Titan`. Note that Titan is still an experimental feature. Once
## enabled, it can't fall back. Forced fallback may result in data loss.
## default: false
# enabled = false
## Maximum number of threads of `Titan` background gc jobs.
# default: 4
# max-background-gc = 4
## Options for "Default" Column Family, which stores actual user data.
[rocksdb.defaultcf]
## Compression method (if any) is used to compress a block.
## no: kNoCompression
## snappy: kSnappyCompression
## zlib: kZlibCompression
## bzip2: kBZip2Compression
## lz4: kLZ4Compression
## lz4hc: kLZ4HCCompression
## zstd: kZSTD
## `lz4` is a compression algorithm with moderate speed and compression ratio. The compression
## ratio of `zlib` is high. It is friendly to the storage space, but its compression speed is
## slow. This compression occupies many CPU resources.
## Per level compression.
## This config should be chosen carefully according to CPU and I/O resources. For example, if you
## use the compression mode of "no:no:lz4:lz4:lz4:zstd:zstd" and find much I/O pressure of the
## system (run the `iostat` command to find %util lasts 100%, or run the `top` command to find many
## iowaits) when writing (importing) a lot of data while the CPU resources are adequate, you can
## compress level-0 and level-1 and exchange CPU resources for I/O resources. If you use the
## compression mode of "no:no:lz4:lz4:lz4:zstd:zstd" and you find the I/O pressure of the system is
## not big when writing a lot of data, but CPU resources are inadequate. Then run the `top` command
## and choose the `-H` option. If you find a lot of bg threads (namely the compression thread of
## RocksDB) are running, you can exchange I/O resources for CPU resources and change the compression
## mode to "no:no:no:lz4:lz4:zstd:zstd". In a word, it aims at making full use of the existing
## resources of the system and improving TiKV performance in terms of the current resources.
# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
## Set zstd compression for the bottommost level.
## This config overrides compression-per-level. It use zstd for bottommost level to ensure
## consistent compression ratio, regardless of overall data size. If explicitly setting compression type
## for the bottommost level is not desired, "disable" should be used.
# bottommost-level-compression = "zstd"
## The data block size. RocksDB compresses data based on the unit of block.
## Similar to page in other databases, block is the smallest unit cached in block-cache. Note that
## the block size specified here corresponds to uncompressed data.
# block-size = "64KB"
## If you're doing point lookups you definitely want to turn bloom filters on. We use bloom filters
## to avoid unnecessary disk reads. Default bits_per_key is 10, which yields ~1% false positive
## rate. Larger `bloom-filter-bits-per-key` values will reduce false positive rate, but increase
## memory usage and space amplification.
# bloom-filter-bits-per-key = 10
## `false` means one SST file one bloom filter, `true` means every block has a corresponding bloom
## filter.
# block-based-bloom-filter = false
# level0-file-num-compaction-trigger = 4
## Soft limit on number of level-0 files.
## When the number of SST files of level-0 reaches the limit of `level0-slowdown-writes-trigger`,
## RocksDB tries to slow down the write operation, because too many SST files of level-0 can cause
## higher read pressure of RocksDB.
# level0-slowdown-writes-trigger = 20
## Maximum number of level-0 files.
## When the number of SST files of level-0 reaches the limit of `level0-stop-writes-trigger`,
## RocksDB stalls the new write operation.
# level0-stop-writes-trigger = 36
## Amount of data to build up in memory (backed by an unsorted log on disk) before converting to a
## sorted on-disk file. It is the RocksDB MemTable size.
# write-buffer-size = "128MB"
## The maximum number of the MemTables. The data written into RocksDB is first recorded in the WAL
## log, and then inserted into MemTables. When the MemTable reaches the size limit of
## `write-buffer-size`, it turns into read only and generates a new MemTable receiving new write
## operations. The flush threads of RocksDB will flush the read only MemTable to the disks to become
## an SST file of level0. `max-background-flushes` controls the maximum number of flush threads.
## When the flush threads are busy, resulting in the number of the MemTables waiting to be flushed
## to the disks reaching the limit of `max-write-buffer-number`, RocksDB stalls the new operation.
## "Stall" is a flow control mechanism of RocksDB. When importing data, you can set the
## `max-write-buffer-number` value higher, like 10.
# max-write-buffer-number = 5
## The minimum number of write buffers that will be merged together before writing to storage.
# min-write-buffer-number-to-merge = 1
## Control maximum total data size for base level (level 1).
## When the level-1 data size reaches the limit value of `max-bytes-for-level-base`, the SST files
## of level-1 and their overlap SST files of level-2 will be compacted. The golden rule: the first
## reference principle of setting `max-bytes-for-level-base` is guaranteeing that the
## `max-bytes-for-level-base` value is roughly equal to the data volume of level-0. Thus
## unnecessary compaction is reduced. For example, if the compression mode is
## "no:no:lz4:lz4:lz4:lz4:lz4", the `max-bytes-for-level-base` value can be `write-buffer-size * 4`,
## because there is no compression of level-0 and level-1 and the trigger condition of compaction
## for level-0 is that the number of the SST files reaches 4 (the default value). When both level-0
## and level-1 adopt compaction, it is necessary to analyze RocksDB logs to know the size of an SST
## file compressed from a MemTable. For example, if the file size is 32MB, the proposed value of
## `max-bytes-for-level-base` is 32MB * 4 = 128MB.
# max-bytes-for-level-base = "512MB"
## Target file size for compaction.
## The SST file size of level-0 is influenced by the compaction algorithm of `write-buffer-size`
## and level0. `target-file-size-base` is used to control the size of a single SST file of level1 to
## level6.
# target-file-size-base = "8MB"
## Max bytes for `compaction.max_compaction_bytes`.
## If it's necessary to enlarge value of this entry, it's better to also enlarge `reserve-space`
## in `storage` to ensure that a restarted TiKV instance can perform compactions successfully.
# max-compaction-bytes = "2GB"
## There are four different compaction priorities.
## 0 : ByCompensatedSize
## 1 : OldestLargestSeqFirst
## 2 : OldestSmallestSeqFirst
## 3 : MinOverlappingRatio
# compaction-pri = 3
## Indicating if we'd put index/filter blocks to the block cache.
## If not specified, each "table reader" object will pre-load index/filter block during table
## initialization.
# cache-index-and-filter-blocks = true
## Pin level-0 filter and index blocks in cache.
# pin-l0-filter-and-index-blocks = true
## Enable read amplification statistics.
## value => memory usage (percentage of loaded blocks memory)
## 1 => 12.50 %
## 2 => 06.25 %
## 4 => 03.12 %
## 8 => 01.56 %
## 16 => 00.78 %
# read-amp-bytes-per-bit = 0
## Pick target size of each level dynamically.
# dynamic-level-bytes = true
## Optimizes bloom filters. If true, RocksDB won't create bloom filters for the max level of
## the LSM to reduce metadata that should fit in RAM.
## This value is setted to true for `default` cf by default because its kv data could be determined
## whether really exists by upper logic instead of bloom filters. But we suggest to set it to false
## while using `Raw` mode.
# optimize-filters-for-hits = true
## Enable compaction guard, which is an optimization to split SST files at TiKV region boundaries.
## The optimization can help reduce compaction IO, and allow us to use larger SST file size
## (thus less SST files overall) while making sure we can still efficiently cleanup stale data on
## region migration.
## This config is available to default CF and write CF.
# enable-compaction-guard = true
## The lower bound of SST file size when compaction guard is enabled. The config prevent SST files
## being too small when compaction guard is enabled.
# compaction-guard-min-output-file-size = "8M"
## The upper bound of SST file size when compaction guard is enabled. The config prevent SST files
## being too large when compaction guard is enabled. This config overrides target-file-size-base
## for the same CF.
# compaction-guard-max-output-file-size = "128M"
## Options for "Default" Column Family for `Titan`.
[rocksdb.defaultcf.titan]
## The smallest value to store in blob files. Value smaller than
## this threshold will be inlined in base DB.
## default: 1KB
# min-blob-size = "1KB"
## The compression algorithm used to compress data in blob files.
## Compression method.
## no: kNoCompression
## snappy: kSnappyCompression
## zlib: kZlibCompression
## bzip2: kBZip2Compression
## lz4: kLZ4Compression
## lz4hc: kLZ4HCCompression
## zstd: kZSTD
# default: lz4
# blob-file-compression = "lz4"
## Specifics cache size for blob records
# default: 0
# blob-cache-size = "0GB"
## If the ratio of discardable size of a blob file is larger than
## this threshold, the blob file will be GCed out.
# default: 0.5
# discardable-ratio = 0.5
## The mode used to process blob files. In read-only mode Titan
## stops writing value into blob log. In fallback mode Titan
## converts blob index into real value on flush and compaction.
## This option is especially useful for downgrading Titan.
## default: kNormal
## read-only: kReadOnly
## fallback: kFallback
# default: normal
# blob-run-mode = "normal"
## If set true, values in blob file will be merged to a new blob file while
## their corresponding keys are compacted to last two level in LSM-Tree.
##
## With this feature enabled, Titan could get better scan performance, and
## better write performance during GC, but will suffer around 1.1 space
## amplification and 3 more write amplification if no GC needed (eg. uniformly
## distributed keys) under default rocksdb setting.
##
## Requirement: level_compaction_dynamic_level_base = true
## default: false
# level_merge = false
## Use merge operator to rewrite GC blob index.
## default: false
# gc-merge-rewrite = false
## Options for "Write" Column Family, which stores MVCC commit information
[rocksdb.writecf]
## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
# block-size = "64KB"
## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`.
# write-buffer-size = "128MB"
# max-write-buffer-number = 5
# min-write-buffer-number-to-merge = 1
## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`.
# max-bytes-for-level-base = "512MB"
# target-file-size-base = "8MB"
# level0-file-num-compaction-trigger = 4
# level0-slowdown-writes-trigger = 20
# level0-stop-writes-trigger = 36
# cache-index-and-filter-blocks = true
# pin-l0-filter-and-index-blocks = true
# compaction-pri = 3
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
# optimize-filters-for-hits = false
# enable-compaction-guard = true
# compaction-guard-min-output-file-size = "8M"
# compaction-guard-max-output-file-size = "128M"
[rocksdb.lockcf]
# compression-per-level = ["no", "no", "no", "no", "no", "no", "no"]
# block-size = "16KB"
# write-buffer-size = "32MB"
# max-write-buffer-number = 5
# min-write-buffer-number-to-merge = 1
# max-bytes-for-level-base = "128MB"
# target-file-size-base = "8MB"
# level0-file-num-compaction-trigger = 1
# level0-slowdown-writes-trigger = 20
# level0-stop-writes-trigger = 36
# cache-index-and-filter-blocks = true
# pin-l0-filter-and-index-blocks = true
# compaction-pri = 0
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
# optimize-filters-for-hits = false
# enable-compaction-guard = false
[raftdb]
# max-background-jobs = 4
# max-sub-compactions = 2
# max-open-files = 40960
# max-manifest-file-size = "20MB"
# create-if-missing = true
# enable-statistics = true
# stats-dump-period = "10m"
# compaction-readahead-size = 0
# writable-file-max-buffer-size = "1MB"
# use-direct-io-for-flush-and-compaction = false
# enable-pipelined-write = true
# allow-concurrent-memtable-write = false
# bytes-per-sync = "1MB"
# wal-bytes-per-sync = "512KB"
# info-log-max-size = "1GB"
# info-log-roll-time = "0s"
# info-log-keep-log-file-num = 10
# info-log-dir = ""
# info-log-level = "info"
# optimize-filters-for-hits = true
[raftdb.defaultcf]
## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
# block-size = "64KB"
## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`.
# write-buffer-size = "128MB"
# max-write-buffer-number = 5
# min-write-buffer-number-to-merge = 1
## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`.
# max-bytes-for-level-base = "512MB"
# target-file-size-base = "8MB"
# level0-file-num-compaction-trigger = 4
# level0-slowdown-writes-trigger = 20
# level0-stop-writes-trigger = 36
# cache-index-and-filter-blocks = true
# pin-l0-filter-and-index-blocks = true
# compaction-pri = 0
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
# optimize-filters-for-hits = true
# enable-compaction-guard = false
[raft-engine]
## Enable raft-engine will ignore all settings about `raftdb`.
# enable = false
## Recovery mode. Candidates are `tolerate-corrupted-tail-records` and `absolute-consistency`.
# recovery_mode = "tolerate-corrupted-tail-records"
# bytes-per-sync = "256KB"
# target-file-size = "128MB"
# purge-threshold = "10GB"
## Raft engine has builtin entry cache. `cache-limit` limits the memory usage of the cache.
# cache-limit = "1GB"
[security]
## The path for TLS certificates. Empty string means disabling secure connections.
# ca-path = ""
# cert-path = ""
# key-path = ""
# cert-allowed-cn = []
#
## Avoid outputing data (e.g. user keys) to info log. It currently does not avoid printing
## user data altogether, but greatly reduce those logs.
## Default is false.
# redact-info-log = false
# Configurations for encryption at rest. Experimental.
[security.encryption]
## Encryption method to use for data files.
## Possible values are "plaintext", "aes128-ctr", "aes192-ctr" and "aes256-ctr". Value other than
## "plaintext" means encryption is enabled, in which case master key must be specified.
# data-encryption-method = "plaintext"
## Specifies how often TiKV rotates data encryption key.
# data-key-rotation-period = "7d"
## Enable an optimization to reduce IO and mutex contention for encryption metadata management.
## Once the option is turned on (which is the default after 4.0.9), the data format is not
## compatible with TiKV <= 4.0.8. In order to downgrade to TiKV <= 4.0.8, one can turn off this
## option and restart TiKV, after which TiKV will convert the data format to be compatible with
## previous versions.
# enable-file-dictionary-log = true
## Specifies master key if encryption is enabled. There are three types of master key:
##
## * "plaintext":
##
## Plaintext as master key means no master key is given and only applicable when
## encryption is not enabled, i.e. data-encryption-method = "plaintext". This type doesn't
## have sub-config items. Example:
##
## [security.encryption.master-key]
## type = "plaintext"
##
## * "kms":
##
## Use a KMS service to supply master key. Currently only AWS KMS is supported. This type of
## master key is recommended for production use. Example:
##
## [security.encryption.master-key]
## type = "kms"
## ## KMS CMK key id. Must be a valid KMS CMK where the TiKV process has access to.
## ## In production is recommended to grant access of the CMK to TiKV using IAM.
## key-id = "1234abcd-12ab-34cd-56ef-1234567890ab"
## ## AWS region of the KMS CMK.
## region = "us-west-2"
## ## (Optional) AWS KMS service endpoint. Only required when non-default KMS endpoint is
## ## desired.
## endpoint = "https://kms.us-west-2.amazonaws.com"
##
## * "file":
##
## Supply a custom encryption key stored in a file. It is recommended NOT to use in production,
## as it breaks the purpose of encryption at rest, unless the file is stored in tempfs.
## The file must contain a 256-bits (32 bytes, regardless of key length implied by
## data-encryption-method) key encoded as hex string and end with newline ("\n"). Example:
##
## [security.encryption.master-key]
## type = "file"
## path = "/path/to/master/key/file"
##
# [security.encryption.master-key]
# type = "plaintext"
## Specifies the old master key when rotating master key. Same config format as master-key.
## The key is only access once during TiKV startup, after that TiKV do not need access to the key.
## And it is okay to leave the stale previous-master-key config after master key rotation.
# [security.encryption.previous-master-key]
# type = "plaintext"
[import]
## Number of threads to handle RPC requests.
# num-threads = 8
## Stream channel window size, stream will be blocked on channel full.
# stream-channel-window = 128
[backup]
## Number of threads to perform backup tasks.
## The default value is set to min(CPU_NUM * 0.75, 32).
# num-threads = 24
## Number of ranges to backup in one batch.
# batch = 8
## When Backup region [a,e) size exceeds `sst-max-size`, it will be backuped into several Files [a,b),
## [b,c), [c,d), [d,e) and the size of [a,b), [b,c), [c,d) will be `sst-max-size` (or a
## little larger).
# sst-max-size = "144MB"
[pessimistic-txn]
## The default and maximum delay before responding to TiDB when pessimistic
## transactions encounter locks
# wait-for-lock-timeout = "1s"
## If more than one transaction is waiting for the same lock, only the one with smallest
## start timestamp will be waked up immediately when the lock is released. Others will
## be waked up after `wake_up_delay_duration` to reduce contention and make the oldest
## one more likely acquires the lock.
# wake-up-delay-duration = "20ms"
## Enable pipelined pessimistic lock, only effect when processing perssimistic transactions
## Enabled this will improve performance, but slightly increase the transcation failure rate
# pipelined = true
[gc]
## The number of keys to GC in one batch.
# batch-keys = 512
## Max bytes that GC worker can write to rocksdb in one second.
## If it is set to 0, there is no limit.
# max-write-bytes-per-sec = "0"
## Enable GC by compaction filter or not.
# enable-compaction-filter = true
## Garbage ratio threshold to trigger a GC.
# ratio-threshold = 1.1
|