summarylogtreecommitdiffstats
path: root/tikv.toml
diff options
context:
space:
mode:
Diffstat (limited to 'tikv.toml')
-rw-r--r--tikv.toml649
1 files changed, 516 insertions, 133 deletions
diff --git a/tikv.toml b/tikv.toml
index 4d698ef5d303..57421a2cf865 100644
--- a/tikv.toml
+++ b/tikv.toml
@@ -1,21 +1,10 @@
## TiKV config template
## Human-readable big numbers:
-## File size(based on byte): KB, MB, GB, TB, PB
+## File size(based on byte, binary units): KB, MB, GB, TB, PB
## e.g.: 1_048_576 = "1MB"
## Time(based on ms): ms, s, m, h
## e.g.: 78_000 = "1.3m"
-## Log levels: trace, debug, info, warning, error, critical.
-## Note that `debug` and `trace` are only available in development builds.
-# log-level = "info"
-
-## File to store logs.
-## If it is not set, logs will be appended to stderr.
-# log-file = ""
-
-## Log format, one of json, text. Default to text.
-# log-format = "text"
-
## File to store slow logs.
## If "log-file" is set, but this is not set, the slow logs will be appeneded
## to "log-file". If both "log-file" and "slow-log-file" are not set, all logs
@@ -25,28 +14,95 @@
## The minimum operation cost to output relative logs.
# slow-log-threshold = "1s"
-## Timespan between rotating the log files.
-## Once this timespan passes, log files will be rotated, i.e. existing log file will have a
-## timestamp appended to its name and a new file will be created.
-# log-rotation-timespan = "24h"
-
-## Size of log file that triggers the log rotation.
-## Once the size of log file exceeds the threshold value, the log file will be rotated
-## and place the old log file in a new file named by orginal file name subbfixed by a timestamp.
-# log-rotation-size = "300MB"
-
## Enable io snoop which utilize eBPF to get accurate disk io of TiKV
-## It won't take effect when compiling without BCC_IOSNOOP=1.
+## It won't take effect when compiling without BCC_IOSNOOP=1.
# enable-io-snoop = true
-# Configurations for the single thread pool serving read requests.
+## Use abort when TiKV panic. By default TiKV will use _exit() on panic, in that case
+## core dump file will not be generated, regardless of system settings.
+## If this config is enabled, core dump files needs to be cleanup to avoid disk space
+## being filled up.
+# abort-on-panic = false
+
+## Memory usage limit for the TiKV instance. Generally it's unnecessary to configure it
+## explicitly, in which case it will be set to 75% of total available system memory.
+## Considering the behavior of `block-cache.capacity`, it means 25% memory is reserved for
+## OS page cache.
+##
+## It's still unnecessary to configure it for deploying multiple TiKV nodes on a single
+## physical machine. It will be calculated as `5/3 * block-cache.capacity`.
+##
+## For different system memory capacity, the default memory quota will be:
+## * system=8G block-cache=3.6G memory-usage-limit=6G page-cache=2G.
+## * system=16G block-cache=7.2G memory-usage-limit=12G page-cache=4G
+## * system=32G block-cache=14.4G memory-usage-limit=24G page-cache=8G
+##
+## So how can `memory-usage-limit` influence TiKV? When a TiKV's memory usage almost reaches
+## this threshold, it can squeeze some internal components (e.g. evicting cached Raft entries)
+## to release memory.
+# memory-usage-limit = "0B"
+
+[quota]
+## Quota is use to add some limitation for the read write flow and then
+## gain predictable stable performance.
+## CPU quota for these front requests can use, default value is 0, it means unlimited.
+## The unit is millicpu but for now this config is approximate and soft limit.
+# foreground-cpu-time = 0
+## Write bandwidth limitation for this TiKV instance, default value is 0 which means unlimited.
+# foreground-write-bandwidth = "0B"
+## Read bandwidth limitation for this TiKV instance, default value is 0 which means unlimited.
+# foreground-read-bandwidth = "0B"
+## CPU quota for these background requests can use, default value is 0, it means unlimited.
+## The unit is millicpu but for now this config is approximate and soft limit.
+# background-cpu-time = 0
+## Write bandwidth limitation for backgroud request for this TiKV instance, default value is 0 which means unlimited.
+# background-write-bandwidth = "0B"
+## Read bandwidth limitation for background request for this TiKV instance, default value is 0 which means unlimited.
+# background-read-bandwidth = "0B"
+## Limitation of max delay duration, default value is 0 which means unlimited.
+# max-delay-duration = "500ms"
+## Whether to enable quota auto tune
+# enable-auto-tune = false
+
+[log]
+## Log levels: debug, info, warn, error, fatal.
+## Note that `debug` is only available in development builds.
+# level = "info"
+## log format, one of json, text. Default to text.
+# format = "text"
+## Enable automatic timestamps in log output, if not set, it will be defaulted to true.
+# enable-timestamp = true
+
+[log.file]
+## Usually it is set through command line.
+# filename = ""
+## max log file size in MB (upper limit to 4096MB)
+# max-size = 300
+## max log file keep days
+# max-days = 0
+## maximum number of old log files to retain
+# max-backups = 0
+
+[memory]
+## Whether enable the heap profiling which may have a bit performance overhead about 2% for the
+## default sample rate.
+# enable-heap-profiling = true
+
+## Average interval between allocation samples, as measured in bytes of allocation activity.
+## Increasing the sampling interval decreases profile fidelity, but also decreases the
+## computational overhead.
+## The default sample interval is 512 KB. It only accepts power of two, otherwise it will be
+## rounded up to the next power of two.
+# profiling-sample-per-bytes = "512KB"
+
+## Configurations for the single thread pool serving read requests.
[readpool.unified]
## The minimal working thread count of the thread pool.
# min-thread-count = 1
## The maximum working thread count of the thread pool.
## The default value is max(4, LOGICAL_CPU_NUM * 0.8).
-# max-thread-count = 8
+# max-thread-count = 4
## Size of the stack for each thread in the thread pool.
# stack-size = "10MB"
@@ -56,7 +112,7 @@
[readpool.storage]
## Whether to use the unified read pool to handle storage requests.
-# use-unified-pool = false
+# use-unified-pool = true
## The following configurations only take effect when `use-unified-pool` is false.
@@ -110,7 +166,7 @@
# advertise-addr = ""
## Status address.
-## This is used for reporting the status of TiKV directly through
+## This is used for reporting the status of TiKV directly through
## the HTTP address. Notice that there is a risk of leaking status
## information if this port is exposed to the public.
## Empty string means disabling it.
@@ -123,7 +179,7 @@
# grpc-compression-type = "none"
## Size of the thread pool for the gRPC server.
-# grpc-concurrency = 4
+# grpc-concurrency = 5
## The number of max concurrent streams/requests on a client connection.
# grpc-concurrent-stream = 1024
@@ -146,6 +202,9 @@
## Time to wait before closing the connection without receiving KeepAlive ping Ack.
# grpc-keepalive-timeout = "3s"
+## Set maximum message length in bytes that gRPC can send. `-1` means unlimited.
+# max-grpc-send-msg-len = 10485760
+
## How many snapshots can be sent concurrently.
# concurrent-send-snap-limit = 32
@@ -158,9 +217,11 @@
## Max time to handle Coprocessor requests before timeout.
# end-point-request-max-handle-duration = "60s"
-## Max bytes that snapshot can be written to disk in one second.
-## It should be set based on your disk performance.
-# snap-max-write-bytes-per-sec = "100MB"
+## Max bytes that snapshot can interact with disk in one second. It should be
+## set based on your disk performance. Only write flow is considered, if
+## partiioned-raft-kv is used, read flow is also considered and it will be estimated
+## as read_size * 0.5 to get around errors from page cache.
+# snap-io-max-bytes-per-sec = "100MB"
## Whether to enable request batch.
# enable-request-batch = true
@@ -179,12 +240,20 @@
[storage]
## The path to RocksDB directory.
-# data-dir = "/tmp/tikv/store"
+# data-dir = "./"
+
+## Specifies the engine type. This configuration can only be specified when creating a new cluster
+## and cannot be modifies once being specified.
+##
+## Available types are:
+## "raft-kv": The default engine type in versions earlier than TiDB v6.6.0.
+## "partitioned-raft-kv": The new storage engine type introduced in TiDB v6.6.0.
+# engine = "raft-kv"
## The number of slots in Scheduler latches, which controls write concurrency.
## In most cases you can use the default value. When importing data, you can set it to a larger
## value.
-# scheduler-concurrency = 2048000
+# scheduler-concurrency = 524288
## Scheduler's worker pool size, i.e. the number of write threads.
## It should be less than total CPU cores. When there are frequent write operations, set it to a
@@ -206,35 +275,83 @@
## Set it to 0 will cause no space is reserved at all. It's generally used for tests.
# reserve-space = "5GB"
-[storage.block-cache]
-## Whether to create a shared block cache for all RocksDB column families.
+## Reserve some space for raft disk if raft disk is separated deployed with kv disk.
+## `max(reserve-raft-space, raft disk capacity * 5%)` will be reserved exactly.
##
+## Set it to 0 will cause no space is reserved at all. It's generally used for tests.
+# reserve-raft-space = "1GB"
+
+## The maximum recovery time after rocksdb detects restorable background errors. When the data belonging
+## to the data range is damaged, it will be reported to PD through heartbeat, and PD will add `remove-peer`
+## operator to remove this damaged peer. When the damaged peer still exists in the current store, the
+## corruption SST files remain, and the KV storage engine can still put new content normally, but it
+## will return error when reading corrupt data range.
+##
+## If after this time, the peer where the corrupted data range located has not been removed from the
+## current store, TiKV will panic.
+##
+## Set to 0 to disable this feature if you want to panic immediately when encountering such an error.
+# background-error-recovery-window = "1h"
+
## Block cache is used by RocksDB to cache uncompressed blocks. Big block cache can speed up read.
## It is recommended to turn on shared block cache. Since only the total cache size need to be
## set, it is easier to config. In most cases it should be able to auto-balance cache usage
## between column families with standard LRU algorithm.
-##
-## The rest of config in the storage.block-cache session is effective only when shared block cache
-## is on.
-# shared = true
+[storage.block-cache]
## Size of the shared block cache. Normally it should be tuned to 30%-50% of system's total memory.
-## When the config is not set, it is decided by the sum of the following fields or their default
-## value:
-## * rocksdb.defaultcf.block-cache-size or 25% of system's total memory
-## * rocksdb.writecf.block-cache-size or 15% of system's total memory
-## * rocksdb.lockcf.block-cache-size or 2% of system's total memory
-## * raftdb.defaultcf.block-cache-size or 2% of system's total memory
##
## To deploy multiple TiKV nodes on a single physical machine, configure this parameter explicitly.
## Otherwise, the OOM problem might occur in TiKV.
-# capacity = "1GB"
+##
+## When storage.engine is "raft-kv", default value is 45% of available system memory.
+## When storage.engine is "partitioned-raft-kv", default value is 30% of available system memory.
+# capacity = "0B"
+
+[storage.flow-control]
+## Flow controller is used to throttle the write rate at scheduler level, aiming
+## to substitute the write stall mechanism of RocksDB. It features in two points:
+## * throttle at scheduler, so raftstore and apply won't be blocked anymore
+## * better control on the throttle rate to avoid QPS drop under heavy write
+##
+## Support change dynamically.
+## When enabled, it disables kvdb's write stall and raftdb's write stall(except memtable) and vice versa.
+# enable = true
+
+## When the number of immutable memtables of kvdb reaches the threshold, the flow controller begins to work
+# memtables-threshold = 5
+
+## When the number of SST files of level-0 of kvdb reaches the threshold, the flow controller begins to work
+# l0-files-threshold = 20
+
+## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to
+## reject some write requests with `ServerIsBusy` error.
+# soft-pending-compaction-bytes-limit = "192GB"
+
+## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to
+## reject all write requests with `ServerIsBusy` error.
+# hard-pending-compaction-bytes-limit = "1024GB"
+
+[storage.io-rate-limit]
+## Maximum I/O bytes that this server can write to or read from disk (determined by mode)
+## in one second. Internally it prefers throttling background operations over foreground
+## ones. This value should be set to the disk's optimal IO bandwidth, e.g. maximum IO
+## bandwidth specified by cloud disk vendors.
+##
+## When set to zero, disk IO operations are not limited.
+# max-bytes-per-sec = "0MB"
+
+## Determine which types of IO operations are counted and restrained below threshold.
+## Three different modes are: write-only, read-only, all-io.
+##
+## Only write-only mode is supported for now.
+# mode = "write-only"
[pd]
## PD endpoints.
-# endpoints = []
+endpoints = ["127.0.0.1:2379"]
-## The interval at which to retry a PD connection initialization.
+## The interval at which to retry a PD connection.
## Default is 300ms.
# retry-interval = "300ms"
@@ -256,7 +373,7 @@
## The path to RaftDB directory.
## If not set, it will be `{data-dir}/raft`.
-## If there are multiple disks on the machine, storing the data of Raft RocksDB on differen disks
+## If there are multiple disks on the machine, storing the data of Raft RocksDB on a different disk
## can improve TiKV performance.
# raftdb-path = ""
@@ -277,6 +394,9 @@
## Store heartbeat tick interval for reporting to PD.
# pd-store-heartbeat-tick-interval = "10s"
+## Store min resolved ts tick interval for reporting to PD.
+# pd-report-min-resolved-ts-interval = "1s"
+
## The threshold of triggering Region split check.
## When Region size change exceeds this config, TiKV will check whether the Region should be split
## or not. To reduce the cost of scanning data in the checking process, you can set the value to
@@ -289,26 +409,33 @@
## When the number of Raft entries exceeds the max size, TiKV rejects to propose the entry.
# raft-entry-max-size = "8MB"
+## Interval to compact unnecessary Raft log.
+# raft-log-compact-sync-interval = "2s"
+
## Interval to GC unnecessary Raft log.
-# raft-log-gc-tick-interval = "10s"
+# raft-log-gc-tick-interval = "3s"
## Threshold to GC stale Raft log, must be >= 1.
# raft-log-gc-threshold = 50
## When the entry count exceeds this value, GC will be forced to trigger.
-# raft-log-gc-count-limit = 72000
+# raft-log-gc-count-limit = 73728
## When the approximate size of Raft log entries exceeds this value, GC will be forced trigger.
## It's recommanded to set it to 3/4 of `region-split-size`.
# raft-log-gc-size-limit = "72MB"
+## Old Raft logs could be reserved if `raft_log_gc_threshold` is not reached.
+## GC them after ticks `raft_log_reserve_max_ticks` times.
+# raft_log_reserve_max_ticks = 6
+
## Raft engine is a replaceable component. For some implementations, it's necessary to purge
## old log files to recycle disk space ASAP.
# raft-engine-purge-interval = "10s"
## How long the peer will be considered down and reported to PD when it hasn't been active for this
## time.
-# max-peer-down-duration = "5m"
+# max-peer-down-duration = "10m"
## Interval to check whether to start manual compaction for a Region.
# region-compact-check-interval = "5m"
@@ -325,14 +452,23 @@
## exceeds `region-compact-tombstones-percent`.
# region-compact-tombstones-percent = 30
+## The minimum number of duplicated MVCC keys to trigger manual compaction.
+# region-compact-min-redundant-rows = 50000
+
+## The minimum percentage of duplicated MVCC keys to trigger manual compaction.
+## It should be set between 1 and 100. Manual compaction is only triggered when the number of
+## duplicated MVCC keys exceeds `region-compact-min-redundant-rows` and the percentage of duplicated MVCC keys
+## exceeds `region-compact-redundant-rows-percent`.
+# region-compact-redundant-rows-percent = 20
+
## Interval to check whether to start a manual compaction for Lock Column Family.
## If written bytes reach `lock-cf-compact-bytes-threshold` for Lock Column Family, TiKV will
## trigger a manual compaction for Lock Column Family.
# lock-cf-compact-interval = "10m"
# lock-cf-compact-bytes-threshold = "256MB"
-## Interval (s) to check Region whether the data are consistent.
-# consistency-check-interval = 0
+## Interval to check region whether the data is consistent.
+# consistency-check-interval = "0s"
## Interval to clean up import SST files.
# cleanup-import-sst-interval = "10m"
@@ -343,6 +479,16 @@
## Use how many threads to handle raft messages
# store-pool-size = 2
+## Use how many threads to handle raft io tasks
+## If it is 0, it means io tasks are handled in store threads.
+# store-io-pool-size = 1
+
+## When the size of raft db writebatch exceeds this value, write will be triggered.
+# raft-write-size-limit = "1MB"
+
+## threads to generate raft snapshots
+# snap-generator-pool-size = 2
+
[coprocessor]
## When it is set to `true`, TiKV will try to split a Region with table prefix if that Region
## crosses tables.
@@ -368,6 +514,12 @@
## Set to "mvcc" to do consistency check for MVCC data, or "raw" for raw data.
# consistency-check-method = "mvcc"
+[coprocessor-v2]
+## Path to the directory where compiled coprocessor plugins are located.
+## Plugins in this directory will be automatically loaded by TiKV.
+## If the config value is not set, the coprocessor plugin will be disabled.
+# coprocessor-plugin-directory = "./coprocessors"
+
[rocksdb]
## Maximum number of threads of RocksDB background jobs.
## The background tasks include compaction and flush. For detailed information why RocksDB needs to
@@ -392,8 +544,8 @@
## Value -1 means files opened are always kept open and RocksDB will prefetch index and filter
## blocks into block cache at startup. So if your database has a large working set, it will take
## several minutes to open the DB. You may need to increase this if your database has a large
-## working set. You can estimate the number of files based on `target-file-size-base` and
-## `target_file_size_multiplier` for level-based compaction.
+## working set. You can estimate the number of files based on `target-file-size-base` for
+## level-based compaction.
# max-open-files = 40960
## Max size of RocksDB's MANIFEST file.
@@ -404,20 +556,19 @@
# create-if-missing = true
## RocksDB Write-Ahead Logs (WAL) recovery mode.
-## 0 : TolerateCorruptedTailRecords, tolerate incomplete record in trailing data on all logs;
-## 1 : AbsoluteConsistency, We don't expect to find any corruption in the WAL;
-## 2 : PointInTimeRecovery, Recover to point-in-time consistency;
-## 3 : SkipAnyCorruptedRecords, Recovery after a disaster;
-# wal-recovery-mode = 2
+## "tolerate-corrupted-tail-records", tolerate incomplete record in trailing data on all logs;
+## "absolute-consistency", We don't expect to find any corruption in the WAL;
+## "point-in-time", Recover to point-in-time consistency;
+## "skip-any-corrupted-records", Recovery after a disaster;
+# wal-recovery-mode = "point-in-time"
-## RocksDB WAL directory.
+## KV RocksDB WAL directory.
## This config specifies the absolute directory path for WAL.
-## If it is not set, the log files will be in the same directory as data. When you set the path to
-## RocksDB directory in memory like in `/dev/shm`, you may want to set`wal-dir` to a directory on a
-## persistent storage. See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database .
+## If it is not set, the log files will be in the same directory as data.
## If there are two disks on the machine, storing RocksDB data and WAL logs on different disks can
## improve performance.
-# wal-dir = "/tmp/tikv/store"
+## Do not set this config the same as `raftdb.wal-dir`.
+# wal-dir = ""
## The following two fields affect how archived WAL will be deleted.
## 1. If both values are set to 0, logs will be deleted ASAP and will not get into the archive.
@@ -435,15 +586,13 @@
# wal-size-limit = 0
## Max RocksDB WAL size in total
+## When storage.engine is "raft-kv", default value is 4GB.
+## When storage.engine is "partitioned-raft-kv", default value is 1.
# max-total-wal-size = "4GB"
-## RocksDB Statistics provides cumulative stats over time.
-## Turning statistics on will introduce about 5%-10% overhead for RocksDB, but it can help you to
-## know the internal status of RocksDB.
-# enable-statistics = true
-
## Dump statistics periodically in information logs.
-## Same as RocksDB's default value (10 min).
+## When storage.engine is "raft-kv", default value is 10m.
+## When storage.engine is "partitioned-raft-kv", default value is 0.
# stats-dump-period = "10m"
## Refer to: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ
@@ -466,18 +615,19 @@
## 2. rate-limiter-refill-period controls how often IO tokens are refilled. Smaller value will flatten
## IO bursts while introducing more CPU overhead.
## 3. rate-limiter-mode indicates which types of operations count against the limit.
-## 1 : ReadOnly
-## 2 : WriteOnly
-## 3 : AllIo
+## "read-only"
+## "write-only"
+## "all-io"
## 4. rate-limiter-auto_tuned enables dynamic adjustment of rate limit within the range
## [10MB/s, rate_bytes_per_sec], according to the recent demand for background I/O.
# rate-bytes-per-sec = "10GB"
# rate-limiter-refill-period = "100ms"
-# rate-limiter-mode = 2
+# rate-limiter-mode = "write-only"
# rate-limiter-auto-tuned = true
-## Enable or disable the pipelined write.
-# enable-pipelined-write = true
+## Enable or disable the pipelined write. If set false, RocksDB will use a new write mode port from cockroachdb/pebble.
+## See more details in https://github.com/tikv/rocksdb/pull/267 and https://github.com/tikv/tikv/issues/12059.
+# enable-pipelined-write = false
## Allows OS to incrementally sync files to disk while they are being written, asynchronously,
## in the background.
@@ -505,19 +655,29 @@
## absolute path will be used as the log file name's prefix.
# info-log-dir = ""
-# RocksDB log levels
+## RocksDB log levels
# info-log-level = "info"
+## Memory usage limit for Raft Engine. Undersized write buffers will be flushed to satisfy the
+## requirement.
+##
+## No limit when not specified.
+##
+## When storage.engine is "raft-kv", default is no limit.
+## When storage.engine is "partitioned-raft-kv", default value is 25% of available system memory or
+## 15GiB, whichever is smaller.
+# write-buffer-limit = "0B"
+
## Options for `Titan`.
[rocksdb.titan]
## Enables or disables `Titan`. Note that Titan is still an experimental feature. Once
## enabled, it can't fall back. Forced fallback may result in data loss.
-## default: false
-# enabled = false
+## Titan is default on since v7.6.0. This won't affect deployments existed before v7.6.0.
+# enabled = true
## Maximum number of threads of `Titan` background gc jobs.
-# default: 4
-# max-background-gc = 4
+## default: 1
+# max-background-gc = 1
## Options for "Default" Column Family, which stores actual user data.
[rocksdb.defaultcf]
@@ -556,7 +716,7 @@
## The data block size. RocksDB compresses data based on the unit of block.
## Similar to page in other databases, block is the smallest unit cached in block-cache. Note that
## the block size specified here corresponds to uncompressed data.
-# block-size = "64KB"
+# block-size = "32KB"
## If you're doing point lookups you definitely want to turn bloom filters on. We use bloom filters
## to avoid unnecessary disk reads. Default bits_per_key is 10, which yields ~1% false positive
@@ -568,6 +728,12 @@
## filter.
# block-based-bloom-filter = false
+## Use Ribbon filter for levels higher or equal to this value. Use non-block-based bloom filter for
+## lower levels. When this is set, `block-based-bloom-filter` will be ignored.
+## Only effective for `format-version` >= 5.
+## Disabled by default.
+## ribbon-filter-above-level = 0
+
# level0-file-num-compaction-trigger = 4
## Soft limit on number of level-0 files.
@@ -579,7 +745,7 @@
## Maximum number of level-0 files.
## When the number of SST files of level-0 reaches the limit of `level0-stop-writes-trigger`,
## RocksDB stalls the new write operation.
-# level0-stop-writes-trigger = 36
+# level0-stop-writes-trigger = 20
## Amount of data to build up in memory (backed by an unsorted log on disk) before converting to a
## sorted on-disk file. It is the RocksDB MemTable size.
@@ -625,11 +791,17 @@
# max-compaction-bytes = "2GB"
## There are four different compaction priorities.
-## 0 : ByCompensatedSize
-## 1 : OldestLargestSeqFirst
-## 2 : OldestSmallestSeqFirst
-## 3 : MinOverlappingRatio
-# compaction-pri = 3
+## "by-compensated-size"
+## "oldest-largest-seq-first"
+## "oldest-smallest-seq-first"
+## "min-overlapping-ratio"
+# compaction-pri = "min-overlapping-ratio"
+
+## Refer to storage.flow-control.soft-pending-compaction-bytes-limit.
+# soft-pending-compaction-bytes-limit = "192GB"
+
+## Refer to storage.flow-control.hard-pending-compaction-bytes-limit.
+# hard-pending-compaction-bytes-limit = "1000GB"
## Indicating if we'd put index/filter blocks to the block cache.
## If not specified, each "table reader" object will pre-load index/filter block during table
@@ -658,6 +830,10 @@
## while using `Raw` mode.
# optimize-filters-for-hits = true
+## Option to generate Bloom/Ribbon filters that minimize memory internal fragmentation.
+## Only effective for `format-version` >= 5.
+# optimize-filters-for-memory = false
+
## Enable compaction guard, which is an optimization to split SST files at TiKV region boundaries.
## The optimization can help reduce compaction IO, and allow us to use larger SST file size
## (thus less SST files overall) while making sure we can still efficiently cleanup stale data on
@@ -674,12 +850,86 @@
## for the same CF.
# compaction-guard-max-output-file-size = "128M"
+## Available versions:
+##
+## 0 -- This version can be read by all TiKV releases. Doesn't support changing
+## checksum type (default is CRC32).
+##
+## 1 -- Can be read by all TiKV releases. Supports non-default checksum, like
+## xxHash. It is written by RocksDB when BlockBasedTableOptions::checksum is
+## something other than kCRC32c. (version 0 is silently upconverted)
+##
+## 2 -- Can be read by all TiKV releases. Changes the way we encode compressed
+## blocks with LZ4, BZip2 and Zlib compression.
+##
+## 3 -- Can be read by TiKV's versions since 2.1. Changes the way we encode the
+## keys in index blocks.
+## This option only affects newly written tables. When reading existing tables,
+## the information about version is read from the footer.
+##
+## 4 -- Can be read by TiKV's versions since 3.0. Changes the way we encode the
+## values in index blocks.
+## This option only affects newly written tables. When reading existing tables,
+## the information about version is read from the footer.
+##
+## 5 -- Can be read by TiKV's versions since 6.3. Full and partitioned filters
+## use a generally faster and more accurate Bloom filter implementation, with a
+## different schema.
+##
+## When storage.engine is "raft-kv", default value is 2.
+## When storage.engine is "partitioned-raft-kv", default value is 5.
+##
+# format-version = 2
+
+## If enabled, prepopulate warm/hot blocks (data, uncompressed dict, index and
+## filter blocks) which are already in memory into block cache at the time of
+## flush. On a flush, the block that is in memory (in memtables) get flushed
+## to the device. If using Direct IO, additional IO is incurred to read this
+## data back into memory again, which is avoided by enabling this option. This
+## further helps if the workload exhibits high temporal locality, where most
+## of the reads go to recently written data. This also helps in case of
+## Distributed FileSystem.
+##
+## disabled: kDisabled
+## flush-only: kFlushOnly
+##
+# prepopulate-block-cache = "disabled"
+
+## Use the specified checksum type. Newly created table files will be
+## protected with this checksum type. Old table files will still be readable,
+## even though they have different checksum type.
+##
+## no: kNoChecksum
+## crc32c: kCRC32c
+## xxhash: kxxHash
+## xxhash64: kxxHash64
+## xxh3: kXXH3 (supported since TiKV 6.2)
+##
+# checksum = "crc32c"
+
+## The maximum number of concurrent compaction tasks. 0 stands for no limit.
+# max-compactions = 0
+
+## SST files containing updates older than TTL will go through the compaction
+## process. This usually happens in a cascading way so that those entries
+## will be compacted to bottommost level/file. Disabled as default.
+##
+## Default: 0s.
+# ttl = "0s"
+
+## SST files older than this value will be picked up for compaction, and
+## re-written to the same level as they were before. Disabled as default.
+##
+## Default: 0s.
+# periodic-compaction-seconds = "0s"
+
## Options for "Default" Column Family for `Titan`.
[rocksdb.defaultcf.titan]
## The smallest value to store in blob files. Value smaller than
## this threshold will be inlined in base DB.
-## default: 1KB
-# min-blob-size = "1KB"
+## The default value is 32KB since v7.6.0. But it won't affect deployments existed
+## before v7.6.0 of which the default value is 1KB.
+# min-blob-size = "32KB"
## The compression algorithm used to compress data in blob files.
## Compression method.
@@ -690,26 +940,43 @@
## lz4: kLZ4Compression
## lz4hc: kLZ4HCCompression
## zstd: kZSTD
-# default: lz4
-# blob-file-compression = "lz4"
+## default: zstd
+# blob-file-compression = "zstd"
+
+## Set blob file zstd dictionary compression, default(0) will use zstd compression.
+## It is recommended to set the dictionary size to values such as 4k or 16k. Additionally,
+## the sample data size to train dictionary is of size 100X dictionary size innerly.
+## It has no effect when `blob-file-compression` is not `zstd`.
+## default: 0
+# zstd-dict-size = 0
+
+## Whether to share blob cache with block cache. If set to true, Titan would use the shared block
+## cache configured in `storage.block_cache` and ignore the setting of `blob-cache-size`.
+## default: true
+# shared-blob-cache = true
## Specifics cache size for blob records
-# default: 0
+## default: 0
# blob-cache-size = "0GB"
## If the ratio of discardable size of a blob file is larger than
## this threshold, the blob file will be GCed out.
-# default: 0.5
+## default: 0.5
# discardable-ratio = 0.5
-## The mode used to process blob files. In read-only mode Titan
-## stops writing value into blob log. In fallback mode Titan
-## converts blob index into real value on flush and compaction.
-## This option is especially useful for downgrading Titan.
+## The mode used to process blob files. In read-only mode Titan stops writing
+## value into blob log. In fallback mode Titan converts blob index into real
+## value on flush and compaction.
+##
+## This option can be used to disable Titan. More specifically, to disable
+## Titan, set this option to fallback and perform a full compaction using
+## tikv-ctl. Then, monitor the blob file size metrics. After the blob file size
+## decreases to 0, you can set rocksdb.titan.enabled to false and restart TiKV.
+##
## default: kNormal
## read-only: kReadOnly
## fallback: kFallback
-# default: normal
+## default: normal
# blob-run-mode = "normal"
## If set true, values in blob file will be merged to a new blob file while
@@ -722,17 +989,13 @@
##
## Requirement: level_compaction_dynamic_level_base = true
## default: false
-# level_merge = false
-
-## Use merge operator to rewrite GC blob index.
-## default: false
-# gc-merge-rewrite = false
+# level-merge = false
## Options for "Write" Column Family, which stores MVCC commit information
[rocksdb.writecf]
## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
-# block-size = "64KB"
+# block-size = "32KB"
## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`.
# write-buffer-size = "128MB"
@@ -745,16 +1008,22 @@
# level0-file-num-compaction-trigger = 4
# level0-slowdown-writes-trigger = 20
-# level0-stop-writes-trigger = 36
+# level0-stop-writes-trigger = 20
# cache-index-and-filter-blocks = true
# pin-l0-filter-and-index-blocks = true
-# compaction-pri = 3
+# compaction-pri = "min-overlapping-ratio"
+# soft-pending-compaction-bytes-limit = "192GB"
+# hard-pending-compaction-bytes-limit = "1000GB"
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
# optimize-filters-for-hits = false
# enable-compaction-guard = true
# compaction-guard-min-output-file-size = "8M"
# compaction-guard-max-output-file-size = "128M"
+# format-version = 2
+# prepopulate-block-cache = "disabled"
+# checksum = "crc32c"
+# max-compactions = 0
[rocksdb.lockcf]
# compression-per-level = ["no", "no", "no", "no", "no", "no", "no"]
@@ -766,14 +1035,20 @@
# target-file-size-base = "8MB"
# level0-file-num-compaction-trigger = 1
# level0-slowdown-writes-trigger = 20
-# level0-stop-writes-trigger = 36
+# level0-stop-writes-trigger = 20
# cache-index-and-filter-blocks = true
# pin-l0-filter-and-index-blocks = true
-# compaction-pri = 0
+# compaction-pri = "by-compensated-size"
+# soft-pending-compaction-bytes-limit = "192GB"
+# hard-pending-compaction-bytes-limit = "1000GB"
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
# optimize-filters-for-hits = false
# enable-compaction-guard = false
+# format-version = 2
+# prepopulate-block-cache = "disabled"
+# checksum = "crc32c"
+# max-compactions = 0
[raftdb]
# max-background-jobs = 4
@@ -782,14 +1057,21 @@
# max-manifest-file-size = "20MB"
# create-if-missing = true
-# enable-statistics = true
# stats-dump-period = "10m"
+## Raft RocksDB WAL directory.
+## This config specifies the absolute directory path for WAL.
+## If it is not set, the log files will be in the same directory as data.
+## If there are two disks on the machine, storing RocksDB data and WAL logs on different disks can
+## improve performance.
+## Do not set this config the same as `rocksdb.wal-dir`.
+# wal-dir = ""
+
# compaction-readahead-size = 0
# writable-file-max-buffer-size = "1MB"
# use-direct-io-for-flush-and-compaction = false
# enable-pipelined-write = true
-# allow-concurrent-memtable-write = false
+# allow-concurrent-memtable-write = true
# bytes-per-sync = "1MB"
# wal-bytes-per-sync = "512KB"
@@ -798,7 +1080,6 @@
# info-log-keep-log-file-num = 10
# info-log-dir = ""
# info-log-level = "info"
-# optimize-filters-for-hits = true
[raftdb.defaultcf]
## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
@@ -816,25 +1097,98 @@
# level0-file-num-compaction-trigger = 4
# level0-slowdown-writes-trigger = 20
-# level0-stop-writes-trigger = 36
+# level0-stop-writes-trigger = 20
# cache-index-and-filter-blocks = true
# pin-l0-filter-and-index-blocks = true
-# compaction-pri = 0
+# compaction-pri = "by-compensated-size"
+# soft-pending-compaction-bytes-limit = "192GB"
+# hard-pending-compaction-bytes-limit = "1000GB"
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
# optimize-filters-for-hits = true
# enable-compaction-guard = false
+# format-version = 2
+# prepopulate-block-cache = "disabled"
+# checksum = "crc32c"
+# max-compactions = 0
[raft-engine]
-## Enable raft-engine will ignore all settings about `raftdb`.
-# enable = false
-## Recovery mode. Candidates are `tolerate-corrupted-tail-records` and `absolute-consistency`.
-# recovery_mode = "tolerate-corrupted-tail-records"
-# bytes-per-sync = "256KB"
+## Determines whether to use Raft Engine to store raft logs. When it is
+## enabled, configurations of `raftdb` are ignored.
+# enable = true
+
+## The directory at which raft log files are stored. If the directory does not
+## exist, it will be created when TiKV is started.
+##
+## When this configuration is not set, `{data-dir}/raft-engine` is used.
+##
+## If there are multiple disks on your machine, it is recommended to store the
+## data of Raft Engine on a different disk to improve TiKV performance.
+# dir = ""
+
+## Specifies the threshold size of a log batch. A log batch larger than this
+## configuration is compressed.
+##
+## If you set this configuration item to `0`, compression is disabled.
+# batch-compression-threshold = "8KB"
+
+## Specifies the maximum size of log files. When a log file is larger than this
+## value, it is rotated.
# target-file-size = "128MB"
+
+## Specifies the threshold size of the main log queue. When this configuration
+## value is exceeded, the main log queue is purged.
+##
+## This configuration can be used to adjust the disk space usage of Raft
+## Engine.
# purge-threshold = "10GB"
-## Raft engine has builtin entry cache. `cache-limit` limits the memory usage of the cache.
-# cache-limit = "1GB"
+
+## Determines how to deal with file corruption during recovery.
+##
+## Candidates:
+## absolute-consistency
+## tolerate-tail-corruption
+## tolerate-any-corruption
+# recovery-mode = "tolerate-tail-corruption"
+
+## The minimum I/O size for reading log files during recovery.
+##
+## Default: "16KB". Minimum: "512B".
+# recovery-read-block-size = "16KB"
+
+## The number of threads used to scan and recover log files.
+##
+## Default: 4. Minimum: 1.
+# recovery-threads = 4
+
+## Memory usage limit for Raft Engine.
+## When it's not set, 15% of available system memory will be used.
+# memory-limit = "1GB"
+
+## Version of the log file in Raft Engine.
+##
+## Candidates:
+## 1: Can be read by TiKV release 6.1 and above.
+## 2: Can be read by TiKV release 6.3 and above. Supports log recycling.
+##
+## Default: 2.
+# format-version = 2
+
+## Whether to recycle stale log files in Raft Engine.
+## If `true`, logically purged log files will be reserved for recycling.
+## Only available for `format-version` >= 2. This option is only
+## available when TiKV >= 6.3.x.
+##
+## Default: true.
+# enable-log-recycle = true
+
+## Whether to prepare log files for recycling when start.
+## If `true`, batch empty log files will be prepared for recycling when
+## starting engine.
+## Only available for `enable-log-reycle` is true.
+##
+## Default: false
+# prefill-for-recycle = false
[security]
## The path for TLS certificates. Empty string means disabling secure connections.
@@ -848,11 +1202,12 @@
## Default is false.
# redact-info-log = false
-# Configurations for encryption at rest. Experimental.
+## Configurations for encryption at rest. Experimental.
[security.encryption]
## Encryption method to use for data files.
-## Possible values are "plaintext", "aes128-ctr", "aes192-ctr" and "aes256-ctr". Value other than
-## "plaintext" means encryption is enabled, in which case master key must be specified.
+## Possible values are "plaintext", "aes128-ctr", "aes192-ctr", "aes256-ctr" and "sm4-ctr".
+## Value other than "plaintext" means encryption is enabled, in which case
+## master key must be specified.
# data-encryption-method = "plaintext"
## Specifies how often TiKV rotates data encryption key.
@@ -872,7 +1227,7 @@
## Plaintext as master key means no master key is given and only applicable when
## encryption is not enabled, i.e. data-encryption-method = "plaintext". This type doesn't
## have sub-config items. Example:
-##
+##
## [security.encryption.master-key]
## type = "plaintext"
##
@@ -896,7 +1251,7 @@
##
## Supply a custom encryption key stored in a file. It is recommended NOT to use in production,
## as it breaks the purpose of encryption at rest, unless the file is stored in tempfs.
-## The file must contain a 256-bits (32 bytes, regardless of key length implied by
+## The file must contain a 256-bits (32 bytes, regardless of key length implied by
## data-encryption-method) key encoded as hex string and end with newline ("\n"). Example:
##
## [security.encryption.master-key]
@@ -921,17 +1276,39 @@
[backup]
## Number of threads to perform backup tasks.
-## The default value is set to min(CPU_NUM * 0.75, 32).
-# num-threads = 24
+## The default value is set to min(CPU_NUM * 0.5, 8).
+# num-threads = 8
## Number of ranges to backup in one batch.
-# batch = 8
+# batch-size = 8
## When Backup region [a,e) size exceeds `sst-max-size`, it will be backuped into several Files [a,b),
## [b,c), [c,d), [d,e) and the size of [a,b), [b,c), [c,d) will be `sst-max-size` (or a
## little larger).
# sst-max-size = "144MB"
+## Automatically reduce the number of backup threads when the current workload is high,
+## in order to reduce impact on the cluster's performance during back up.
+# enable-auto-tune = true
+
+[log-backup]
+## Number of threads to perform backup stream tasks.
+## The default value is CPU_NUM * 0.5, and limited to [2, 12].
+# num-threads = 8
+
+## enable this feature. TiKV will starts watch related tasks in PD. and backup kv changes to storage accodring to task.
+## The default value is false.
+# enable = true
+
+[backup.hadoop]
+## let TiKV know how to find the hdfs shell command.
+## Equivalent to the $HADOOP_HOME enviroment variable.
+# home = ""
+
+## TiKV will run the hdfs shell command under this linux user.
+## TiKV will use the current linux user if not provided.
+# linux-user = ""
+
[pessimistic-txn]
## The default and maximum delay before responding to TiDB when pessimistic
## transactions encounter locks
@@ -943,10 +1320,15 @@
## one more likely acquires the lock.
# wake-up-delay-duration = "20ms"
-## Enable pipelined pessimistic lock, only effect when processing perssimistic transactions
-## Enabled this will improve performance, but slightly increase the transcation failure rate
+## Enable pipelined pessimistic lock, only effect when processing perssimistic transactions.
+## Enabling this will improve performance, but slightly increase the transaction failure rate
# pipelined = true
+## Enable in-memory pessimistic lock, only effect when processing perssimistic transactions.
+## Enabling this will improve performance, but slightly increase the transaction failure rate.
+## It only takes effect when `pessimistic-txn.pipelined` is also set to true.
+# in-memory = true
+
[gc]
## The number of keys to GC in one batch.
# batch-keys = 512
@@ -960,3 +1342,4 @@
## Garbage ratio threshold to trigger a GC.
# ratio-threshold = 1.1
+