1 files changed, 516 insertions, 133 deletions
diff --git a/tikv.toml b/tikv.toml
index 4d698ef5d303..57421a2cf865 100644
--- a/tikv.toml
+++ b/tikv.toml
@@ -1,21 +1,10 @@
 ## TiKV config template
 ##  Human-readable big numbers:
-##   File size(based on byte): KB, MB, GB, TB, PB
+##   File size(based on byte, binary units): KB, MB, GB, TB, PB
 ##    e.g.: 1_048_576 = "1MB"
 ##   Time(based on ms): ms, s, m, h
 ##    e.g.: 78_000 = "1.3m"
 
-## Log levels: trace, debug, info, warning, error, critical.
-## Note that `debug` and `trace` are only available in development builds.
-# log-level = "info"
-
-## File to store logs.
-## If it is not set, logs will be appended to stderr.
-# log-file = ""
-
-## Log format, one of json, text. Default to text.
-# log-format = "text"
-
 ## File to store slow logs.
 ## If "log-file" is set, but this is not set, the slow logs will be appeneded
 ## to "log-file". If both "log-file" and "slow-log-file" are not set, all logs
@@ -25,28 +14,95 @@
 ## The minimum operation cost to output relative logs.
 # slow-log-threshold = "1s"
 
-## Timespan between rotating the log files.
-## Once this timespan passes, log files will be rotated, i.e. existing log file will have a
-## timestamp appended to its name and a new file will be created.
-# log-rotation-timespan = "24h"
-
-## Size of log file that triggers the log rotation.
-## Once the size of log file exceeds the threshold value, the log file will be rotated
-## and place the old log file in a new file named by orginal file name subbfixed by a timestamp.
-# log-rotation-size = "300MB"
-
 ## Enable io snoop which utilize eBPF to get accurate disk io of TiKV
-## It won't take effect when compiling without BCC_IOSNOOP=1. 
+## It won't take effect when compiling without BCC_IOSNOOP=1.
 # enable-io-snoop = true
 
-# Configurations for the single thread pool serving read requests.
+## Use abort when TiKV panic. By default TiKV will use _exit() on panic, in that case
+## core dump file will not be generated, regardless of system settings.
+## If this config is enabled, core dump files needs to be cleanup to avoid disk space
+## being filled up.
+# abort-on-panic = false
+
+## Memory usage limit for the TiKV instance. Generally it's unnecessary to configure it
+## explicitly, in which case it will be set to 75% of total available system memory.
+## Considering the behavior of `block-cache.capacity`, it means 25% memory is reserved for
+## OS page cache.
+##
+## It's still unnecessary to configure it for deploying multiple TiKV nodes on a single
+## physical machine. It will be calculated as `5/3 * block-cache.capacity`.
+##
+## For different system memory capacity, the default memory quota will be:
+## * system=8G    block-cache=3.6G    memory-usage-limit=6G   page-cache=2G.
+## * system=16G   block-cache=7.2G    memory-usage-limit=12G  page-cache=4G
+## * system=32G   block-cache=14.4G   memory-usage-limit=24G  page-cache=8G
+##
+## So how can `memory-usage-limit` influence TiKV? When a TiKV's memory usage almost reaches
+## this threshold, it can squeeze some internal components (e.g. evicting cached Raft entries)
+## to release memory.
+# memory-usage-limit = "0B"
+
+[quota]
+## Quota is use to add some limitation for the read write flow and then
+## gain predictable stable performance.
+## CPU quota for these front requests can use, default value is 0, it means unlimited.
+## The unit is millicpu but for now this config is approximate and soft limit.
+# foreground-cpu-time = 0
+## Write bandwidth limitation for this TiKV instance, default value is 0 which means unlimited.
+# foreground-write-bandwidth = "0B"
+## Read bandwidth limitation for this TiKV instance, default value is 0 which means unlimited.
+# foreground-read-bandwidth = "0B"
+## CPU quota for these background requests can use, default value is 0, it means unlimited.
+## The unit is millicpu but for now this config is approximate and soft limit.
+# background-cpu-time = 0
+## Write bandwidth limitation for backgroud request for this TiKV instance, default value is 0 which means unlimited.
+# background-write-bandwidth = "0B"
+## Read bandwidth limitation for background request for this TiKV instance, default value is 0 which means unlimited.
+# background-read-bandwidth = "0B"
+## Limitation of max delay duration, default value is 0 which means unlimited.
+# max-delay-duration = "500ms"
+## Whether to enable quota auto tune
+# enable-auto-tune = false
+
+[log]
+## Log levels: debug, info, warn, error, fatal.
+## Note that `debug` is only available in development builds.
+# level = "info"
+## log format, one of json, text. Default to text.
+# format = "text"
+## Enable automatic timestamps in log output, if not set, it will be defaulted to true.
+# enable-timestamp = true
+
+[log.file]
+## Usually it is set through command line.
+# filename = ""
+## max log file size in MB (upper limit to 4096MB)
+# max-size = 300
+## max log file keep days
+# max-days = 0
+## maximum number of old log files to retain
+# max-backups = 0
+
+[memory]
+## Whether enable the heap profiling which may have a bit performance overhead about 2% for the
+## default sample rate.
+# enable-heap-profiling = true
+
+## Average interval between allocation samples, as measured in bytes of allocation activity.
+## Increasing the sampling interval decreases profile fidelity, but also decreases the
+## computational overhead.
+## The default sample interval is 512 KB. It only accepts power of two, otherwise it will be
+## rounded up to the next power of two.
+# profiling-sample-per-bytes = "512KB"
+
+## Configurations for the single thread pool serving read requests.
 [readpool.unified]
 ## The minimal working thread count of the thread pool.
 # min-thread-count = 1
 
 ## The maximum working thread count of the thread pool.
 ## The default value is max(4, LOGICAL_CPU_NUM * 0.8).
-# max-thread-count = 8
+# max-thread-count = 4
 
 ## Size of the stack for each thread in the thread pool.
 # stack-size = "10MB"
@@ -56,7 +112,7 @@
 
 [readpool.storage]
 ## Whether to use the unified read pool to handle storage requests.
-# use-unified-pool = false
+# use-unified-pool = true
 
 ## The following configurations only take effect when `use-unified-pool` is false.
 
@@ -110,7 +166,7 @@
 # advertise-addr = ""
 
 ## Status address.
-## This is used for reporting the status of TiKV directly through 
+## This is used for reporting the status of TiKV directly through
 ## the HTTP address. Notice that there is a risk of leaking status
 ## information if this port is exposed to the public.
 ## Empty string means disabling it.
@@ -123,7 +179,7 @@
 # grpc-compression-type = "none"
 
 ## Size of the thread pool for the gRPC server.
-# grpc-concurrency = 4
+# grpc-concurrency = 5
 
 ## The number of max concurrent streams/requests on a client connection.
 # grpc-concurrent-stream = 1024
@@ -146,6 +202,9 @@
 ## Time to wait before closing the connection without receiving KeepAlive ping Ack.
 # grpc-keepalive-timeout = "3s"
 
+## Set maximum message length in bytes that gRPC can send. `-1` means unlimited.
+# max-grpc-send-msg-len = 10485760
+
 ## How many snapshots can be sent concurrently.
 # concurrent-send-snap-limit = 32
 
@@ -158,9 +217,11 @@
 ## Max time to handle Coprocessor requests before timeout.
 # end-point-request-max-handle-duration = "60s"
 
-## Max bytes that snapshot can be written to disk in one second.
-## It should be set based on your disk performance.
-# snap-max-write-bytes-per-sec = "100MB"
+## Max bytes that snapshot can interact with disk in one second. It should be
+## set based on your disk performance. Only write flow is considered, if
+## partiioned-raft-kv is used, read flow is also considered and it will be estimated
+## as read_size * 0.5 to get around errors from page cache.
+# snap-io-max-bytes-per-sec = "100MB"
 
 ## Whether to enable request batch.
 # enable-request-batch = true
@@ -179,12 +240,20 @@
 
 [storage]
 ## The path to RocksDB directory.
-# data-dir = "/tmp/tikv/store"
+# data-dir = "./"
+
+## Specifies the engine type. This configuration can only be specified when creating a new cluster
+## and cannot be modifies once being specified.
+##
+## Available types are:
+## "raft-kv": The default engine type in versions earlier than TiDB v6.6.0.
+## "partitioned-raft-kv": The new storage engine type introduced in TiDB v6.6.0.
+# engine = "raft-kv"
 
 ## The number of slots in Scheduler latches, which controls write concurrency.
 ## In most cases you can use the default value. When importing data, you can set it to a larger
 ## value.
-# scheduler-concurrency = 2048000
+# scheduler-concurrency = 524288
 
 ## Scheduler's worker pool size, i.e. the number of write threads.
 ## It should be less than total CPU cores. When there are frequent write operations, set it to a
@@ -206,35 +275,83 @@
 ## Set it to 0 will cause no space is reserved at all. It's generally used for tests.
 # reserve-space = "5GB"
 
-[storage.block-cache]
-## Whether to create a shared block cache for all RocksDB column families.
+## Reserve some space for raft disk if raft disk is separated deployed with kv disk.
+## `max(reserve-raft-space, raft disk capacity * 5%)` will be reserved exactly.
 ##
+## Set it to 0 will cause no space is reserved at all. It's generally used for tests.
+# reserve-raft-space = "1GB"
+
+## The maximum recovery time after rocksdb detects restorable background errors. When the data belonging
+## to the data range is damaged, it will be reported to PD through heartbeat, and PD will add `remove-peer`
+## operator to remove this damaged peer. When the damaged peer still exists in the current store, the
+## corruption SST files remain, and the KV storage engine can still put new content normally, but it
+## will return error when reading corrupt data range.
+##
+## If after this time, the peer where the corrupted data range located has not been removed from the
+## current store, TiKV will panic.
+##
+## Set to 0 to disable this feature if you want to panic immediately when encountering such an error.
+# background-error-recovery-window = "1h"
+
 ## Block cache is used by RocksDB to cache uncompressed blocks. Big block cache can speed up read.
 ## It is recommended to turn on shared block cache. Since only the total cache size need to be
 ## set, it is easier to config. In most cases it should be able to auto-balance cache usage
 ## between column families with standard LRU algorithm.
-##
-## The rest of config in the storage.block-cache session is effective only when shared block cache
-## is on.
-# shared = true
+[storage.block-cache]
 
 ## Size of the shared block cache. Normally it should be tuned to 30%-50% of system's total memory.
-## When the config is not set, it is decided by the sum of the following fields or their default
-## value:
-##   * rocksdb.defaultcf.block-cache-size or 25% of system's total memory
-##   * rocksdb.writecf.block-cache-size   or 15% of system's total memory
-##   * rocksdb.lockcf.block-cache-size    or  2% of system's total memory
-##   * raftdb.defaultcf.block-cache-size  or  2% of system's total memory
 ##
 ## To deploy multiple TiKV nodes on a single physical machine, configure this parameter explicitly.
 ## Otherwise, the OOM problem might occur in TiKV.
-# capacity = "1GB"
+##
+## When storage.engine is "raft-kv", default value is 45% of available system memory.
+## When storage.engine is "partitioned-raft-kv", default value is 30% of available system memory.
+# capacity = "0B"
+
+[storage.flow-control]
+## Flow controller is used to throttle the write rate at scheduler level, aiming
+## to substitute the write stall mechanism of RocksDB. It features in two points:
+##   * throttle at scheduler, so raftstore and apply won't be blocked anymore
+##   * better control on the throttle rate to avoid QPS drop under heavy write
+##
+## Support change dynamically.
+## When enabled, it disables kvdb's write stall and raftdb's write stall(except memtable) and vice versa.
+# enable = true
+
+## When the number of immutable memtables of kvdb reaches the threshold, the flow controller begins to work
+# memtables-threshold = 5
+
+## When the number of SST files of level-0 of kvdb reaches the threshold, the flow controller begins to work
+# l0-files-threshold = 20
+
+## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to
+## reject some write requests with `ServerIsBusy` error.
+# soft-pending-compaction-bytes-limit = "192GB"
+
+## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to
+## reject all write requests with `ServerIsBusy` error.
+# hard-pending-compaction-bytes-limit = "1024GB"
+
+[storage.io-rate-limit]
+## Maximum I/O bytes that this server can write to or read from disk (determined by mode)
+## in one second. Internally it prefers throttling background operations over foreground
+## ones. This value should be set to the disk's optimal IO bandwidth, e.g. maximum IO
+## bandwidth specified by cloud disk vendors.
+##
+## When set to zero, disk IO operations are not limited.
+# max-bytes-per-sec = "0MB"
+
+## Determine which types of IO operations are counted and restrained below threshold.
+## Three different modes are: write-only, read-only, all-io.
+##
+## Only write-only mode is supported for now.
+# mode = "write-only"
 
 [pd]
 ## PD endpoints.
-# endpoints = []
+endpoints = ["127.0.0.1:2379"]
 
-## The interval at which to retry a PD connection initialization.
+## The interval at which to retry a PD connection.
 ## Default is 300ms.
 # retry-interval = "300ms"
 
@@ -256,7 +373,7 @@
 
 ## The path to RaftDB directory.
 ## If not set, it will be `{data-dir}/raft`.
-## If there are multiple disks on the machine, storing the data of Raft RocksDB on differen disks
+## If there are multiple disks on the machine, storing the data of Raft RocksDB on a different disk
 ## can improve TiKV performance.
 # raftdb-path = ""
 
@@ -277,6 +394,9 @@
 ## Store heartbeat tick interval for reporting to PD.
 # pd-store-heartbeat-tick-interval = "10s"
 
+## Store min resolved ts tick interval for reporting to PD.
+# pd-report-min-resolved-ts-interval = "1s"
+
 ## The threshold of triggering Region split check.
 ## When Region size change exceeds this config, TiKV will check whether the Region should be split
 ## or not. To reduce the cost of scanning data in the checking process, you can set the value to
@@ -289,26 +409,33 @@
 ## When the number of Raft entries exceeds the max size, TiKV rejects to propose the entry.
 # raft-entry-max-size = "8MB"
 
+## Interval to compact unnecessary Raft log.
+# raft-log-compact-sync-interval = "2s"
+
 ## Interval to GC unnecessary Raft log.
-# raft-log-gc-tick-interval = "10s"
+# raft-log-gc-tick-interval = "3s"
 
 ## Threshold to GC stale Raft log, must be >= 1.
 # raft-log-gc-threshold = 50
 
 ## When the entry count exceeds this value, GC will be forced to trigger.
-# raft-log-gc-count-limit = 72000
+# raft-log-gc-count-limit = 73728
 
 ## When the approximate size of Raft log entries exceeds this value, GC will be forced trigger.
 ## It's recommanded to set it to 3/4 of `region-split-size`.
 # raft-log-gc-size-limit = "72MB"
 
+## Old Raft logs could be reserved if `raft_log_gc_threshold` is not reached.
+## GC them after ticks `raft_log_reserve_max_ticks` times.
+# raft_log_reserve_max_ticks = 6
+
 ## Raft engine is a replaceable component. For some implementations, it's necessary to purge
 ## old log files to recycle disk space ASAP.
 # raft-engine-purge-interval = "10s"
 
 ## How long the peer will be considered down and reported to PD when it hasn't been active for this
 ## time.
-# max-peer-down-duration = "5m"
+# max-peer-down-duration = "10m"
 
 ## Interval to check whether to start manual compaction for a Region.
 # region-compact-check-interval = "5m"
@@ -325,14 +452,23 @@
 ## exceeds `region-compact-tombstones-percent`.
 # region-compact-tombstones-percent = 30
 
+## The minimum number of duplicated MVCC keys to trigger manual compaction.
+# region-compact-min-redundant-rows = 50000
+
+## The minimum percentage of duplicated MVCC keys to trigger manual compaction.
+## It should be set between 1 and 100. Manual compaction is only triggered when the number of
+## duplicated MVCC keys exceeds `region-compact-min-redundant-rows` and the percentage of duplicated MVCC keys
+## exceeds `region-compact-redundant-rows-percent`.
+# region-compact-redundant-rows-percent = 20
+
 ## Interval to check whether to start a manual compaction for Lock Column Family.
 ## If written bytes reach `lock-cf-compact-bytes-threshold` for Lock Column Family, TiKV will
 ## trigger a manual compaction for Lock Column Family.
 # lock-cf-compact-interval = "10m"
 # lock-cf-compact-bytes-threshold = "256MB"
 
-## Interval (s) to check Region whether the data are consistent.
-# consistency-check-interval = 0
+## Interval to check region whether the data is consistent.
+# consistency-check-interval = "0s"
 
 ## Interval to clean up import SST files.
 # cleanup-import-sst-interval = "10m"
@@ -343,6 +479,16 @@
 ## Use how many threads to handle raft messages
 # store-pool-size = 2
 
+## Use how many threads to handle raft io tasks
+## If it is 0, it means io tasks are handled in store threads.
+# store-io-pool-size = 1
+
+## When the size of raft db writebatch exceeds this value, write will be triggered.
+# raft-write-size-limit = "1MB"
+
+## threads to generate raft snapshots
+# snap-generator-pool-size = 2
+
 [coprocessor]
 ## When it is set to `true`, TiKV will try to split a Region with table prefix if that Region
 ## crosses tables.
@@ -368,6 +514,12 @@
 ## Set to "mvcc" to do consistency check for MVCC data, or "raw" for raw data.
 # consistency-check-method = "mvcc"
 
+[coprocessor-v2]
+## Path to the directory where compiled coprocessor plugins are located.
+## Plugins in this directory will be automatically loaded by TiKV.
+## If the config value is not set, the coprocessor plugin will be disabled.
+# coprocessor-plugin-directory = "./coprocessors"
+
 [rocksdb]
 ## Maximum number of threads of RocksDB background jobs.
 ## The background tasks include compaction and flush. For detailed information why RocksDB needs to
@@ -392,8 +544,8 @@
 ## Value -1 means files opened are always kept open and RocksDB will prefetch index and filter
 ## blocks into block cache at startup. So if your database has a large working set, it will take
 ## several minutes to open the DB. You may need to increase this if your database has a large
-## working set. You can estimate the number of files based on `target-file-size-base` and
-## `target_file_size_multiplier` for level-based compaction.
+## working set. You can estimate the number of files based on `target-file-size-base` for
+## level-based compaction.
 # max-open-files = 40960
 
 ## Max size of RocksDB's MANIFEST file.
@@ -404,20 +556,19 @@
 # create-if-missing = true
 
 ## RocksDB Write-Ahead Logs (WAL) recovery mode.
-## 0 : TolerateCorruptedTailRecords, tolerate incomplete record in trailing data on all logs;
-## 1 : AbsoluteConsistency, We don't expect to find any corruption in the WAL;
-## 2 : PointInTimeRecovery, Recover to point-in-time consistency;
-## 3 : SkipAnyCorruptedRecords, Recovery after a disaster;
-# wal-recovery-mode = 2
+## "tolerate-corrupted-tail-records", tolerate incomplete record in trailing data on all logs;
+## "absolute-consistency", We don't expect to find any corruption in the WAL;
+## "point-in-time", Recover to point-in-time consistency;
+## "skip-any-corrupted-records", Recovery after a disaster;
+# wal-recovery-mode = "point-in-time"
 
-## RocksDB WAL directory.
+## KV RocksDB WAL directory.
 ## This config specifies the absolute directory path for WAL.
-## If it is not set, the log files will be in the same directory as data. When you set the path to
-## RocksDB directory in memory like in `/dev/shm`, you may want to set`wal-dir` to a directory on a
-## persistent storage. See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database .
+## If it is not set, the log files will be in the same directory as data.
 ## If there are two disks on the machine, storing RocksDB data and WAL logs on different disks can
 ## improve performance.
-# wal-dir = "/tmp/tikv/store"
+## Do not set this config the same as `raftdb.wal-dir`.
+# wal-dir = ""
 
 ## The following two fields affect how archived WAL will be deleted.
 ## 1. If both values are set to 0, logs will be deleted ASAP and will not get into the archive.
@@ -435,15 +586,13 @@
 # wal-size-limit = 0
 
 ## Max RocksDB WAL size in total
+## When storage.engine is "raft-kv", default value is 4GB.
+## When storage.engine is "partitioned-raft-kv", default value is 1.
 # max-total-wal-size = "4GB"
 
-## RocksDB Statistics provides cumulative stats over time.
-## Turning statistics on will introduce about 5%-10% overhead for RocksDB, but it can help you to
-## know the internal status of RocksDB.
-# enable-statistics = true
-
 ## Dump statistics periodically in information logs.
-## Same as RocksDB's default value (10 min).
+## When storage.engine is "raft-kv", default value is 10m.
+## When storage.engine is "partitioned-raft-kv", default value is 0.
 # stats-dump-period = "10m"
 
 ## Refer to: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ
@@ -466,18 +615,19 @@
 ## 2. rate-limiter-refill-period controls how often IO tokens are refilled. Smaller value will flatten
 ##    IO bursts while introducing more CPU overhead.
 ## 3. rate-limiter-mode indicates which types of operations count against the limit.
-##    1 : ReadOnly
-##    2 : WriteOnly
-##    3 : AllIo
+##    "read-only"
+##    "write-only"
+##    "all-io"
 ## 4. rate-limiter-auto_tuned enables dynamic adjustment of rate limit within the range
 ##    [10MB/s, rate_bytes_per_sec], according to the recent demand for background I/O.
 # rate-bytes-per-sec = "10GB"
 # rate-limiter-refill-period = "100ms"
-# rate-limiter-mode = 2
+# rate-limiter-mode = "write-only"
 # rate-limiter-auto-tuned = true
 
-## Enable or disable the pipelined write.
-# enable-pipelined-write = true
+## Enable or disable the pipelined write. If set false, RocksDB will use a new write mode port from cockroachdb/pebble.
+## See more details in https://github.com/tikv/rocksdb/pull/267 and https://github.com/tikv/tikv/issues/12059.
+# enable-pipelined-write = false
 
 ## Allows OS to incrementally sync files to disk while they are being written, asynchronously,
 ## in the background.
@@ -505,19 +655,29 @@
 ## absolute path will be used as the log file name's prefix.
 # info-log-dir = ""
 
-# RocksDB log levels
+## RocksDB log levels
 # info-log-level = "info"
 
+## Memory usage limit for Raft Engine. Undersized write buffers will be flushed to satisfy the
+## requirement.
+##
+## No limit when not specified.
+##
+## When storage.engine is "raft-kv", default is no limit.
+## When storage.engine is "partitioned-raft-kv", default value is 25% of available system memory or
+## 15GiB, whichever is smaller.
+# write-buffer-limit = "0B"
+
 ## Options for `Titan`.
 [rocksdb.titan]
 ## Enables or disables `Titan`. Note that Titan is still an experimental feature. Once
 ## enabled, it can't fall back. Forced fallback may result in data loss.
-## default: false
-# enabled = false
+## Titan is default on since v7.6.0. This won't affect deployments existed before v7.6.0.
+# enabled = true
 
 ## Maximum number of threads of `Titan` background gc jobs.
-# default: 4
-# max-background-gc = 4
+## default: 1
+# max-background-gc = 1
 
 ## Options for "Default" Column Family, which stores actual user data.
 [rocksdb.defaultcf]
@@ -556,7 +716,7 @@
 ## The data block size. RocksDB compresses data based on the unit of block.
 ## Similar to page in other databases, block is the smallest unit cached in block-cache. Note that
 ## the block size specified here corresponds to uncompressed data.
-# block-size = "64KB"
+# block-size = "32KB"
 
 ## If you're doing point lookups you definitely want to turn bloom filters on. We use bloom filters
 ## to avoid unnecessary disk reads. Default bits_per_key is 10, which yields ~1% false positive
@@ -568,6 +728,12 @@
 ## filter.
 # block-based-bloom-filter = false
 
+## Use Ribbon filter for levels higher or equal to this value. Use non-block-based bloom filter for
+## lower levels. When this is set, `block-based-bloom-filter` will be ignored.
+## Only effective for `format-version` >= 5.
+## Disabled by default.
+## ribbon-filter-above-level = 0
+
 # level0-file-num-compaction-trigger = 4
 
 ## Soft limit on number of level-0 files.
@@ -579,7 +745,7 @@
 ## Maximum number of level-0 files.
 ## When the number of SST files of level-0 reaches the limit of `level0-stop-writes-trigger`,
 ## RocksDB stalls the new write operation.
-# level0-stop-writes-trigger = 36
+# level0-stop-writes-trigger = 20
 
 ## Amount of data to build up in memory (backed by an unsorted log on disk) before converting to a
 ## sorted on-disk file. It is the RocksDB MemTable size.
@@ -625,11 +791,17 @@
 # max-compaction-bytes = "2GB"
 
 ## There are four different compaction priorities.
-## 0 : ByCompensatedSize
-## 1 : OldestLargestSeqFirst
-## 2 : OldestSmallestSeqFirst
-## 3 : MinOverlappingRatio
-# compaction-pri = 3
+## "by-compensated-size"
+## "oldest-largest-seq-first"
+## "oldest-smallest-seq-first"
+## "min-overlapping-ratio"
+# compaction-pri = "min-overlapping-ratio"
+
+## Refer to storage.flow-control.soft-pending-compaction-bytes-limit.
+# soft-pending-compaction-bytes-limit = "192GB"
+
+## Refer to storage.flow-control.hard-pending-compaction-bytes-limit.
+# hard-pending-compaction-bytes-limit = "1000GB"
 
 ## Indicating if we'd put index/filter blocks to the block cache.
 ## If not specified, each "table reader" object will pre-load index/filter block during table
@@ -658,6 +830,10 @@
 ## while using `Raw` mode.
 # optimize-filters-for-hits = true
 
+## Option to generate Bloom/Ribbon filters that minimize memory internal fragmentation.
+## Only effective for `format-version` >= 5.
+# optimize-filters-for-memory = false
+
 ## Enable compaction guard, which is an optimization to split SST files at TiKV region boundaries.
 ## The optimization can help reduce compaction IO, and allow us to use larger SST file size
 ## (thus less SST files overall) while making sure we can still efficiently cleanup stale data on
@@ -674,12 +850,86 @@
 ## for the same CF.
 # compaction-guard-max-output-file-size = "128M"
 
+## Available versions:
+##
+## 0 -- This version can be read by all TiKV releases. Doesn't support changing
+## checksum type (default is CRC32).
+##
+## 1 -- Can be read by all TiKV releases. Supports non-default checksum, like
+## xxHash. It is written by RocksDB when BlockBasedTableOptions::checksum is
+## something other than kCRC32c. (version 0 is silently upconverted)
+##
+## 2 -- Can be read by all TiKV releases. Changes the way we encode compressed
+## blocks with LZ4, BZip2 and Zlib compression.
+##
+## 3 -- Can be read by TiKV's versions since 2.1. Changes the way we encode the
+## keys in index blocks.
+## This option only affects newly written tables. When reading existing tables,
+## the information about version is read from the footer.
+##
+## 4 -- Can be read by TiKV's versions since 3.0. Changes the way we encode the
+## values in index blocks.
+## This option only affects newly written tables. When reading existing tables,
+## the information about version is read from the footer.
+##
+## 5 -- Can be read by TiKV's versions since 6.3. Full and partitioned filters
+## use a generally faster and more accurate Bloom filter implementation, with a
+## different schema.
+##
+## When storage.engine is "raft-kv", default value is 2.
+## When storage.engine is "partitioned-raft-kv", default value is 5.
+##
+# format-version = 2
+
+## If enabled, prepopulate warm/hot blocks (data, uncompressed dict, index and
+## filter blocks) which are already in memory into block cache at the time of
+## flush. On a flush, the block that is in memory (in memtables) get flushed
+## to the device. If using Direct IO, additional IO is incurred to read this
+## data back into memory again, which is avoided by enabling this option. This
+## further helps if the workload exhibits high temporal locality, where most
+## of the reads go to recently written data. This also helps in case of
+## Distributed FileSystem.
+##
+##   disabled:   kDisabled
+##   flush-only: kFlushOnly
+##
+# prepopulate-block-cache = "disabled"
+
+## Use the specified checksum type. Newly created table files will be
+## protected with this checksum type. Old table files will still be readable,
+## even though they have different checksum type.
+##
+##   no:       kNoChecksum
+##   crc32c:   kCRC32c
+##   xxhash:   kxxHash
+##   xxhash64: kxxHash64
+##   xxh3:     kXXH3 (supported since TiKV 6.2)
+##
+# checksum = "crc32c"
+
+## The maximum number of concurrent compaction tasks. 0 stands for no limit.
+# max-compactions = 0
+
+## SST files containing updates older than TTL will go through the compaction
+## process. This usually happens in a cascading way so that those entries
+## will be compacted to bottommost level/file. Disabled as default.
+##
+## Default: 0s.
+# ttl = "0s"
+
+## SST files older than this value will be picked up for compaction, and
+## re-written to the same level as they were before. Disabled as default.
+##
+## Default: 0s.
+# periodic-compaction-seconds = "0s"
+
 ## Options for "Default" Column Family for `Titan`.
 [rocksdb.defaultcf.titan]
 ## The smallest value to store in blob files. Value smaller than
 ## this threshold will be inlined in base DB.
-## default: 1KB
-# min-blob-size = "1KB"
+## The default value is 32KB since v7.6.0. But it won't affect deployments existed
+## before v7.6.0 of which the default value is 1KB.
+# min-blob-size = "32KB"
 
 ## The compression algorithm used to compress data in blob files.
 ## Compression method.
@@ -690,26 +940,43 @@
 ##   lz4:    kLZ4Compression
 ##   lz4hc:  kLZ4HCCompression
 ##   zstd:   kZSTD
-# default: lz4
-# blob-file-compression = "lz4"
+## default: zstd
+# blob-file-compression = "zstd"
+
+## Set blob file zstd dictionary compression, default(0) will use zstd compression.
+## It is recommended to set the dictionary size to values such as 4k or 16k. Additionally,
+## the sample data size to train dictionary is of size 100X dictionary size innerly.
+## It has no effect when `blob-file-compression` is not `zstd`.
+## default: 0
+# zstd-dict-size = 0
+
+## Whether to share blob cache with block cache. If set to true, Titan would use the shared block 
+## cache configured in `storage.block_cache` and ignore the setting of `blob-cache-size`.
+## default: true
+# shared-blob-cache = true
 
 ## Specifics cache size for blob records
-# default: 0
+## default: 0
 # blob-cache-size = "0GB"
 
 ## If the ratio of discardable size of a blob file is larger than
 ## this threshold, the blob file will be GCed out.
-# default: 0.5
+## default: 0.5
 # discardable-ratio = 0.5
 
-## The mode used to process blob files. In read-only mode Titan
-## stops writing value into blob log. In fallback mode Titan
-## converts blob index into real value on flush and compaction.
-## This option is especially useful for downgrading Titan.
+## The mode used to process blob files. In read-only mode Titan stops writing
+## value into blob log. In fallback mode Titan converts blob index into real
+## value on flush and compaction.
+##
+## This option can be used to disable Titan. More specifically, to disable
+## Titan, set this option to fallback and perform a full compaction using
+## tikv-ctl. Then, monitor the blob file size metrics. After the blob file size
+## decreases to 0, you can set rocksdb.titan.enabled to false and restart TiKV.
+##
 ##   default:   kNormal
 ##   read-only: kReadOnly
 ##   fallback:  kFallback
-# default: normal
+## default: normal
 # blob-run-mode = "normal"
 
 ## If set true, values in blob file will be merged to a new blob file while
@@ -722,17 +989,13 @@
 ##
 ## Requirement: level_compaction_dynamic_level_base = true
 ## default: false
-# level_merge = false
-
-## Use merge operator to rewrite GC blob index.
-## default: false
-# gc-merge-rewrite = false
+# level-merge = false
 
 ## Options for "Write" Column Family, which stores MVCC commit information
 [rocksdb.writecf]
 ## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
 # compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
-# block-size = "64KB"
+# block-size = "32KB"
 
 ## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`.
 # write-buffer-size = "128MB"
@@ -745,16 +1008,22 @@
 
 # level0-file-num-compaction-trigger = 4
 # level0-slowdown-writes-trigger = 20
-# level0-stop-writes-trigger = 36
+# level0-stop-writes-trigger = 20
 # cache-index-and-filter-blocks = true
 # pin-l0-filter-and-index-blocks = true
-# compaction-pri = 3
+# compaction-pri = "min-overlapping-ratio"
+# soft-pending-compaction-bytes-limit = "192GB"
+# hard-pending-compaction-bytes-limit = "1000GB"
 # read-amp-bytes-per-bit = 0
 # dynamic-level-bytes = true
 # optimize-filters-for-hits = false
 # enable-compaction-guard = true
 # compaction-guard-min-output-file-size = "8M"
 # compaction-guard-max-output-file-size = "128M"
+# format-version = 2
+# prepopulate-block-cache = "disabled"
+# checksum = "crc32c"
+# max-compactions = 0
 
 [rocksdb.lockcf]
 # compression-per-level = ["no", "no", "no", "no", "no", "no", "no"]
@@ -766,14 +1035,20 @@
 # target-file-size-base = "8MB"
 # level0-file-num-compaction-trigger = 1
 # level0-slowdown-writes-trigger = 20
-# level0-stop-writes-trigger = 36
+# level0-stop-writes-trigger = 20
 # cache-index-and-filter-blocks = true
 # pin-l0-filter-and-index-blocks = true
-# compaction-pri = 0
+# compaction-pri = "by-compensated-size"
+# soft-pending-compaction-bytes-limit = "192GB"
+# hard-pending-compaction-bytes-limit = "1000GB"
 # read-amp-bytes-per-bit = 0
 # dynamic-level-bytes = true
 # optimize-filters-for-hits = false
 # enable-compaction-guard = false
+# format-version = 2
+# prepopulate-block-cache = "disabled"
+# checksum = "crc32c"
+# max-compactions = 0
 
 [raftdb]
 # max-background-jobs = 4
@@ -782,14 +1057,21 @@
 # max-manifest-file-size = "20MB"
 # create-if-missing = true
 
-# enable-statistics = true
 # stats-dump-period = "10m"
 
+## Raft RocksDB WAL directory.
+## This config specifies the absolute directory path for WAL.
+## If it is not set, the log files will be in the same directory as data.
+## If there are two disks on the machine, storing RocksDB data and WAL logs on different disks can
+## improve performance.
+## Do not set this config the same as `rocksdb.wal-dir`.
+# wal-dir = ""
+
 # compaction-readahead-size = 0
 # writable-file-max-buffer-size = "1MB"
 # use-direct-io-for-flush-and-compaction = false
 # enable-pipelined-write = true
-# allow-concurrent-memtable-write = false
+# allow-concurrent-memtable-write = true
 # bytes-per-sync = "1MB"
 # wal-bytes-per-sync = "512KB"
 
@@ -798,7 +1080,6 @@
 # info-log-keep-log-file-num = 10
 # info-log-dir = ""
 # info-log-level = "info"
-# optimize-filters-for-hits = true
 
 [raftdb.defaultcf]
 ## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
@@ -816,25 +1097,98 @@
 
 # level0-file-num-compaction-trigger = 4
 # level0-slowdown-writes-trigger = 20
-# level0-stop-writes-trigger = 36
+# level0-stop-writes-trigger = 20
 # cache-index-and-filter-blocks = true
 # pin-l0-filter-and-index-blocks = true
-# compaction-pri = 0
+# compaction-pri = "by-compensated-size"
+# soft-pending-compaction-bytes-limit = "192GB"
+# hard-pending-compaction-bytes-limit = "1000GB"
 # read-amp-bytes-per-bit = 0
 # dynamic-level-bytes = true
 # optimize-filters-for-hits = true
 # enable-compaction-guard = false
+# format-version = 2
+# prepopulate-block-cache = "disabled"
+# checksum = "crc32c"
+# max-compactions = 0
 
 [raft-engine]
-## Enable raft-engine will ignore all settings about `raftdb`.
-# enable = false
-## Recovery mode. Candidates are `tolerate-corrupted-tail-records` and `absolute-consistency`.
-# recovery_mode = "tolerate-corrupted-tail-records"
-# bytes-per-sync = "256KB"
+## Determines whether to use Raft Engine to store raft logs. When it is
+## enabled, configurations of `raftdb` are ignored.
+# enable = true
+
+## The directory at which raft log files are stored. If the directory does not
+## exist, it will be created when TiKV is started.
+##
+## When this configuration is not set, `{data-dir}/raft-engine` is used.
+##
+## If there are multiple disks on your machine, it is recommended to store the
+## data of Raft Engine on a different disk to improve TiKV performance.
+# dir = ""
+
+## Specifies the threshold size of a log batch. A log batch larger than this
+## configuration is compressed.
+##
+## If you set this configuration item to `0`, compression is disabled.
+# batch-compression-threshold = "8KB"
+
+## Specifies the maximum size of log files. When a log file is larger than this
+## value, it is rotated.
 # target-file-size = "128MB"
+
+## Specifies the threshold size of the main log queue. When this configuration
+## value is exceeded, the main log queue is purged.
+##
+## This configuration can be used to adjust the disk space usage of Raft
+## Engine.
 # purge-threshold = "10GB"
-## Raft engine has builtin entry cache. `cache-limit` limits the memory usage of the cache.
-# cache-limit = "1GB"
+
+## Determines how to deal with file corruption during recovery.
+##
+## Candidates:
+##   absolute-consistency
+##   tolerate-tail-corruption
+##   tolerate-any-corruption
+# recovery-mode = "tolerate-tail-corruption"
+
+## The minimum I/O size for reading log files during recovery.
+##
+## Default: "16KB". Minimum: "512B".
+# recovery-read-block-size = "16KB"
+
+## The number of threads used to scan and recover log files.
+##
+## Default: 4. Minimum: 1.
+# recovery-threads = 4
+
+## Memory usage limit for Raft Engine.
+## When it's not set, 15% of available system memory will be used.
+# memory-limit = "1GB"
+
+## Version of the log file in Raft Engine.
+##
+## Candidates:
+##   1: Can be read by TiKV release 6.1 and above.
+##   2: Can be read by TiKV release 6.3 and above. Supports log recycling.
+##
+## Default: 2.
+# format-version = 2
+
+## Whether to recycle stale log files in Raft Engine.
+## If `true`, logically purged log files will be reserved for recycling.
+## Only available for `format-version` >= 2. This option is only
+## available when TiKV >= 6.3.x.
+##
+## Default: true.
+# enable-log-recycle = true
+
+## Whether to prepare log files for recycling when start.
+## If `true`, batch empty log files will be prepared for recycling when
+## starting engine.
+## Only available for `enable-log-reycle` is true.
+##
+## Default: false
+# prefill-for-recycle = false
 
 [security]
 ## The path for TLS certificates. Empty string means disabling secure connections.
@@ -848,11 +1202,12 @@
 ## Default is false.
 # redact-info-log = false
 
-# Configurations for encryption at rest. Experimental.
+## Configurations for encryption at rest. Experimental.
 [security.encryption]
 ## Encryption method to use for data files.
-## Possible values are "plaintext", "aes128-ctr", "aes192-ctr" and "aes256-ctr". Value other than
-## "plaintext" means encryption is enabled, in which case master key must be specified.
+## Possible values are "plaintext", "aes128-ctr", "aes192-ctr", "aes256-ctr" and "sm4-ctr".
+## Value other than "plaintext" means encryption is enabled, in which case
+## master key must be specified.
 # data-encryption-method = "plaintext"
 
 ## Specifies how often TiKV rotates data encryption key.
@@ -872,7 +1227,7 @@
 ##     Plaintext as master key means no master key is given and only applicable when
 ##     encryption is not enabled, i.e. data-encryption-method = "plaintext". This type doesn't
 ##     have sub-config items. Example:
-##     
+##
 ##     [security.encryption.master-key]
 ##     type = "plaintext"
 ##
@@ -896,7 +1251,7 @@
 ##
 ##     Supply a custom encryption key stored in a file. It is recommended NOT to use in production,
 ##     as it breaks the purpose of encryption at rest, unless the file is stored in tempfs.
-##     The file must contain a 256-bits (32 bytes, regardless of key length implied by 
+##     The file must contain a 256-bits (32 bytes, regardless of key length implied by
 ##     data-encryption-method) key encoded as hex string and end with newline ("\n"). Example:
 ##
 ##     [security.encryption.master-key]
@@ -921,17 +1276,39 @@
 
 [backup]
 ## Number of threads to perform backup tasks.
-## The default value is set to min(CPU_NUM * 0.75, 32).
-# num-threads = 24
+## The default value is set to min(CPU_NUM * 0.5, 8).
+# num-threads = 8
 
 ## Number of ranges to backup in one batch.
-# batch = 8
+# batch-size = 8
 
 ## When Backup region [a,e) size exceeds `sst-max-size`, it will be backuped into several Files [a,b),
 ## [b,c), [c,d), [d,e) and the size of [a,b), [b,c), [c,d) will be `sst-max-size` (or a
 ## little larger).
 # sst-max-size = "144MB"
 
+## Automatically reduce the number of backup threads when the current workload is high,
+## in order to reduce impact on the cluster's performance during back up.
+# enable-auto-tune = true
+
+[log-backup]
+## Number of threads to perform backup stream tasks.
+## The default value is CPU_NUM * 0.5, and limited to [2, 12].
+# num-threads = 8
+
+## enable this feature. TiKV will starts watch related tasks in PD. and backup kv changes to storage accodring to task.
+## The default value is false.
+# enable = true
+
+[backup.hadoop]
+## let TiKV know how to find the hdfs shell command.
+## Equivalent to the $HADOOP_HOME enviroment variable.
+# home = ""
+
+## TiKV will run the hdfs shell command under this linux user.
+## TiKV will use the current linux user if not provided.
+# linux-user = ""
+
 [pessimistic-txn]
 ## The default and maximum delay before responding to TiDB when pessimistic
 ## transactions encounter locks
@@ -943,10 +1320,15 @@
 ## one more likely acquires the lock.
 # wake-up-delay-duration = "20ms"
 
-## Enable pipelined pessimistic lock, only effect when processing perssimistic transactions
-## Enabled this will improve performance, but slightly increase the transcation failure rate
+## Enable pipelined pessimistic lock, only effect when processing perssimistic transactions.
+## Enabling this will improve performance, but slightly increase the transaction failure rate
 # pipelined = true
 
+## Enable in-memory pessimistic lock, only effect when processing perssimistic transactions.
+## Enabling this will improve performance, but slightly increase the transaction failure rate.
+## It only takes effect when `pessimistic-txn.pipelined` is also set to true.
+# in-memory = true
+
 [gc]
 ## The number of keys to GC in one batch.
 # batch-keys = 512
@@ -960,3 +1342,4 @@
 
 ## Garbage ratio threshold to trigger a GC.
 # ratio-threshold = 1.1
+