diff options
-rw-r--r-- | .SRCINFO | 8 | ||||
-rw-r--r-- | PKGBUILD | 6 | ||||
-rw-r--r-- | tikv.toml | 649 |
3 files changed, 523 insertions, 140 deletions
@@ -1,6 +1,6 @@ pkgbase = tikv pkgdesc = Distributed transactional key-value database, originally created to complement TiDB - pkgver = 7.6.0 + pkgver = 8.0.0 pkgrel = 1 url = https://github.com/tikv/tikv arch = x86_64 @@ -17,15 +17,15 @@ pkgbase = tikv depends = gcc-libs provides = tikv-server backup = etc/tikv/tikv.toml - source = tikv-7.6.0.tar.gz::https://github.com/tikv/tikv/archive/v7.6.0.tar.gz + source = tikv-8.0.0.tar.gz::https://github.com/tikv/tikv/archive/v8.0.0.tar.gz source = tikv.service source = tikv-sysusers.conf source = tikv-tmpfiles.conf source = tikv.toml - sha256sums = 8d7802fd613ee932d2c4517d27dfcbb6765c0509ed6f837cccbbcc947c59ba6a + sha256sums = b0e9475d2b56b7cd40dba8c87f3b8c86f1973081aba18921b44c0d2635606cb0 sha256sums = 870b8eaf83bc0d22b05b0f3a7890660e483cf77bb1d84bc50ad04fb23068cd8c sha256sums = 744b252e29099b0099dc41e30bc3badd33b3d661c7126af8044faa4fc2df8927 sha256sums = 935291bac6a216c6f880df9bfaec8900266413bb202ac483e79f291e1f28e9f1 - sha256sums = be2f8c6830a48da6c356db943aa55ee2f3c9c30b2e9027e7b758cab875fc8520 + sha256sums = 1c933198cd9b5611bd7d25f4f3501bd1b580bb35352f8d65bc1cef8588400d24 pkgname = tikv @@ -1,7 +1,7 @@ # Maintainer: Xuanwo <xuanwo@archlinuxcn.org> # Maintainer: Allen Zhong <pdev@zhoal.pw> pkgname=tikv -pkgver=7.6.0 +pkgver=8.0.0 pkgrel=1 pkgdesc='Distributed transactional key-value database, originally created to complement TiDB' arch=('x86_64') @@ -25,11 +25,11 @@ source=(tikv-${pkgver}.tar.gz::https://github.com/tikv/tikv/archive/v${pkgver}.t tikv-sysusers.conf tikv-tmpfiles.conf tikv.toml) -sha256sums=('8d7802fd613ee932d2c4517d27dfcbb6765c0509ed6f837cccbbcc947c59ba6a' +sha256sums=('b0e9475d2b56b7cd40dba8c87f3b8c86f1973081aba18921b44c0d2635606cb0' '870b8eaf83bc0d22b05b0f3a7890660e483cf77bb1d84bc50ad04fb23068cd8c' '744b252e29099b0099dc41e30bc3badd33b3d661c7126af8044faa4fc2df8927' '935291bac6a216c6f880df9bfaec8900266413bb202ac483e79f291e1f28e9f1' - 'be2f8c6830a48da6c356db943aa55ee2f3c9c30b2e9027e7b758cab875fc8520') + '1c933198cd9b5611bd7d25f4f3501bd1b580bb35352f8d65bc1cef8588400d24') prepare() { cd tikv-${pkgver} diff --git a/tikv.toml b/tikv.toml index 4d698ef5d303..57421a2cf865 100644 --- a/tikv.toml +++ b/tikv.toml @@ -1,21 +1,10 @@ ## TiKV config template ## Human-readable big numbers: -## File size(based on byte): KB, MB, GB, TB, PB +## File size(based on byte, binary units): KB, MB, GB, TB, PB ## e.g.: 1_048_576 = "1MB" ## Time(based on ms): ms, s, m, h ## e.g.: 78_000 = "1.3m" -## Log levels: trace, debug, info, warning, error, critical. -## Note that `debug` and `trace` are only available in development builds. -# log-level = "info" - -## File to store logs. -## If it is not set, logs will be appended to stderr. -# log-file = "" - -## Log format, one of json, text. Default to text. -# log-format = "text" - ## File to store slow logs. ## If "log-file" is set, but this is not set, the slow logs will be appeneded ## to "log-file". If both "log-file" and "slow-log-file" are not set, all logs @@ -25,28 +14,95 @@ ## The minimum operation cost to output relative logs. # slow-log-threshold = "1s" -## Timespan between rotating the log files. -## Once this timespan passes, log files will be rotated, i.e. existing log file will have a -## timestamp appended to its name and a new file will be created. -# log-rotation-timespan = "24h" - -## Size of log file that triggers the log rotation. -## Once the size of log file exceeds the threshold value, the log file will be rotated -## and place the old log file in a new file named by orginal file name subbfixed by a timestamp. -# log-rotation-size = "300MB" - ## Enable io snoop which utilize eBPF to get accurate disk io of TiKV -## It won't take effect when compiling without BCC_IOSNOOP=1. +## It won't take effect when compiling without BCC_IOSNOOP=1. # enable-io-snoop = true -# Configurations for the single thread pool serving read requests. +## Use abort when TiKV panic. By default TiKV will use _exit() on panic, in that case +## core dump file will not be generated, regardless of system settings. +## If this config is enabled, core dump files needs to be cleanup to avoid disk space +## being filled up. +# abort-on-panic = false + +## Memory usage limit for the TiKV instance. Generally it's unnecessary to configure it +## explicitly, in which case it will be set to 75% of total available system memory. +## Considering the behavior of `block-cache.capacity`, it means 25% memory is reserved for +## OS page cache. +## +## It's still unnecessary to configure it for deploying multiple TiKV nodes on a single +## physical machine. It will be calculated as `5/3 * block-cache.capacity`. +## +## For different system memory capacity, the default memory quota will be: +## * system=8G block-cache=3.6G memory-usage-limit=6G page-cache=2G. +## * system=16G block-cache=7.2G memory-usage-limit=12G page-cache=4G +## * system=32G block-cache=14.4G memory-usage-limit=24G page-cache=8G +## +## So how can `memory-usage-limit` influence TiKV? When a TiKV's memory usage almost reaches +## this threshold, it can squeeze some internal components (e.g. evicting cached Raft entries) +## to release memory. +# memory-usage-limit = "0B" + +[quota] +## Quota is use to add some limitation for the read write flow and then +## gain predictable stable performance. +## CPU quota for these front requests can use, default value is 0, it means unlimited. +## The unit is millicpu but for now this config is approximate and soft limit. +# foreground-cpu-time = 0 +## Write bandwidth limitation for this TiKV instance, default value is 0 which means unlimited. +# foreground-write-bandwidth = "0B" +## Read bandwidth limitation for this TiKV instance, default value is 0 which means unlimited. +# foreground-read-bandwidth = "0B" +## CPU quota for these background requests can use, default value is 0, it means unlimited. +## The unit is millicpu but for now this config is approximate and soft limit. +# background-cpu-time = 0 +## Write bandwidth limitation for backgroud request for this TiKV instance, default value is 0 which means unlimited. +# background-write-bandwidth = "0B" +## Read bandwidth limitation for background request for this TiKV instance, default value is 0 which means unlimited. +# background-read-bandwidth = "0B" +## Limitation of max delay duration, default value is 0 which means unlimited. +# max-delay-duration = "500ms" +## Whether to enable quota auto tune +# enable-auto-tune = false + +[log] +## Log levels: debug, info, warn, error, fatal. +## Note that `debug` is only available in development builds. +# level = "info" +## log format, one of json, text. Default to text. +# format = "text" +## Enable automatic timestamps in log output, if not set, it will be defaulted to true. +# enable-timestamp = true + +[log.file] +## Usually it is set through command line. +# filename = "" +## max log file size in MB (upper limit to 4096MB) +# max-size = 300 +## max log file keep days +# max-days = 0 +## maximum number of old log files to retain +# max-backups = 0 + +[memory] +## Whether enable the heap profiling which may have a bit performance overhead about 2% for the +## default sample rate. +# enable-heap-profiling = true + +## Average interval between allocation samples, as measured in bytes of allocation activity. +## Increasing the sampling interval decreases profile fidelity, but also decreases the +## computational overhead. +## The default sample interval is 512 KB. It only accepts power of two, otherwise it will be +## rounded up to the next power of two. +# profiling-sample-per-bytes = "512KB" + +## Configurations for the single thread pool serving read requests. [readpool.unified] ## The minimal working thread count of the thread pool. # min-thread-count = 1 ## The maximum working thread count of the thread pool. ## The default value is max(4, LOGICAL_CPU_NUM * 0.8). -# max-thread-count = 8 +# max-thread-count = 4 ## Size of the stack for each thread in the thread pool. # stack-size = "10MB" @@ -56,7 +112,7 @@ [readpool.storage] ## Whether to use the unified read pool to handle storage requests. -# use-unified-pool = false +# use-unified-pool = true ## The following configurations only take effect when `use-unified-pool` is false. @@ -110,7 +166,7 @@ # advertise-addr = "" ## Status address. -## This is used for reporting the status of TiKV directly through +## This is used for reporting the status of TiKV directly through ## the HTTP address. Notice that there is a risk of leaking status ## information if this port is exposed to the public. ## Empty string means disabling it. @@ -123,7 +179,7 @@ # grpc-compression-type = "none" ## Size of the thread pool for the gRPC server. -# grpc-concurrency = 4 +# grpc-concurrency = 5 ## The number of max concurrent streams/requests on a client connection. # grpc-concurrent-stream = 1024 @@ -146,6 +202,9 @@ ## Time to wait before closing the connection without receiving KeepAlive ping Ack. # grpc-keepalive-timeout = "3s" +## Set maximum message length in bytes that gRPC can send. `-1` means unlimited. +# max-grpc-send-msg-len = 10485760 + ## How many snapshots can be sent concurrently. # concurrent-send-snap-limit = 32 @@ -158,9 +217,11 @@ ## Max time to handle Coprocessor requests before timeout. # end-point-request-max-handle-duration = "60s" -## Max bytes that snapshot can be written to disk in one second. -## It should be set based on your disk performance. -# snap-max-write-bytes-per-sec = "100MB" +## Max bytes that snapshot can interact with disk in one second. It should be +## set based on your disk performance. Only write flow is considered, if +## partiioned-raft-kv is used, read flow is also considered and it will be estimated +## as read_size * 0.5 to get around errors from page cache. +# snap-io-max-bytes-per-sec = "100MB" ## Whether to enable request batch. # enable-request-batch = true @@ -179,12 +240,20 @@ [storage] ## The path to RocksDB directory. -# data-dir = "/tmp/tikv/store" +# data-dir = "./" + +## Specifies the engine type. This configuration can only be specified when creating a new cluster +## and cannot be modifies once being specified. +## +## Available types are: +## "raft-kv": The default engine type in versions earlier than TiDB v6.6.0. +## "partitioned-raft-kv": The new storage engine type introduced in TiDB v6.6.0. +# engine = "raft-kv" ## The number of slots in Scheduler latches, which controls write concurrency. ## In most cases you can use the default value. When importing data, you can set it to a larger ## value. -# scheduler-concurrency = 2048000 +# scheduler-concurrency = 524288 ## Scheduler's worker pool size, i.e. the number of write threads. ## It should be less than total CPU cores. When there are frequent write operations, set it to a @@ -206,35 +275,83 @@ ## Set it to 0 will cause no space is reserved at all. It's generally used for tests. # reserve-space = "5GB" -[storage.block-cache] -## Whether to create a shared block cache for all RocksDB column families. +## Reserve some space for raft disk if raft disk is separated deployed with kv disk. +## `max(reserve-raft-space, raft disk capacity * 5%)` will be reserved exactly. ## +## Set it to 0 will cause no space is reserved at all. It's generally used for tests. +# reserve-raft-space = "1GB" + +## The maximum recovery time after rocksdb detects restorable background errors. When the data belonging +## to the data range is damaged, it will be reported to PD through heartbeat, and PD will add `remove-peer` +## operator to remove this damaged peer. When the damaged peer still exists in the current store, the +## corruption SST files remain, and the KV storage engine can still put new content normally, but it +## will return error when reading corrupt data range. +## +## If after this time, the peer where the corrupted data range located has not been removed from the +## current store, TiKV will panic. +## +## Set to 0 to disable this feature if you want to panic immediately when encountering such an error. +# background-error-recovery-window = "1h" + ## Block cache is used by RocksDB to cache uncompressed blocks. Big block cache can speed up read. ## It is recommended to turn on shared block cache. Since only the total cache size need to be ## set, it is easier to config. In most cases it should be able to auto-balance cache usage ## between column families with standard LRU algorithm. -## -## The rest of config in the storage.block-cache session is effective only when shared block cache -## is on. -# shared = true +[storage.block-cache] ## Size of the shared block cache. Normally it should be tuned to 30%-50% of system's total memory. -## When the config is not set, it is decided by the sum of the following fields or their default -## value: -## * rocksdb.defaultcf.block-cache-size or 25% of system's total memory -## * rocksdb.writecf.block-cache-size or 15% of system's total memory -## * rocksdb.lockcf.block-cache-size or 2% of system's total memory -## * raftdb.defaultcf.block-cache-size or 2% of system's total memory ## ## To deploy multiple TiKV nodes on a single physical machine, configure this parameter explicitly. ## Otherwise, the OOM problem might occur in TiKV. -# capacity = "1GB" +## +## When storage.engine is "raft-kv", default value is 45% of available system memory. +## When storage.engine is "partitioned-raft-kv", default value is 30% of available system memory. +# capacity = "0B" + +[storage.flow-control] +## Flow controller is used to throttle the write rate at scheduler level, aiming +## to substitute the write stall mechanism of RocksDB. It features in two points: +## * throttle at scheduler, so raftstore and apply won't be blocked anymore +## * better control on the throttle rate to avoid QPS drop under heavy write +## +## Support change dynamically. +## When enabled, it disables kvdb's write stall and raftdb's write stall(except memtable) and vice versa. +# enable = true + +## When the number of immutable memtables of kvdb reaches the threshold, the flow controller begins to work +# memtables-threshold = 5 + +## When the number of SST files of level-0 of kvdb reaches the threshold, the flow controller begins to work +# l0-files-threshold = 20 + +## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to +## reject some write requests with `ServerIsBusy` error. +# soft-pending-compaction-bytes-limit = "192GB" + +## When the number of pending compaction bytes of kvdb reaches the threshold, the flow controller begins to +## reject all write requests with `ServerIsBusy` error. +# hard-pending-compaction-bytes-limit = "1024GB" + +[storage.io-rate-limit] +## Maximum I/O bytes that this server can write to or read from disk (determined by mode) +## in one second. Internally it prefers throttling background operations over foreground +## ones. This value should be set to the disk's optimal IO bandwidth, e.g. maximum IO +## bandwidth specified by cloud disk vendors. +## +## When set to zero, disk IO operations are not limited. +# max-bytes-per-sec = "0MB" + +## Determine which types of IO operations are counted and restrained below threshold. +## Three different modes are: write-only, read-only, all-io. +## +## Only write-only mode is supported for now. +# mode = "write-only" [pd] ## PD endpoints. -# endpoints = [] +endpoints = ["127.0.0.1:2379"] -## The interval at which to retry a PD connection initialization. +## The interval at which to retry a PD connection. ## Default is 300ms. # retry-interval = "300ms" @@ -256,7 +373,7 @@ ## The path to RaftDB directory. ## If not set, it will be `{data-dir}/raft`. -## If there are multiple disks on the machine, storing the data of Raft RocksDB on differen disks +## If there are multiple disks on the machine, storing the data of Raft RocksDB on a different disk ## can improve TiKV performance. # raftdb-path = "" @@ -277,6 +394,9 @@ ## Store heartbeat tick interval for reporting to PD. # pd-store-heartbeat-tick-interval = "10s" +## Store min resolved ts tick interval for reporting to PD. +# pd-report-min-resolved-ts-interval = "1s" + ## The threshold of triggering Region split check. ## When Region size change exceeds this config, TiKV will check whether the Region should be split ## or not. To reduce the cost of scanning data in the checking process, you can set the value to @@ -289,26 +409,33 @@ ## When the number of Raft entries exceeds the max size, TiKV rejects to propose the entry. # raft-entry-max-size = "8MB" +## Interval to compact unnecessary Raft log. +# raft-log-compact-sync-interval = "2s" + ## Interval to GC unnecessary Raft log. -# raft-log-gc-tick-interval = "10s" +# raft-log-gc-tick-interval = "3s" ## Threshold to GC stale Raft log, must be >= 1. # raft-log-gc-threshold = 50 ## When the entry count exceeds this value, GC will be forced to trigger. -# raft-log-gc-count-limit = 72000 +# raft-log-gc-count-limit = 73728 ## When the approximate size of Raft log entries exceeds this value, GC will be forced trigger. ## It's recommanded to set it to 3/4 of `region-split-size`. # raft-log-gc-size-limit = "72MB" +## Old Raft logs could be reserved if `raft_log_gc_threshold` is not reached. +## GC them after ticks `raft_log_reserve_max_ticks` times. +# raft_log_reserve_max_ticks = 6 + ## Raft engine is a replaceable component. For some implementations, it's necessary to purge ## old log files to recycle disk space ASAP. # raft-engine-purge-interval = "10s" ## How long the peer will be considered down and reported to PD when it hasn't been active for this ## time. -# max-peer-down-duration = "5m" +# max-peer-down-duration = "10m" ## Interval to check whether to start manual compaction for a Region. # region-compact-check-interval = "5m" @@ -325,14 +452,23 @@ ## exceeds `region-compact-tombstones-percent`. # region-compact-tombstones-percent = 30 +## The minimum number of duplicated MVCC keys to trigger manual compaction. +# region-compact-min-redundant-rows = 50000 + +## The minimum percentage of duplicated MVCC keys to trigger manual compaction. +## It should be set between 1 and 100. Manual compaction is only triggered when the number of +## duplicated MVCC keys exceeds `region-compact-min-redundant-rows` and the percentage of duplicated MVCC keys +## exceeds `region-compact-redundant-rows-percent`. +# region-compact-redundant-rows-percent = 20 + ## Interval to check whether to start a manual compaction for Lock Column Family. ## If written bytes reach `lock-cf-compact-bytes-threshold` for Lock Column Family, TiKV will ## trigger a manual compaction for Lock Column Family. # lock-cf-compact-interval = "10m" # lock-cf-compact-bytes-threshold = "256MB" -## Interval (s) to check Region whether the data are consistent. -# consistency-check-interval = 0 +## Interval to check region whether the data is consistent. +# consistency-check-interval = "0s" ## Interval to clean up import SST files. # cleanup-import-sst-interval = "10m" @@ -343,6 +479,16 @@ ## Use how many threads to handle raft messages # store-pool-size = 2 +## Use how many threads to handle raft io tasks +## If it is 0, it means io tasks are handled in store threads. +# store-io-pool-size = 1 + +## When the size of raft db writebatch exceeds this value, write will be triggered. +# raft-write-size-limit = "1MB" + +## threads to generate raft snapshots +# snap-generator-pool-size = 2 + [coprocessor] ## When it is set to `true`, TiKV will try to split a Region with table prefix if that Region ## crosses tables. @@ -368,6 +514,12 @@ ## Set to "mvcc" to do consistency check for MVCC data, or "raw" for raw data. # consistency-check-method = "mvcc" +[coprocessor-v2] +## Path to the directory where compiled coprocessor plugins are located. +## Plugins in this directory will be automatically loaded by TiKV. +## If the config value is not set, the coprocessor plugin will be disabled. +# coprocessor-plugin-directory = "./coprocessors" + [rocksdb] ## Maximum number of threads of RocksDB background jobs. ## The background tasks include compaction and flush. For detailed information why RocksDB needs to @@ -392,8 +544,8 @@ ## Value -1 means files opened are always kept open and RocksDB will prefetch index and filter ## blocks into block cache at startup. So if your database has a large working set, it will take ## several minutes to open the DB. You may need to increase this if your database has a large -## working set. You can estimate the number of files based on `target-file-size-base` and -## `target_file_size_multiplier` for level-based compaction. +## working set. You can estimate the number of files based on `target-file-size-base` for +## level-based compaction. # max-open-files = 40960 ## Max size of RocksDB's MANIFEST file. @@ -404,20 +556,19 @@ # create-if-missing = true ## RocksDB Write-Ahead Logs (WAL) recovery mode. -## 0 : TolerateCorruptedTailRecords, tolerate incomplete record in trailing data on all logs; -## 1 : AbsoluteConsistency, We don't expect to find any corruption in the WAL; -## 2 : PointInTimeRecovery, Recover to point-in-time consistency; -## 3 : SkipAnyCorruptedRecords, Recovery after a disaster; -# wal-recovery-mode = 2 +## "tolerate-corrupted-tail-records", tolerate incomplete record in trailing data on all logs; +## "absolute-consistency", We don't expect to find any corruption in the WAL; +## "point-in-time", Recover to point-in-time consistency; +## "skip-any-corrupted-records", Recovery after a disaster; +# wal-recovery-mode = "point-in-time" -## RocksDB WAL directory. +## KV RocksDB WAL directory. ## This config specifies the absolute directory path for WAL. -## If it is not set, the log files will be in the same directory as data. When you set the path to -## RocksDB directory in memory like in `/dev/shm`, you may want to set`wal-dir` to a directory on a -## persistent storage. See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database . +## If it is not set, the log files will be in the same directory as data. ## If there are two disks on the machine, storing RocksDB data and WAL logs on different disks can ## improve performance. -# wal-dir = "/tmp/tikv/store" +## Do not set this config the same as `raftdb.wal-dir`. +# wal-dir = "" ## The following two fields affect how archived WAL will be deleted. ## 1. If both values are set to 0, logs will be deleted ASAP and will not get into the archive. @@ -435,15 +586,13 @@ # wal-size-limit = 0 ## Max RocksDB WAL size in total +## When storage.engine is "raft-kv", default value is 4GB. +## When storage.engine is "partitioned-raft-kv", default value is 1. # max-total-wal-size = "4GB" -## RocksDB Statistics provides cumulative stats over time. -## Turning statistics on will introduce about 5%-10% overhead for RocksDB, but it can help you to -## know the internal status of RocksDB. -# enable-statistics = true - ## Dump statistics periodically in information logs. -## Same as RocksDB's default value (10 min). +## When storage.engine is "raft-kv", default value is 10m. +## When storage.engine is "partitioned-raft-kv", default value is 0. # stats-dump-period = "10m" ## Refer to: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ @@ -466,18 +615,19 @@ ## 2. rate-limiter-refill-period controls how often IO tokens are refilled. Smaller value will flatten ## IO bursts while introducing more CPU overhead. ## 3. rate-limiter-mode indicates which types of operations count against the limit. -## 1 : ReadOnly -## 2 : WriteOnly -## 3 : AllIo +## "read-only" +## "write-only" +## "all-io" ## 4. rate-limiter-auto_tuned enables dynamic adjustment of rate limit within the range ## [10MB/s, rate_bytes_per_sec], according to the recent demand for background I/O. # rate-bytes-per-sec = "10GB" # rate-limiter-refill-period = "100ms" -# rate-limiter-mode = 2 +# rate-limiter-mode = "write-only" # rate-limiter-auto-tuned = true -## Enable or disable the pipelined write. -# enable-pipelined-write = true +## Enable or disable the pipelined write. If set false, RocksDB will use a new write mode port from cockroachdb/pebble. +## See more details in https://github.com/tikv/rocksdb/pull/267 and https://github.com/tikv/tikv/issues/12059. +# enable-pipelined-write = false ## Allows OS to incrementally sync files to disk while they are being written, asynchronously, ## in the background. @@ -505,19 +655,29 @@ ## absolute path will be used as the log file name's prefix. # info-log-dir = "" -# RocksDB log levels +## RocksDB log levels # info-log-level = "info" +## Memory usage limit for Raft Engine. Undersized write buffers will be flushed to satisfy the +## requirement. +## +## No limit when not specified. +## +## When storage.engine is "raft-kv", default is no limit. +## When storage.engine is "partitioned-raft-kv", default value is 25% of available system memory or +## 15GiB, whichever is smaller. +# write-buffer-limit = "0B" + ## Options for `Titan`. [rocksdb.titan] ## Enables or disables `Titan`. Note that Titan is still an experimental feature. Once ## enabled, it can't fall back. Forced fallback may result in data loss. -## default: false -# enabled = false +## Titan is default on since v7.6.0. This won't affect deployments existed before v7.6.0. +# enabled = true ## Maximum number of threads of `Titan` background gc jobs. -# default: 4 -# max-background-gc = 4 +## default: 1 +# max-background-gc = 1 ## Options for "Default" Column Family, which stores actual user data. [rocksdb.defaultcf] @@ -556,7 +716,7 @@ ## The data block size. RocksDB compresses data based on the unit of block. ## Similar to page in other databases, block is the smallest unit cached in block-cache. Note that ## the block size specified here corresponds to uncompressed data. -# block-size = "64KB" +# block-size = "32KB" ## If you're doing point lookups you definitely want to turn bloom filters on. We use bloom filters ## to avoid unnecessary disk reads. Default bits_per_key is 10, which yields ~1% false positive @@ -568,6 +728,12 @@ ## filter. # block-based-bloom-filter = false +## Use Ribbon filter for levels higher or equal to this value. Use non-block-based bloom filter for +## lower levels. When this is set, `block-based-bloom-filter` will be ignored. +## Only effective for `format-version` >= 5. +## Disabled by default. +## ribbon-filter-above-level = 0 + # level0-file-num-compaction-trigger = 4 ## Soft limit on number of level-0 files. @@ -579,7 +745,7 @@ ## Maximum number of level-0 files. ## When the number of SST files of level-0 reaches the limit of `level0-stop-writes-trigger`, ## RocksDB stalls the new write operation. -# level0-stop-writes-trigger = 36 +# level0-stop-writes-trigger = 20 ## Amount of data to build up in memory (backed by an unsorted log on disk) before converting to a ## sorted on-disk file. It is the RocksDB MemTable size. @@ -625,11 +791,17 @@ # max-compaction-bytes = "2GB" ## There are four different compaction priorities. -## 0 : ByCompensatedSize -## 1 : OldestLargestSeqFirst -## 2 : OldestSmallestSeqFirst -## 3 : MinOverlappingRatio -# compaction-pri = 3 +## "by-compensated-size" +## "oldest-largest-seq-first" +## "oldest-smallest-seq-first" +## "min-overlapping-ratio" +# compaction-pri = "min-overlapping-ratio" + +## Refer to storage.flow-control.soft-pending-compaction-bytes-limit. +# soft-pending-compaction-bytes-limit = "192GB" + +## Refer to storage.flow-control.hard-pending-compaction-bytes-limit. +# hard-pending-compaction-bytes-limit = "1000GB" ## Indicating if we'd put index/filter blocks to the block cache. ## If not specified, each "table reader" object will pre-load index/filter block during table @@ -658,6 +830,10 @@ ## while using `Raw` mode. # optimize-filters-for-hits = true +## Option to generate Bloom/Ribbon filters that minimize memory internal fragmentation. +## Only effective for `format-version` >= 5. +# optimize-filters-for-memory = false + ## Enable compaction guard, which is an optimization to split SST files at TiKV region boundaries. ## The optimization can help reduce compaction IO, and allow us to use larger SST file size ## (thus less SST files overall) while making sure we can still efficiently cleanup stale data on @@ -674,12 +850,86 @@ ## for the same CF. # compaction-guard-max-output-file-size = "128M" +## Available versions: +## +## 0 -- This version can be read by all TiKV releases. Doesn't support changing +## checksum type (default is CRC32). +## +## 1 -- Can be read by all TiKV releases. Supports non-default checksum, like +## xxHash. It is written by RocksDB when BlockBasedTableOptions::checksum is +## something other than kCRC32c. (version 0 is silently upconverted) +## +## 2 -- Can be read by all TiKV releases. Changes the way we encode compressed +## blocks with LZ4, BZip2 and Zlib compression. +## +## 3 -- Can be read by TiKV's versions since 2.1. Changes the way we encode the +## keys in index blocks. +## This option only affects newly written tables. When reading existing tables, +## the information about version is read from the footer. +## +## 4 -- Can be read by TiKV's versions since 3.0. Changes the way we encode the +## values in index blocks. +## This option only affects newly written tables. When reading existing tables, +## the information about version is read from the footer. +## +## 5 -- Can be read by TiKV's versions since 6.3. Full and partitioned filters +## use a generally faster and more accurate Bloom filter implementation, with a +## different schema. +## +## When storage.engine is "raft-kv", default value is 2. +## When storage.engine is "partitioned-raft-kv", default value is 5. +## +# format-version = 2 + +## If enabled, prepopulate warm/hot blocks (data, uncompressed dict, index and +## filter blocks) which are already in memory into block cache at the time of +## flush. On a flush, the block that is in memory (in memtables) get flushed +## to the device. If using Direct IO, additional IO is incurred to read this +## data back into memory again, which is avoided by enabling this option. This +## further helps if the workload exhibits high temporal locality, where most +## of the reads go to recently written data. This also helps in case of +## Distributed FileSystem. +## +## disabled: kDisabled +## flush-only: kFlushOnly +## +# prepopulate-block-cache = "disabled" + +## Use the specified checksum type. Newly created table files will be +## protected with this checksum type. Old table files will still be readable, +## even though they have different checksum type. +## +## no: kNoChecksum +## crc32c: kCRC32c +## xxhash: kxxHash +## xxhash64: kxxHash64 +## xxh3: kXXH3 (supported since TiKV 6.2) +## +# checksum = "crc32c" + +## The maximum number of concurrent compaction tasks. 0 stands for no limit. +# max-compactions = 0 + +## SST files containing updates older than TTL will go through the compaction +## process. This usually happens in a cascading way so that those entries +## will be compacted to bottommost level/file. Disabled as default. +## +## Default: 0s. +# ttl = "0s" + +## SST files older than this value will be picked up for compaction, and +## re-written to the same level as they were before. Disabled as default. +## +## Default: 0s. +# periodic-compaction-seconds = "0s" + ## Options for "Default" Column Family for `Titan`. [rocksdb.defaultcf.titan] ## The smallest value to store in blob files. Value smaller than ## this threshold will be inlined in base DB. -## default: 1KB -# min-blob-size = "1KB" +## The default value is 32KB since v7.6.0. But it won't affect deployments existed +## before v7.6.0 of which the default value is 1KB. +# min-blob-size = "32KB" ## The compression algorithm used to compress data in blob files. ## Compression method. @@ -690,26 +940,43 @@ ## lz4: kLZ4Compression ## lz4hc: kLZ4HCCompression ## zstd: kZSTD -# default: lz4 -# blob-file-compression = "lz4" +## default: zstd +# blob-file-compression = "zstd" + +## Set blob file zstd dictionary compression, default(0) will use zstd compression. +## It is recommended to set the dictionary size to values such as 4k or 16k. Additionally, +## the sample data size to train dictionary is of size 100X dictionary size innerly. +## It has no effect when `blob-file-compression` is not `zstd`. +## default: 0 +# zstd-dict-size = 0 + +## Whether to share blob cache with block cache. If set to true, Titan would use the shared block +## cache configured in `storage.block_cache` and ignore the setting of `blob-cache-size`. +## default: true +# shared-blob-cache = true ## Specifics cache size for blob records -# default: 0 +## default: 0 # blob-cache-size = "0GB" ## If the ratio of discardable size of a blob file is larger than ## this threshold, the blob file will be GCed out. -# default: 0.5 +## default: 0.5 # discardable-ratio = 0.5 -## The mode used to process blob files. In read-only mode Titan -## stops writing value into blob log. In fallback mode Titan -## converts blob index into real value on flush and compaction. -## This option is especially useful for downgrading Titan. +## The mode used to process blob files. In read-only mode Titan stops writing +## value into blob log. In fallback mode Titan converts blob index into real +## value on flush and compaction. +## +## This option can be used to disable Titan. More specifically, to disable +## Titan, set this option to fallback and perform a full compaction using +## tikv-ctl. Then, monitor the blob file size metrics. After the blob file size +## decreases to 0, you can set rocksdb.titan.enabled to false and restart TiKV. +## ## default: kNormal ## read-only: kReadOnly ## fallback: kFallback -# default: normal +## default: normal # blob-run-mode = "normal" ## If set true, values in blob file will be merged to a new blob file while @@ -722,17 +989,13 @@ ## ## Requirement: level_compaction_dynamic_level_base = true ## default: false -# level_merge = false - -## Use merge operator to rewrite GC blob index. -## default: false -# gc-merge-rewrite = false +# level-merge = false ## Options for "Write" Column Family, which stores MVCC commit information [rocksdb.writecf] ## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`. # compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] -# block-size = "64KB" +# block-size = "32KB" ## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`. # write-buffer-size = "128MB" @@ -745,16 +1008,22 @@ # level0-file-num-compaction-trigger = 4 # level0-slowdown-writes-trigger = 20 -# level0-stop-writes-trigger = 36 +# level0-stop-writes-trigger = 20 # cache-index-and-filter-blocks = true # pin-l0-filter-and-index-blocks = true -# compaction-pri = 3 +# compaction-pri = "min-overlapping-ratio" +# soft-pending-compaction-bytes-limit = "192GB" +# hard-pending-compaction-bytes-limit = "1000GB" # read-amp-bytes-per-bit = 0 # dynamic-level-bytes = true # optimize-filters-for-hits = false # enable-compaction-guard = true # compaction-guard-min-output-file-size = "8M" # compaction-guard-max-output-file-size = "128M" +# format-version = 2 +# prepopulate-block-cache = "disabled" +# checksum = "crc32c" +# max-compactions = 0 [rocksdb.lockcf] # compression-per-level = ["no", "no", "no", "no", "no", "no", "no"] @@ -766,14 +1035,20 @@ # target-file-size-base = "8MB" # level0-file-num-compaction-trigger = 1 # level0-slowdown-writes-trigger = 20 -# level0-stop-writes-trigger = 36 +# level0-stop-writes-trigger = 20 # cache-index-and-filter-blocks = true # pin-l0-filter-and-index-blocks = true -# compaction-pri = 0 +# compaction-pri = "by-compensated-size" +# soft-pending-compaction-bytes-limit = "192GB" +# hard-pending-compaction-bytes-limit = "1000GB" # read-amp-bytes-per-bit = 0 # dynamic-level-bytes = true # optimize-filters-for-hits = false # enable-compaction-guard = false +# format-version = 2 +# prepopulate-block-cache = "disabled" +# checksum = "crc32c" +# max-compactions = 0 [raftdb] # max-background-jobs = 4 @@ -782,14 +1057,21 @@ # max-manifest-file-size = "20MB" # create-if-missing = true -# enable-statistics = true # stats-dump-period = "10m" +## Raft RocksDB WAL directory. +## This config specifies the absolute directory path for WAL. +## If it is not set, the log files will be in the same directory as data. +## If there are two disks on the machine, storing RocksDB data and WAL logs on different disks can +## improve performance. +## Do not set this config the same as `rocksdb.wal-dir`. +# wal-dir = "" + # compaction-readahead-size = 0 # writable-file-max-buffer-size = "1MB" # use-direct-io-for-flush-and-compaction = false # enable-pipelined-write = true -# allow-concurrent-memtable-write = false +# allow-concurrent-memtable-write = true # bytes-per-sync = "1MB" # wal-bytes-per-sync = "512KB" @@ -798,7 +1080,6 @@ # info-log-keep-log-file-num = 10 # info-log-dir = "" # info-log-level = "info" -# optimize-filters-for-hits = true [raftdb.defaultcf] ## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`. @@ -816,25 +1097,98 @@ # level0-file-num-compaction-trigger = 4 # level0-slowdown-writes-trigger = 20 -# level0-stop-writes-trigger = 36 +# level0-stop-writes-trigger = 20 # cache-index-and-filter-blocks = true # pin-l0-filter-and-index-blocks = true -# compaction-pri = 0 +# compaction-pri = "by-compensated-size" +# soft-pending-compaction-bytes-limit = "192GB" +# hard-pending-compaction-bytes-limit = "1000GB" # read-amp-bytes-per-bit = 0 # dynamic-level-bytes = true # optimize-filters-for-hits = true # enable-compaction-guard = false +# format-version = 2 +# prepopulate-block-cache = "disabled" +# checksum = "crc32c" +# max-compactions = 0 [raft-engine] -## Enable raft-engine will ignore all settings about `raftdb`. -# enable = false -## Recovery mode. Candidates are `tolerate-corrupted-tail-records` and `absolute-consistency`. -# recovery_mode = "tolerate-corrupted-tail-records" -# bytes-per-sync = "256KB" +## Determines whether to use Raft Engine to store raft logs. When it is +## enabled, configurations of `raftdb` are ignored. +# enable = true + +## The directory at which raft log files are stored. If the directory does not +## exist, it will be created when TiKV is started. +## +## When this configuration is not set, `{data-dir}/raft-engine` is used. +## +## If there are multiple disks on your machine, it is recommended to store the +## data of Raft Engine on a different disk to improve TiKV performance. +# dir = "" + +## Specifies the threshold size of a log batch. A log batch larger than this +## configuration is compressed. +## +## If you set this configuration item to `0`, compression is disabled. +# batch-compression-threshold = "8KB" + +## Specifies the maximum size of log files. When a log file is larger than this +## value, it is rotated. # target-file-size = "128MB" + +## Specifies the threshold size of the main log queue. When this configuration +## value is exceeded, the main log queue is purged. +## +## This configuration can be used to adjust the disk space usage of Raft +## Engine. # purge-threshold = "10GB" -## Raft engine has builtin entry cache. `cache-limit` limits the memory usage of the cache. -# cache-limit = "1GB" + +## Determines how to deal with file corruption during recovery. +## +## Candidates: +## absolute-consistency +## tolerate-tail-corruption +## tolerate-any-corruption +# recovery-mode = "tolerate-tail-corruption" + +## The minimum I/O size for reading log files during recovery. +## +## Default: "16KB". Minimum: "512B". +# recovery-read-block-size = "16KB" + +## The number of threads used to scan and recover log files. +## +## Default: 4. Minimum: 1. +# recovery-threads = 4 + +## Memory usage limit for Raft Engine. +## When it's not set, 15% of available system memory will be used. +# memory-limit = "1GB" + +## Version of the log file in Raft Engine. +## +## Candidates: +## 1: Can be read by TiKV release 6.1 and above. +## 2: Can be read by TiKV release 6.3 and above. Supports log recycling. +## +## Default: 2. +# format-version = 2 + +## Whether to recycle stale log files in Raft Engine. +## If `true`, logically purged log files will be reserved for recycling. +## Only available for `format-version` >= 2. This option is only +## available when TiKV >= 6.3.x. +## +## Default: true. +# enable-log-recycle = true + +## Whether to prepare log files for recycling when start. +## If `true`, batch empty log files will be prepared for recycling when +## starting engine. +## Only available for `enable-log-reycle` is true. +## +## Default: false +# prefill-for-recycle = false [security] ## The path for TLS certificates. Empty string means disabling secure connections. @@ -848,11 +1202,12 @@ ## Default is false. # redact-info-log = false -# Configurations for encryption at rest. Experimental. +## Configurations for encryption at rest. Experimental. [security.encryption] ## Encryption method to use for data files. -## Possible values are "plaintext", "aes128-ctr", "aes192-ctr" and "aes256-ctr". Value other than -## "plaintext" means encryption is enabled, in which case master key must be specified. +## Possible values are "plaintext", "aes128-ctr", "aes192-ctr", "aes256-ctr" and "sm4-ctr". +## Value other than "plaintext" means encryption is enabled, in which case +## master key must be specified. # data-encryption-method = "plaintext" ## Specifies how often TiKV rotates data encryption key. @@ -872,7 +1227,7 @@ ## Plaintext as master key means no master key is given and only applicable when ## encryption is not enabled, i.e. data-encryption-method = "plaintext". This type doesn't ## have sub-config items. Example: -## +## ## [security.encryption.master-key] ## type = "plaintext" ## @@ -896,7 +1251,7 @@ ## ## Supply a custom encryption key stored in a file. It is recommended NOT to use in production, ## as it breaks the purpose of encryption at rest, unless the file is stored in tempfs. -## The file must contain a 256-bits (32 bytes, regardless of key length implied by +## The file must contain a 256-bits (32 bytes, regardless of key length implied by ## data-encryption-method) key encoded as hex string and end with newline ("\n"). Example: ## ## [security.encryption.master-key] @@ -921,17 +1276,39 @@ [backup] ## Number of threads to perform backup tasks. -## The default value is set to min(CPU_NUM * 0.75, 32). -# num-threads = 24 +## The default value is set to min(CPU_NUM * 0.5, 8). +# num-threads = 8 ## Number of ranges to backup in one batch. -# batch = 8 +# batch-size = 8 ## When Backup region [a,e) size exceeds `sst-max-size`, it will be backuped into several Files [a,b), ## [b,c), [c,d), [d,e) and the size of [a,b), [b,c), [c,d) will be `sst-max-size` (or a ## little larger). # sst-max-size = "144MB" +## Automatically reduce the number of backup threads when the current workload is high, +## in order to reduce impact on the cluster's performance during back up. +# enable-auto-tune = true + +[log-backup] +## Number of threads to perform backup stream tasks. +## The default value is CPU_NUM * 0.5, and limited to [2, 12]. +# num-threads = 8 + +## enable this feature. TiKV will starts watch related tasks in PD. and backup kv changes to storage accodring to task. +## The default value is false. +# enable = true + +[backup.hadoop] +## let TiKV know how to find the hdfs shell command. +## Equivalent to the $HADOOP_HOME enviroment variable. +# home = "" + +## TiKV will run the hdfs shell command under this linux user. +## TiKV will use the current linux user if not provided. +# linux-user = "" + [pessimistic-txn] ## The default and maximum delay before responding to TiDB when pessimistic ## transactions encounter locks @@ -943,10 +1320,15 @@ ## one more likely acquires the lock. # wake-up-delay-duration = "20ms" -## Enable pipelined pessimistic lock, only effect when processing perssimistic transactions -## Enabled this will improve performance, but slightly increase the transcation failure rate +## Enable pipelined pessimistic lock, only effect when processing perssimistic transactions. +## Enabling this will improve performance, but slightly increase the transaction failure rate # pipelined = true +## Enable in-memory pessimistic lock, only effect when processing perssimistic transactions. +## Enabling this will improve performance, but slightly increase the transaction failure rate. +## It only takes effect when `pessimistic-txn.pipelined` is also set to true. +# in-memory = true + [gc] ## The number of keys to GC in one batch. # batch-keys = 512 @@ -960,3 +1342,4 @@ ## Garbage ratio threshold to trigger a GC. # ratio-threshold = 1.1 + |