summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorAllen Zhong2021-05-06 12:02:21 +0800
committerAllen Zhong2021-05-06 12:02:21 +0800
commitd91bba9454d4ade0d6213eb15b6ea4dd8d929469 (patch)
tree957090b0bffce28e54c6063667a6aa5fe6de782d
parentbf2f2a4262f94ebe23639c8d6f5137246603c1ca (diff)
downloadaur-d91bba9454d4ade0d6213eb15b6ea4dd8d929469.tar.gz
upgpkg: tidb-bin 5.0.1-1
-rw-r--r--.SRCINFO21
-rw-r--r--PKGBUILD14
-rw-r--r--pd.toml180
-rw-r--r--tidb.toml292
-rw-r--r--tikv.toml949
5 files changed, 1123 insertions, 333 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 998d382fcd6a..39343a7a24be 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,6 +1,6 @@
pkgbase = tidb-bin
pkgdesc = A distributed NewSQL database compatible with MySQL protocol
- pkgver = 5.0.0
+ pkgver = 5.0.1
pkgrel = 1
url = https://github.com/pingcap/tidb
arch = x86_64
@@ -12,7 +12,10 @@ pkgbase = tidb-bin
conflicts = tikv-pd
options = strip
options = debug
- source = https://download.pingcap.org/tidb-v5.0.0-linux-amd64.tar.gz
+ backup = etc/tidb/tidb.toml
+ backup = etc/tikv/tikv.toml
+ backup = etc/pd/pd.toml
+ source = https://download.pingcap.org/tidb-v5.0.1-linux-amd64.tar.gz
source = pd.service
source = tidb.service
source = tikv.service
@@ -25,7 +28,7 @@ pkgbase = tidb-bin
source = pd.toml
source = tidb.toml
source = tikv.toml
- sha256sums = 8c2b8d5e6dd9f51a5950f11401416b71ee32b2bff46ff2906ebecf6ffbe79f59
+ sha256sums = e7b6568c1ed153863383481fcfe3012ce261b224ff8e772fb04f2af992fbba32
sha256sums = b03d12f2f8d6eb2e9d654d6258ca39000225cdf1418840f7e35081631bc4d924
sha256sums = 22318c19bb89ff5a0852df5186cc1496214cd49f2264192413a326d1e8c93dc9
sha256sums = 870b8eaf83bc0d22b05b0f3a7890660e483cf77bb1d84bc50ad04fb23068cd8c
@@ -35,15 +38,15 @@ pkgbase = tidb-bin
sha256sums = 30ce83fbec8f102c30e438282bb5b18c026d08480f2386d68f1116c12481bf66
sha256sums = 744b252e29099b0099dc41e30bc3badd33b3d661c7126af8044faa4fc2df8927
sha256sums = 935291bac6a216c6f880df9bfaec8900266413bb202ac483e79f291e1f28e9f1
- sha256sums = 11bc441dfd0327c56218f214a9869da20ccdf7e5265c2f5ffca45089ba8094db
- sha256sums = a34a8ca1f13c965cc0e872fc671f377b64a80cc11225cd6359bf7415b4c86a06
- sha256sums = 248790d756d15322ed7af13f30525744c472190ac68a26b486c5eed24427abdf
+ sha256sums = 10d3fdb40e522c8731e87a7ba2acc6a7866d8f967a6b931247ae8ff97709065a
+ sha256sums = f32709894c0d2c105a4398dcaf027f1cbdee359a2a6747f43cac819e9df25517
+ sha256sums = be2f8c6830a48da6c356db943aa55ee2f3c9c30b2e9027e7b758cab875fc8520
pkgname = tidb-bin
install = tidb.install
- provides = tidb-server=5.0.0
- provides = tikv-server=5.0.0
- provides = pd-server=5.0.0
+ provides = tidb-server=5.0.1
+ provides = tikv-server=5.0.1
+ provides = pd-server=5.0.1
pkgname = tidb-bin-utils
optdepends = go-tools: provides goyacc
diff --git a/PKGBUILD b/PKGBUILD
index 3522007120e1..353683fcf82e 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -3,7 +3,7 @@
# Contributor: Jian Zeng <anonymousknight96@gmail.com>
# Contributor: Xuanwo <xuanwo@archlinuxcn.org>
pkgbase=tidb-bin
-_basever=5.0.0
+_basever=5.0.1
#_relver=-prega
#pkgver=$_basever.$_relver
pkgver=$_basever
@@ -16,6 +16,10 @@ license=('APACHE')
depends=('gcc-libs')
conflicts=('tidb-bin-nightly' 'tidb' 'tikv' 'tikv-pd')
options=('strip' 'debug')
+backup=(etc/tidb/tidb.toml
+ etc/tikv/tikv.toml
+ etc/pd/pd.toml
+)
source=("https://download.pingcap.org/tidb-v$_basever$_relver-linux-amd64.tar.gz"
pd.service
tidb.service
@@ -29,7 +33,7 @@ source=("https://download.pingcap.org/tidb-v$_basever$_relver-linux-amd64.tar.gz
pd.toml
tidb.toml
tikv.toml)
-sha256sums=('8c2b8d5e6dd9f51a5950f11401416b71ee32b2bff46ff2906ebecf6ffbe79f59'
+sha256sums=('e7b6568c1ed153863383481fcfe3012ce261b224ff8e772fb04f2af992fbba32'
'b03d12f2f8d6eb2e9d654d6258ca39000225cdf1418840f7e35081631bc4d924'
'22318c19bb89ff5a0852df5186cc1496214cd49f2264192413a326d1e8c93dc9'
'870b8eaf83bc0d22b05b0f3a7890660e483cf77bb1d84bc50ad04fb23068cd8c'
@@ -39,9 +43,9 @@ sha256sums=('8c2b8d5e6dd9f51a5950f11401416b71ee32b2bff46ff2906ebecf6ffbe79f59'
'30ce83fbec8f102c30e438282bb5b18c026d08480f2386d68f1116c12481bf66'
'744b252e29099b0099dc41e30bc3badd33b3d661c7126af8044faa4fc2df8927'
'935291bac6a216c6f880df9bfaec8900266413bb202ac483e79f291e1f28e9f1'
- '11bc441dfd0327c56218f214a9869da20ccdf7e5265c2f5ffca45089ba8094db'
- 'a34a8ca1f13c965cc0e872fc671f377b64a80cc11225cd6359bf7415b4c86a06'
- '248790d756d15322ed7af13f30525744c472190ac68a26b486c5eed24427abdf')
+ '10d3fdb40e522c8731e87a7ba2acc6a7866d8f967a6b931247ae8ff97709065a'
+ 'f32709894c0d2c105a4398dcaf027f1cbdee359a2a6747f43cac819e9df25517'
+ 'be2f8c6830a48da6c356db943aa55ee2f3c9c30b2e9027e7b758cab875fc8520')
_package() {
provides=("tidb-server=$_basever" "tikv-server=$_basever" "pd-server=$_basever")
diff --git a/pd.toml b/pd.toml
index 808f40ede708..bb261328836d 100644
--- a/pd.toml
+++ b/pd.toml
@@ -4,88 +4,192 @@ name = "pd"
data-dir = "/var/lib/pd"
client-urls = "http://127.0.0.1:2379"
-# if not set, use ${client-urls}
+## if not set, use ${client-urls}
advertise-client-urls = ""
peer-urls = "http://127.0.0.1:2380"
-# if not set, use ${peer-urls}
+## if not set, use ${peer-urls}
advertise-peer-urls = ""
initial-cluster = "pd=http://127.0.0.1:2380"
initial-cluster-state = "new"
+## set different tokens to prevent communication between PDs in different clusters.
+# initial-cluster-token = "pd-cluster"
+
lease = 3
tso-save-interval = "3s"
-namespace-classifier = "table"
+## Make sure you set the "zone" label for this PD server before enabling its Local TSO service.
+# enable-local-tso = true
enable-prevote = true
+[labels]
+## This means **this** PD server belongs to data center "dc-1". Setting it alone won't have any effect.
+## It should be used with enable-local-tso together if you want to make the Local TSO function work.
+# zone="dc-1"
+
[security]
-# Path of file that contains list of trusted SSL CAs. if set, following four settings shouldn't be empty
+## Path of file that contains list of trusted SSL CAs. if set, following four settings shouldn't be empty
cacert-path = ""
-# Path of file that contains X509 certificate in PEM format.
+## Path of file that contains X509 certificate in PEM format.
cert-path = ""
-# Path of file that contains X509 key in PEM format.
+## Path of file that contains X509 key in PEM format.
key-path = ""
+cert-allowed-cn = ["example.com"]
+
+[security.encryption]
+## Encryption method to use for PD data. One of "plaintext", "aes128-ctr", "aes192-ctr" and "aes256-ctr".
+## Defaults to "plaintext" if not set.
+# data-encryption-method = "plaintext"
+## Specifies how often PD rotates data encryption key. Default is 7 days.
+# data-key-rotation-period = "168h"
+
+## Specifies master key if encryption is enabled. There are three types of master key:
+##
+## * "plaintext":
+##
+## Plaintext as master key means no master key is given and only applicable when
+## encryption is not enabled, i.e. data-encryption-method = "plaintext". This type doesn't
+## have sub-config items. Example:
+##
+## [security.encryption.master-key]
+## type = "plaintext"
+##
+## * "kms":
+##
+## Use a KMS service to supply master key. Currently only AWS KMS is supported. This type of
+## master key is recommended for production use. Example:
+##
+## [security.encryption.master-key]
+## type = "kms"
+## ## KMS CMK key id. Must be a valid KMS CMK where the TiKV process has access to.
+## ## In production is recommended to grant access of the CMK to TiKV using IAM.
+## key-id = "1234abcd-12ab-34cd-56ef-1234567890ab"
+## ## AWS region of the KMS CMK.
+## region = "us-west-2"
+## ## (Optional) AWS KMS service endpoint. Only required when non-default KMS endpoint is
+## ## desired.
+## endpoint = "https://kms.us-west-2.amazonaws.com"
+##
+## * "file":
+##
+## Supply a custom encryption key stored in a file. It is recommended NOT to use in production,
+## as it breaks the purpose of encryption at rest, unless the file is stored in tempfs.
+## The file must contain a 256-bits (32 bytes, regardless of key length implied by
+## data-encryption-method) key encoded as hex string and end with newline ("\n"). Example:
+##
+## [security.encryption.master-key]
+## type = "file"
+## path = "/path/to/master/key/file"
+# [security.encryption.master-key]
+# type = "plaintext"
+
[log]
level = "info"
-# log format, one of json, text, console
-#format = "text"
+## log format, one of json, text, console
+# format = "text"
-# disable automatic timestamps in output
-#disable-timestamp = false
+## disable automatic timestamps in output
+# disable-timestamp = false
# file logging
[log.file]
-#filename = ""
-# max log file size in MB
-#max-size = 300
-# max log file keep days
-#max-days = 28
-# maximum number of old log files to retain
-#max-backups = 7
-# rotate log by day
-#log-rotate = true
+# filename = ""
+## max log file size in MB
+# max-size = 300
+## max log file keep days
+# max-days = 28
+## maximum number of old log files to retain
+# max-backups = 7
[metric]
-# prometheus client push interval, set "0s" to disable prometheus.
+## prometheus client push interval, set "0s" to disable prometheus.
interval = "15s"
-# prometheus pushgateway address, leaves it empty will disable prometheus.
+## prometheus pushgateway address, leaves it empty will disable prometheus.
address = ""
+[pd-server]
+## the metric storage is the cluster metric storage. This is use for query metric data.
+## Currently we use prometheus as metric storage, we may use PD/TiKV as metric storage later.
+## For usability, recommended to temporarily set it to the prometheus address, eg: http://127.0.0.1:9090
+metric-storage = ""
+
[schedule]
-max-merge-region-size = 0
-max-merge-region-keys = 0
+max-merge-region-size = 20
+max-merge-region-keys = 200000
split-merge-interval = "1h"
max-snapshot-count = 3
max-pending-peer-count = 16
max-store-down-time = "30m"
leader-schedule-limit = 4
-region-schedule-limit = 4
-replica-schedule-limit = 8
+region-schedule-limit = 2048
+replica-schedule-limit = 64
merge-schedule-limit = 8
-tolerant-size-ratio = 5.0
-
-# customized schedulers, the format is as below
-# if empty, it will use balance-leader, balance-region, hot-region as default
+hot-region-schedule-limit = 4
+## There are some policies supported: ["count", "size"], default: "count"
+# leader-schedule-policy = "count"
+## When the score difference between the leader or Region of the two stores is
+## less than specified multiple times of the Region size, it is considered in balance by PD.
+## If it equals 0.0, PD will automatically adjust it.
+# tolerant-size-ratio = 0.0
+
+## This three parameters control the merge scheduler behavior.
+## If it is true, it means a region can only be merged into the next region of it.
+# enable-one-way-merge = false
+## If it is true, it means two region within different tables can be merged.
+## This option only works when key type is "table".
+# enable-cross-table-merge = false
+
+## customized schedulers, the format is as below
+## if empty, it will use balance-leader, balance-region, hot-region as default
# [[schedule.schedulers]]
# type = "evict-leader"
# args = ["1"]
[replication]
-# The number of replicas for each region.
+## The number of replicas for each region.
max-replicas = 3
-# The label keys specified the location of a store.
-# The placement priorities is implied by the order of label keys.
-# For example, ["zone", "rack"] means that we should place replicas to
-# different zones first, then to different racks if we don't have enough zones.
+## The label keys specified the location of a store.
+## The placement priorities is implied by the order of label keys.
+## For example, ["zone", "rack"] means that we should place replicas to
+## different zones first, then to different racks if we don't have enough zones.
location-labels = []
+## Strictly checks if the label of TiKV is matched with location labels.
+# strictly-match-label = false
[label-property]
-# Do not assign region leaders to stores that have these tags.
-# [[label-property.reject-leader]]
-# key = "zone"
-# value = "cn1
+## Do not assign region leaders to stores that have these tags.
+# [[label-property.reject-leader]]
+# key = "zone"
+# value = "cn1
+
+[dashboard]
+## Configurations below are for the TiDB Dashboard embedded in the PD.
+
+## The path of the CA certificate used to verify the TiDB server in TLS.
+# tidb-cacert-path = ""
+## The path of the certificate used to connect to TiDB server in TLS.
+# tidb-cert-path = ""
+## The path of the certificate private key.
+# tidb-key-path = ""
+
+## The public path prefix to serve Dashboard urls. It can be set when Dashboard
+## is running behind a reverse proxy. Do not configure it if you access
+## Dashboard directly.
+# public-path-prefix = "/dashboard"
+
+## When enabled, request will be proxied to the instance running Dashboard
+## internally instead of result in a 307 redirection.
+# internal-proxy = false
+
+## When enabled, usage data will be sent to PingCAP for improving user experience.
+# enable-telemetry = true
+
+## When enabled, experimental TiDB Dashboard features will be available.
+## These features are incomplete or not well tested. Suggest not to enable in
+## production.
+# enable-experimental = false
diff --git a/tidb.toml b/tidb.toml
index 1e02e45c5451..3766784b1efb 100644
--- a/tidb.toml
+++ b/tidb.toml
@@ -3,10 +3,13 @@
# TiDB server host.
host = "0.0.0.0"
+# tidb server advertise IP.
+advertise-address = ""
+
# TiDB server port.
port = 4000
-# Registered store name, [tikv, mocktikv]
+# Registered store name, [tikv, mocktikv, unistore]
store = "tikv"
# TiDB storage path.
@@ -28,19 +31,107 @@ split-table = true
# The limit of concurrent executed sessions.
token-limit = 1000
-# Only print a log when out of memory quota.
-# Valid options: ["log", "cancel"]
-oom-action = "log"
+# The maximum memory available for a single SQL statement. Default: 1GB
+mem-quota-query = 1073741824
+
+# Controls whether to enable the temporary storage for some operators when a single SQL statement exceeds the memory quota specified by mem-quota-query.
+oom-use-tmp-storage = true
-# Set the memory quota for a query in bytes. Default: 32GB
-mem-quota-query = 34359738368
+# Specifies the temporary storage path for some operators when a single SQL statement exceeds the memory quota specified by mem-quota-query.
+# It defaults to a generated directory in `<TMPDIR>/<os/user.Current().Uid>_tidb/` if it is unset.
+# It only takes effect when `oom-use-tmp-storage` is `true`.
+# tmp-storage-path = "/tmp/<os/user.Current().Uid>_tidb/MC4wLjAuMDo0MDAwLzAuMC4wLjA6MTAwODA=/tmp-storage"
-# Enable coprocessor streaming.
-enable-streaming = false
+# Specifies the maximum use of temporary storage (bytes) for all active queries when `oom-use-tmp-storage` is enabled.
+# If the `tmp-storage-quota` exceeds the capacity of the temporary storage directory, tidb-server would return an error and exit.
+# The default value of tmp-storage-quota is under 0 which means tidb-server wouldn't check the capacity.
+tmp-storage-quota = -1
+
+# Specifies what operation TiDB performs when a single SQL statement exceeds the memory quota specified by mem-quota-query and cannot be spilled over to disk.
+# Valid options: ["log", "cancel"]
+oom-action = "cancel"
+
+# Enable batch commit for the DMLs.
+enable-batch-dml = false
# Set system variable 'lower_case_table_names'
lower-case-table-names = 2
+# Make "kill query" behavior compatible with MySQL. It's not recommend to
+# turn on this option when TiDB server is behind a proxy.
+compatible-kill-query = false
+
+# Make SIGTERM wait N seconds before starting the shutdown procedure. This is designed for when TiDB is behind a proxy/load balancer.
+# The health check will fail immediately but the server will not start shutting down until the time has elapsed.
+graceful-wait-before-shutdown = 0
+
+# check mb4 value in utf8 is used to control whether to check the mb4 characters when the charset is utf8.
+check-mb4-value-in-utf8 = true
+
+# treat-old-version-utf8-as-utf8mb4 use for upgrade compatibility. Set to true will treat old version table/column UTF8 charset as UTF8MB4.
+treat-old-version-utf8-as-utf8mb4 = true
+
+# max-index-length is used to deal with compatibility issues from v3.0.7 and previous version upgrades. It can only be in [3072, 3072*4].
+max-index-length = 3072
+
+# index-limit is used to deal with compatibility issues. It can only be in [64, 64*8].
+index-limit = 64
+
+# enable-table-lock is used to control table lock feature. Default is false, indicate the table lock feature is disabled.
+enable-table-lock = false
+
+# delay-clean-table-lock is used to control the time (Milliseconds) of delay before unlock the table in the abnormal situation.
+delay-clean-table-lock = 0
+
+# Maximum number of the splitting region, which is used by the split region statement.
+split-region-max-num = 1000
+
+# alter-primary-key is used to control whether the primary keys are clustered.
+# Note that this config is deprecated. Only valid when @@global.tidb_enable_clustered_index = 'int_only'.
+# Default is false, only the integer primary keys are clustered.
+# If it is true, all types of primary keys are nonclustered.
+alter-primary-key = false
+
+# server-version is used to change the version string of TiDB in the following scenarios:
+# 1. the server version returned by builtin-function `VERSION()`.
+# 2. the server version filled in handshake packets of MySQL Connection Protocol, see https://dev.mysql.com/doc/internals/en/connection-phase-packets.html#packet-Protocol::Handshake for more details.
+# if server-version = "", the default value(original TiDB version string) is used.
+server-version = ""
+
+# repair mode is used to repair the broken table meta in TiKV in extreme cases.
+repair-mode = false
+
+# Repair table list is used to list the tables in repair mode with the format like ["db.table",].
+# In repair mode, repairing table which is not in repair list will get wrong database or wrong table error.
+repair-table-list = []
+
+# The maximum permitted number of simultaneous client connections. When the value is 0, the number of connections is unlimited.
+max-server-connections = 0
+
+# Whether new collations are enabled, as indicated by its name, this configuration entry take effect ONLY when a TiDB cluster bootstraps for the first time.
+new_collations_enabled_on_first_bootstrap = false
+
+# Don't register information of this TiDB to etcd, so this instance of TiDB won't appear in the services like dashboard.
+# This option is useful when you want to embed TiDB into your service(i.e. use TiDB as a library).
+# *If you want to start a TiDB service, NEVER enable this.*
+skip-register-to-dashboard = false
+
+# When enabled, usage data (for example, instance versions) will be reported to PingCAP periodically for user experience analytics.
+# If this config is set to `false` on all TiDB servers, telemetry will be always disabled regardless of the value of the global variable `tidb_enable_telemetry`.
+# See PingCAP privacy policy for details: https://pingcap.com/en/privacy-policy/
+enable-telemetry = true
+
+# deprecate-integer-display-length is used to be compatible with MySQL 8.0 in which the integer declared with display length will be returned with
+# a warning like `Integer display width is deprecated and will be removed in a future release`.
+deprecate-integer-display-length = false
+
+# enable-enum-length-limit is used to deal with compatibility issues. When true, the enum/set element length is limited.
+# According to MySQL 8.0 Refman:
+# The maximum supported length of an individual SET element is M <= 255 and (M x w) <= 1020,
+# where M is the element literal length and w is the number of bytes required for the maximum-length character in the character set.
+# See https://dev.mysql.com/doc/refman/8.0/en/string-type-syntax.html for more details.
+enable-enum-length-limit = true
+
[log]
# Log level: debug, info, warn, error, fatal.
level = "info"
@@ -48,20 +139,30 @@ level = "info"
# Log format, one of json, text, console.
format = "text"
-# Disable automatic timestamp in output
-disable-timestamp = false
+# Enable automatic timestamps in log output, if not set, it will be defaulted to true.
+# enable-timestamp = true
+
+# Enable annotating logs with the full stack error message, if not set, it will be defaulted to false.
+# enable-error-stack = false
+
+# Whether to enable slow query log.
+enable-slow-log = true
# Stores slow query log into separated files.
-slow-query-file = ""
+slow-query-file = "tidb-slow.log"
# Queries with execution time greater than this value will be logged. (Milliseconds)
slow-threshold = 300
+# record-plan-in-slow-log is used to enable record query plan in slow log.
+# 0 is disable. 1 is enable.
+record-plan-in-slow-log = 1
+
# Queries with internal result greater than this value will be logged.
expensive-threshold = 10000
# Maximum query length recorded in log.
-query-log-max-len = 2048
+query-log-max-len = 4096
# File logging.
[log.file]
@@ -77,9 +178,6 @@ max-days = 0
# Maximum number of old log files to retain. No clean up by default.
max-backups = 0
-# Rotate log by day
-log-rotate = true
-
[security]
# Path of file that contains list of trusted SSL CAs for connection with mysql client.
ssl-ca = ""
@@ -99,31 +197,54 @@ cluster-ssl-cert = ""
# Path of file that contains X509 key in PEM format for connection with cluster components.
cluster-ssl-key = ""
+# Configurations of the encryption method to use for encrypting the spilled data files.
+# Possible values are "plaintext", "aes128-ctr", if not set, it will be "plaintext" by default.
+# "plaintext" means encryption is disabled.
+spilled-file-encryption-method = "plaintext"
+
[status]
# If enable status report HTTP service.
report-status = true
+# TiDB status host.
+status-host = "0.0.0.0"
+
+## status-host is the HTTP address for reporting the internal status of a TiDB server, for example:
+## API for prometheus: http://${status-host}:${status_port}/metrics
+## API for pprof: http://${status-host}:${status_port}/debug/pprof
# TiDB status port.
status-port = 10080
-# Prometheus pushgateway address, leaves it empty will disable prometheus push.
+# Prometheus pushgateway address, leaves it empty will disable push to pushgateway.
metrics-addr = ""
-# Prometheus client push interval in second, set \"0\" to disable prometheus push.
+# Prometheus client push interval in second, set \"0\" to disable push to pushgateway.
metrics-interval = 15
+# Record statements qps by database name if it is enabled.
+record-db-qps = false
+
[performance]
# Max CPUs to use, 0 use number of CPUs in the machine.
max-procs = 0
+
+# Memory size quota for tidb server, 0 means unlimited
+server-memory-quota = 0
+
+# The alarm threshold when memory usage of the tidb-server exceeds. The valid value range is greater than or equal to 0
+# and less than or equal to 1. The default value is 0.8.
+# If this configuration is set to 0 or 1, it'll disable the alarm.
+# Otherwise, related information will be recorded in the directory `tmp-storage-path/record`.
+# Note: If the configuration `server-memory-quota` is set and larger than 0, the alarm threshold will be
+# `memory-usage-alarm-ratio * server-memory-quota`; otherwise, it'll be `memory-usage-alarm-ratio * system memory size`.
+memory-usage-alarm-ratio = 0.8
+
# StmtCountLimit limits the max count of statement inside a transaction.
stmt-count-limit = 5000
# Set keep alive option for tcp connection.
tcp-keep-alive = true
-# The maximum number of retries when commit a transaction.
-retry-limit = 10
-
# Whether support cartesian product.
cross-join = true
@@ -133,15 +254,50 @@ stats-lease = "3s"
# Run auto analyze worker on this tidb-server.
run-auto-analyze = true
-# Probability to use the query feedback to update stats, 0 or 1 for always false/true.
+# Probability to use the query feedback to update stats, 0.0 or 1.0 for always false/true.
feedback-probability = 0.0
# The max number of query feedback that cache in memory.
-query-feedback-limit = 1024
+query-feedback-limit = 512
# Pseudo stats will be used if the ratio between the modify count and
# row count in statistics of a table is greater than it.
-pseudo-estimate-ratio = 0.7
+pseudo-estimate-ratio = 0.8
+
+# Force the priority of all statements in a specified priority.
+# The value could be "NO_PRIORITY", "LOW_PRIORITY", "HIGH_PRIORITY" or "DELAYED".
+force-priority = "NO_PRIORITY"
+
+# Bind info lease duration, which influences the duration of loading bind info and handling invalid bind.
+bind-info-lease = "3s"
+
+# Whether support pushing down aggregation with distinct to cop task
+distinct-agg-push-down = false
+
+# The limitation of the size in byte for the entries in one transaction.
+# If using TiKV as the storage, the entry represents a key/value pair.
+# NOTE: If binlog is enabled with Kafka (e.g. arbiter cluster),
+# this value should be less than 1073741824(1G) because this is the maximum size that can be handled by Kafka.
+# If binlog is disabled or binlog is enabled without Kafka, this value should be less than 10737418240(10G).
+txn-total-size-limit = 104857600
+
+# The limitation of the size in byte for each entry in one transaction.
+# NOTE: Increasing this limit may cause performance problems.
+txn-entry-size-limit = 6291456
+
+# The max number of running concurrency two phase committer request for an SQL.
+committer-concurrency = 128
+
+# max lifetime of transaction ttl manager.
+max-txn-ttl = 3600000
+
+# the interval duration between two memory profile into global tracker
+mem-profile-interval = "1m"
+
+# The Go GC trigger factor, you can get more information about it at https://golang.org/pkg/runtime.
+# If you encounter OOM when executing large query, you can decrease this value to trigger GC earlier.
+# If you find the CPU used by GC is too high or GC is too frequent and impact your business you can increase this value.
+gogc = 100
[proxy-protocol]
# PROXY protocol acceptable client networks.
@@ -151,14 +307,10 @@ networks = ""
# PROXY protocol header read timeout, unit is second
header-timeout = 5
-[plan-cache]
-enabled = false
-capacity = 2560
-shards = 256
-
[prepared-plan-cache]
enabled = false
capacity = 100
+memory-guard-ratio = 0.1
[opentracing]
# Enable opentracing.
@@ -211,9 +363,13 @@ log-spans = false
# LocalAgentHostPort instructs reporter to send spans to jaeger-agent at this address
local-agent-host-port = ""
+[pd-client]
+# Max time which PD client will wait for the PD server in seconds.
+pd-server-timeout = 3
+
[tikv-client]
# Max gRPC connections that will be established with each tikv-server.
-grpc-connection-count = 16
+grpc-connection-count = 4
# After a duration of this time in seconds if the client doesn't see any activity it pings
# the server to see if the transport is still alive.
@@ -223,19 +379,93 @@ grpc-keepalive-time = 10
# and if no activity is seen even after that the connection is closed.
grpc-keepalive-timeout = 3
-# max time for commit command, must be twice bigger than raft election timeout.
+# The compression type for gRPC channel: none or gzip.
+grpc-compression-type = "none"
+
+# Max time for commit command, must be twice bigger than raft election timeout.
commit-timeout = "41s"
+# Max batch size in gRPC.
+max-batch-size = 128
+# Overload threshold of TiKV.
+overload-threshold = 200
+# Max batch wait time in nanosecond to avoid waiting too long. 0 means disable this feature.
+max-batch-wait-time = 0
+# Batch wait size, to avoid waiting too long.
+batch-wait-size = 8
+
+# Enable chunk encoded data for coprocessor requests.
+enable-chunk-rpc = true
+
+# If a Region has not been accessed for more than the given duration (in seconds), it
+# will be reloaded from the PD.
+region-cache-ttl = 600
+
+# store-limit is used to restrain TiDB from sending request to some stores which is up to the limit.
+# If a store has been up to the limit, it will return error for the successive request in same store.
+# default 0 means shutting off store limit.
+store-limit = 0
+
+# store-liveness-timeout is used to control timeout for store liveness after sending request failed.
+store-liveness-timeout = "1s"
+
+# ttl-refreshed-txn-size decides whether a transaction should update its lock TTL.
+# If the size(in byte) of a transaction is large than `ttl-refreshed-txn-size`, it update the lock TTL during the 2PC.
+ttl-refreshed-txn-size = 33554432
+
+[tikv-client.copr-cache]
+# The capacity in MB of the cache. Zero means disable coprocessor cache.
+capacity-mb = 1000.0
+
[binlog]
# enable to write binlog.
+# NOTE: If binlog is enabled with Kafka (e.g. arbiter cluster),
+# txn-total-size-limit should be less than 1073741824(1G) because this is the maximum size that can be handled by Kafka.
enable = false
# WriteTimeout specifies how long it will wait for writing binlog to pump.
write-timeout = "15s"
-# If IgnoreError is true, when writting binlog meets error, TiDB would stop writting binlog,
+# If IgnoreError is true, when writing binlog meets error, TiDB would stop writing binlog,
# but still provide service.
ignore-error = false
# use socket file to write binlog, for compatible with kafka version tidb-binlog.
binlog-socket = ""
+
+# the strategy for sending binlog to pump, value can be "range" or "hash" now.
+strategy = "range"
+
+[pessimistic-txn]
+# max retry count for a statement in a pessimistic transaction.
+max-retry-count = 256
+
+[stmt-summary]
+# enable statement summary.
+enable = true
+
+# enable statement summary for TiDB internal query, default is false.
+enable-internal-query = false
+
+# max number of statements kept in memory.
+max-stmt-count = 200
+
+# max length of displayed normalized sql and sample sql.
+max-sql-length = 4096
+
+# the refresh interval of statement summary, it's counted in seconds.
+refresh-interval = 1800
+
+# the maximum history size of statement summary.
+history-size = 24
+
+# experimental section controls the features that are still experimental: their semantics,
+# interfaces are subject to change, using these features in the production environment is not recommended.
+[experimental]
+# enable creating expression index.
+allow-expression-index = false
+
+# server level isolation read by engines and labels
+[isolation-read]
+# engines means allow the tidb server read data from which types of engines. options: "tikv", "tiflash", "tidb".
+engines = ["tikv", "tiflash", "tidb"]
diff --git a/tikv.toml b/tikv.toml
index f685c4494d89..4d698ef5d303 100644
--- a/tikv.toml
+++ b/tikv.toml
@@ -1,41 +1,98 @@
-# TiKV config template
-# Human-readable big numbers:
-# File size(based on byte): KB, MB, GB, TB, PB
-# e.g.: 1_048_576 = "1MB"
-# Time(based on ms): ms, s, m, h
-# e.g.: 78_000 = "1.3m"
-
-# log level: trace, debug, info, warning, error, critical.
-# Note that `debug` and `trace` are only available in development builds.
+## TiKV config template
+## Human-readable big numbers:
+## File size(based on byte): KB, MB, GB, TB, PB
+## e.g.: 1_048_576 = "1MB"
+## Time(based on ms): ms, s, m, h
+## e.g.: 78_000 = "1.3m"
+
+## Log levels: trace, debug, info, warning, error, critical.
+## Note that `debug` and `trace` are only available in development builds.
# log-level = "info"
-# file to store log, write to stderr if it's empty.
+## File to store logs.
+## If it is not set, logs will be appended to stderr.
# log-file = ""
-# timespan between rotating the log files.
-# Once this timespan passes the existing log file will have a timestamp appended to its name,
-# and a new file will be created.
+## Log format, one of json, text. Default to text.
+# log-format = "text"
+
+## File to store slow logs.
+## If "log-file" is set, but this is not set, the slow logs will be appeneded
+## to "log-file". If both "log-file" and "slow-log-file" are not set, all logs
+## will be appended to stderr.
+# slow-log-file = ""
+
+## The minimum operation cost to output relative logs.
+# slow-log-threshold = "1s"
+
+## Timespan between rotating the log files.
+## Once this timespan passes, log files will be rotated, i.e. existing log file will have a
+## timestamp appended to its name and a new file will be created.
# log-rotation-timespan = "24h"
+## Size of log file that triggers the log rotation.
+## Once the size of log file exceeds the threshold value, the log file will be rotated
+## and place the old log file in a new file named by orginal file name subbfixed by a timestamp.
+# log-rotation-size = "300MB"
+
+## Enable io snoop which utilize eBPF to get accurate disk io of TiKV
+## It won't take effect when compiling without BCC_IOSNOOP=1.
+# enable-io-snoop = true
+
+# Configurations for the single thread pool serving read requests.
+[readpool.unified]
+## The minimal working thread count of the thread pool.
+# min-thread-count = 1
+
+## The maximum working thread count of the thread pool.
+## The default value is max(4, LOGICAL_CPU_NUM * 0.8).
+# max-thread-count = 8
+
+## Size of the stack for each thread in the thread pool.
+# stack-size = "10MB"
+
+## Max running tasks of each worker, reject if exceeded.
+# max-tasks-per-worker = 2000
+
[readpool.storage]
-# size of thread pool for high-priority operations
+## Whether to use the unified read pool to handle storage requests.
+# use-unified-pool = false
+
+## The following configurations only take effect when `use-unified-pool` is false.
+
+## Size of the thread pool for high-priority operations.
# high-concurrency = 4
-# size of thread pool for normal-priority operations
+
+## Size of the thread pool for normal-priority operations.
# normal-concurrency = 4
-# size of thread pool for low-priority operations
+
+## Size of the thread pool for low-priority operations.
# low-concurrency = 4
-# max running high-priority operations of each worker, reject if exceed
+
+## Max running high-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-high = 2000
-# max running normal-priority operations of each worker, reject if exceed
+
+## Max running normal-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-normal = 2000
-# max running low-priority operations of each worker, reject if exceed
+
+## Max running low-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-low = 2000
-# size of stack size for each thread pool
+
+## Size of the stack for each thread in the thread pool.
# stack-size = "10MB"
[readpool.coprocessor]
-# Notice: if CPU_NUM > 8, default thread pool size for coprocessors
-# will be set to CPU_NUM * 0.8.
+## Whether to use the unified read pool to handle coprocessor requests.
+# use-unified-pool = true
+
+## The following configurations only take effect when `use-unified-pool` is false.
+
+## Most read requests from TiDB are sent to the coprocessor of TiKV. high/normal/low-concurrency is
+## used to set the number of threads of the coprocessor.
+## If there are many read requests, you can increase these config values (but keep it within the
+## number of system CPU cores). For example, for a 32-core machine deployed with TiKV, you can even
+## set these config to 30 in heavy read scenarios.
+## If CPU_NUM > 8, the default thread pool size for coprocessors is set to CPU_NUM * 0.8.
# high-concurrency = 8
# normal-concurrency = 8
@@ -43,394 +100,649 @@
# max-tasks-per-worker-high = 2000
# max-tasks-per-worker-normal = 2000
# max-tasks-per-worker-low = 2000
-# stack-size = "10MB"
[server]
-# set listening address.
+## Listening address.
# addr = "127.0.0.1:20160"
-# set advertise listening address for client communication, if not set, use addr instead.
+
+## Advertise listening address for client communication.
+## If not set, `addr` will be used.
# advertise-addr = ""
-# compression type for grpc channel, available values are none, deflate and gzip.
+## Status address.
+## This is used for reporting the status of TiKV directly through
+## the HTTP address. Notice that there is a risk of leaking status
+## information if this port is exposed to the public.
+## Empty string means disabling it.
+# status-addr = "127.0.0.1:20180"
+
+## Set the maximum number of worker threads for the status report HTTP service.
+# status-thread-pool-size = 1
+
+## Compression type for gRPC channel: none, deflate or gzip.
# grpc-compression-type = "none"
-# size of thread pool for grpc server.
+
+## Size of the thread pool for the gRPC server.
# grpc-concurrency = 4
-# The number of max concurrent streams/requests on a client connection.
+
+## The number of max concurrent streams/requests on a client connection.
# grpc-concurrent-stream = 1024
-# The number of connections with each tikv server to send raft messages.
-# grpc-raft-conn-num = 10
-# Amount to read ahead on individual grpc streams.
+
+## Limit the memory size can be used by gRPC. Default is unlimited.
+## gRPC usually works well to reclaim memory by itself. Limit the memory in case OOM
+## is observed. Note that limit the usage can lead to potential stall.
+# grpc-memory-pool-quota = "32G"
+
+## The number of connections with each TiKV server to send Raft messages.
+# grpc-raft-conn-num = 1
+
+## Amount to read ahead on individual gRPC streams.
# grpc-stream-initial-window-size = "2MB"
-# Time to wait before sending out a ping to check if server is still alive.
-# This is only for communications between tikv instances.
+
+## Time to wait before sending out a ping to check if server is still alive.
+## This is only for communications between TiKV instances.
# grpc-keepalive-time = "10s"
-# Time to wait before closing the connection without receiving keepalive ping
-# ack.
+
+## Time to wait before closing the connection without receiving KeepAlive ping Ack.
# grpc-keepalive-timeout = "3s"
-# How many snapshots can be sent concurrently.
+## How many snapshots can be sent concurrently.
# concurrent-send-snap-limit = 32
-# How many snapshots can be recv concurrently.
+
+## How many snapshots can be received concurrently.
# concurrent-recv-snap-limit = 32
-# max recursion level allowed when decoding dag expression
+## Max allowed recursion level when decoding Coprocessor DAG expression.
# end-point-recursion-limit = 1000
-# max time to handle coprocessor request before timeout
+## Max time to handle Coprocessor requests before timeout.
# end-point-request-max-handle-duration = "60s"
-# the max bytes that snapshot can be written to disk in one second,
-# should be set based on your disk performance
+## Max bytes that snapshot can be written to disk in one second.
+## It should be set based on your disk performance.
# snap-max-write-bytes-per-sec = "100MB"
-# set attributes about this server, e.g. { zone = "us-west-1", disk = "ssd" }.
+## Whether to enable request batch.
+# enable-request-batch = true
+
+## Attributes about this server, e.g. `{ zone = "us-west-1", disk = "ssd" }`.
# labels = {}
-[storage]
-# set the path to rocksdb directory.
-data-dir = "/var/lib/tikv/store"
+## The working thread count of the background pool, which include the endpoint of and br, split-check,
+## region thread and other thread of delay-insensitive tasks.
+## The default value is 2 if the number of CPU cores is less than 16, otherwise 3.
+# background-thread-count = 2
+
+## If handle time is larger than the threshold, it will print slow log in endpoint.
+## The default value is 1s.
+# end-point-slow-log-threshold = "1s"
-# notify capacity of scheduler's channel
-# scheduler-notify-capacity = 10240
+[storage]
+## The path to RocksDB directory.
+# data-dir = "/tmp/tikv/store"
-# the number of slots in scheduler latches, concurrency control for write.
+## The number of slots in Scheduler latches, which controls write concurrency.
+## In most cases you can use the default value. When importing data, you can set it to a larger
+## value.
# scheduler-concurrency = 2048000
-# scheduler's worker pool size, should increase it in heavy write cases,
-# also should less than total cpu cores.
+## Scheduler's worker pool size, i.e. the number of write threads.
+## It should be less than total CPU cores. When there are frequent write operations, set it to a
+## higher value. More specifically, you can run `top -H -p tikv-pid` to check whether the threads
+## named `sched-worker-pool` are busy.
# scheduler-worker-pool-size = 4
-# When the pending write bytes exceeds this threshold,
-# the "scheduler too busy" error is displayed.
+## When the pending write bytes exceeds this threshold, the "scheduler too busy" error is displayed.
# scheduler-pending-write-threshold = "100MB"
+## For async commit transactions, it's possible to response to the client before applying prewrite
+## requests. Enabling this can ease reduce latency when apply duration is significant, or reduce
+## latency jittering when apply duration is not stable.
+# enable-async-apply-prewrite = false
+
+## Reserve some space to ensure recovering the store from `no space left` must succeed.
+## `max(reserve-space, capacity * 5%)` will be reserved exactly.
+##
+## Set it to 0 will cause no space is reserved at all. It's generally used for tests.
+# reserve-space = "5GB"
+
+[storage.block-cache]
+## Whether to create a shared block cache for all RocksDB column families.
+##
+## Block cache is used by RocksDB to cache uncompressed blocks. Big block cache can speed up read.
+## It is recommended to turn on shared block cache. Since only the total cache size need to be
+## set, it is easier to config. In most cases it should be able to auto-balance cache usage
+## between column families with standard LRU algorithm.
+##
+## The rest of config in the storage.block-cache session is effective only when shared block cache
+## is on.
+# shared = true
+
+## Size of the shared block cache. Normally it should be tuned to 30%-50% of system's total memory.
+## When the config is not set, it is decided by the sum of the following fields or their default
+## value:
+## * rocksdb.defaultcf.block-cache-size or 25% of system's total memory
+## * rocksdb.writecf.block-cache-size or 15% of system's total memory
+## * rocksdb.lockcf.block-cache-size or 2% of system's total memory
+## * raftdb.defaultcf.block-cache-size or 2% of system's total memory
+##
+## To deploy multiple TiKV nodes on a single physical machine, configure this parameter explicitly.
+## Otherwise, the OOM problem might occur in TiKV.
+# capacity = "1GB"
+
[pd]
-# pd endpoints
-endpoints = ["127.0.0.1:2379"]
+## PD endpoints.
+# endpoints = []
-[metric]
-# the Prometheus client push interval. Setting the value to 0s stops Prometheus client from pushing.
-# interval = "15s"
-# the Prometheus pushgateway address. Leaving it empty stops Prometheus client from pushing.
-# address = ""
-# the Prometheus client push job name. Note: A node id will automatically append, e.g., "tikv_1".
-# job = "tikv"
+## The interval at which to retry a PD connection initialization.
+## Default is 300ms.
+# retry-interval = "300ms"
-[raftstore]
-# true (default value) for high reliability, this can prevent data loss when power failure.
-# sync-log = true
+## If the client observes an error, it can can skip reporting it except every `n` times.
+## Set to 1 to disable this feature.
+## Default is 10.
+# retry-log-every = 10
+
+## The maximum number of times to retry a PD connection initialization.
+## Set to 0 to disable retry.
+## Default is -1, meaning isize::MAX times.
+# retry-max-count = -1
-# minimizes disruption when a partitioned node rejoins the cluster by using a two phase election.
+[raftstore]
+## Whether to enable Raft prevote.
+## Prevote minimizes disruption when a partitioned node rejoins the cluster by using a two phase
+## election.
# prevote = true
-# set the path to raftdb directory, default value is data-dir/raft
+## The path to RaftDB directory.
+## If not set, it will be `{data-dir}/raft`.
+## If there are multiple disks on the machine, storing the data of Raft RocksDB on differen disks
+## can improve TiKV performance.
# raftdb-path = ""
-# set store capacity, if no set, use disk capacity.
+## Store capacity, i.e. max data size allowed.
+## If it is not set, disk capacity is used.
# capacity = 0
-# notify capacity, 40960 is suitable for about 7000 regions.
+## Internal notify capacity.
+## 40960 is suitable for about 7000 Regions. It is recommended to use the default value.
# notify-capacity = 40960
-# maximum number of messages can be processed in one tick.
+## Maximum number of internal messages to process in a tick.
# messages-per-tick = 4096
-# Region heartbeat tick interval for reporting to pd.
+## Region heartbeat tick interval for reporting to PD.
# pd-heartbeat-tick-interval = "60s"
-# Store heartbeat tick interval for reporting to pd.
+
+## Store heartbeat tick interval for reporting to PD.
# pd-store-heartbeat-tick-interval = "10s"
-# When region size changes exceeds region-split-check-diff, we should check
-# whether the region should be split or not.
+## The threshold of triggering Region split check.
+## When Region size change exceeds this config, TiKV will check whether the Region should be split
+## or not. To reduce the cost of scanning data in the checking process, you can set the value to
+## 32MB during checking and set it back to the default value in normal operations.
# region-split-check-diff = "6MB"
-# Interval to check region whether need to be split or not.
+## The interval of triggering Region split check.
# split-region-check-tick-interval = "10s"
-# When raft entry exceed the max size, reject to propose the entry.
+## When the number of Raft entries exceeds the max size, TiKV rejects to propose the entry.
# raft-entry-max-size = "8MB"
-# Interval to gc unnecessary raft log.
+## Interval to GC unnecessary Raft log.
# raft-log-gc-tick-interval = "10s"
-# A threshold to gc stale raft log, must >= 1.
+
+## Threshold to GC stale Raft log, must be >= 1.
# raft-log-gc-threshold = 50
-# When entry count exceed this value, gc will be forced trigger.
+
+## When the entry count exceeds this value, GC will be forced to trigger.
# raft-log-gc-count-limit = 72000
-# When the approximate size of raft log entries exceed this value, gc will be forced trigger.
-# It's recommanded to set it to 3/4 of region-split-size.
+
+## When the approximate size of Raft log entries exceeds this value, GC will be forced trigger.
+## It's recommanded to set it to 3/4 of `region-split-size`.
# raft-log-gc-size-limit = "72MB"
-# When a peer hasn't been active for max-peer-down-duration,
-# we will consider this peer to be down and report it to pd.
+## Raft engine is a replaceable component. For some implementations, it's necessary to purge
+## old log files to recycle disk space ASAP.
+# raft-engine-purge-interval = "10s"
+
+## How long the peer will be considered down and reported to PD when it hasn't been active for this
+## time.
# max-peer-down-duration = "5m"
-# Interval to check whether start manual compaction for a region,
+## Interval to check whether to start manual compaction for a Region.
# region-compact-check-interval = "5m"
-# Number of regions for each time to check.
+## Number of Regions for each time to check.
# region-compact-check-step = 100
-# The minimum number of delete tombstones to trigger manual compaction.
+## The minimum number of delete tombstones to trigger manual compaction.
# region-compact-min-tombstones = 10000
-# The minimum percentage of delete tombstones to trigger manual compaction.
-# Should between 1 and 100. Manual compaction only triggered when the number
-# of delete tombstones exceeds region-compact-min-tombstones and the percentage
-# of delete tombstones exceeds region-compact-tombstones-percent.
+## The minimum percentage of delete tombstones to trigger manual compaction.
+## It should be set between 1 and 100. Manual compaction is only triggered when the number of
+## delete tombstones exceeds `region-compact-min-tombstones` and the percentage of delete tombstones
+## exceeds `region-compact-tombstones-percent`.
# region-compact-tombstones-percent = 30
-# Interval to check whether should start a manual compaction for lock column family,
-# if written bytes reach lock-cf-compact-threshold for lock column family, will fire
-# a manual compaction for lock column family.
+## Interval to check whether to start a manual compaction for Lock Column Family.
+## If written bytes reach `lock-cf-compact-bytes-threshold` for Lock Column Family, TiKV will
+## trigger a manual compaction for Lock Column Family.
# lock-cf-compact-interval = "10m"
-
# lock-cf-compact-bytes-threshold = "256MB"
-# Interval (s) to check region whether the data are consistent.
+## Interval (s) to check Region whether the data are consistent.
# consistency-check-interval = 0
-# Use delete range to drop a large number of continuous keys.
-# use-delete-range = false
+## Interval to clean up import SST files.
+# cleanup-import-sst-interval = "10m"
-# delay time before deleting a stale peer
-# clean-stale-peer-delay = "10m"
+## Use how many threads to handle log apply
+# apply-pool-size = 2
-# Interval to cleanup import sst files.
-# cleanup-import-sst-interval = "10m"
+## Use how many threads to handle raft messages
+# store-pool-size = 2
[coprocessor]
-# When it is true, it will try to split a region with table prefix if
-# that region crosses tables. It is recommended to turn off this option
-# if there will be a large number of tables created.
-# split-region-on-table = true
-# When the region's size exceeds region-max-size, we will split the region
-# into two which the left region's size will be region-split-size or a little
-# bit smaller.
+## When it is set to `true`, TiKV will try to split a Region with table prefix if that Region
+## crosses tables.
+## It is recommended to turn off this option if there will be a large number of tables created.
+# split-region-on-table = false
+
+## One split check produces several split keys in batch. This config limits the number of produced
+## split keys in one batch.
+# batch-split-limit = 10
+
+## When Region [a,e) size exceeds `region_max_size`, it will be split into several Regions [a,b),
+## [b,c), [c,d), [d,e) and the size of [a,b), [b,c), [c,d) will be `region_split_size` (or a
+## little larger).
# region-max-size = "144MB"
# region-split-size = "96MB"
-# When the region's keys exceeds region-max-keys, we will split the region
-# into two which the left region's keys will be region-split-keys or a little
-# bit smaller.
+
+## When the number of keys in Region [a,e) exceeds the `region_max_keys`, it will be split into
+## several Regions [a,b), [b,c), [c,d), [d,e) and the number of keys in [a,b), [b,c), [c,d) will be
+## `region_split_keys`.
# region-max-keys = 1440000
# region-split-keys = 960000
+## Set to "mvcc" to do consistency check for MVCC data, or "raw" for raw data.
+# consistency-check-method = "mvcc"
+
[rocksdb]
-# Maximum number of concurrent background jobs (compactions and flushes)
+## Maximum number of threads of RocksDB background jobs.
+## The background tasks include compaction and flush. For detailed information why RocksDB needs to
+## do compaction, see RocksDB-related materials.
+## When write traffic (like the importing data size) is big, it is recommended to enable more
+## threads. But set the number of the enabled threads smaller than that of CPU cores. For example,
+## when importing data, for a machine with a 32-core CPU, set the value to 28.
+## The default value is set to 8 or CPU_NUM - 1, whichever is smaller.
# max-background-jobs = 8
-# This value represents the maximum number of threads that will concurrently perform a
-# compaction job by breaking it into multiple, smaller ones that are run simultaneously.
-# Default: 1 (i.e. no subcompactions)
-# max-sub-compactions = 1
-
-# Number of open files that can be used by the DB. You may need to
-# increase this if your database has a large working set. Value -1 means
-# files opened are always kept open. You can estimate number of files based
-# on target_file_size_base and target_file_size_multiplier for level-based
-# compaction.
-# If max-open-files = -1, RocksDB will prefetch index and filter blocks into
-# block cache at startup, so if your database has a large working set, it will
-# take several minutes to open the db.
+## Maximum number of threads of RocksDB background memtable flush jobs.
+## The default value is set to 2 or max_background_jobs / 4, whichever is bigger.
+# max-background-flushes = 2
+
+## Represents the maximum number of threads that will concurrently perform a sub-compaction job by
+## breaking it into multiple, smaller ones running simultaneously.
+## The default value is set to 3 or the largest number to allow for two compactions, whichever is
+## smaller.
+# max-sub-compactions = 3
+
+## Number of open files that can be used by the DB.
+## Value -1 means files opened are always kept open and RocksDB will prefetch index and filter
+## blocks into block cache at startup. So if your database has a large working set, it will take
+## several minutes to open the DB. You may need to increase this if your database has a large
+## working set. You can estimate the number of files based on `target-file-size-base` and
+## `target_file_size_multiplier` for level-based compaction.
# max-open-files = 40960
-# Max size of rocksdb's MANIFEST file.
-# For detailed explanation please refer to https://github.com/facebook/rocksdb/wiki/MANIFEST
-# max-manifest-file-size = "20MB"
+## Max size of RocksDB's MANIFEST file.
+## For detailed explanation, please refer to https://github.com/facebook/rocksdb/wiki/MANIFEST
+# max-manifest-file-size = "128MB"
-# If true, the database will be created if it is missing.
+## If the value is `true`, the database will be created if it is missing.
# create-if-missing = true
-# rocksdb wal recovery mode
-# 0 : TolerateCorruptedTailRecords, tolerate incomplete record in trailing data on all logs;
-# 1 : AbsoluteConsistency, We don't expect to find any corruption in the WAL;
-# 2 : PointInTimeRecovery, Recover to point-in-time consistency;
-# 3 : SkipAnyCorruptedRecords, Recovery after a disaster;
+## RocksDB Write-Ahead Logs (WAL) recovery mode.
+## 0 : TolerateCorruptedTailRecords, tolerate incomplete record in trailing data on all logs;
+## 1 : AbsoluteConsistency, We don't expect to find any corruption in the WAL;
+## 2 : PointInTimeRecovery, Recover to point-in-time consistency;
+## 3 : SkipAnyCorruptedRecords, Recovery after a disaster;
# wal-recovery-mode = 2
-# rocksdb write-ahead logs dir path
-# This specifies the absolute dir path for write-ahead logs (WAL).
-# If it is empty, the log files will be in the same dir as data.
-# When you set the path to rocksdb directory in memory like in /dev/shm, you may want to set
-# wal-dir to a directory on a persistent storage.
-# See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database
+## RocksDB WAL directory.
+## This config specifies the absolute directory path for WAL.
+## If it is not set, the log files will be in the same directory as data. When you set the path to
+## RocksDB directory in memory like in `/dev/shm`, you may want to set`wal-dir` to a directory on a
+## persistent storage. See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database .
+## If there are two disks on the machine, storing RocksDB data and WAL logs on different disks can
+## improve performance.
# wal-dir = "/tmp/tikv/store"
-# The following two fields affect how archived write-ahead logs will be deleted.
-# 1. If both set to 0, logs will be deleted asap and will not get into the archive.
-# 2. If wal-ttl-seconds is 0 and wal-size-limit is not 0,
-# WAL files will be checked every 10 min and if total size is greater
-# then wal-size-limit, they will be deleted starting with the
-# earliest until size_limit is met. All empty files will be deleted.
-# 3. If wal-ttl-seconds is not 0 and wal-size-limit is 0, then
-# WAL files will be checked every wal-ttl-seconds / 2 and those that
-# are older than wal-ttl-seconds will be deleted.
-# 4. If both are not 0, WAL files will be checked every 10 min and both
-# checks will be performed with ttl being first.
-# When you set the path to rocksdb directory in memory like in /dev/shm, you may want to set
-# wal-ttl-seconds to a value greater than 0 (like 86400) and backup your db on a regular basis.
-# See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database
+## The following two fields affect how archived WAL will be deleted.
+## 1. If both values are set to 0, logs will be deleted ASAP and will not get into the archive.
+## 2. If `wal-ttl-seconds` is 0 and `wal-size-limit` is not 0, WAL files will be checked every 10
+## min and if total size is greater than `wal-size-limit`, they will be deleted starting with the
+## earliest until `wal-size-limit` is met. All empty files will be deleted.
+## 3. If `wal-ttl-seconds` is not 0 and `wal-size-limit` is 0, then WAL files will be checked every
+## `wal-ttl-seconds / 2` and those that are older than `wal-ttl-seconds` will be deleted.
+## 4. If both are not 0, WAL files will be checked every 10 min and both checks will be performed
+## with ttl being first.
+## When you set the path to RocksDB directory in memory like in `/dev/shm`, you may want to set
+## `wal-ttl-seconds` to a value greater than 0 (like 86400) and backup your DB on a regular basis.
+## See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database .
# wal-ttl-seconds = 0
# wal-size-limit = 0
-# rocksdb max total wal size
+## Max RocksDB WAL size in total
# max-total-wal-size = "4GB"
-# Rocksdb Statistics provides cumulative stats over time.
-# Turn statistics on will introduce about 5%-10% overhead for RocksDB,
-# but it is worthy to know the internal status of RocksDB.
+## RocksDB Statistics provides cumulative stats over time.
+## Turning statistics on will introduce about 5%-10% overhead for RocksDB, but it can help you to
+## know the internal status of RocksDB.
# enable-statistics = true
-# Dump statistics periodically in information logs.
-# Same as rocksdb's default value (10 min).
+## Dump statistics periodically in information logs.
+## Same as RocksDB's default value (10 min).
# stats-dump-period = "10m"
-# Due to Rocksdb FAQ: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ,
-# If you want to use rocksdb on multi disks or spinning disks, you should set value at
-# least 2MB;
+## Refer to: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ
+## If you want to use RocksDB on multi disks or spinning disks, you should set value at least 2MB.
# compaction-readahead-size = 0
-# This is the maximum buffer size that is used by WritableFileWrite
+## Max buffer size that is used by WritableFileWrite.
# writable-file-max-buffer-size = "1MB"
-# Use O_DIRECT for both reads and writes in background flush and compactions
+## Use O_DIRECT for both reads and writes in background flush and compactions.
# use-direct-io-for-flush-and-compaction = false
-# Limit the disk IO of compaction and flush. Compaction and flush can cause
-# terrible spikes if they exceed a certain threshold. Consider setting this to
-# 50% ~ 80% of the disk throughput for a more stable result. However, in heavy
-# write workload, limiting compaction and flush speed can cause write stalls too.
-# rate-bytes-per-sec = 0
-
-# Enable or disable the pipelined write
+## Limit the disk IO of compaction and flush.
+## Compaction and flush can cause terrible spikes if they exceed a certain threshold. Consider
+## setting this to 50% ~ 80% of the disk throughput for a more stable result. However, in heavy
+## write workload, limiting compaction and flush speed can cause write stalls too.
+## 1. rate-bytes-per-sec is the only parameter you want to set most of the time. It controls the
+## total write rate of compaction and flush in bytes per second. Currently, RocksDB does not
+## enforce rate limit for anything other than flush and compaction, e.g. write to WAL.
+## 2. rate-limiter-refill-period controls how often IO tokens are refilled. Smaller value will flatten
+## IO bursts while introducing more CPU overhead.
+## 3. rate-limiter-mode indicates which types of operations count against the limit.
+## 1 : ReadOnly
+## 2 : WriteOnly
+## 3 : AllIo
+## 4. rate-limiter-auto_tuned enables dynamic adjustment of rate limit within the range
+## [10MB/s, rate_bytes_per_sec], according to the recent demand for background I/O.
+# rate-bytes-per-sec = "10GB"
+# rate-limiter-refill-period = "100ms"
+# rate-limiter-mode = 2
+# rate-limiter-auto-tuned = true
+
+## Enable or disable the pipelined write.
# enable-pipelined-write = true
-# Allows OS to incrementally sync files to disk while they are being
-# written, asynchronously, in the background.
+## Allows OS to incrementally sync files to disk while they are being written, asynchronously,
+## in the background.
# bytes-per-sync = "1MB"
-# Allows OS to incrementally sync WAL to disk while it is being written.
+## Allows OS to incrementally sync WAL to disk while it is being written.
# wal-bytes-per-sync = "512KB"
-# Specify the maximal size of the Rocksdb info log file. If the log file
-# is larger than `max_log_file_size`, a new info log file will be created.
-# If max_log_file_size == 0, all logs will be written to one log file.
+## Specify the maximal size of the RocksDB info log file.
+## If the log file is larger than this config, a new info log file will be created.
+## If it is set to 0, all logs will be written to one log file.
# info-log-max-size = "1GB"
-# Time for the Rocksdb info log file to roll (in seconds).
-# If specified with non-zero value, log file will be rolled
-# if it has been active longer than `log_file_time_to_roll`.
-# 0 means disabled.
-# info-log-roll-time = "0"
+## Time for the RocksDB info log file to roll (in seconds).
+## If the log file has been active longer than this config, it will be rolled.
+## If it is set to 0, rolling will be disabled.
+# info-log-roll-time = "0s"
-# Maximal Rocksdb info log files to be kept.
+## Maximal RocksDB info log files to be kept.
# info-log-keep-log-file-num = 10
-# This specifies the Rocksdb info LOG dir.
-# If it is empty, the log files will be in the same dir as data.
-# If it is non empty, the log files will be in the specified dir,
-# and the db data dir's absolute path will be used as the log file
-# name's prefix.
+## Specifies the RocksDB info log directory.
+## If it is empty, the log files will be in the same directory as data.
+## If it is not empty, the log files will be in the specified directory, and the DB data directory's
+## absolute path will be used as the log file name's prefix.
# info-log-dir = ""
-# Column Family default used to store actual data of the database.
+# RocksDB log levels
+# info-log-level = "info"
+
+## Options for `Titan`.
+[rocksdb.titan]
+## Enables or disables `Titan`. Note that Titan is still an experimental feature. Once
+## enabled, it can't fall back. Forced fallback may result in data loss.
+## default: false
+# enabled = false
+
+## Maximum number of threads of `Titan` background gc jobs.
+# default: 4
+# max-background-gc = 4
+
+## Options for "Default" Column Family, which stores actual user data.
[rocksdb.defaultcf]
-# compression method (if any) is used to compress a block.
-# no: kNoCompression
-# snappy: kSnappyCompression
-# zlib: kZlibCompression
-# bzip2: kBZip2Compression
-# lz4: kLZ4Compression
-# lz4hc: kLZ4HCCompression
-# zstd: kZSTD
-
-# per level compression
+## Compression method (if any) is used to compress a block.
+## no: kNoCompression
+## snappy: kSnappyCompression
+## zlib: kZlibCompression
+## bzip2: kBZip2Compression
+## lz4: kLZ4Compression
+## lz4hc: kLZ4HCCompression
+## zstd: kZSTD
+## `lz4` is a compression algorithm with moderate speed and compression ratio. The compression
+## ratio of `zlib` is high. It is friendly to the storage space, but its compression speed is
+## slow. This compression occupies many CPU resources.
+
+## Per level compression.
+## This config should be chosen carefully according to CPU and I/O resources. For example, if you
+## use the compression mode of "no:no:lz4:lz4:lz4:zstd:zstd" and find much I/O pressure of the
+## system (run the `iostat` command to find %util lasts 100%, or run the `top` command to find many
+## iowaits) when writing (importing) a lot of data while the CPU resources are adequate, you can
+## compress level-0 and level-1 and exchange CPU resources for I/O resources. If you use the
+## compression mode of "no:no:lz4:lz4:lz4:zstd:zstd" and you find the I/O pressure of the system is
+## not big when writing a lot of data, but CPU resources are inadequate. Then run the `top` command
+## and choose the `-H` option. If you find a lot of bg threads (namely the compression thread of
+## RocksDB) are running, you can exchange I/O resources for CPU resources and change the compression
+## mode to "no:no:no:lz4:lz4:zstd:zstd". In a word, it aims at making full use of the existing
+## resources of the system and improving TiKV performance in terms of the current resources.
# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
-# Approximate size of user data packed per block. Note that the
-# block size specified here corresponds to uncompressed data.
+## Set zstd compression for the bottommost level.
+## This config overrides compression-per-level. It use zstd for bottommost level to ensure
+## consistent compression ratio, regardless of overall data size. If explicitly setting compression type
+## for the bottommost level is not desired, "disable" should be used.
+# bottommost-level-compression = "zstd"
+
+## The data block size. RocksDB compresses data based on the unit of block.
+## Similar to page in other databases, block is the smallest unit cached in block-cache. Note that
+## the block size specified here corresponds to uncompressed data.
# block-size = "64KB"
-# If you're doing point lookups you definitely want to turn bloom filters on, We use
-# bloom filters to avoid unnecessary disk reads. Default bits_per_key is 10, which
-# yields ~1% false positive rate. Larger bits_per_key values will reduce false positive
-# rate, but increase memory usage and space amplification.
+## If you're doing point lookups you definitely want to turn bloom filters on. We use bloom filters
+## to avoid unnecessary disk reads. Default bits_per_key is 10, which yields ~1% false positive
+## rate. Larger `bloom-filter-bits-per-key` values will reduce false positive rate, but increase
+## memory usage and space amplification.
# bloom-filter-bits-per-key = 10
-# false means one sst file one bloom filter, true means evry block has a corresponding bloom filter
+## `false` means one SST file one bloom filter, `true` means every block has a corresponding bloom
+## filter.
# block-based-bloom-filter = false
# level0-file-num-compaction-trigger = 4
-# Soft limit on number of level-0 files. We start slowing down writes at this point.
+## Soft limit on number of level-0 files.
+## When the number of SST files of level-0 reaches the limit of `level0-slowdown-writes-trigger`,
+## RocksDB tries to slow down the write operation, because too many SST files of level-0 can cause
+## higher read pressure of RocksDB.
# level0-slowdown-writes-trigger = 20
-# Maximum number of level-0 files. We stop writes at this point.
+## Maximum number of level-0 files.
+## When the number of SST files of level-0 reaches the limit of `level0-stop-writes-trigger`,
+## RocksDB stalls the new write operation.
# level0-stop-writes-trigger = 36
-# Amount of data to build up in memory (backed by an unsorted log
-# on disk) before converting to a sorted on-disk file.
+## Amount of data to build up in memory (backed by an unsorted log on disk) before converting to a
+## sorted on-disk file. It is the RocksDB MemTable size.
# write-buffer-size = "128MB"
-# The maximum number of write buffers that are built up in memory.
+## The maximum number of the MemTables. The data written into RocksDB is first recorded in the WAL
+## log, and then inserted into MemTables. When the MemTable reaches the size limit of
+## `write-buffer-size`, it turns into read only and generates a new MemTable receiving new write
+## operations. The flush threads of RocksDB will flush the read only MemTable to the disks to become
+## an SST file of level0. `max-background-flushes` controls the maximum number of flush threads.
+## When the flush threads are busy, resulting in the number of the MemTables waiting to be flushed
+## to the disks reaching the limit of `max-write-buffer-number`, RocksDB stalls the new operation.
+## "Stall" is a flow control mechanism of RocksDB. When importing data, you can set the
+## `max-write-buffer-number` value higher, like 10.
# max-write-buffer-number = 5
-# The minimum number of write buffers that will be merged together
-# before writing to storage.
+## The minimum number of write buffers that will be merged together before writing to storage.
# min-write-buffer-number-to-merge = 1
-# Control maximum total data size for base level (level 1).
+## Control maximum total data size for base level (level 1).
+## When the level-1 data size reaches the limit value of `max-bytes-for-level-base`, the SST files
+## of level-1 and their overlap SST files of level-2 will be compacted. The golden rule: the first
+## reference principle of setting `max-bytes-for-level-base` is guaranteeing that the
+## `max-bytes-for-level-base` value is roughly equal to the data volume of level-0. Thus
+## unnecessary compaction is reduced. For example, if the compression mode is
+## "no:no:lz4:lz4:lz4:lz4:lz4", the `max-bytes-for-level-base` value can be `write-buffer-size * 4`,
+## because there is no compression of level-0 and level-1 and the trigger condition of compaction
+## for level-0 is that the number of the SST files reaches 4 (the default value). When both level-0
+## and level-1 adopt compaction, it is necessary to analyze RocksDB logs to know the size of an SST
+## file compressed from a MemTable. For example, if the file size is 32MB, the proposed value of
+## `max-bytes-for-level-base` is 32MB * 4 = 128MB.
# max-bytes-for-level-base = "512MB"
-# Target file size for compaction.
+## Target file size for compaction.
+## The SST file size of level-0 is influenced by the compaction algorithm of `write-buffer-size`
+## and level0. `target-file-size-base` is used to control the size of a single SST file of level1 to
+## level6.
# target-file-size-base = "8MB"
-# Max bytes for compaction.max_compaction_bytes
+## Max bytes for `compaction.max_compaction_bytes`.
+## If it's necessary to enlarge value of this entry, it's better to also enlarge `reserve-space`
+## in `storage` to ensure that a restarted TiKV instance can perform compactions successfully.
# max-compaction-bytes = "2GB"
-# There are four different algorithms to pick files to compact.
-# 0 : ByCompensatedSize
-# 1 : OldestLargestSeqFirst
-# 2 : OldestSmallestSeqFirst
-# 3 : MinOverlappingRatio
+## There are four different compaction priorities.
+## 0 : ByCompensatedSize
+## 1 : OldestLargestSeqFirst
+## 2 : OldestSmallestSeqFirst
+## 3 : MinOverlappingRatio
# compaction-pri = 3
-# block-cache used to cache uncompressed blocks, big block-cache can speed up read.
-# in normal cases should tune to 30%-50% system's total memory.
-# block-cache-size = "1GB"
-
-# Indicating if we'd put index/filter blocks to the block cache.
-# If not specified, each "table reader" object will pre-load index/filter block
-# during table initialization.
+## Indicating if we'd put index/filter blocks to the block cache.
+## If not specified, each "table reader" object will pre-load index/filter block during table
+## initialization.
# cache-index-and-filter-blocks = true
-# Pin level0 filter and index blocks in cache.
+## Pin level-0 filter and index blocks in cache.
# pin-l0-filter-and-index-blocks = true
-# Enable read amplication statistics.
-# value => memory usage (percentage of loaded blocks memory)
-# 1 => 12.50 %
-# 2 => 06.25 %
-# 4 => 03.12 %
-# 8 => 01.56 %
-# 16 => 00.78 %
+## Enable read amplification statistics.
+## value => memory usage (percentage of loaded blocks memory)
+## 1 => 12.50 %
+## 2 => 06.25 %
+## 4 => 03.12 %
+## 8 => 01.56 %
+## 16 => 00.78 %
# read-amp-bytes-per-bit = 0
-# Pick target size of each level dynamically.
+## Pick target size of each level dynamically.
# dynamic-level-bytes = true
-# Options for Column Family write
-# Column Family write used to store commit informations in MVCC model
+## Optimizes bloom filters. If true, RocksDB won't create bloom filters for the max level of
+## the LSM to reduce metadata that should fit in RAM.
+## This value is setted to true for `default` cf by default because its kv data could be determined
+## whether really exists by upper logic instead of bloom filters. But we suggest to set it to false
+## while using `Raw` mode.
+# optimize-filters-for-hits = true
+
+## Enable compaction guard, which is an optimization to split SST files at TiKV region boundaries.
+## The optimization can help reduce compaction IO, and allow us to use larger SST file size
+## (thus less SST files overall) while making sure we can still efficiently cleanup stale data on
+## region migration.
+## This config is available to default CF and write CF.
+# enable-compaction-guard = true
+
+## The lower bound of SST file size when compaction guard is enabled. The config prevent SST files
+## being too small when compaction guard is enabled.
+# compaction-guard-min-output-file-size = "8M"
+
+## The upper bound of SST file size when compaction guard is enabled. The config prevent SST files
+## being too large when compaction guard is enabled. This config overrides target-file-size-base
+## for the same CF.
+# compaction-guard-max-output-file-size = "128M"
+
+## Options for "Default" Column Family for `Titan`.
+[rocksdb.defaultcf.titan]
+## The smallest value to store in blob files. Value smaller than
+## this threshold will be inlined in base DB.
+## default: 1KB
+# min-blob-size = "1KB"
+
+## The compression algorithm used to compress data in blob files.
+## Compression method.
+## no: kNoCompression
+## snappy: kSnappyCompression
+## zlib: kZlibCompression
+## bzip2: kBZip2Compression
+## lz4: kLZ4Compression
+## lz4hc: kLZ4HCCompression
+## zstd: kZSTD
+# default: lz4
+# blob-file-compression = "lz4"
+
+## Specifics cache size for blob records
+# default: 0
+# blob-cache-size = "0GB"
+
+## If the ratio of discardable size of a blob file is larger than
+## this threshold, the blob file will be GCed out.
+# default: 0.5
+# discardable-ratio = 0.5
+
+## The mode used to process blob files. In read-only mode Titan
+## stops writing value into blob log. In fallback mode Titan
+## converts blob index into real value on flush and compaction.
+## This option is especially useful for downgrading Titan.
+## default: kNormal
+## read-only: kReadOnly
+## fallback: kFallback
+# default: normal
+# blob-run-mode = "normal"
+
+## If set true, values in blob file will be merged to a new blob file while
+## their corresponding keys are compacted to last two level in LSM-Tree.
+##
+## With this feature enabled, Titan could get better scan performance, and
+## better write performance during GC, but will suffer around 1.1 space
+## amplification and 3 more write amplification if no GC needed (eg. uniformly
+## distributed keys) under default rocksdb setting.
+##
+## Requirement: level_compaction_dynamic_level_base = true
+## default: false
+# level_merge = false
+
+## Use merge operator to rewrite GC blob index.
+## default: false
+# gc-merge-rewrite = false
+
+## Options for "Write" Column Family, which stores MVCC commit information
[rocksdb.writecf]
+## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
# block-size = "64KB"
+
+## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`.
# write-buffer-size = "128MB"
# max-write-buffer-number = 5
# min-write-buffer-number-to-merge = 1
+
+## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`.
# max-bytes-for-level-base = "512MB"
# target-file-size-base = "8MB"
-# in normal cases should tune to 10%-30% system's total memory.
-# block-cache-size = "256MB"
# level0-file-num-compaction-trigger = 4
# level0-slowdown-writes-trigger = 20
# level0-stop-writes-trigger = 36
@@ -439,16 +751,19 @@ endpoints = ["127.0.0.1:2379"]
# compaction-pri = 3
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
+# optimize-filters-for-hits = false
+# enable-compaction-guard = true
+# compaction-guard-min-output-file-size = "8M"
+# compaction-guard-max-output-file-size = "128M"
[rocksdb.lockcf]
# compression-per-level = ["no", "no", "no", "no", "no", "no", "no"]
# block-size = "16KB"
-# write-buffer-size = "128MB"
+# write-buffer-size = "32MB"
# max-write-buffer-number = 5
# min-write-buffer-number-to-merge = 1
# max-bytes-for-level-base = "128MB"
# target-file-size-base = "8MB"
-# block-cache-size = "256MB"
# level0-file-num-compaction-trigger = 1
# level0-slowdown-writes-trigger = 20
# level0-stop-writes-trigger = 36
@@ -457,9 +772,12 @@ endpoints = ["127.0.0.1:2379"]
# compaction-pri = 0
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
+# optimize-filters-for-hits = false
+# enable-compaction-guard = false
[raftdb]
-# max-sub-compactions = 1
+# max-background-jobs = 4
+# max-sub-compactions = 2
# max-open-files = 40960
# max-manifest-file-size = "20MB"
# create-if-missing = true
@@ -476,21 +794,26 @@ endpoints = ["127.0.0.1:2379"]
# wal-bytes-per-sync = "512KB"
# info-log-max-size = "1GB"
-# info-log-roll-time = "0"
+# info-log-roll-time = "0s"
# info-log-keep-log-file-num = 10
# info-log-dir = ""
+# info-log-level = "info"
+# optimize-filters-for-hits = true
[raftdb.defaultcf]
+## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
# block-size = "64KB"
+
+## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`.
# write-buffer-size = "128MB"
# max-write-buffer-number = 5
# min-write-buffer-number-to-merge = 1
+
+## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`.
# max-bytes-for-level-base = "512MB"
# target-file-size-base = "8MB"
-# should tune to 256MB~2GB.
-# block-cache-size = "256MB"
# level0-file-num-compaction-trigger = 4
# level0-slowdown-writes-trigger = 20
# level0-stop-writes-trigger = 36
@@ -499,15 +822,141 @@ endpoints = ["127.0.0.1:2379"]
# compaction-pri = 0
# read-amp-bytes-per-bit = 0
# dynamic-level-bytes = true
+# optimize-filters-for-hits = true
+# enable-compaction-guard = false
+
+[raft-engine]
+## Enable raft-engine will ignore all settings about `raftdb`.
+# enable = false
+## Recovery mode. Candidates are `tolerate-corrupted-tail-records` and `absolute-consistency`.
+# recovery_mode = "tolerate-corrupted-tail-records"
+# bytes-per-sync = "256KB"
+# target-file-size = "128MB"
+# purge-threshold = "10GB"
+## Raft engine has builtin entry cache. `cache-limit` limits the memory usage of the cache.
+# cache-limit = "1GB"
[security]
-# set the path for certificates. Empty string means disabling secure connectoins.
+## The path for TLS certificates. Empty string means disabling secure connections.
# ca-path = ""
# cert-path = ""
# key-path = ""
+# cert-allowed-cn = []
+#
+## Avoid outputing data (e.g. user keys) to info log. It currently does not avoid printing
+## user data altogether, but greatly reduce those logs.
+## Default is false.
+# redact-info-log = false
+
+# Configurations for encryption at rest. Experimental.
+[security.encryption]
+## Encryption method to use for data files.
+## Possible values are "plaintext", "aes128-ctr", "aes192-ctr" and "aes256-ctr". Value other than
+## "plaintext" means encryption is enabled, in which case master key must be specified.
+# data-encryption-method = "plaintext"
+
+## Specifies how often TiKV rotates data encryption key.
+# data-key-rotation-period = "7d"
+
+## Enable an optimization to reduce IO and mutex contention for encryption metadata management.
+## Once the option is turned on (which is the default after 4.0.9), the data format is not
+## compatible with TiKV <= 4.0.8. In order to downgrade to TiKV <= 4.0.8, one can turn off this
+## option and restart TiKV, after which TiKV will convert the data format to be compatible with
+## previous versions.
+# enable-file-dictionary-log = true
+
+## Specifies master key if encryption is enabled. There are three types of master key:
+##
+## * "plaintext":
+##
+## Plaintext as master key means no master key is given and only applicable when
+## encryption is not enabled, i.e. data-encryption-method = "plaintext". This type doesn't
+## have sub-config items. Example:
+##
+## [security.encryption.master-key]
+## type = "plaintext"
+##
+## * "kms":
+##
+## Use a KMS service to supply master key. Currently only AWS KMS is supported. This type of
+## master key is recommended for production use. Example:
+##
+## [security.encryption.master-key]
+## type = "kms"
+## ## KMS CMK key id. Must be a valid KMS CMK where the TiKV process has access to.
+## ## In production is recommended to grant access of the CMK to TiKV using IAM.
+## key-id = "1234abcd-12ab-34cd-56ef-1234567890ab"
+## ## AWS region of the KMS CMK.
+## region = "us-west-2"
+## ## (Optional) AWS KMS service endpoint. Only required when non-default KMS endpoint is
+## ## desired.
+## endpoint = "https://kms.us-west-2.amazonaws.com"
+##
+## * "file":
+##
+## Supply a custom encryption key stored in a file. It is recommended NOT to use in production,
+## as it breaks the purpose of encryption at rest, unless the file is stored in tempfs.
+## The file must contain a 256-bits (32 bytes, regardless of key length implied by
+## data-encryption-method) key encoded as hex string and end with newline ("\n"). Example:
+##
+## [security.encryption.master-key]
+## type = "file"
+## path = "/path/to/master/key/file"
+##
+# [security.encryption.master-key]
+# type = "plaintext"
+
+## Specifies the old master key when rotating master key. Same config format as master-key.
+## The key is only access once during TiKV startup, after that TiKV do not need access to the key.
+## And it is okay to leave the stale previous-master-key config after master key rotation.
+# [security.encryption.previous-master-key]
+# type = "plaintext"
[import]
-# number of threads to handle RPC requests.
+## Number of threads to handle RPC requests.
# num-threads = 8
-# stream channel window size, stream will be blocked on channel full.
+
+## Stream channel window size, stream will be blocked on channel full.
# stream-channel-window = 128
+
+[backup]
+## Number of threads to perform backup tasks.
+## The default value is set to min(CPU_NUM * 0.75, 32).
+# num-threads = 24
+
+## Number of ranges to backup in one batch.
+# batch = 8
+
+## When Backup region [a,e) size exceeds `sst-max-size`, it will be backuped into several Files [a,b),
+## [b,c), [c,d), [d,e) and the size of [a,b), [b,c), [c,d) will be `sst-max-size` (or a
+## little larger).
+# sst-max-size = "144MB"
+
+[pessimistic-txn]
+## The default and maximum delay before responding to TiDB when pessimistic
+## transactions encounter locks
+# wait-for-lock-timeout = "1s"
+
+## If more than one transaction is waiting for the same lock, only the one with smallest
+## start timestamp will be waked up immediately when the lock is released. Others will
+## be waked up after `wake_up_delay_duration` to reduce contention and make the oldest
+## one more likely acquires the lock.
+# wake-up-delay-duration = "20ms"
+
+## Enable pipelined pessimistic lock, only effect when processing perssimistic transactions
+## Enabled this will improve performance, but slightly increase the transcation failure rate
+# pipelined = true
+
+[gc]
+## The number of keys to GC in one batch.
+# batch-keys = 512
+
+## Max bytes that GC worker can write to rocksdb in one second.
+## If it is set to 0, there is no limit.
+# max-write-bytes-per-sec = "0"
+
+## Enable GC by compaction filter or not.
+# enable-compaction-filter = true
+
+## Garbage ratio threshold to trigger a GC.
+# ratio-threshold = 1.1