Skip to content

DataKit Main Configuration


The DataKit main configuration is used to configure the operational behavior of DataKit itself.

The directory is usually located at:

  • Linux/Mac: /usr/local/datakit/conf.d/datakit.conf
  • Windows: C:\Program Files\datakit\conf.d\datakit.conf

When installed as a DaemonSet, even though there are configuration files in the corresponding directories, DataKit does not actually load these configurations. These are generated by injecting environment variables into datakit.yaml here. All configurations below can be found in the Kubernetes deployment documentation under corresponding environment variable settings.

Datakit Main Configuration Example

The following is an example of the Datakit main configuration. We can enable various features based on this example (current version 1.69.1):

datakit.conf
################################################
# Global configures
################################################
# Default enabled input list.
default_enabled_inputs = [
  "cpu",
  "disk",
  "diskio",
  "host_processes",
  "hostobject",
  "mem",
  "net",
  "swap",
  "system",
]

# enable_pprof: bool
# If pprof enabled, we can profiling the running datakit
enable_pprof = true
pprof_listen = "localhost:6060" # pprof listen

# protect_mode: bool, default false
# When protect_mode enabled, we can set radical collect parameters, these may cause Datakit
# collect data more frequently.
protect_mode = true

# The user name running datakit. Generally for audit purpose. Default is root.
datakit_user = "root"

################################################
# ulimit: set max open-files limit(Linux only)
################################################
ulimit = 64000

################################################
# point_pool: use point pool for better memory usage
################################################
[point_pool]
  enable = false
  reserved_capacity = 4096

################################################
# DCA configure
################################################
[dca]
  # Enable or disable DCA
  enable = false

  # DCA websocket server address
  websocket_server = "ws://localhost:8000/ws"

################################################
# Upgrader 
################################################
[dk_upgrader]
  # host address
  host = "0.0.0.0"

  # port number
  port = 9542 

################################################
# Pipeline
################################################
[pipeline]
  # IP database type, support iploc and geolite2
  ipdb_type = "iploc"

  # How often to sync remote pipeline
  remote_pull_interval = "1m"

  #
  # reftab configures
  #
  # Reftab remote HTTP URL(https/http)
  refer_table_url = ""

  # How often reftab sync the remote
  refer_table_pull_interval = "5m"

  # use sqlite to store reftab data to release memory usage
  use_sqlite = false
  # or use pure memory to cache the reftab data
  sqlite_mem_mode = false

  # append run info
  disable_append_run_info = false

  # default pipeline
  [pipeline.default_pipeline]
    # logging = "<your_script.p>"
    # metric  = "<your_script.p>"
    # tracing = "<your_script.p>"

  # Offload data processing tasks to post-level data processors.
  [pipeline.offload]
    receiver = "datakit-http"
    addresses = [
      # "http://<ip>:<port>"
    ]

################################################
# HTTP server(9529)
################################################
[http_api]

  # HTTP server address
  listen = "localhost:9529"

  # Disable 404 page to hide detailed Datakit info
  disable_404page = false

  # only enable these APIs. If list empty, all APIs are enabled.
  public_apis = []

  # Datakit server-side timeout
  timeout = "30s"
  close_idle_connection = false

  # API rate limit(QPS)
  request_rate_limit = 20.0

  #
  # RUM related: we should port these configures to RUM inputs(TODO)
  #
  # When serving RUM(/v1/write/rum), extract the IP address from this HTTP header
  rum_origin_ip_header = "X-Forwarded-For"
  # When serving RUM(/v1/write/rum), only accept requests from these app-id.
  # If the list empty, all app's requests accepted.
  rum_app_id_white_list = []

  # only these domains enable CORS. If list empty, all domains are enabled.
  allowed_cors_origins = []

  # Start Datakit web server with HTTPS
  [http_api.tls]
    # cert = "path/to/certificate/file"
    # privkey = "path/to/private_key/file"

################################################
# io configures
################################################
[io]
  # How often Datakit flush data to dataway.
  # Datakit will upload data points if cached(in memory) points
  #  reached(>=) the max_cache_count or the flush_interval triggered.
  max_cache_count = 1000
  flush_workers   = 0 # default to (cpu_core * 2)
  flush_interval  = "10s"

  # Queue size of feed.
  feed_chan_size = 1

  # Set blocking if queue is full.
  # NOTE: Global blocking mode may consume more memory on large metric points.
  global_blocking = false

  # Data point filter configures.
  # NOTE: Most of the time, you should use web-side filter, it's a debug helper for developers.
  #[io.filters]
  #  logging = [
  #   "{ source = 'datakit' or f1 IN [ 1, 2, 3] }"
  #  ]
  #  metric = [
  #    "{ measurement IN ['datakit', 'disk'] }",
  #    "{ measurement CONTAIN ['host.*', 'swap'] }",
  #  ]
  #  object = [
  #    { class CONTAIN ['host_.*'] }",
  #  ]
  #  tracing = [
  #    "{ service = re("abc.*") AND some_tag CONTAIN ['def_.*'] }",
  #  ]

[recorder]
  enabled = false
  #path = "/path/to/point-data/dir"
  encoding = "v2"  # use protobuf-json format
  duration = "30m" # record for 30 minutes

  # only record these inputs, if empty, record all
  inputs = [
    #"cpu",
    #"mem",
  ]

  # only record these categories, if empty, record all
  category = [
    #"logging",
    #"object",
  ]

################################################
# Dataway configure
################################################
[dataway]
  # urls: Dataway URL list
  # NOTE: do not configure multiple URLs here, it's a deprecated feature.
  urls = [
    # "https://openway.guance.com?token=<YOUR-WORKSPACE-TOKEN>"
  ]

  # Dataway HTTP timeout
  timeout_v2 = "30s"

  # max_retry_count specifies at most how many times will be tried when dataway API fails(not 4xx),
  # default value(and minimal) is 1 and maximum value is 10.
  #
  # The default set to 1 to makes the API fails ASAP to release memroy.
  max_retry_count = 1

  # The interval between two retry operation, valid time units are "ns", "us", "ms", "s", "m", "h"
  retry_delay = "1s"

  # HTTP Proxy
  # Format: "http(s)://IP:Port"
  http_proxy = ""

  max_idle_conns   = 0       # limit idle TCP connections for HTTP request to Dataway
  enable_httptrace = false   # enable trace HTTP metrics(connection/NDS/TLS and so on)
  idle_timeout     = "90s"   # not-set, default 90s

  # HTTP body content type, other candidates are(case insensitive):
  #  - v1: line-protocol
  #  - v2: protobuf
  content_encoding = "v2"

  # Enable GZip to upload point data.
  #
  # do NOT disable gzip or your get large network payload.
  gzip = true

  max_raw_body_size = 1048576 # max body size(before gizp) in bytes

  # Customer tag or field keys that will extract from exist points
  # to build the X-Global-Tags HTTP header value.
  global_customer_keys = []
  enable_sinker        = false # disable sinker

  # use dataway as NTP server
  [dataway.ntp]
    interval = "5m"  # sync dataway time each 5min

    # if datakit local time and dataway time's ABS value reach the diff,
    # datakit's soft time will update to the dataway time.
    # NOTE: diff MUST larger than "1s"
    diff     = "30s" 

  # WAL queue for uploading points
  [dataway.wal]
    max_capacity_gb = 2.0 # 2GB reserved disk space for each category(M/L/O/T/...)
    #workers = 4          # flush workers on WAL(default to CPU limited cores)
    #mem_cap = 4          # in-memory queue capacity(default to CPU limited cores)
    #fail_cache_clean_interval = "30s" # duration for clean fail uploaded data


################################################
# Datakit logging configure
################################################
[logging]

  # log path
  log = "/var/log/datakit/log"

  # HTTP access log
  gin_log = "/var/log/datakit/gin.log"

  # level level(info/debug)
  level = "info"

  # Disable log color
  disable_color = false

  # log rotate size(in MB)
  # DataKit will always keep at most n+1(n backup log and 1 writing log) splited log files on disk.
  rotate = 32

  # Upper limit count of backup log
  rotate_backups = 5

################################################
# Global tags
################################################
# We will try to add these tags to every collected data point if these
# tags do not exist in orignal data.
#
# NOTE: we can get the real IP of current note, we just need
# to set "$datakit_ip" or "__datakit_ip" here. Same for the hostname.
[global_host_tags]
  ip   = "$datakit_ip"
  host = "$datakit_hostname"

[election]
  # Enable election
  enable = false

  # Election whitelist
  # NOTE: Empty to disable whitelist
  node_whitelist = []

  # Election namespace.
  # NOTE: for single workspace, there can be multiple election namespace.
  namespace = "default"

  # If enabled, every data point will add a tag with election_namespace = <your-election-namespace>
  enable_namespace_tag = false

  # Like global_host_tags, but only for data points that are remotely collected(such as MySQL/Nginx).
  [election.tags]
    #  project = "my-project"
    #  cluster = "my-cluster"

###################################################
# Tricky: we can rename the default hostname here
###################################################
[environments]
  ENV_HOSTNAME = ""

################################################
# resource limit configures
################################################
[resource_limit]

  # enable or disable resource limit
  enable = true

  # Linux only, cgroup path
  path = "/datakit"

  # Limit CPU cores
  cpu_cores = 2.0

  # set max memory usage(MB)
  mem_max_mb = 4096

################################################
# git_repos configures
################################################

# We can hosting all input configures on git server
[git_repos]
  # git pull interval
  pull_interval = "1m"

  # git repository settings
  [[git_repos.repo]]
    # enable the repository or not
    enable = false

    # the branch name to pull
    branch = "master"

    # git repository URL. There are 3 formats here:
    #   - HTTP(s): such as "https://github.datakit.com/path/to/datakit-conf.git"
    #   - Git: such as "git@github.com:path/to/datakit.git"
    #   - SSH: such as "ssh://git@github.com:9000/path/to/repository.git"
    url = ""

    # For formats Git and SSH, we need extra configures:
    ssh_private_key_path = ""
    ssh_private_key_password = ""

################################################
# crypto key or key filePath.
################################################
[crypto]
  aes_key = ""
  aes_Key_file = ""

[remote_job]
  enable=false
  envs = ["OSS_BUCKET_HOST=host","OSS_ACCESS_KEY_ID=key","OSS_ACCESS_KEY_SECRET=secret","OSS_BUCKET_NAME=bucket"]
  interval = "30s"
  java_home=""

HTTP Service Configuration

DataKit will start an HTTP service to receive external data or provide basic data services externally.

Modify HTTP Service Address

The default HTTP service address is localhost:9529. If port 9529 is occupied or you wish to access DataKit’s HTTP service from outside (for example, receiving RUM or Tracing data), you can modify it as follows:

[http_api]
   listen = "0.0.0.0:<other-port>"
   # or use IPV6 address
   # listen = "[::]:<other-port>"

Note, IPv6 support requires Datakit upgrade to 1.5.7.

Use Unix Domain Socket

Datakit supports UNIX domain socket access. To enable, configure the listen field as a non-existent file's full path. Here, take datakit.sock as an example, which can be any filename.

[http_api]
   listen = "/tmp/datakit.sock"
After configuring, you can test whether the configuration was successful using the curl command: sudo curl --no-buffer -XGET --unix-socket /tmp/datakit.sock http:/localhost/v1/ping. More information about curl testing commands can be found here.

HTTP Request Frequency Control

Version-1.62.0 has already enabled this feature by default.

Since DataKit needs to handle a large amount of external data writes, to avoid creating excessive overhead on the node, Datakit sets a default API rate limit of 20/s QPS:

[http_api]
  request_rate_limit = 20.0 # Limits the QPS of requests per client (IP + API route) per second

  # If there is indeed a large amount of data to write, the limit can be appropriately increased to avoid data loss (clients will receive an HTTP 429 error code after exceeding the limit).

Other Settings

[http_api]
    close_idle_connection = true # Close idle connections
    timeout = "30s"              # Set server-side HTTP timeout

Refer to here

HTTP API Access Control

Version-1.64.0

For security reasons, Datakit restricts access to some of its own APIs by default (these APIs can only be accessed via localhost). If DataKit is deployed in a public network environment and you need to request these APIs from other machines or the public network, you can modify the following public_apis field configuration in datakit.conf:

[http_api]
  public_apis = [
    # Allow Datakit's own metrics exposure interface /metrics
    "/metrics",
    # ... # Other interfaces
  ]

By default, public_apis is empty. For convenience and compatibility, the Ping interface (/v1/ping) and basic data upload interface ("/v1/write/:category") are open by default. All other interfaces are prohibited from external access. Collector-related interfaces, such as trace collectors, automatically open access once the collector is enabled and can be externally accessed by default.

Refer to here for adding API whitelists in Kubernetes.

Attention

Once public_apis is not empty, those APIs enabled by default need to be manually added again:

[http_api]
  public_apis = [
    "/v1/write/metric",
    "/v1/write/logging",
    # ...
  ]

Global Tag Modification

Version-1.4.6

Datakit allows configuring global tags for all the data it collects. Global tags are divided into two types:

  • Host-based global tags (GHT): Collected data is bound to the current host, such as CPU/memory metrics data
  • Election-based global tags (GET): Collected data comes from a public (remote) entity, like MySQL/Redis, etc. These collectors generally participate in elections, so the data won't carry tags related to the current host.
[global_host_tags] # These inside are referred to as "global host tags"
  ip   = "__datakit_ip"
  host = "__datakit_hostname"

[election]
  [election.tags] # These inside are referred to as "global election tags"
    project = "my-project"
    cluster = "my-cluster"

When adding global tags, several things need to be noted:

  1. These global tags’ values can use several wildcards supported by Datakit (both double underscores (__) prefix and $ are acceptable):

    1. __datakit_ip/$datakit_ip: The tag value will be set to the first primary NIC IP obtained by DataKit.
    2. __datakit_hostname/$datakit_hostname: The tag value will be set to DataKit's hostname.
  2. Due to DataKit data transmission protocol limitations, no indicator (Field) fields should appear in global tags (Tag), otherwise, data processing will fail due to protocol violations. Refer to specific collector field lists for details. Additionally, don't add too many tags, and both the Key and Value length of each tag have restrictions.

  3. If the collected data already carries tags with the same name, DataKit will not append the configured global tags again.
  4. Even if there is no configuration in GHT, DataKit will still add a host=__datakit_hostname tag. Because hostname is the default field currently used for data correlation on the Guance platform, logs/CPU/memory collection will all carry the host tag.
  5. These two types of global tags (GHT/GET) can overlap, for example, both can set a project = "my-project" tag.
  6. When election is not enabled, GET inherits all tags from GHT (it will at least have one host tag).
  7. Election-based collectors default to appending GET, while non-election-based collectors default to appending GHT.
How to distinguish between election and non-election collectors?

In the collector documentation, at the top, there are similar identifiers indicating the platform compatibility and collection characteristics of the current collector:

·

If marked with , it indicates that the current collector is an election-based collector.

Global Tag Settings During Remote Collection

Since DataKit will default to appending the tag host=<DataKit host name> to all collected data, this default addition might cause confusion in certain cases.

Take MySQL as an example. If MySQL is not on the machine where DataKit resides, but you hope the host tag represents the actual hostname of the collected MySQL (or another identifier field of cloud databases), rather than the hostname of the machine where DataKit resides.

For such situations, there are two ways to bypass the global tag on DataKit:

  • In specific collectors, there is generally a configuration like the following where you can add new Tags. For example, if you don’t want DataKit to default to adding the host=xxx tag, you can override this tag here, taking MySQL as an example:
[[inputs.mysql.tags]]
  host = "real-mysql-host-name" 
  • When pushing data to DataKit via HTTP API, you can use the API parameter ignore_global_tags to block all global tags.
Tip

Starting from 1.4.20, DataKit defaults to using the IP/Host from the connection address of the collected service as the host tag value.

DataKit Operation Log Configuration

DataKit has two logs: one is its own operation log (/var/log/datakit/log), and the other is the HTTP Access log (/var/log/datakit/gin.log).

The default DataKit log level is info. Editing datakit.conf allows modification of the log level and shard size:

[logging]
  level = "debug" # Change info to debug
  rotate = 32     # Each log shard is 32MB
  • level: Setting it to debug enables viewing more logs (currently supports only debug/info levels).
  • rotate: DataKit defaults to splitting logs, with a default shard size of 32MB, totaling 6 shards (1 current writing shard plus 5 split shards; the number of shards is not configurable yet). If DataKit logs occupy too much disk space (up to 32 x 6 = 192MB), reduce the rotate size (e.g., change to 4, unit in MB). HTTP access logs also automatically split in the same way.

Advanced Configuration

Below involves some advanced configurations. If you're not confident about the configurations, it's recommended to consult our technical experts.

Point Cache

Version-1.28.0

Point caching currently has additional performance issues and is not recommended for use.

To optimize memory usage under high load conditions, enabling the Point Pool can help:

# datakit.conf
[point_pool]
    enable = true
    reserved_capacity = 4096

Simultaneously, in the Datakit configuration, you can enable content_encoding = "v2" transmission encoding ( Version-1.32.0 has already enabled v2 by default). Compared to v1, it consumes less memory and CPU.

Attention
  • Under low load (Datakit memory usage around 100MB), enabling the point pool increases Datakit's own memory consumption. High load generally refers to scenarios consuming 2GB+ of memory. Enabling this can also improve Datakit's CPU consumption.

IO Module Tuning

Version-1.4.8 · Experimental

In some cases, the volume of single-machine data collection by DataKit is very large. If network bandwidth is limited, it may lead to interruptions or losses in data collection. You can alleviate this problem by configuring some parameters of the io module:

[io]
  feed_chan_size  = 1     # Data processing queue length
  max_cache_count = 1000  # Threshold for bulk data sending point numbers, triggers sending once exceeded
  flush_interval  = "10s" # Interval threshold for data sending, sends at least once every 10 seconds
  flush_workers   = 0     # Number of data upload workers (default quota CPU core * 2)

Blocking mode see relevant instructions in k8s

Refer to here

Resource Limitation

Due to the unpredictable volume of data processed by DataKit, if physical limits are not placed on the resources consumed by DataKit, it could potentially consume a significant amount of resources on the node. Here, we can use Linux's cgroup and Windows' job object to impose limits, with the following configuration in datakit.conf:

[resource_limit]
  path = "/datakit" # Linux cgroup restriction directory, such as /sys/fs/cgroup/memory/datakit, /sys/fs/cgroup/cpu/datakit

  # Allowed CPU core count
  cpu_cores = 2.0

  # Default allows 4GB memory(memory + swap) usage
  # If set to 0 or negative number, memory limitation is not enabled
  mem_max_mb = 4096 

If DataKit exceeds the memory limit, it will be forcibly killed by the operating system. You can see the following results through the command, at which point you need to manually restart the service:

$ systemctl status datakit 
 datakit.service - Collects data and uploads it to DataFlux.
     Loaded: loaded (/etc/systemd/system/datakit.service; enabled; vendor preset: enabled)
     Active: activating (auto-restart) (Result: signal) since Fri 2022-02-30 16:39:25 CST; 1min 40s ago
    Process: 3474282 ExecStart=/usr/local/datakit/datakit (code=killed, signal=KILL)
   Main PID: 3474282 (code=killed, signal=KILL)
Attention
  • Resource limitation is enabled by default only during host installation.
  • Supports control over CPU usage rate and memory usage (memory + swap), and only supports Linux and Windows operating systems ( Version-1.15.0).
  • CPU usage rate control currently does not support these Windows operating systems: Windows 7, Windows Server 2008 R2, Windows Server 2008, Windows Vista, Windows Server 2003, and Windows XP.
  • Non-root users must reinstall the service to modify resource limitation configurations.
  • CPU core limitation affects the worker number configuration of some submodules of Datakit (generally integer multiples of the CPU core count). For instance, the number of data upload workers is CPU core count * 2. A single upload worker uses default 10MB of memory for data sending, so if more CPU cores are opened, it will affect the overall memory usage of Datakit.
  • Version-1.5.8 starts supporting cgroup v2. If uncertain about the cgroup version, confirm it via the command mount | grep cgroup.
  • Version-1.68.0 supports configuring CPU core limitation in daktait.conf, and abandons the original percentage configuration method. Percentage configuration leads to different CPU quotas on hosts with different CPU core counts under the same collection pressure, possibly causing abnormal behaviors. When upgrading older versions of Datakit, specify the DK_LIMIT_CPUCORES environment variable in the upgrade command. If not specified in the upgrade command, it continues using the previous percentage configuration method. If Datakit is reinstalled, it directly adopts the CPU core limit method.

Election Configuration

Refer to here

DataWay Parameter Configuration

There are a few configurations for the Dataway section, others are not recommended to be modified:

  • timeout: Upload timeout to Guance, default 30s
  • max_retry_count: Set the retry count for Dataway sending (default 1 time, maximum 10 times) Version-1.17.0
  • retry_delay: Set the base step length for retry intervals, default 1s. The base step means the first retry is 1s, the second is 2s, the third is 4s, and so on (increasing exponentially as 2^n) Version-1.17.0
  • max_raw_body_size: Controls the maximum size of a single upload package (before compression), in bytes Version-1.17.1
  • content_encoding: Can choose v1 or v2 Version-1.17.1
    • v1 is line protocol (default v1)
    • v2 is Protobuf protocol, which performs better in all aspects compared to v1. After stable operation, v2 will be adopted by default later.

Refer to here for deployment-related configurations in Kubernetes.

WAL Queue Configuration

Version-1.60.0

In [dataway.wal], we can adjust the WAL queue configuration:

  [dataway.wal]
     max_capacity_gb = 2.0             # 2GB reserved disk space for each category(M/L/O/T/...)
     workers = 0                       # flush workers on WAL(default to CPU limited cores)
     mem_cap = 0                       # in-memory queue capacity(default to CPU limited cores)
     fail_cache_clean_interval = "30s" # duration for cleaning failed uploaded data

Disk files are located in the cache/dw-wal directory under the Datakit installation directory:

/usr/local/datakit/cache/dw-wal/
├── custom_object
   └── data
├── dialtesting
   └── data
├── dynamic_dw
   └── data
├── fc
   └── data
├── keyevent
   └── data
├── logging
   ├── data
   └── data.00000000000000000000000000000000
├── metric
   └── data
├── network
   └── data
├── object
   └── data
├── profiling
   └── data
├── rum
   └── data
├── security
   └── data
└── tracing
    └── data

13 directories, 14 files

Here, except for fc being the failure retransmission queue, other directories correspond to different data types respectively. When data upload fails, these data will be cached under the fc directory, and Datakit will intermittently upload them.

If the current host's disk performance is insufficient, you can try using WAL under tmpfs.

Sinker Configuration

Refer to here

Using Git to Manage DataKit Configurations

Refer to here

Local Default Pipeline Script Settings

Version-1.61.0

Supports setting default Pipeline scripts locally. If there is a conflict with the remote default script settings, the local settings take precedence.

It can be configured in two ways:

  • Host-based deployment, you can specify the default script for each category in the DataKit main configuration file, as follows:

    # default pipeline
    [pipeline.default_pipeline]
        # logging = "<your_script.p>"
        # metric  = "<your_script.p>"
        # tracing = "<your_script.p>"
    
  • Container-based deployment, you can use the environment variable ENV_PIPELINE_DEFAULT_PIPELINE, with a value like {"logging":"abc.p","metric":"xyz.p"}

Setting the Maximum Value for Open File Descriptors

In the Linux environment, you can configure the ulimit item in the Datakit main configuration file to set the maximum number of files that Datakit can open, as follows:

ulimit = 64000

The default ulimit configuration is 64000. In Kubernetes, this can be set via setting ENV_ULIMIT.

Explanation of CPU Usage Rate in Resource Restrictions

CPU usage rate is measured in percentages (maximum value 100.0). Taking an 8-core CPU as an example, if the limit cpu_max is 20.0 (i.e., 20%), then the maximum CPU consumption of DataKit will be shown as approximately 160% in the top command.

Collector Password Protection

Version-1.31.0

If you wish to avoid storing passwords in plain text within configuration files, you can use this feature.

When DataKit encounters ENC[] while loading collector configuration files, it replaces the text with the password obtained from a file, environment variable, or AES encryption before reloading it into memory.

ENC currently supports three methods:

  • File form (recommended):

    Password format in the configuration file: ENC[file:///path/to/enc4dk], simply fill in the correct password in the corresponding file.

  • AES encryption method.

    You need to configure the key in the main configuration file datakit.conf: crypto_AES_key or crypto_AES_Key_filePath, with a key length of 16 characters. The format to fill in the password is: ENC[aes://5w1UiRjWuVk53k96WfqEaGUYJ/Oje7zr8xmBeGa3ugI=]

Next, using mysql as an example, we explain how to configure and use both methods:

1 File Form

First, place the plaintext password in the file /usr/local/datakit/enc4mysql, then modify the configuration file mysql.conf:

# Partial configuration
[[inputs.mysql]]
  host = "localhost"
  user = "datakit"
  pass = "ENC[file:///usr/local/datakit/enc4mysql]"
  port = 3306
  # sock = "<SOCK>"
  # charset = "utf8"

DK will read the password from /usr/local/datakit/enc```toml enc4mysql and replace it, resulting in pass = "Hello*******"

2 AES Encryption Method

First, configure the key in datakit.conf:

# crypto key or key file path.
[crypto]
  # Configure key
  aes_key = "0123456789abcdef"
  # Or, place the key in a file and configure the file location here.
  aes_Key_file = "/usr/local/datakit/mykey"

mysql.conf configuration file:

pass = "ENC[aes://5w1UiRjWuVk53k96WfqEaGUYJ/Oje7zr8xmBeGa3ugI=]"

Note that the ciphertext obtained through AES encryption needs to be fully filled in. Below is the code example:

// AESEncrypt  encryption.
func AESEncrypt(key []byte, plaintext string) (string, error) {
    block, err := aes.NewCipher(key)
    if err != nil {
        return "", err
    }

    // PKCS7 padding
    padding := aes.BlockSize - len(plaintext)%aes.BlockSize
    padtext := bytes.Repeat([]byte{byte(padding)}, padding)
    plaintext += string(padtext)
    ciphertext := make([]byte, aes.BlockSize+len(plaintext))
    iv := ciphertext[:aes.BlockSize]
    if _, err := io.ReadFull(rand.Reader, iv); err != nil {
        return "", err
    }
    mode := cipher.NewCBCEncrypter(block, iv)
    mode.CryptBlocks(ciphertext[aes.BlockSize:], []byte(plaintext))

    return base64.StdEncoding.EncodeToString(ciphertext), nil
}

// AESDecrypt AES decryption.
func AESDecrypt(key []byte, cryptoText string) (string, error) {
    ciphertext, err := base64.StdEncoding.DecodeString(cryptoText)
    if err != nil {
        return "", err
    }

    block, err := aes.NewCipher(key)
    if err != nil {
        return "", err
    }

    if len(ciphertext) < aes.BlockSize {
        return "", fmt.Errorf("ciphertext too short")
    }

    iv := ciphertext[:aes.BlockSize]
    ciphertext = ciphertext[aes.BlockSize:]

    mode := cipher.NewCBCDecrypter(block, iv)
    mode.CryptBlocks(ciphertext, ciphertext)

    // Remove PKCS7 padding
    padding := int(ciphertext[len(ciphertext)-1])
    if padding > aes.BlockSize {
        return "", fmt.Errorf("invalid padding")
    }
    ciphertext = ciphertext[:len(ciphertext)-padding]

    return string(ciphertext), nil
}
import javax.crypto.Cipher;
import javax.crypto.spec.IvParameterSpec;
import javax.crypto.spec.SecretKeySpec;
import java.security.SecureRandom;
import java.util.Base64;

public class AESUtils {
    public static String AESEncrypt(byte[] key, String plaintext) throws Exception {
        javax.crypto.Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding");
        SecretKeySpec secretKeySpec = new SecretKeySpec(key, "AES");

        SecureRandom random = new SecureRandom();
        byte[] iv = new byte[16];
        random.nextBytes(iv);
        IvParameterSpec ivParameterSpec = new IvParameterSpec(iv);
        cipher.init(Cipher.ENCRYPT_MODE, secretKeySpec, ivParameterSpec);
        byte[] encrypted = cipher.doFinal(plaintext.getBytes());
        byte[] ivAndEncrypted = new byte[iv.length + encrypted.length];
        System.arraycopy(iv, 0, ivAndEncrypted, 0, iv.length);
        System.arraycopy(encrypted, 0, ivAndEncrypted, iv.length, encrypted.length);

        return Base64.getEncoder().encodeToString(ivAndEncrypted);
    }

    public static String AESDecrypt(byte[] key, String cryptoText) throws Exception {
        byte[] ciphertext = Base64.getDecoder().decode(cryptoText);

        SecretKeySpec secretKeySpec = new SecretKeySpec(key, "AES");

        if (ciphertext.length < 16) {
            throw new Exception("ciphertext too short");
        }

        byte[] iv = new byte[16];
        System.arraycopy(ciphertext, 0, iv, 0, 16);
        byte[] encrypted = new byte[ciphertext.length - 16];
        System.arraycopy(ciphertext, 16, encrypted, 0, ciphertext.length - 16);

        Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding");
        IvParameterSpec ivParameterSpec = new IvParameterSpec(iv);
        cipher.init(Cipher.DECRYPT_MODE, secretKeySpec, ivParameterSpec);

        byte[] decrypted = cipher.doFinal(encrypted);

        return new String(decrypted);
    }
}
public static void main(String[] args) {
    try {
        String key = "0123456789abcdef"; // 16, 24, or 32 bytes AES key
        String plaintext = "HelloAES9*&.";
        byte[] keyBytes = key.getBytes("UTF-8");

        String encrypted = AESEncrypt(keyBytes, plaintext);
        System.out.println("Encrypted text: " + encrypted);

        String decrypt = AESDecrypt(keyBytes, encrypted);
        System.out.println("Decoded result: " + decrypt);
    } catch (Exception e) {
        System.out.println(e);
        e.printStackTrace();
    }
}

In K8S environments, private keys can be added via environment variables: ENV_CRYPTO_AES_KEY and ENV_CRYPTO_AES_KEY_FILEPATH. Refer to DaemonSet Installation - Others.

Remote Tasks


Version-1.63.0


DataKit receives tasks issued from the center and executes them. Currently supports the JVM dump function.

This feature executes the jmap command, generates a jump file, and uploads it to OSS, AWS S3 Bucket, or HuaWei Cloud OBS.

After installing DK, two files will be generated under the installation directory template/service-task: jvm_dump_host_script.py and jvm_dump_k8s_script.py. The former is the script for host mode, and the latter is for the k8s environment.

After DK starts, it will periodically execute the script. If the script is modified, it will be overwritten after DK restarts.

In the host environment, the current environment requires python3 and packages. If not installed, you need to install them:

# With python3 environment
pip install requests
# Or
pip3 install requests

# If uploading to HuaWei Cloud OBS, the library needs to be installed:
pip install esdk-obs-python --trusted-host pypi.org

# If uploading to AWS S3, boto3 needs to be installed:
pip install boto3

Environment variables can control uploading to multiple storage bucket types. Below are the configuration instructions, which apply similarly to the k8s environment:

# upload to OSS
[remote_job]
  enable = true
  envs = [
      "REMOTE=oss",
      "OSS_BUCKET_HOST=host","OSS_ACCESS_KEY_ID=key","OSS_ACCESS_KEY_SECRET=secret","OSS_BUCKET_NAME=bucket",
    ]
  interval = "30s"

# or upload to AWS:
[remote_job]
  enable = true
  envs = [
      "REMOTE=aws",
      "AWS_BUCKET_NAME=bucket","AWS_ACCESS_KEY_ID=AK","AWS_SECRET_ACCESS_KEY=SK","AWS_DEFAULT_REGION=us-west-2",
    ]
  interval = "30s"

# or upload to OBS:
[remote_job]
  enable = true
  envs = [
      "REMOTE=obs",
      "OBS_BUCKET_NAME=bucket","OBS_ACCESS_KEY_ID=AK","OBS_SECRET_ACCESS_KEY=SK","OBS_SERVER=https://xxx.myhuaweicloud.com"
    ]
  interval = "30s"    

The k8s environment requires calling the Kubernetes API, so Role-Based Access Control (RBAC) is needed.

Configuration related:

The directory is generally located at:

  • Linux/Mac: /usr/local/datakit/conf.d/datakit.conf
  • Windows: C:\Program Files\datakit\conf.d\datakit.conf

Modify the configuration, and if not present, add the following at the end:

[remote_job]
  enable=true
  envs=["REMOTE=oss","OSS_BUCKET_HOST=<bucket_host>","OSS_ACCESS_KEY_ID=<key>","OSS_ACCESS_KEY_SECRET=<secret key>","OSS_BUCKET_NAME=<name>"]
  interval="100s"
  java_home=""

Modify the DataKit yaml file and add RBAC permissions

---

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: datakit
rules:
- apiGroups: ["rbac.authorization.k8s.io"]
  resources: ["clusterroles"]
  verbs: ["get", "list", "watch"]
- apiGroups: [""]
  resources: ["nodes", "nodes/stats", "nodes/metrics", "namespaces", "pods", "pods/log", "events", "services", "endpoints", "persistentvolumes", "persistentvolumeclaims", "pods/exec"]
  verbs: ["get", "list", "watch", "create"]
- apiGroups: ["apps"]
  resources: ["deployments", "daemonsets", "statefulsets", "replicasets"]
  verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
  resources: ["jobs", "cronjobs"]
  verbs: [ "get", "list", "watch"]
- apiGroups: ["guance.com"]
  resources: ["datakits"]
  verbs: ["get","list"]
- apiGroups: ["monitoring.coreos.com"]
  resources: ["podmonitors", "servicemonitors"]
  verbs: ["get", "list"]
- apiGroups: ["metrics.k8s.io"]
  resources: ["pods", "nodes"]
  verbs: ["get", "list"]
- nonResourceURLs: ["/metrics"]
  verbs: ["get"]

---

In the above configuration, "pod/exec" has been added, and the rest remains consistent with the yaml file.

Add remote_job environment variables:

- name: ENV_REMOTE_JOB_ENABLE
  value: 'true'
- name: ENV_REMOTE_JOB_ENVS
  value: >-
    REMOTE=oss,OSS_BUCKET_HOST=<bucket host>,OSS_ACCESS_KEY_ID=<key>,OSS_ACCESS_KEY_SECRET=<secret key>,OSS_BUCKET_NAME=<name>
- name: ENV_REMOTE_JOB_JAVA_HOME
- name: ENV_REMOTE_JOB_INTERVAL
  value: 100s

Configuration explanation:

  1. enable ENV_REMOTE_JOB_ENABLE remote_job function switch.
  2. envs ENV_REMOTE_JOB_ENVS including host access key secret key bucket information, sending the obtained JVM dump file to OSS, AWS, and OBS work similarly by changing the environment variables.
  3. interval ENV_REMOTE_JOB_INTERVAL Time interval for DataKit to actively call the interface to obtain the latest task.
  4. java_home ENV_REMOTE_JOB_JAVA_HOME Automatically obtained from the environment variable ($JAVA_HOME) in the host environment, no configuration is usually required.

Note, the used Agent:dd-java-agent.jar version should not be lower than v1.4.0-guance

Further Reading

Feedback

Is this page helpful? ×