2# commonLabels -- set of labels that will be applied to all the resources for the operator
4# commonAnnotations -- set of annotations that will be applied to all the resources for the operator
7 # look details in `kubectl explain deployment.spec.strategy`
11 # crdHook.enabled -- enable automatic CRD installation/update via pre-install/pre-upgrade hooks
12 # when disabled, CRDs must be installed manually using kubectl apply
15 # crdHook.image.repository -- image repository for CRD installation job
16 repository: cgr.dev/chainguard-private/kubectl
17 # crdHook.image.tag -- image tag for CRD installation job
18 tag: latest-dev@sha256:7542d78d96ca19bd41f188066f28e95454e75f26df80d45010853d4c0d299dbc
19 # crdHook.image.pullPolicy -- image pull policy for CRD installation job
20 pullPolicy: IfNotPresent
21 # crdHook.imagePullSecrets -- image pull secrets for CRD installation job
22 # possible value format `[{"name":"your-secret-name"}]`,
23 # check `kubectl explain pod.spec.imagePullSecrets` for details
25 # crdHook.resources -- resource limits and requests for CRD installation job
33 # crdHook.nodeSelector -- node selector for CRD installation job
35 # crdHook.tolerations -- tolerations for CRD installation job
37 # crdHook.affinity -- affinity for CRD installation job
39 # crdHook.annotations -- additional annotations for CRD installation job
41 # crdHook.containerSecurityContext -- container security context for CRD installation job
42 # check `kubectl explain pod.spec.containers.securityContext` for details
43 containerSecurityContext: {}
44 # allowPrivilegeEscalation: false
50 # type: RuntimeDefault
53 # operator.image.registry -- optional image registry prefix (e.g. 1234567890.dkr.ecr.us-east-1.amazonaws.com)
55 # operator.image.repository -- image repository
56 repository: cgr.dev/chainguard-private/clickhouse-operator
57 # operator.image.tag -- image tag (chart's appVersion value will be used if not set)
58 tag: latest@sha256:db19dd2832f4537abc9722bfa2acafdefd164b027e0893e0837a5be402c65363
59 # operator.image.pullPolicy -- image pull policy
60 pullPolicy: IfNotPresent
61 containerSecurityContext: {}
62 # operator.resources -- custom resource configuration, check `kubectl explain pod.spec.containers.resources` for details
71 # operator.priorityClassName -- priority class name for the clickhouse-operator deployment, check `kubectl explain pod.spec.priorityClassName` for details
74 # operator.env -- additional environment variables for the clickhouse-operator container in deployment
75 # possible format value `[{"name": "SAMPLE", "value": "text"}]`
77 # operator.livenessProbe -- optional liveness probe for the clickhouse-operator container
78 # check `kubectl explain pod.spec.containers.livenessProbe` for details
83 # initialDelaySeconds: 10
86 # operator.readinessProbe -- optional readiness probe for the clickhouse-operator container
87 # check `kubectl explain pod.spec.containers.readinessProbe` for details
92 # initialDelaySeconds: 5
98 # metrics.image.registry -- optional image registry prefix (e.g. 1234567890.dkr.ecr.us-east-1.amazonaws.com)
100 # metrics.image.repository -- image repository
101 repository: cgr.dev/chainguard-private/clickhouse-operator-metrics-exporter
102 # metrics.image.tag -- image tag (chart's appVersion value will be used if not set)
103 tag: latest@sha256:c45d6ebb34c759bcc35f147f4bff7fce348d23555f639c2098812b8f9d1191b1
104 # metrics.image.pullPolicy -- image pull policy
105 pullPolicy: IfNotPresent
106 containerSecurityContext: {}
107 # metrics.resources -- custom resource configuration
116 # metrics.env -- additional environment variables for the deployment of metrics-exporter containers
117 # possible format value `[{"name": "SAMPLE", "value": "text"}]`
119 # metrics.livenessProbe -- optional liveness probe for the metrics-exporter container
120 # check `kubectl explain pod.spec.containers.livenessProbe` for details
125 # initialDelaySeconds: 10
128 # metrics.readinessProbe -- optional readiness probe for the metrics-exporter container
129 # check `kubectl explain pod.spec.containers.readinessProbe` for details
134 # initialDelaySeconds: 5
137# imagePullSecrets -- image pull secret for private images in clickhouse-operator pod
138# possible value format `[{"name":"your-secret-name"}]`,
139# check `kubectl explain pod.spec.imagePullSecrets` for details
141# podLabels -- labels to add to the clickhouse-operator pod
143# podAnnotations -- annotations to add to the clickhouse-operator pod, check `kubectl explain pod.spec.annotations` for details
144# @default -- check the `values.yaml` file
146 prometheus.io/port: '8888'
147 prometheus.io/scrape: 'true'
148 clickhouse-operator-metrics/port: '9999'
149 clickhouse-operator-metrics/scrape: 'true'
150# nameOverride -- override name of the chart
152# fullnameOverride -- full name of the chart.
155 # serviceAccount.create -- specifies whether a service account should be created
157 # serviceAccount.annotations -- annotations to add to the service account
159 # serviceAccount.name -- the name of the service account to use; if not set and create is true, a name is generated using the fullname template
162 # rbac.create -- specifies whether rbac resources should be created
164 # rbac.namespaceScoped -- specifies whether to create roles and rolebindings at the cluster level or namespace level
165 namespaceScoped: false
167 # secret.create -- create a secret with operator credentials
169 # secret.username -- operator credentials username
170 username: clickhouse_operator
171 # secret.password -- operator credentials password
172 password: clickhouse_operator_password
173# nodeSelector -- node for scheduler pod assignment, check `kubectl explain pod.spec.nodeSelector` for details
175# tolerations -- tolerations for scheduler pod assignment, check `kubectl explain pod.spec.tolerations` for details
177# affinity -- affinity for scheduler pod assignment, check `kubectl explain pod.spec.affinity` for details
179# podSecurityContext - operator deployment SecurityContext, check `kubectl explain pod.spec.securityContext` for details
180podSecurityContext: {}
181# topologySpreadConstraints - topologySpreadConstraints affinity for scheduler pod assignment, check `kubectl explain pod.spec.topologySpreadConstraints` for details
182topologySpreadConstraints: []
184 # serviceMonitor.enabled -- ServiceMonitor Custom resource is created for a [prometheus-operator](https://github.com/prometheus-operator/prometheus-operator)
185 # In serviceMonitor will be created two endpoints ch-metrics on port 8888 and op-metrics # 9999. Ypu can specify interval, scrapeTimeout, relabelings, metricRelabelings for each endpoint below
187 # serviceMonitor.additionalLabels -- additional labels for service monitor
190 # serviceMonitor.interval for ch-metrics endpoint --
192 # serviceMonitor.scrapeTimeout for ch-metrics endpoint -- Prometheus ServiceMonitor scrapeTimeout. If empty, Prometheus uses the global scrape timeout unless it is less than the target's scrape interval value in which the latter is used.
194 # serviceMonitor.relabelings for ch-metrics endpoint -- Prometheus [RelabelConfigs] to apply to samples before scraping
196 # serviceMonitor.metricRelabelings for ch-metrics endpoint -- Prometheus [MetricRelabelConfigs] to apply to samples before ingestio
197 metricRelabelings: []
199 # serviceMonitor.interval for op-metrics endpoint --
201 # serviceMonitor.scrapeTimeout for op-metrics endpoint -- Prometheus ServiceMonitor scrapeTimeout. If empty, Prometheus uses the global scrape timeout unless it is less than the target's scrape interval value in which the latter is used.
203 # serviceMonitor.relabelings for op-metrics endpoint -- Prometheus [RelabelConfigs] to apply to samples before scraping
205 # serviceMonitor.metricRelabelings for op-metrics endpoint -- Prometheus [MetricRelabelConfigs] to apply to samples before ingestio
206 metricRelabelings: []
207# configs -- clickhouse operator configs
208# @default -- check the `values.yaml` file for the config content (auto-generated from latest operator release)
212 01-clickhouse-01-listen.xml: |
214 <!-- This file is auto-generated -->
215 <!-- Do not edit this file - all changes would be lost -->
216 <!-- Edit appropriate template in the following folder: -->
217 <!-- deploy/builder/templates-config -->
220 <!-- Listen wildcard address to allow accepting connections from other containers and host network. -->
221 <listen_host>::</listen_host>
222 <listen_host>0.0.0.0</listen_host>
223 <listen_try>1</listen_try>
225 01-clickhouse-02-logger.xml: |
227 <!-- This file is auto-generated -->
228 <!-- Do not edit this file - all changes would be lost -->
229 <!-- Edit appropriate template in the following folder: -->
230 <!-- deploy/builder/templates-config -->
234 <!-- Possible levels: https://github.com/pocoproject/poco/blob/devel/Foundation/include/Poco/Logger.h#L439 -->
236 <log>/var/log/clickhouse-server/clickhouse-server.log</log>
237 <errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
240 <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
244 01-clickhouse-03-query_log.xml: |
246 <!-- This file is auto-generated -->
247 <!-- Do not edit this file - all changes would be lost -->
248 <!-- Edit appropriate template in the following folder: -->
249 <!-- deploy/builder/templates-config -->
252 <query_log replace="1">
253 <database>system</database>
254 <table>query_log</table>
255 <engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + interval 30 day</engine>
256 <flush_interval_milliseconds>7500</flush_interval_milliseconds>
258 <query_thread_log remove="1"/>
260 01-clickhouse-04-part_log.xml: |
262 <!-- This file is auto-generated -->
263 <!-- Do not edit this file - all changes would be lost -->
264 <!-- Edit appropriate template in the following folder: -->
265 <!-- deploy/builder/templates-config -->
268 <part_log replace="1">
269 <database>system</database>
270 <table>part_log</table>
271 <engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + interval 30 day</engine>
272 <flush_interval_milliseconds>7500</flush_interval_milliseconds>
275 01-clickhouse-05-trace_log.xml: |-
277 <!-- This file is auto-generated -->
278 <!-- Do not edit this file - all changes would be lost -->
279 <!-- Edit appropriate template in the following folder: -->
280 <!-- deploy/builder/templates-config -->
283 <trace_log replace="1">
284 <database>system</database>
285 <table>trace_log</table>
286 <engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + interval 30 day</engine>
287 <flush_interval_milliseconds>7500</flush_interval_milliseconds>
293 # This file is auto-generated
294 # Do not edit this file - all changes would be lost
295 # Edit appropriate template in the following folder:
296 # deploy/builder/templates-config
299 # Template parameters available:
303 # CH_CREDENTIALS_SECRET_NAMESPACE=
304 # CH_CREDENTIALS_SECRET_NAME=clickhouse-operator
307 ################################################
311 ################################################
313 # Namespaces where clickhouse-operator watches for events.
314 # Concurrently running operators should watch on different namespaces.
315 # `include` and `exclude` accept literal namespace names or regexp patterns.
316 # Empty `include` watches the operator's own namespace (or all namespaces when
317 # the operator runs in `kube-system`); use [".*"] to force watch-all elsewhere.
318 # Empty `exclude` matches none. `exclude` is applied after `include`.
322 # Behavior when ClickHouseOperatorConfiguration changes: none | restart
327 ################################################
329 ## Configuration files section
331 ################################################
333 # Each 'path' can be either absolute or relative.
334 # In case path is absolute - it is used as is
335 # In case path is relative - it is relative to the folder where configuration file you are reading right now is located.
337 # Path to the folder where ClickHouse configuration files common for all instances within a CHI are located.
339 # Path to the folder where ClickHouse configuration files unique for each instance (host) within a CHI are located.
341 # Path to the folder where ClickHouse configuration files with users' settings are located.
342 # Files are common for all instances within a CHI.
344 ################################################
346 ## Configuration users section
348 ################################################
350 # Default settings for user accounts, created by the operator.
351 # IMPORTANT. These are not access credentials or settings for 'default' user account,
352 # it is a template for filling out missing fields for all user accounts to be created by the operator,
353 # with the following EXCEPTIONS:
354 # 1. 'default' user account DOES NOT use provided password, but uses all the rest of the fields.
355 # Password for 'default' user account has to be provided explicitly, if to be used.
356 # 2. CHOP user account DOES NOT use:
357 # - profile setting. It uses predefined profile called 'clickhouse_operator'
358 # - quota setting. It uses empty quota name.
359 # - networks IP setting. Operator specifies 'networks/ip' user setting to match operators' pod IP only.
360 # - password setting. Password for CHOP account is used from 'clickhouse.access.*' section
362 # Default values for ClickHouse user account(s) created by the operator
363 # 1. user/profile - string
364 # 2. user/quota - string
365 # 3. user/networks/ip - multiple strings
366 # 4. user/password - string
367 # These values can be overwritten on per-user basis.
374 ################################################
376 ## Configuration network section
378 ################################################
380 # Default host_regexp to limit network connectivity from outside
381 hostRegexpTemplate: "(chi-{chi}-[^.]+\\d+-\\d+|clickhouse\\-{chi})\\.{namespace}\\.svc\\.cluster\\.local$"
382 ################################################
384 ## Configuration restart policy section
385 ## Configuration restart policy describes what configuration changes require ClickHouse restart
387 ################################################
388 configurationRestartPolicy:
391 # Special version of "*" - default version - has to satisfy all ClickHouse versions.
392 # Default version will also be used in case ClickHouse version is unknown.
393 # ClickHouse version may be unknown due to host being down - for example, because of incorrect "settings" section.
394 # ClickHouse is not willing to start in case incorrect/unknown settings are provided in config file.
397 # see https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-server-config-files/#server-config-configxml-sections-which-dont-require-restart
398 # to be replaced with "select * from system.server_settings where changeable_without_restart = 'No'"
401 - settings/access_control_path: "no"
402 - settings/dictionaries_config: "no"
403 - settings/max_server_memory_*: "no"
404 - settings/max_*_to_drop: "no"
405 - settings/max_concurrent_queries: "no"
406 - settings/models_config: "no"
407 - settings/user_defined_executable_functions_config: "no"
409 - settings/logger/*: "no"
410 - settings/macros/*: "no"
411 - settings/remote_servers/*: "no"
412 - settings/user_directories/*: "no"
413 # these settings should not lead to pod restarts
414 - settings/display_secrets_in_show_and_select: "no"
417 - files/config.d/*.xml: "yes"
418 - files/config.d/*dict*.xml: "no"
419 - files/config.d/*no_restart*: "no"
420 # exceptions in default profile
421 - profiles/default/background_*_pool_size: "yes"
422 - profiles/default/max_*_for_server: "yes"
425 - settings/logger: "yes"
426 #################################################
428 ## Access to ClickHouse instances
430 ################################################
432 # Possible values for 'scheme' are:
433 # 1. http - force http to be used to connect to ClickHouse instances
434 # 2. https - force https to be used to connect to ClickHouse instances
435 # 3. auto - either http or https is selected based on open ports
437 # ClickHouse credentials (username, password and port) to be used by the operator to connect to ClickHouse instances.
438 # These credentials are used for:
439 # 1. Metrics requests
440 # 2. Schema maintenance
441 # User with these credentials can be specified in additional ClickHouse .xml config files,
442 # located in 'clickhouse.configuration.file.path.user' folder
446 # Location of the k8s Secret with username and password to be used by the operator to connect to ClickHouse instances.
447 # Can be used instead of explicitly specified username and password available in sections:
448 # - clickhouse.access.username
449 # - clickhouse.access.password
450 # Secret should have two keys:
454 # Empty `namespace` means that k8s secret would be looked in the same namespace where operator's pod is running.
456 # Empty `name` means no k8s Secret would be looked for
457 name: '{{ include "altinity-clickhouse-operator.fullname" . }}'
458 # Port where to connect to ClickHouse instances to
460 # Timeouts used to limit connection and queries from the operator to ClickHouse instances
461 # Specified in seconds.
463 # Timout to setup connection from the operator to ClickHouse instances. In seconds.
465 # Timout to perform SQL query from the operator to ClickHouse instances. In seconds.
467 ################################################
469 ## Addons specifies additional configuration sections
470 ## Should it be called something like "templates"?
472 ################################################
487 ### users.d is global while description depends on CH version which may vary on per-host basis
488 ### In case of global-ness this may be better to implement via auto-templates
490 ### As a solution, this may be applied on the whole cluster based on any of its hosts
492 ### What to do when host is just created? CH version is not known prior to CH started and user config is required before CH started.
493 ### We do not have any info about the cluster on initial creation
496 "{clickhouseOperatorUser}/access_management": 1
497 "{clickhouseOperatorUser}/named_collection_control": 1
498 "{clickhouseOperatorUser}/show_named_collections": 1
499 "{clickhouseOperatorUser}/show_named_collections_secrets": 1
509 clickhouse_operator/format_display_secrets_in_show_and_select: 1
513 ## this may be added on per-host basis into host's conf.d folder
515 display_secrets_in_show_and_select: 1
517 #################################################
519 ## Metrics collection
521 ################################################
523 # Timeouts used to limit connection and queries from the metrics exporter to ClickHouse instances
524 # Specified in seconds.
526 # Timeout used to limit metrics collection request. In seconds.
527 # Upon reaching this timeout metrics collection is aborted and no more metrics are collected in this cycle.
528 # All collected metrics are returned.
530 # Regexp to match tables in system database to fetch metrics from.
531 # Multiple tables can be matched using regexp. Matched tables are merged using merge() table function.
532 # Default is "^(metrics|custom_metrics)$" which fetches from both system.metrics and system.custom_metrics.
533 tablesRegexp: "^(metrics|custom_metrics)$"
534 # List of regexps to match ClickHouse metrics to exclude from export.
535 # Regexps match internal metric names before Prometheus normalization and prefixing.
536 # Default is the per-CPU OS metrics filter shown below; set to [] to disable.
538 - "^metric\\.(OS.*CPU[0-9]+|CPUFrequencyMHz_[0-9]+)$"
541 ################################################
543 ## Configuration files section
545 ################################################
547 # Each 'path' can be either absolute or relative.
548 # In case path is absolute - it is used as is
549 # In case path is relative - it is relative to the folder where configuration file you are reading right now is located.
551 # Path to the folder where Keeper configuration files common for all instances within a CHK are located.
552 common: chk/keeper_config.d
553 # Path to the folder where Keeper configuration files unique for each instance (host) within a CHK are located.
555 # Path to the folder where Keeper configuration files with users' settings are located.
556 # Files are common for all instances within a CHI.
558 ################################################
560 ## Template(s) management section
562 ################################################
565 # CHI template updates handling policy
566 # Possible policy values:
567 # - ReadOnStart. Accept CHIT updates on the operator's start only.
568 # - ApplyOnNextReconcile. Accept CHIT updates at all time. Apply new CHITs on next regular reconcile of the CHI
569 policy: ApplyOnNextReconcile
570 # Path to the folder where ClickHouseInstallation templates .yaml manifests are located.
571 # Templates are added to the list of all templates and used when CHI is reconciled.
572 # Templates are applied in sorted alpha-numeric order.
573 path: chi/templates.d
575 # CHK template updates handling policy
576 # Possible policy values:
577 # - ReadOnStart. Accept CHIT updates on the operators start only.
578 # - ApplyOnNextReconcile. Accept CHIT updates at all time. Apply new CHITs on next regular reconcile of the CHI
579 policy: ApplyOnNextReconcile
580 # Path to the folder where ClickHouseInstallation templates .yaml manifests are located.
581 # Templates are added to the list of all templates and used when CHI is reconciled.
582 # Templates are applied in sorted alpha-numeric order.
583 path: chk/templates.d
584 ################################################
588 ################################################
590 # Reconcile runtime settings
592 # Max number of concurrent CHI reconciles in progress
593 reconcileCHIsThreadsNumber: 10
594 # The operator reconciles shards concurrently in each CHI with the following limitations:
595 # 1. Number of shards being reconciled (and thus having hosts down) in each CHI concurrently
596 # can not be greater than 'reconcileShardsThreadsNumber'.
597 # 2. Percentage of shards being reconciled (and thus having hosts down) in each CHI concurrently
598 # can not be greater than 'reconcileShardsMaxConcurrencyPercent'.
599 # 3. The first shard is always reconciled alone. Concurrency starts from the second shard and onward.
600 # Thus limiting number of shards being reconciled (and thus having hosts down) in each CHI by both number and percentage
602 # Max number of concurrent shard reconciles within one cluster in progress
603 reconcileShardsThreadsNumber: 5
604 # Max percentage of concurrent shard reconciles within one cluster in progress
605 reconcileShardsMaxConcurrencyPercent: 50
606 # Reconcile StatefulSet scenario
608 # Create StatefulSet scenario
610 # What to do in case created StatefulSet is not in 'Ready' after `reconcile.statefulSet.update.timeout` seconds
612 # 1. abort - abort the process, do nothing with the problematic StatefulSet, leave it as it is,
613 # do not try to fix or delete or update it, just abort reconcile cycle.
614 # Do not proceed to the next StatefulSet(s) and wait for an admin to assist.
615 # 2. delete - delete newly created problematic StatefulSet and follow 'abort' path afterwards.
616 # 3. ignore - ignore an error, pretend nothing happened, continue reconcile and move on to the next StatefulSet.
618 # Update StatefulSet scenario
620 # How many seconds to wait for created/updated StatefulSet to be 'Ready'
622 # How many seconds to wait between checks/polls for created/updated StatefulSet status
624 # What to do in case updated StatefulSet is not in 'Ready' after `reconcile.statefulSet.update.timeout` seconds
626 # 1. abort - abort the process, do nothing with the problematic StatefulSet, leave it as it is,
627 # do not try to fix or delete or update it, just abort reconcile cycle.
628 # Do not proceed to the next StatefulSet(s) and wait for an admin to assist.
629 # 2. rollback - delete Pod and rollback StatefulSet to previous Generation.
630 # Pod would be recreated by StatefulSet based on rollback-ed StatefulSet configuration.
631 # Follow 'abort' path afterwards.
632 # 3. ignore - ignore an error, pretend nothing happened, continue reconcile and move on to the next StatefulSet.
634 # Recreate StatefulSet scenario
636 # What to do in case operator is in need to recreate StatefulSet?
638 # 1. abort - abort the process, do nothing with the problematic StatefulSet, leave it as it is,
639 # do not try to fix or delete or update it, just abort reconcile cycle.
640 # Do not proceed to the next StatefulSet(s) and wait for an admin to assist.
641 # 2. recreate - proceed and recreate StatefulSet.
643 # Triggered when PVC data loss or missing volumes are detected
645 # Triggered when StatefulSet update fails or StatefulSet is not ready
646 onUpdateFailure: recreate
647 # Reconcile Host scenario
649 # The operator during reconcile procedure should wait for a ClickHouse host to achieve the following conditions:
651 # Whether the operator during reconcile procedure should wait for a ClickHouse host:
652 # - to be excluded from a ClickHouse cluster
653 # - to complete all running queries
654 # - to be included into a ClickHouse cluster
655 # respectfully before moving forward with host reconcile
659 # The operator during reconcile procedure should wait for replicas to catch-up
660 # replication delay a.k.a replication lag for the following replicas
662 # All replicas (new and known earlier) are explicitly requested to wait for replication to catch-up
664 # New replicas only are requested to wait for replication to catch-up
666 # Replication catch-up is considered to be completed as soon as replication delay
667 # a.k.a replication lag - calculated as "MAX(absolute_delay) FROM system.replicas"
668 # is within this specified delay (in seconds)
671 # Whether the operator during host launch procedure should wait for startup probe to succeed.
672 # In case probe is unspecified wait is assumed to be completed successfully.
673 # Default option value is to do not wait.
675 # Whether the operator during host launch procedure should wait for readiness probe to succeed.
676 # In case probe is unspecified wait is assumed to be completed successfully.
677 # Default option value is to wait.
679 # The operator during reconcile procedure should drop the following entities:
682 # Whether the operator during reconcile procedure should drop replicas when replica is deleted
684 # Whether the operator during reconcile procedure should drop replicas when replica volume is lost
686 # Whether the operator during reconcile procedure should drop active replicas when replica is deleted or recreated
688 ################################################
690 ## Coordination with external systems during reconcile
692 ################################################
695 # How long the operator waits for a referenced ClickHouseKeeper to become ready
696 # before aborting CHI reconcile. In seconds.
698 # Reaction when a referenced CHK resource changes:
699 # none — do nothing (default, backward-compatible)
700 # reconcile — trigger CHI reconcile
701 # onKeeperResourceUpdate: none
702 ################################################
704 ## Auto-recovery from aborted reconcile
706 ################################################
708 # Recovery scopes keyed by CHI state being recovered from.
709 # Each scope contains on<Event>: <action> mappings that apply while the CHI
710 # is in that state. Multi-scope design anticipates future states beyond Aborted
711 # (e.g. Failed, Broken).
713 # Recovery from Status=Aborted
715 # Action when a pod belonging to an Aborted CHI transitions to Ready:
716 # retry (default) — re-enqueue the CHI for reconcile
717 # none — do nothing, CHI stays Aborted
719 # Future events (not yet implemented):
720 # onKeeperReady: retry — retry when a referenced CHK becomes ready
721 # onOperatorRestart: retry — sweep Aborted CHIs on operator startup
722 # Future scopes (not yet implemented):
727 # Future global policy knobs (not yet implemented) — flat peers of `from`,
728 # apply across all recovery scopes:
730 # Global kill-switch for auto-recovery:
733 # Cap on consecutive auto-recovery attempts before giving up:
736 # Minimum time between auto-recovery attempts for the same CHI:
739 # Exponential backoff for auto-recovery attempts:
744 ################################################
746 ## Annotations management section
748 ################################################
751 # 1. Propagating annotations from the CHI's `metadata.annotations` to child objects' `metadata.annotations`,
752 # 2. Propagating annotations from the CHI Template's `metadata.annotations` to CHI's `metadata.annotations`,
753 # Include annotations from the following list:
754 # Applied only when not empty. Empty list means "include all, no selection"
756 # Exclude annotations from the following list:
758 ################################################
760 ## Labels management section
762 ################################################
765 # 1. Propagating labels from the CHI's `metadata.labels` to child objects' `metadata.labels`,
766 # 2. Propagating labels from the CHI Template's `metadata.labels` to CHI's `metadata.labels`,
767 # Include labels from the following list:
768 # Applied only when not empty. Empty list means "include all, no selection"
770 # Exclude labels from the following list:
771 # Applied only when not empty. Empty list means "nothing to exclude, no selection"
773 # Whether to append *Scope* labels to StatefulSet and Pod.
774 # Full list of available *scope* labels check in 'labeler.go'
775 # LabelShardScopeIndex
776 # LabelReplicaScopeIndex
778 # LabelCHIScopeCycleSize
779 # LabelCHIScopeCycleIndex
780 # LabelCHIScopeCycleOffset
781 # LabelClusterScopeIndex
782 # LabelClusterScopeCycleSize
783 # LabelClusterScopeCycleIndex
784 # LabelClusterScopeCycleOffset
786 ################################################
788 ## Metrics management section
790 ################################################
794 ################################################
796 ## Status management section
798 ################################################
805 ################################################
807 ## StatefulSet management section
809 ################################################
811 revisionHistoryLimit: 0
812 ################################################
814 ## Pod management section
816 ################################################
818 # Grace period for Pod termination.
819 # How many seconds to wait between sending
820 # SIGTERM and SIGKILL during Pod termination process.
821 # Increase this number is case of slow shutdown.
822 terminationGracePeriod: 30
823 ################################################
825 ## Log parameters section
827 ################################################
830 alsologtostderr: "false"
836 001-templates.json.example: |
838 "apiVersion": "clickhouse.altinity.com/v1",
839 "kind": "ClickHouseInstallationTemplate",
841 "name": "01-default-volumeclaimtemplate"
845 "volumeClaimTemplates": [
847 "name": "chi-default-volume-claim-template",
862 "name": "chi-default-oneperhost-pod-template",
863 "distribution": "OnePerHost",
867 "name": "clickhouse",
868 "image": "clickhouse/clickhouse-server:23.8",
872 "containerPort": 8123
876 "containerPort": 9000
879 "name": "interserver",
880 "containerPort": 9009
891 default-pod-template.yaml.example: |
892 apiVersion: "clickhouse.altinity.com/v1"
893 kind: "ClickHouseInstallationTemplate"
895 name: "default-oneperhost-pod-template"
899 - name: default-oneperhost-pod-template
900 distribution: "OnePerHost"
901 default-storage-template.yaml.example: |
902 apiVersion: "clickhouse.altinity.com/v1"
903 kind: "ClickHouseInstallationTemplate"
905 name: "default-storage-template-2Gi"
908 volumeClaimTemplates:
909 - name: default-storage-template-2Gi
917 Templates in this folder are packaged with an operator and available via 'useTemplate'
919 01-clickhouse-operator-profile.xml: |
921 <!-- This file is auto-generated -->
922 <!-- Do not edit this file - all changes would be lost -->
923 <!-- Edit appropriate template in the following folder: -->
924 <!-- deploy/builder/templates-config -->
928 # Template parameters available:
932 <!-- clickhouse-operator user is generated by the operator based on config.yaml in runtime -->
934 <clickhouse_operator>
935 <log_queries>0</log_queries>
936 <skip_unavailable_shards>1</skip_unavailable_shards>
937 <http_connection_timeout>10</http_connection_timeout>
938 <max_concurrent_queries_for_all_users>0</max_concurrent_queries_for_all_users>
939 <os_thread_priority>0</os_thread_priority>
940 </clickhouse_operator>
943 02-clickhouse-default-profile.xml: |-
945 <!-- This file is auto-generated -->
946 <!-- Do not edit this file - all changes would be lost -->
947 <!-- Edit appropriate template in the following folder: -->
948 <!-- deploy/builder/templates-config -->
953 <os_thread_priority>2</os_thread_priority>
954 <log_queries>1</log_queries>
955 <connect_timeout_with_failover_ms>1000</connect_timeout_with_failover_ms>
956 <distributed_aggregation_memory_efficient>1</distributed_aggregation_memory_efficient>
957 <parallel_view_processing>1</parallel_view_processing>
958 <do_not_merge_across_partitions_select_final>1</do_not_merge_across_partitions_select_final>
959 <load_balancing>nearest_hostname</load_balancing>
960 <prefer_localhost_replica>0</prefer_localhost_replica>
961 <!-- materialize_ttl_recalculate_only>1</materialize_ttl_recalculate_only> 21.10 and above -->
965 keeperConfdFiles: null
967 01-keeper-01-default-config.xml: |
969 <!-- This file is auto-generated -->
970 <!-- Do not edit this file - all changes would be lost -->
971 <!-- Edit appropriate template in the following folder: -->
972 <!-- deploy/builder/templates-config -->
975 <asynchronous_metrics_keeper_metrics_only>1</asynchronous_metrics_keeper_metrics_only>
977 <coordination_settings>
978 <async_replication>1</async_replication>
979 <min_session_timeout_ms>10000</min_session_timeout_ms>
980 <operation_timeout_ms>10000</operation_timeout_ms>
981 <raft_logs_level>information</raft_logs_level>
982 <session_timeout_ms>100000</session_timeout_ms>
983 <use_xid_64>1</use_xid_64>
984 </coordination_settings>
985 <hostname_checks_enabled>true</hostname_checks_enabled>
986 <log_storage_path>/var/lib/clickhouse-keeper/coordination/logs</log_storage_path>
987 <snapshot_storage_path>/var/lib/clickhouse-keeper/coordination/snapshots</snapshot_storage_path>
988 <storage_path>/var/lib/clickhouse-keeper</storage_path>
989 <tcp_port>2181</tcp_port>
991 Four-letter-word command allowlist.
993 Set explicitly to the upstream-default list so the operator-rendered
994 liveness probe (which sends `ruok` over TCP and expects `imok`) keeps
995 working even if a user adds their own keeper_server settings.
997 Without this, a user override that restricts the allowlist
998 (e.g. `four_letter_word_white_list: "mntr,stat"` for security)
999 would silently disable `ruok` → liveness probe always fails → CrashLoopBackOff.
1001 The list mirrors ClickHouse Keeper's compiled-in default; users who want a
1002 stricter list can override this value, but they must keep `ruok` if they
1003 also use the default operator probes.
1005 <four_letter_word_white_list>conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro</four_letter_word_white_list>
1007 <listen_host>::</listen_host>
1008 <listen_host>0.0.0.0</listen_host>
1009 <listen_try>1</listen_try>
1011 <console>1</console>
1012 <level>information</level>
1014 <max_connections>4096</max_connections>
1016 01-keeper-02-readiness.xml: |
1018 <!-- This file is auto-generated -->
1019 <!-- Do not edit this file - all changes would be lost -->
1020 <!-- Edit appropriate template in the following folder: -->
1021 <!-- deploy/builder/templates-config -->
1028 <endpoint>/ready</endpoint>
1033 01-keeper-03-enable-reconfig.xml: |-
1035 <!-- This file is auto-generated -->
1036 <!-- Do not edit this file - all changes would be lost -->
1037 <!-- Edit appropriate template in the following folder: -->
1038 <!-- deploy/builder/templates-config -->
1042 <enable_reconfiguration>false</enable_reconfiguration>
1045 keeperTemplatesdFiles:
1047 Templates in this folder are packaged with an operator and available via 'useTemplate'
1048 keeperUsersdFiles: null
1049# additionalResources -- list of additional resources to create (processed via `tpl` function),
1050# useful for create ClickHouse clusters together with clickhouse-operator.
1051# check `kubectl explain chi` for details
1052additionalResources: []
1057# name: {{ include "altinity-clickhouse-operator.fullname" . }}-cm
1058# namespace: {{ include "altinity-clickhouse-operator.namespace" . }}
1063# name: {{ include "altinity-clickhouse-operator.fullname" . }}-s
1064# namespace: {{ include "altinity-clickhouse-operator.namespace" . }}
1068# apiVersion: clickhouse.altinity.com/v1
1069# kind: ClickHouseInstallation
1071# name: {{ include "altinity-clickhouse-operator.fullname" . }}-chi
1072# namespace: {{ include "altinity-clickhouse-operator.namespace" . }}
1081 # dashboards.enabled -- provision grafana dashboards as configMaps (can be synced by grafana dashboards sidecar https://github.com/grafana/helm-charts/blob/grafana-8.3.4/charts/grafana/values.yaml#L778 )
1083 # dashboards.additionalLabels -- labels to add to a secret with dashboards
1085 # dashboards.additionalLabels.grafana_dashboard - will watch when official grafana helm chart sidecar.dashboards.enabled=true
1086 grafana_dashboard: ""
1087 # dashboards.annotations -- annotations to add to a secret with dashboards
1089 # dashboards.annotations.grafana_folder -- folder where will place dashboards, requires define values in official grafana helm chart sidecar.dashboards.folderAnnotation: grafana_folder
1090 grafana_folder: clickhouse-operator