From 7ec79918023d31c726e83b0f434ac2766c114612 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Wed, 17 Dec 2025 17:58:19 +0100 Subject: [PATCH 01/29] allow kafka controllers running with zookeeper metadata manager --- deploy/helm/kafka-operator/crds/crds.yaml | 9 +++- rust/operator-binary/src/config/command.rs | 2 +- rust/operator-binary/src/crd/mod.rs | 52 ++++++++++++------- rust/operator-binary/src/kafka_controller.rs | 41 ++++++--------- .../operator-binary/src/resource/configmap.rs | 2 +- 5 files changed, 59 insertions(+), 47 deletions(-) diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index 323b1c1d..ff3e5feb 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -731,6 +731,7 @@ spec: authentication: [] authorization: opa: null + metadataManager: ZooKeeper tls: internalSecretClass: tls serverSecretClass: tls @@ -793,6 +794,12 @@ spec: - configMapName type: object type: object + metadataManager: + default: ZooKeeper + enum: + - ZooKeeper + - KRaft + type: string tls: default: internalSecretClass: tls @@ -836,7 +843,7 @@ spec: Provide the name of the ZooKeeper [discovery ConfigMap](https://docs.stackable.tech/home/nightly/concepts/service_discovery) here. When using the [Stackable operator for Apache ZooKeeper](https://docs.stackable.tech/home/nightly/zookeeper/) to deploy a ZooKeeper cluster, this will simply be the name of your ZookeeperCluster resource. - This can only be used up to Kafka version 3.9.x. Since Kafka 4.0.0, ZooKeeper suppport was dropped. + This can only be used up to Kafka version 3.9.x. Since Kafka 4.0.0, ZooKeeper support was dropped. Please use the 'controller' role instead. nullable: true type: string diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index b2f31e8a..0230318c 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -52,7 +52,7 @@ fn broker_start_command( controller_descriptors: Vec, product_version: &str, ) -> String { - if kafka.is_controller_configured() { + if kafka.is_kraft_mode() { formatdoc! {" POD_INDEX=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') export REPLICA_ID=$((POD_INDEX+NODE_ID_OFFSET)) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index e8ea4852..be2d5f17 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -23,6 +23,7 @@ use stackable_operator::{ utils::cluster_info::KubernetesClusterInfo, versioned::versioned, }; +use strum::{Display, EnumIter, EnumString}; use crate::{ config::node_id_hasher::node_id_hash32_offset, @@ -158,9 +159,12 @@ pub mod versioned { /// Provide the name of the ZooKeeper [discovery ConfigMap](DOCS_BASE_URL_PLACEHOLDER/concepts/service_discovery) /// here. When using the [Stackable operator for Apache ZooKeeper](DOCS_BASE_URL_PLACEHOLDER/zookeeper/) /// to deploy a ZooKeeper cluster, this will simply be the name of your ZookeeperCluster resource. - /// This can only be used up to Kafka version 3.9.x. Since Kafka 4.0.0, ZooKeeper suppport was dropped. + /// This can only be used up to Kafka version 3.9.x. Since Kafka 4.0.0, ZooKeeper support was dropped. /// Please use the 'controller' role instead. pub zookeeper_config_map_name: Option, + + #[serde(default = "default_metadata_manager")] + pub metadata_manager: MetadataManager, } } @@ -172,6 +176,7 @@ impl Default for v1alpha1::KafkaClusterConfig { tls: tls::default_kafka_tls(), vector_aggregator_config_map_name: None, zookeeper_config_map_name: None, + metadata_manager: default_metadata_manager(), } } } @@ -186,25 +191,8 @@ impl HasStatusCondition for v1alpha1::KafkaCluster { } impl v1alpha1::KafkaCluster { - /// Supporting Kraft alongside Zookeeper requires a couple of CRD checks - /// - If Kafka 4 and higher is used, no zookeeper config map ref has to be provided - /// - Configuring the controller role means no zookeeper config map ref has to be provided - pub fn check_kraft_vs_zookeeper(&self, product_version: &str) -> Result<(), Error> { - if product_version.starts_with("4.") && self.spec.controllers.is_none() { - return Err(Error::Kafka4RequiresKraft); - } - - if self.spec.controllers.is_some() - && self.spec.cluster_config.zookeeper_config_map_name.is_some() - { - return Err(Error::KraftAndZookeeperConfigured); - } - - Ok(()) - } - - pub fn is_controller_configured(&self) -> bool { - self.spec.controllers.is_some() + pub fn is_kraft_mode(&self) -> bool { + self.spec.cluster_config.metadata_manager == MetadataManager::KRaft } // The cluster-id for Kafka @@ -407,6 +395,30 @@ pub struct KafkaClusterStatus { pub conditions: Vec, } +#[derive( + Clone, + Debug, + Deserialize, + Display, + EnumIter, + Eq, + Hash, + JsonSchema, + PartialEq, + Serialize, + EnumString, +)] +pub enum MetadataManager { + #[strum(serialize = "zookeeper")] + ZooKeeper, + #[strum(serialize = "kraft")] + KRaft, +} + +fn default_metadata_manager() -> MetadataManager { + MetadataManager::ZooKeeper +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index aebe52c3..cc5999f1 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -277,11 +277,6 @@ pub async fn reconcile_kafka( .resolve(DOCKER_IMAGE_BASE_NAME, crate::built_info::PKG_VERSION) .context(ResolveProductImageSnafu)?; - // check Kraft vs ZooKeeper and fail if misconfigured - kafka - .check_kraft_vs_zookeeper(&resolved_product_image.product_version) - .context(MisconfiguredKafkaClusterSnafu)?; - let mut cluster_resources = ClusterResources::new( APP_NAME, OPERATOR_NAME, @@ -562,25 +557,23 @@ fn validated_product_config( ), ); - if kafka.is_controller_configured() { - roles.insert( - KafkaRole::Controller.to_string(), - ( - vec![ - PropertyNameKind::File(CONTROLLER_PROPERTIES_FILE.to_string()), - PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), - PropertyNameKind::Env, - ], - kafka - .controller_role() - .cloned() - .context(MissingKafkaRoleSnafu { - role: KafkaRole::Controller, - })? - .erase(), - ), - ); - } + roles.insert( + KafkaRole::Controller.to_string(), + ( + vec![ + PropertyNameKind::File(CONTROLLER_PROPERTIES_FILE.to_string()), + PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), + PropertyNameKind::Env, + ], + kafka + .controller_role() + .cloned() + .context(MissingKafkaRoleSnafu { + role: KafkaRole::Controller, + })? + .erase(), + ), + ); let role_config = transform_all_roles_to_config(kafka, roles).context(GenerateProductConfigSnafu)?; diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index ded83c59..434422bf 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -94,7 +94,7 @@ pub fn build_rolegroup_config_map( let kafka_config_file_name = merged_config.config_file_name(); let mut kafka_config = server_properties_file( - kafka.is_controller_configured(), + kafka.is_kraft_mode(), &rolegroup.role, pod_descriptors, listener_config, From c87b2adec23f1b7c6fea58f050af58df03340611 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 18 Dec 2025 17:04:03 +0100 Subject: [PATCH 02/29] add zookeeper migration properties to controllers --- rust/operator-binary/src/resource/configmap.rs | 5 +++++ rust/operator-binary/src/resource/statefulset.rs | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index 434422bf..f0698f57 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -261,6 +261,11 @@ fn server_properties_file( result.extend([(KAFKA_CONTROLLER_QUORUM_VOTERS.to_string(), kraft_voters)]); } + result.extend([( + "zookeeper.connect".to_string(), + "${env:ZOOKEEPER}".to_string(), + )]); + Ok(result) } KafkaRole::Broker => { diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 89154dad..062ccaca 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -634,6 +634,21 @@ pub fn build_controller_rolegroup_statefulset( ..EnvVar::default() }); + if let Some(zookeeper_config_map_name) = &kafka.spec.cluster_config.zookeeper_config_map_name { + env.push(EnvVar { + name: "ZOOKEEPER".to_string(), + value_from: Some(EnvVarSource { + config_map_key_ref: Some(ConfigMapKeySelector { + name: zookeeper_config_map_name.to_string(), + key: "ZOOKEEPER".to_string(), + ..ConfigMapKeySelector::default() + }), + ..EnvVarSource::default() + }), + ..EnvVar::default() + }) + }; + cb_kafka .image_from_product_image(resolved_product_image) .command(vec![ From 7ab7872dcc4da885cdb890df2d332ac73873f5e4 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 18 Dec 2025 17:04:58 +0100 Subject: [PATCH 03/29] validate controller role only if controllers are defined --- rust/operator-binary/src/kafka_controller.rs | 37 +++++++++++--------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index cc5999f1..f5b871c2 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -557,23 +557,26 @@ fn validated_product_config( ), ); - roles.insert( - KafkaRole::Controller.to_string(), - ( - vec![ - PropertyNameKind::File(CONTROLLER_PROPERTIES_FILE.to_string()), - PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), - PropertyNameKind::Env, - ], - kafka - .controller_role() - .cloned() - .context(MissingKafkaRoleSnafu { - role: KafkaRole::Controller, - })? - .erase(), - ), - ); + // TODO: need this if because controller_role() raises an error + if kafka.spec.controllers.is_some() { + roles.insert( + KafkaRole::Controller.to_string(), + ( + vec![ + PropertyNameKind::File(CONTROLLER_PROPERTIES_FILE.to_string()), + PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()), + PropertyNameKind::Env, + ], + kafka + .controller_role() + .cloned() + .context(MissingKafkaRoleSnafu { + role: KafkaRole::Controller, + })? + .erase(), + ), + ); + } let role_config = transform_all_roles_to_config(kafka, roles).context(GenerateProductConfigSnafu)?; From 928a045eb2659aee532209e9c300253e2b843d77 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 18 Dec 2025 17:37:00 +0100 Subject: [PATCH 04/29] move cluster id to env var --- rust/operator-binary/src/config/command.rs | 9 +++------ rust/operator-binary/src/crd/mod.rs | 17 +++++++++++++++-- rust/operator-binary/src/crd/role/broker.rs | 8 ++++++-- rust/operator-binary/src/crd/role/controller.rs | 8 ++++++-- .../operator-binary/src/resource/statefulset.rs | 5 +---- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 0230318c..3418a9cf 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -19,7 +19,6 @@ use crate::{ /// Returns the commands to start the main Kafka container pub fn broker_kafka_container_commands( kafka: &v1alpha1::KafkaCluster, - cluster_id: &str, controller_descriptors: Vec, kafka_security: &KafkaTlsSecurity, product_version: &str, @@ -42,13 +41,12 @@ pub fn broker_kafka_container_commands( true => format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {STACKABLE_KERBEROS_KRB5_PATH})"), false => "".to_string(), }, - broker_start_command = broker_start_command(kafka, cluster_id, controller_descriptors, product_version), + broker_start_command = broker_start_command(kafka, controller_descriptors, product_version), } } fn broker_start_command( kafka: &v1alpha1::KafkaCluster, - cluster_id: &str, controller_descriptors: Vec, product_version: &str, ) -> String { @@ -63,7 +61,7 @@ fn broker_start_command( cp {config_dir}/jaas.properties /tmp/jaas.properties config-utils template /tmp/jaas.properties - bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} + bin/kafka-storage.sh format --cluster-id \"$KAFKA_CLUSTER_ID\" --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} bin/kafka-server-start.sh /tmp/{properties_file} & ", config_dir = STACKABLE_CONFIG_DIR, @@ -128,7 +126,6 @@ wait_for_termination() "#; pub fn controller_kafka_container_command( - cluster_id: &str, controller_descriptors: Vec, product_version: &str, ) -> String { @@ -145,7 +142,7 @@ pub fn controller_kafka_container_command( config-utils template /tmp/{properties_file} - bin/kafka-storage.sh format --cluster-id {cluster_id} --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} + bin/kafka-storage.sh format --cluster-id \"$KAFKA_CLUSTER_ID\" --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} bin/kafka-server-start.sh /tmp/{properties_file} & wait_for_termination $! diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index be2d5f17..f5e3d7f0 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -195,9 +195,22 @@ impl v1alpha1::KafkaCluster { self.spec.cluster_config.metadata_manager == MetadataManager::KRaft } - // The cluster-id for Kafka + /// The Kafka cluster id when running in Kraft mode. + /// + /// In ZooKeeper mode the cluster id is a UUID generated by Kafka its self and users typically + /// do not need to deal with it. + /// + /// When in Kraft mode, the cluster id is passed on an as the environment variable `KAFKA_CLUSTER_ID`. + /// + /// When migrating to Kraft mode, users *must* set this variable via `envOverrides` to the value + /// found in the `cluster/id` ZooKeeper node or in the `meta.properties` file. + /// + /// For freshly installed clusters, users do not need to deal with the cluster id. pub fn cluster_id(&self) -> Option<&str> { - self.metadata.name.as_deref() + match self.spec.cluster_config.metadata_manager { + MetadataManager::KRaft => self.metadata.name.as_deref(), + _ => None, + } } /// The name of the load-balanced Kubernetes Service providing the bootstrap address. Kafka clients will use this diff --git a/rust/operator-binary/src/crd/role/broker.rs b/rust/operator-binary/src/crd/role/broker.rs index 00d614b9..70ac85d0 100644 --- a/rust/operator-binary/src/crd/role/broker.rs +++ b/rust/operator-binary/src/crd/role/broker.rs @@ -107,11 +107,15 @@ impl Configuration for BrokerConfigFragment { fn compute_env( &self, - _resource: &Self::Configurable, + resource: &Self::Configurable, _role_name: &str, ) -> Result>, stackable_operator::product_config_utils::Error> { - Ok(BTreeMap::new()) + let mut result = BTreeMap::new(); + if let Some(cluster_id) = resource.cluster_id() { + result.insert("KAFKA_CLUSTER_ID".to_string(), Some(cluster_id.to_string())); + } + Ok(result) } fn compute_cli( diff --git a/rust/operator-binary/src/crd/role/controller.rs b/rust/operator-binary/src/crd/role/controller.rs index 5b9513a5..bf1468b6 100644 --- a/rust/operator-binary/src/crd/role/controller.rs +++ b/rust/operator-binary/src/crd/role/controller.rs @@ -97,11 +97,15 @@ impl Configuration for ControllerConfigFragment { fn compute_env( &self, - _resource: &Self::Configurable, + resource: &Self::Configurable, _role_name: &str, ) -> Result>, stackable_operator::product_config_utils::Error> { - Ok(BTreeMap::new()) + let mut result = BTreeMap::new(); + if let Some(cluster_id) = resource.cluster_id() { + result.insert("KAFKA_CLUSTER_ID".to_string(), Some(cluster_id.to_string())); + } + Ok(result) } fn compute_cli( diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 062ccaca..a7aec39c 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -283,8 +283,6 @@ pub fn build_broker_rolegroup_statefulset( ..EnvVar::default() }); - let cluster_id = kafka.cluster_id().context(ClusterIdMissingSnafu)?; - cb_kafka .image_from_product_image(resolved_product_image) .command(vec![ @@ -296,7 +294,6 @@ pub fn build_broker_rolegroup_statefulset( ]) .args(vec![broker_kafka_container_commands( kafka, - cluster_id, // we need controller pods kafka .pod_descriptors( @@ -634,6 +631,7 @@ pub fn build_controller_rolegroup_statefulset( ..EnvVar::default() }); + // Controllers need the ZooKeeper connection string for migration if let Some(zookeeper_config_map_name) = &kafka.spec.cluster_config.zookeeper_config_map_name { env.push(EnvVar { name: "ZOOKEEPER".to_string(), @@ -659,7 +657,6 @@ pub fn build_controller_rolegroup_statefulset( "-c".to_string(), ]) .args(vec![controller_kafka_container_command( - kafka.cluster_id().context(ClusterIdMissingSnafu)?, kafka .pod_descriptors(Some(kafka_role), cluster_info, kafka_security.client_port()) .context(BuildPodDescriptorsSnafu)?, From a2d30770373d5157cb5d0cfe45c20bdfad569ec9 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Fri, 19 Dec 2025 15:05:47 +0100 Subject: [PATCH 05/29] disable automatic broker.id generation --- rust/operator-binary/src/config/command.rs | 3 ++ rust/operator-binary/src/crd/role/mod.rs | 5 ++++ .../operator-binary/src/resource/configmap.rs | 30 +++++++++---------- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 3418a9cf..bfe150b1 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -70,6 +70,9 @@ fn broker_start_command( } } else { formatdoc! {" + POD_INDEX=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') + export REPLICA_ID=$((POD_INDEX+NODE_ID_OFFSET)) + cp {config_dir}/{properties_file} /tmp/{properties_file} config-utils template /tmp/{properties_file} diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 47210ea4..16f72083 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -33,6 +33,11 @@ use crate::{ /// Env var pub const KAFKA_NODE_ID_OFFSET: &str = "NODE_ID_OFFSET"; +/// Past versions of the operator didn't set this explicitly and allowed Kafka to generate random ids. +/// To support Kraft migration, this must be carried over to `KAFKA_NODE_ID` so the operator needs +/// to know it's value for each broker Pod. +pub const KAFKA_BROKER_ID: &str = "broker.id"; + // See: https://kafka.apache.org/documentation/#brokerconfigs /// The node ID associated with the roles this process is playing when process.roles is non-empty. /// This is required configuration when running in KRaft mode. diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index f0698f57..11c8ae73 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -19,9 +19,10 @@ use crate::{ STACKABLE_LISTENER_BROKER_DIR, listener::{KafkaListenerConfig, KafkaListenerName, node_address_cmd}, role::{ - AnyConfig, KAFKA_ADVERTISED_LISTENERS, KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, - KAFKA_CONTROLLER_QUORUM_VOTERS, KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, KAFKA_LISTENERS, - KAFKA_LOG_DIRS, KAFKA_NODE_ID, KAFKA_PROCESS_ROLES, KafkaRole, + AnyConfig, KAFKA_ADVERTISED_LISTENERS, KAFKA_BROKER_ID, + KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, KAFKA_CONTROLLER_QUORUM_VOTERS, + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, KAFKA_LISTENERS, KAFKA_LOG_DIRS, KAFKA_NODE_ID, + KAFKA_PROCESS_ROLES, KafkaRole, }, security::KafkaTlsSecurity, v1alpha1, @@ -99,7 +100,6 @@ pub fn build_rolegroup_config_map( pod_descriptors, listener_config, opa_connect_string, - resolved_product_image.product_version.starts_with("3.7"), // needs_quorum_voters )?; match merged_config { @@ -213,7 +213,6 @@ fn server_properties_file( pod_descriptors: &[KafkaPodDescriptor], listener_config: &KafkaListenerConfig, opa_connect_string: Option<&str>, - needs_quorum_voters: bool, ) -> Result, Error> { let kraft_controllers = kraft_controllers(pod_descriptors); @@ -254,12 +253,10 @@ fn server_properties_file( .unwrap_or("".to_string())), ]); - if needs_quorum_voters { - let kraft_voters = - kraft_voters(pod_descriptors).context(NoKraftControllersFoundSnafu)?; + let kraft_voters = + kraft_voters(pod_descriptors).context(NoKraftControllersFoundSnafu)?; - result.extend([(KAFKA_CONTROLLER_QUORUM_VOTERS.to_string(), kraft_voters)]); - } + result.extend([(KAFKA_CONTROLLER_QUORUM_VOTERS.to_string(), kraft_voters)]); result.extend([( "zookeeper.connect".to_string(), @@ -283,6 +280,11 @@ fn server_properties_file( KAFKA_LISTENER_SECURITY_PROTOCOL_MAP.to_string(), listener_config.listener_security_protocol_map(), ), + ( + "broker.id.generation.enable".to_string(), + "false".to_string(), + ), + (KAFKA_BROKER_ID.to_string(), "${env:REPLICA_ID}".to_string()), ]); if kraft_mode { @@ -305,12 +307,10 @@ fn server_properties_file( ), ]); - if needs_quorum_voters { - let kraft_voters = - kraft_voters(pod_descriptors).context(NoKraftControllersFoundSnafu)?; + let kraft_voters = + kraft_voters(pod_descriptors).context(NoKraftControllersFoundSnafu)?; - result.extend([(KAFKA_CONTROLLER_QUORUM_VOTERS.to_string(), kraft_voters)]); - } + result.extend([(KAFKA_CONTROLLER_QUORUM_VOTERS.to_string(), kraft_voters)]); } else { // Running with ZooKeeper enabled result.extend([( From 88f319d9aaff40acfbb38ba662cb0fb270a3c015 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Fri, 19 Dec 2025 18:18:48 +0100 Subject: [PATCH 06/29] disable default tls --- deploy/helm/kafka-operator/crds/crds.yaml | 9 +-------- rust/operator-binary/src/crd/listener.rs | 13 +++++-------- rust/operator-binary/src/crd/mod.rs | 10 ++++------ rust/operator-binary/src/crd/security.rs | 15 +++++---------- rust/operator-binary/src/crd/tls.rs | 15 +++++---------- 5 files changed, 20 insertions(+), 42 deletions(-) diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index ff3e5feb..6bab087b 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -732,9 +732,6 @@ spec: authorization: opa: null metadataManager: ZooKeeper - tls: - internalSecretClass: tls - serverSecretClass: tls zookeeperConfigMapName: null description: |- Kafka settings that affect all roles and role groups. @@ -801,14 +798,10 @@ spec: - KRaft type: string tls: - default: - internalSecretClass: tls - serverSecretClass: tls description: TLS encryption settings for Kafka (server, internal). nullable: true properties: internalSecretClass: - default: tls description: |- The [SecretClass](https://docs.stackable.tech/home/nightly/secret-operator/secretclass.html) to use for internal broker communication. Use mutual verification between brokers (mandatory). @@ -817,9 +810,9 @@ spec: - Which ca.crt to use when validating the other brokers Defaults to `tls` + nullable: true type: string serverSecretClass: - default: tls description: |- The [SecretClass](https://docs.stackable.tech/home/nightly/secret-operator/secretclass.html) to use for client connections. This setting controls: diff --git a/rust/operator-binary/src/crd/listener.rs b/rust/operator-binary/src/crd/listener.rs index 97b15b85..abc257b5 100644 --- a/rust/operator-binary/src/crd/listener.rs +++ b/rust/operator-binary/src/crd/listener.rs @@ -401,7 +401,7 @@ mod tests { ), }, }]), - "internalTls".to_string(), + Some("internalTls".to_string()), Some("tls".to_string()), ); let cluster_info = default_cluster_info(); @@ -460,7 +460,7 @@ mod tests { let kafka_security = KafkaTlsSecurity::new( ResolvedAuthenticationClasses::new(vec![]), - "tls".to_string(), + Some("tls".to_string()), Some("tls".to_string()), ); let config = @@ -514,11 +514,8 @@ mod tests { ) ); - let kafka_security = KafkaTlsSecurity::new( - ResolvedAuthenticationClasses::new(vec![]), - "".to_string(), - None, - ); + let kafka_security = + KafkaTlsSecurity::new(ResolvedAuthenticationClasses::new(vec![]), None, None); let config = get_kafka_listener_config(&kafka, &kafka_security, &rolegroup_ref, &cluster_info) @@ -603,7 +600,7 @@ mod tests { ), }, }]), - "tls".to_string(), + Some("tls".to_string()), Some("tls".to_string()), ); let cluster_info = default_cluster_info(); diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index f5e3d7f0..b015a3d6 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -445,15 +445,13 @@ mod tests { .and_then(|tls| tls.server_secret_class.clone()) } - fn get_internal_secret_class(kafka: &v1alpha1::KafkaCluster) -> String { + fn get_internal_secret_class(kafka: &v1alpha1::KafkaCluster) -> Option { kafka .spec .cluster_config .tls .as_ref() - .unwrap() - .internal_secret_class - .clone() + .and_then(|tls| tls.internal_secret_class.clone()) } #[test] @@ -542,7 +540,7 @@ mod tests { assert_eq!(get_server_secret_class(&kafka), tls::server_tls_default()); assert_eq!( get_internal_secret_class(&kafka), - "simple-kafka-internal-tls".to_string() + Some("simple-kafka-internal-tls".to_string()) ); } @@ -585,7 +583,7 @@ mod tests { assert_eq!(get_server_secret_class(&kafka), tls::server_tls_default()); assert_eq!( get_internal_secret_class(&kafka), - "simple-kafka-internal-tls".to_string() + Some("simple-kafka-internal-tls".to_string()) ); let input = r#" diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index b729386a..c20f3e7f 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -29,7 +29,7 @@ use crate::crd::{ authentication::{self, ResolvedAuthenticationClasses}, listener::{self, KafkaListenerName, node_address_cmd_env, node_port_cmd_env}, role::KafkaRole, - tls, v1alpha1, + v1alpha1, }; #[derive(Snafu, Debug)] @@ -57,7 +57,7 @@ pub enum Error { /// Helper struct combining TLS settings for server and internal with the resolved AuthenticationClasses pub struct KafkaTlsSecurity { resolved_authentication_classes: ResolvedAuthenticationClasses, - internal_secret_class: String, + internal_secret_class: Option, server_secret_class: Option, } @@ -92,7 +92,7 @@ impl KafkaTlsSecurity { #[cfg(test)] pub fn new( resolved_authentication_classes: ResolvedAuthenticationClasses, - internal_secret_class: String, + internal_secret_class: Option, server_secret_class: Option, ) -> Self { Self { @@ -120,8 +120,7 @@ impl KafkaTlsSecurity { .cluster_config .tls .as_ref() - .map(|tls| tls.internal_secret_class.clone()) - .unwrap_or_else(tls::internal_tls_default), + .and_then(|tls| tls.internal_secret_class.clone()), server_secret_class: kafka .spec .cluster_config @@ -155,11 +154,7 @@ impl KafkaTlsSecurity { /// Retrieve the mandatory internal `SecretClass`. pub fn tls_internal_secret_class(&self) -> Option<&str> { - if !self.internal_secret_class.is_empty() { - Some(self.internal_secret_class.as_str()) - } else { - None - } + self.internal_secret_class.as_deref() } pub fn has_kerberos_enabled(&self) -> bool { diff --git a/rust/operator-binary/src/crd/tls.rs b/rust/operator-binary/src/crd/tls.rs index 94843601..08ebe3d3 100644 --- a/rust/operator-binary/src/crd/tls.rs +++ b/rust/operator-binary/src/crd/tls.rs @@ -1,8 +1,6 @@ use serde::{Deserialize, Serialize}; use stackable_operator::schemars::{self, JsonSchema}; -const TLS_DEFAULT_SECRET_CLASS: &str = "tls"; - #[derive(Clone, Deserialize, Debug, Eq, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] pub struct KafkaTls { @@ -14,7 +12,7 @@ pub struct KafkaTls { /// /// Defaults to `tls` #[serde(default = "internal_tls_default")] - pub internal_secret_class: String, + pub internal_secret_class: Option, /// The [SecretClass](DOCS_BASE_URL_PLACEHOLDER/secret-operator/secretclass.html) to use for /// client connections. This setting controls: /// - If TLS encryption is used at all @@ -31,18 +29,15 @@ pub struct KafkaTls { /// Default TLS settings. /// Internal and server communication default to `tls` secret class. pub fn default_kafka_tls() -> Option { - Some(KafkaTls { - internal_secret_class: internal_tls_default(), - server_secret_class: server_tls_default(), - }) + None } /// Helper methods to provide defaults in the CRDs and tests -pub fn internal_tls_default() -> String { - TLS_DEFAULT_SECRET_CLASS.into() +pub fn internal_tls_default() -> Option { + None } /// Helper methods to provide defaults in the CRDs and tests pub fn server_tls_default() -> Option { - Some(TLS_DEFAULT_SECRET_CLASS.into()) + None } From 7250bd96f34dd1d02166ebb6c05a236a3e9172bd Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Fri, 19 Dec 2025 18:19:19 +0100 Subject: [PATCH 07/29] add test manifests --- examples/kraft-migration/01-setup.yaml | 87 +++++++++++++++++++ .../kraft-migration/02-start-controllers.yaml | 33 +++++++ .../kraft-migration/03-migrate-metadata.yaml | 47 ++++++++++ 3 files changed, 167 insertions(+) create mode 100644 examples/kraft-migration/01-setup.yaml create mode 100644 examples/kraft-migration/02-start-controllers.yaml create mode 100644 examples/kraft-migration/03-migrate-metadata.yaml diff --git a/examples/kraft-migration/01-setup.yaml b/examples/kraft-migration/01-setup.yaml new file mode 100644 index 00000000..c290270e --- /dev/null +++ b/examples/kraft-migration/01-setup.yaml @@ -0,0 +1,87 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + labels: + stackable.tech/vendor: Stackable + name: kraft-migration +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperCluster +metadata: + name: simple-zk + namespace: kraft-migration +spec: + image: + productVersion: 3.8.3 + pullPolicy: IfNotPresent + servers: + roleGroups: + default: + replicas: 3 +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperZnode +metadata: + name: simple-kafka-znode + namespace: kraft-migration +spec: + clusterRef: + name: simple-zk +# --- +# apiVersion: secrets.stackable.tech/v1alpha1 +# kind: SecretClass +# metadata: +# name: kafka-internal-tls +# spec: +# backend: +# autoTls: +# ca: +# secret: +# name: secret-provisioner-kafka-internal-tls-ca +# namespace: kraft-migration +# autoGenerate: true +# --- +# apiVersion: authentication.stackable.tech/v1alpha1 +# kind: AuthenticationClass +# metadata: +# name: kafka-client-auth-tls +# spec: +# provider: +# tls: +# clientCertSecretClass: kafka-client-auth-secret +# --- +# apiVersion: secrets.stackable.tech/v1alpha1 +# kind: SecretClass +# metadata: +# name: kafka-client-auth-secret +# spec: +# backend: +# autoTls: +# ca: +# secret: +# name: secret-provisioner-tls-kafka-client-ca +# namespace: kraft-migration +# autoGenerate: true +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: simple-kafka + namespace: kraft-migration +spec: + image: + productVersion: 3.9.1 + pullPolicy: IfNotPresent + clusterConfig: + metadataManager: ZooKeeper + # authentication: + # - authenticationClass: kafka-client-auth-tls + # tls: + # internalSecretClass: kafka-internal-tls + # serverSecretClass: tls + zookeeperConfigMapName: simple-kafka-znode + brokers: + roleGroups: + default: + replicas: 3 diff --git a/examples/kraft-migration/02-start-controllers.yaml b/examples/kraft-migration/02-start-controllers.yaml new file mode 100644 index 00000000..6c32ff96 --- /dev/null +++ b/examples/kraft-migration/02-start-controllers.yaml @@ -0,0 +1,33 @@ +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: simple-kafka + namespace: kraft-migration +spec: + image: + productVersion: 3.9.1 + pullPolicy: IfNotPresent + clusterConfig: + metadataManager: ZooKeeper + # authentication: + # - authenticationClass: kafka-client-auth-tls + # tls: + # internalSecretClass: kafka-internal-tls + # serverSecretClass: tls + zookeeperConfigMapName: simple-kafka-znode + brokers: + envOverrides: + KAFKA_CLUSTER_ID: "lyeJYZ7TQ_SfT4HcU8W3iw" + roleGroups: + default: + replicas: 3 + controllers: + roleGroups: + default: + replicas: 3 + envOverrides: + KAFKA_CLUSTER_ID: "lyeJYZ7TQ_SfT4HcU8W3iw" + configOverrides: + controller.properties: + zookeeper.metadata.migration.enable: "true" # Enable migration mode so the controller can read metadata from ZooKeeper. diff --git a/examples/kraft-migration/03-migrate-metadata.yaml b/examples/kraft-migration/03-migrate-metadata.yaml new file mode 100644 index 00000000..e971a6e2 --- /dev/null +++ b/examples/kraft-migration/03-migrate-metadata.yaml @@ -0,0 +1,47 @@ +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: simple-kafka + namespace: kraft-migration +spec: + image: + productVersion: 3.9.1 + pullPolicy: IfNotPresent + clusterConfig: + metadataManager: ZooKeeper + # authentication: + # - authenticationClass: kafka-client-auth-tls + # tls: + # internalSecretClass: kafka-internal-tls + # serverSecretClass: tls + zookeeperConfigMapName: simple-kafka-znode + brokers: + envOverrides: + KAFKA_CLUSTER_ID: "lyeJYZ7TQ_SfT4HcU8W3iw" + roleGroups: + default: + replicas: 3 + configOverrides: + broker.properties: + inter.broker.protocol.version: "3.9" # - Latest value known to Kafka 3.9.1 + zookeeper.metadata.migration.enable: "true" # - Enable migration mode so the broker can participate in metadata migration. + controller.listener.names: "CONTROLLER" + controller.quorum.voters: "2110489703@simple-kafka-controller-default-0.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9092,2110489704@simple-kafka-controller-default-1.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9092,2110489705@simple-kafka-controller-default-2.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9092" + + # listener.security.protocol.map: CONTROLLER:SSL,... - Already defined by the operator + # zookeeper.connect= (should already be present) - The ZooKeeper connection string. This property should already be configured. + # controller.quorum.voters= (same as controllers) - Specify the same controller quorum voters string as configured in phase 2. + # controller.listener.names=CONTROLLER - Define the listener name for the controller. + # Add CONTROLLER to listener.security.protocol.map (for example, ...CONTROLLER:PLAINTEXT) - Add the CONTROLLER listener to the security protocol map with the appropriate security protocol. + # confluent.cluster.link.metadata.topic.enable=true - This property is used by Cluster Linking during the migration. + + controllers: + roleGroups: + default: + replicas: 3 + envOverrides: + KAFKA_CLUSTER_ID: "lyeJYZ7TQ_SfT4HcU8W3iw" + configOverrides: + controller.properties: + zookeeper.metadata.migration.enable: "true" # Enable migration mode so the controller can read metadata from ZooKeeper. From e4f801657f1374511d426fe72476170ada7a849a Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Sat, 20 Dec 2025 17:23:00 +0100 Subject: [PATCH 08/29] Revert "disable default tls" This reverts commit 88f319d9aaff40acfbb38ba662cb0fb270a3c015. --- deploy/helm/kafka-operator/crds/crds.yaml | 9 ++++++++- rust/operator-binary/src/crd/listener.rs | 13 ++++++++----- rust/operator-binary/src/crd/mod.rs | 10 ++++++---- rust/operator-binary/src/crd/security.rs | 15 ++++++++++----- rust/operator-binary/src/crd/tls.rs | 15 ++++++++++----- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index 6bab087b..ff3e5feb 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -732,6 +732,9 @@ spec: authorization: opa: null metadataManager: ZooKeeper + tls: + internalSecretClass: tls + serverSecretClass: tls zookeeperConfigMapName: null description: |- Kafka settings that affect all roles and role groups. @@ -798,10 +801,14 @@ spec: - KRaft type: string tls: + default: + internalSecretClass: tls + serverSecretClass: tls description: TLS encryption settings for Kafka (server, internal). nullable: true properties: internalSecretClass: + default: tls description: |- The [SecretClass](https://docs.stackable.tech/home/nightly/secret-operator/secretclass.html) to use for internal broker communication. Use mutual verification between brokers (mandatory). @@ -810,9 +817,9 @@ spec: - Which ca.crt to use when validating the other brokers Defaults to `tls` - nullable: true type: string serverSecretClass: + default: tls description: |- The [SecretClass](https://docs.stackable.tech/home/nightly/secret-operator/secretclass.html) to use for client connections. This setting controls: diff --git a/rust/operator-binary/src/crd/listener.rs b/rust/operator-binary/src/crd/listener.rs index abc257b5..97b15b85 100644 --- a/rust/operator-binary/src/crd/listener.rs +++ b/rust/operator-binary/src/crd/listener.rs @@ -401,7 +401,7 @@ mod tests { ), }, }]), - Some("internalTls".to_string()), + "internalTls".to_string(), Some("tls".to_string()), ); let cluster_info = default_cluster_info(); @@ -460,7 +460,7 @@ mod tests { let kafka_security = KafkaTlsSecurity::new( ResolvedAuthenticationClasses::new(vec![]), - Some("tls".to_string()), + "tls".to_string(), Some("tls".to_string()), ); let config = @@ -514,8 +514,11 @@ mod tests { ) ); - let kafka_security = - KafkaTlsSecurity::new(ResolvedAuthenticationClasses::new(vec![]), None, None); + let kafka_security = KafkaTlsSecurity::new( + ResolvedAuthenticationClasses::new(vec![]), + "".to_string(), + None, + ); let config = get_kafka_listener_config(&kafka, &kafka_security, &rolegroup_ref, &cluster_info) @@ -600,7 +603,7 @@ mod tests { ), }, }]), - Some("tls".to_string()), + "tls".to_string(), Some("tls".to_string()), ); let cluster_info = default_cluster_info(); diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index b015a3d6..f5e3d7f0 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -445,13 +445,15 @@ mod tests { .and_then(|tls| tls.server_secret_class.clone()) } - fn get_internal_secret_class(kafka: &v1alpha1::KafkaCluster) -> Option { + fn get_internal_secret_class(kafka: &v1alpha1::KafkaCluster) -> String { kafka .spec .cluster_config .tls .as_ref() - .and_then(|tls| tls.internal_secret_class.clone()) + .unwrap() + .internal_secret_class + .clone() } #[test] @@ -540,7 +542,7 @@ mod tests { assert_eq!(get_server_secret_class(&kafka), tls::server_tls_default()); assert_eq!( get_internal_secret_class(&kafka), - Some("simple-kafka-internal-tls".to_string()) + "simple-kafka-internal-tls".to_string() ); } @@ -583,7 +585,7 @@ mod tests { assert_eq!(get_server_secret_class(&kafka), tls::server_tls_default()); assert_eq!( get_internal_secret_class(&kafka), - Some("simple-kafka-internal-tls".to_string()) + "simple-kafka-internal-tls".to_string() ); let input = r#" diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index c20f3e7f..b729386a 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -29,7 +29,7 @@ use crate::crd::{ authentication::{self, ResolvedAuthenticationClasses}, listener::{self, KafkaListenerName, node_address_cmd_env, node_port_cmd_env}, role::KafkaRole, - v1alpha1, + tls, v1alpha1, }; #[derive(Snafu, Debug)] @@ -57,7 +57,7 @@ pub enum Error { /// Helper struct combining TLS settings for server and internal with the resolved AuthenticationClasses pub struct KafkaTlsSecurity { resolved_authentication_classes: ResolvedAuthenticationClasses, - internal_secret_class: Option, + internal_secret_class: String, server_secret_class: Option, } @@ -92,7 +92,7 @@ impl KafkaTlsSecurity { #[cfg(test)] pub fn new( resolved_authentication_classes: ResolvedAuthenticationClasses, - internal_secret_class: Option, + internal_secret_class: String, server_secret_class: Option, ) -> Self { Self { @@ -120,7 +120,8 @@ impl KafkaTlsSecurity { .cluster_config .tls .as_ref() - .and_then(|tls| tls.internal_secret_class.clone()), + .map(|tls| tls.internal_secret_class.clone()) + .unwrap_or_else(tls::internal_tls_default), server_secret_class: kafka .spec .cluster_config @@ -154,7 +155,11 @@ impl KafkaTlsSecurity { /// Retrieve the mandatory internal `SecretClass`. pub fn tls_internal_secret_class(&self) -> Option<&str> { - self.internal_secret_class.as_deref() + if !self.internal_secret_class.is_empty() { + Some(self.internal_secret_class.as_str()) + } else { + None + } } pub fn has_kerberos_enabled(&self) -> bool { diff --git a/rust/operator-binary/src/crd/tls.rs b/rust/operator-binary/src/crd/tls.rs index 08ebe3d3..94843601 100644 --- a/rust/operator-binary/src/crd/tls.rs +++ b/rust/operator-binary/src/crd/tls.rs @@ -1,6 +1,8 @@ use serde::{Deserialize, Serialize}; use stackable_operator::schemars::{self, JsonSchema}; +const TLS_DEFAULT_SECRET_CLASS: &str = "tls"; + #[derive(Clone, Deserialize, Debug, Eq, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] pub struct KafkaTls { @@ -12,7 +14,7 @@ pub struct KafkaTls { /// /// Defaults to `tls` #[serde(default = "internal_tls_default")] - pub internal_secret_class: Option, + pub internal_secret_class: String, /// The [SecretClass](DOCS_BASE_URL_PLACEHOLDER/secret-operator/secretclass.html) to use for /// client connections. This setting controls: /// - If TLS encryption is used at all @@ -29,15 +31,18 @@ pub struct KafkaTls { /// Default TLS settings. /// Internal and server communication default to `tls` secret class. pub fn default_kafka_tls() -> Option { - None + Some(KafkaTls { + internal_secret_class: internal_tls_default(), + server_secret_class: server_tls_default(), + }) } /// Helper methods to provide defaults in the CRDs and tests -pub fn internal_tls_default() -> Option { - None +pub fn internal_tls_default() -> String { + TLS_DEFAULT_SECRET_CLASS.into() } /// Helper methods to provide defaults in the CRDs and tests pub fn server_tls_default() -> Option { - None + Some(TLS_DEFAULT_SECRET_CLASS.into()) } From 4b55b35c51e42c72bdc8a083ae86eedebdc3eac0 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Sat, 20 Dec 2025 21:50:22 +0100 Subject: [PATCH 09/29] add internal listener to controller properties. --- rust/operator-binary/src/crd/listener.rs | 17 ++++++++------ .../operator-binary/src/resource/configmap.rs | 23 +++++++++++++------ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/rust/operator-binary/src/crd/listener.rs b/rust/operator-binary/src/crd/listener.rs index 97b15b85..5b02c66f 100644 --- a/rust/operator-binary/src/crd/listener.rs +++ b/rust/operator-binary/src/crd/listener.rs @@ -162,14 +162,17 @@ impl KafkaListenerConfig { .join(",") } - /// Returns the `listener.security.protocol.map` for the Kafka `broker.properties` config. - pub fn listener_security_protocol_map_for_listener( - &self, - listener_name: &KafkaListenerName, - ) -> Option { + /// Returns the `listener.security.protocol.map` for the Kraft controller. + /// This map must include the internal broker listener too. + pub fn listener_security_protocol_map_for_controller(&self) -> String { self.listener_security_protocol_map - .get(listener_name) - .map(|protocol| format!("{listener_name}:{protocol}")) + .iter() + .filter(|(name, _)| { + *name == &KafkaListenerName::Internal || *name == &KafkaListenerName::Controller + }) + .map(|(name, protocol)| format!("{name}:{protocol}")) + .collect::>() + .join(",") } } diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index 11c8ae73..1d2fed83 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -249,8 +249,7 @@ fn server_properties_file( ( KAFKA_LISTENER_SECURITY_PROTOCOL_MAP.to_string(), listener_config - .listener_security_protocol_map_for_listener(&KafkaListenerName::Controller) - .unwrap_or("".to_string())), + .listener_security_protocol_map_for_controller()), ]); let kraft_voters = @@ -258,11 +257,17 @@ fn server_properties_file( result.extend([(KAFKA_CONTROLLER_QUORUM_VOTERS.to_string(), kraft_voters)]); - result.extend([( - "zookeeper.connect".to_string(), - "${env:ZOOKEEPER}".to_string(), - )]); - + // Needed to migrate from ZooKeeper to KRaft mode. + result.extend([ + ( + "zookeeper.connect".to_string(), + "${env:ZOOKEEPER}".to_string(), + ), + ( + "inter.broker.listener.name".to_string(), + KafkaListenerName::Internal.to_string(), + ), + ]); Ok(result) } KafkaRole::Broker => { @@ -285,6 +290,10 @@ fn server_properties_file( "false".to_string(), ), (KAFKA_BROKER_ID.to_string(), "${env:REPLICA_ID}".to_string()), + ( + "inter.broker.listener.name".to_string(), + KafkaListenerName::Internal.to_string(), + ), ]); if kraft_mode { From 00617d937353e8311bd5442a8c51c990821c9912 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Sat, 20 Dec 2025 21:51:12 +0100 Subject: [PATCH 10/29] update test manifests --- examples/kraft-migration/01-setup.yaml | 84 +++++++++---------- .../kraft-migration/02-start-controllers.yaml | 14 ++-- .../kraft-migration/03-migrate-metadata.yaml | 16 ++-- 3 files changed, 57 insertions(+), 57 deletions(-) diff --git a/examples/kraft-migration/01-setup.yaml b/examples/kraft-migration/01-setup.yaml index c290270e..a7ec8369 100644 --- a/examples/kraft-migration/01-setup.yaml +++ b/examples/kraft-migration/01-setup.yaml @@ -13,12 +13,12 @@ metadata: namespace: kraft-migration spec: image: - productVersion: 3.8.3 + productVersion: 3.9.4 pullPolicy: IfNotPresent servers: roleGroups: default: - replicas: 3 + replicas: 1 --- apiVersion: zookeeper.stackable.tech/v1alpha1 kind: ZookeeperZnode @@ -28,41 +28,41 @@ metadata: spec: clusterRef: name: simple-zk -# --- -# apiVersion: secrets.stackable.tech/v1alpha1 -# kind: SecretClass -# metadata: -# name: kafka-internal-tls -# spec: -# backend: -# autoTls: -# ca: -# secret: -# name: secret-provisioner-kafka-internal-tls-ca -# namespace: kraft-migration -# autoGenerate: true -# --- -# apiVersion: authentication.stackable.tech/v1alpha1 -# kind: AuthenticationClass -# metadata: -# name: kafka-client-auth-tls -# spec: -# provider: -# tls: -# clientCertSecretClass: kafka-client-auth-secret -# --- -# apiVersion: secrets.stackable.tech/v1alpha1 -# kind: SecretClass -# metadata: -# name: kafka-client-auth-secret -# spec: -# backend: -# autoTls: -# ca: -# secret: -# name: secret-provisioner-tls-kafka-client-ca -# namespace: kraft-migration -# autoGenerate: true +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: kafka-internal-tls +spec: + backend: + autoTls: + ca: + secret: + name: secret-provisioner-kafka-internal-tls-ca + namespace: kraft-migration + autoGenerate: true +--- +apiVersion: authentication.stackable.tech/v1alpha1 +kind: AuthenticationClass +metadata: + name: kafka-client-auth-tls +spec: + provider: + tls: + clientCertSecretClass: kafka-client-auth-secret +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: kafka-client-auth-secret +spec: + backend: + autoTls: + ca: + secret: + name: secret-provisioner-tls-kafka-client-ca + namespace: kraft-migration + autoGenerate: true --- apiVersion: kafka.stackable.tech/v1alpha1 kind: KafkaCluster @@ -75,11 +75,11 @@ spec: pullPolicy: IfNotPresent clusterConfig: metadataManager: ZooKeeper - # authentication: - # - authenticationClass: kafka-client-auth-tls - # tls: - # internalSecretClass: kafka-internal-tls - # serverSecretClass: tls + authentication: + - authenticationClass: kafka-client-auth-tls + tls: + internalSecretClass: kafka-internal-tls + serverSecretClass: tls zookeeperConfigMapName: simple-kafka-znode brokers: roleGroups: diff --git a/examples/kraft-migration/02-start-controllers.yaml b/examples/kraft-migration/02-start-controllers.yaml index 6c32ff96..2a961de1 100644 --- a/examples/kraft-migration/02-start-controllers.yaml +++ b/examples/kraft-migration/02-start-controllers.yaml @@ -10,15 +10,15 @@ spec: pullPolicy: IfNotPresent clusterConfig: metadataManager: ZooKeeper - # authentication: - # - authenticationClass: kafka-client-auth-tls - # tls: - # internalSecretClass: kafka-internal-tls - # serverSecretClass: tls + authentication: + - authenticationClass: kafka-client-auth-tls + tls: + internalSecretClass: kafka-internal-tls + serverSecretClass: tls zookeeperConfigMapName: simple-kafka-znode brokers: envOverrides: - KAFKA_CLUSTER_ID: "lyeJYZ7TQ_SfT4HcU8W3iw" + KAFKA_CLUSTER_ID: "saiZFmAuSX-QyMfMhwLk9g" roleGroups: default: replicas: 3 @@ -27,7 +27,7 @@ spec: default: replicas: 3 envOverrides: - KAFKA_CLUSTER_ID: "lyeJYZ7TQ_SfT4HcU8W3iw" + KAFKA_CLUSTER_ID: "saiZFmAuSX-QyMfMhwLk9g" configOverrides: controller.properties: zookeeper.metadata.migration.enable: "true" # Enable migration mode so the controller can read metadata from ZooKeeper. diff --git a/examples/kraft-migration/03-migrate-metadata.yaml b/examples/kraft-migration/03-migrate-metadata.yaml index e971a6e2..26e3976b 100644 --- a/examples/kraft-migration/03-migrate-metadata.yaml +++ b/examples/kraft-migration/03-migrate-metadata.yaml @@ -10,15 +10,15 @@ spec: pullPolicy: IfNotPresent clusterConfig: metadataManager: ZooKeeper - # authentication: - # - authenticationClass: kafka-client-auth-tls - # tls: - # internalSecretClass: kafka-internal-tls - # serverSecretClass: tls + authentication: + - authenticationClass: kafka-client-auth-tls + tls: + internalSecretClass: kafka-internal-tls + serverSecretClass: tls zookeeperConfigMapName: simple-kafka-znode brokers: envOverrides: - KAFKA_CLUSTER_ID: "lyeJYZ7TQ_SfT4HcU8W3iw" + KAFKA_CLUSTER_ID: "saiZFmAuSX-QyMfMhwLk9g" roleGroups: default: replicas: 3 @@ -27,7 +27,7 @@ spec: inter.broker.protocol.version: "3.9" # - Latest value known to Kafka 3.9.1 zookeeper.metadata.migration.enable: "true" # - Enable migration mode so the broker can participate in metadata migration. controller.listener.names: "CONTROLLER" - controller.quorum.voters: "2110489703@simple-kafka-controller-default-0.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9092,2110489704@simple-kafka-controller-default-1.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9092,2110489705@simple-kafka-controller-default-2.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9092" + controller.quorum.voters: "2110489703@simple-kafka-controller-default-0.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093,2110489704@simple-kafka-controller-default-1.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093,2110489705@simple-kafka-controller-default-2.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093" # listener.security.protocol.map: CONTROLLER:SSL,... - Already defined by the operator # zookeeper.connect= (should already be present) - The ZooKeeper connection string. This property should already be configured. @@ -41,7 +41,7 @@ spec: default: replicas: 3 envOverrides: - KAFKA_CLUSTER_ID: "lyeJYZ7TQ_SfT4HcU8W3iw" + KAFKA_CLUSTER_ID: "saiZFmAuSX-QyMfMhwLk9g" configOverrides: controller.properties: zookeeper.metadata.migration.enable: "true" # Enable migration mode so the controller can read metadata from ZooKeeper. From a2b700956290fcc597b3d2ff462b55746fb1ebba Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 22 Dec 2025 12:17:04 +0100 Subject: [PATCH 11/29] add internal broker listener TLS settings to controller.properties --- rust/operator-binary/src/crd/security.rs | 27 +++++++++++++++++++ .../operator-binary/src/resource/configmap.rs | 20 +++++++------- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index b729386a..2efd90b4 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -705,6 +705,33 @@ impl KafkaTlsSecurity { KafkaListenerName::Controller.listener_ssl_truststore_type(), "PKCS12".to_string(), ); + + // The TLS properties for the internal broker listener are needed by the Kraft controllers + // too during metadata migration from ZooKeeper to Kraft mode. + config.insert( + KafkaListenerName::Internal.listener_ssl_keystore_location(), + format!("{}/keystore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), + ); + config.insert( + KafkaListenerName::Internal.listener_ssl_keystore_password(), + Self::SSL_STORE_PASSWORD.to_string(), + ); + config.insert( + KafkaListenerName::Internal.listener_ssl_keystore_type(), + "PKCS12".to_string(), + ); + config.insert( + KafkaListenerName::Internal.listener_ssl_truststore_location(), + format!("{}/truststore.p12", Self::STACKABLE_TLS_KAFKA_INTERNAL_DIR), + ); + config.insert( + KafkaListenerName::Internal.listener_ssl_truststore_password(), + Self::SSL_STORE_PASSWORD.to_string(), + ); + config.insert( + KafkaListenerName::Internal.listener_ssl_truststore_type(), + "PKCS12".to_string(), + ); // We set either client tls with authentication or client tls without authentication // If authentication is explicitly required we do not want to have any other CAs to // be trusted. diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index 1d2fed83..73b81cf5 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -257,17 +257,19 @@ fn server_properties_file( result.extend([(KAFKA_CONTROLLER_QUORUM_VOTERS.to_string(), kraft_voters)]); - // Needed to migrate from ZooKeeper to KRaft mode. - result.extend([ - ( + result.insert( + "inter.broker.listener.name".to_string(), + KafkaListenerName::Internal.to_string(), + ); + + // The ZooKeeper connection is needed for migration from ZooKeeper to KRaft mode. + // It is not needed once the controller is fully running in KRaft mode. + if !kraft_mode { + result.insert( "zookeeper.connect".to_string(), "${env:ZOOKEEPER}".to_string(), - ), - ( - "inter.broker.listener.name".to_string(), - KafkaListenerName::Internal.to_string(), - ), - ]); + ); + } Ok(result) } KafkaRole::Broker => { From 09f295b279c2df2b5bc68d8eaa99c3feedc162f0 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 22 Dec 2025 12:19:22 +0100 Subject: [PATCH 12/29] update migration manifests --- .../kraft-migration/02-start-controllers.yaml | 4 +- .../kraft-migration/03-migrate-metadata.yaml | 4 +- .../kraft-migration/04-migrate-brokers.yaml | 46 +++++++++++++++++++ examples/kraft-migration/05-kraft-mode.yaml | 32 +++++++++++++ 4 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 examples/kraft-migration/04-migrate-brokers.yaml create mode 100644 examples/kraft-migration/05-kraft-mode.yaml diff --git a/examples/kraft-migration/02-start-controllers.yaml b/examples/kraft-migration/02-start-controllers.yaml index 2a961de1..b055bf55 100644 --- a/examples/kraft-migration/02-start-controllers.yaml +++ b/examples/kraft-migration/02-start-controllers.yaml @@ -18,7 +18,7 @@ spec: zookeeperConfigMapName: simple-kafka-znode brokers: envOverrides: - KAFKA_CLUSTER_ID: "saiZFmAuSX-QyMfMhwLk9g" + KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" roleGroups: default: replicas: 3 @@ -27,7 +27,7 @@ spec: default: replicas: 3 envOverrides: - KAFKA_CLUSTER_ID: "saiZFmAuSX-QyMfMhwLk9g" + KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" configOverrides: controller.properties: zookeeper.metadata.migration.enable: "true" # Enable migration mode so the controller can read metadata from ZooKeeper. diff --git a/examples/kraft-migration/03-migrate-metadata.yaml b/examples/kraft-migration/03-migrate-metadata.yaml index 26e3976b..022a8d90 100644 --- a/examples/kraft-migration/03-migrate-metadata.yaml +++ b/examples/kraft-migration/03-migrate-metadata.yaml @@ -18,7 +18,7 @@ spec: zookeeperConfigMapName: simple-kafka-znode brokers: envOverrides: - KAFKA_CLUSTER_ID: "saiZFmAuSX-QyMfMhwLk9g" + KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" roleGroups: default: replicas: 3 @@ -41,7 +41,7 @@ spec: default: replicas: 3 envOverrides: - KAFKA_CLUSTER_ID: "saiZFmAuSX-QyMfMhwLk9g" + KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" configOverrides: controller.properties: zookeeper.metadata.migration.enable: "true" # Enable migration mode so the controller can read metadata from ZooKeeper. diff --git a/examples/kraft-migration/04-migrate-brokers.yaml b/examples/kraft-migration/04-migrate-brokers.yaml new file mode 100644 index 00000000..ab67d4ff --- /dev/null +++ b/examples/kraft-migration/04-migrate-brokers.yaml @@ -0,0 +1,46 @@ +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: simple-kafka + namespace: kraft-migration +spec: + image: + productVersion: 3.9.1 + pullPolicy: IfNotPresent + clusterConfig: + metadataManager: ZooKeeper + authentication: + - authenticationClass: kafka-client-auth-tls + tls: + internalSecretClass: kafka-internal-tls + serverSecretClass: tls + zookeeperConfigMapName: simple-kafka-znode + brokers: + envOverrides: + KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" + roleGroups: + default: + replicas: 3 + configOverrides: + broker.properties: + controller.listener.names: "CONTROLLER" + controller.quorum.voters: "2110489703@simple-kafka-controller-default-0.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093,2110489704@simple-kafka-controller-default-1.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093,2110489705@simple-kafka-controller-default-2.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093" + process.roles: "broker" + node.id: "${env:REPLICA_ID}" + + # Remove or comment out the following properties: + # broker.id - This property is replaced by node.id in KRaft mode. + # inter.broker.protocol.version + # zookeeper.metadata.migration.enable=true + # zookeeper.connect + + controllers: + roleGroups: + default: + replicas: 3 + envOverrides: + KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" + configOverrides: + controller.properties: + zookeeper.metadata.migration.enable: "true" # Enable migration mode so the controller can read metadata from ZooKeeper. diff --git a/examples/kraft-migration/05-kraft-mode.yaml b/examples/kraft-migration/05-kraft-mode.yaml new file mode 100644 index 00000000..916195b3 --- /dev/null +++ b/examples/kraft-migration/05-kraft-mode.yaml @@ -0,0 +1,32 @@ +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: simple-kafka + namespace: kraft-migration +spec: + image: + productVersion: 3.9.1 + pullPolicy: IfNotPresent + clusterConfig: + metadataManager: KRaft + authentication: + - authenticationClass: kafka-client-auth-tls + tls: + internalSecretClass: kafka-internal-tls + serverSecretClass: tls + brokers: + envOverrides: + KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" + roleGroups: + default: + replicas: 3 + configOverrides: + broker.properties: + controller.listener.names: "CONTROLLER" + controllers: + roleGroups: + default: + replicas: 3 + envOverrides: + KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" From e76843f9b608cca3880286a8cb1ccd8999d421dc Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 22 Dec 2025 16:15:13 +0100 Subject: [PATCH 13/29] fix metadata manager casing --- deploy/helm/kafka-operator/crds/crds.yaml | 8 ++++---- examples/kraft-migration/01-setup.yaml | 2 +- examples/kraft-migration/02-start-controllers.yaml | 2 +- examples/kraft-migration/03-migrate-metadata.yaml | 2 +- examples/kraft-migration/04-migrate-brokers.yaml | 2 +- examples/kraft-migration/05-kraft-mode.yaml | 2 +- rust/operator-binary/src/crd/mod.rs | 3 +-- 7 files changed, 10 insertions(+), 11 deletions(-) diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index ff3e5feb..87d1b869 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -731,7 +731,7 @@ spec: authentication: [] authorization: opa: null - metadataManager: ZooKeeper + metadataManager: zookeeper tls: internalSecretClass: tls serverSecretClass: tls @@ -795,10 +795,10 @@ spec: type: object type: object metadataManager: - default: ZooKeeper + default: zookeeper enum: - - ZooKeeper - - KRaft + - zookeeper + - kraft type: string tls: default: diff --git a/examples/kraft-migration/01-setup.yaml b/examples/kraft-migration/01-setup.yaml index a7ec8369..f415f885 100644 --- a/examples/kraft-migration/01-setup.yaml +++ b/examples/kraft-migration/01-setup.yaml @@ -74,7 +74,7 @@ spec: productVersion: 3.9.1 pullPolicy: IfNotPresent clusterConfig: - metadataManager: ZooKeeper + metadataManager: zookeeper authentication: - authenticationClass: kafka-client-auth-tls tls: diff --git a/examples/kraft-migration/02-start-controllers.yaml b/examples/kraft-migration/02-start-controllers.yaml index b055bf55..382fa418 100644 --- a/examples/kraft-migration/02-start-controllers.yaml +++ b/examples/kraft-migration/02-start-controllers.yaml @@ -9,7 +9,7 @@ spec: productVersion: 3.9.1 pullPolicy: IfNotPresent clusterConfig: - metadataManager: ZooKeeper + metadataManager: zookeeper authentication: - authenticationClass: kafka-client-auth-tls tls: diff --git a/examples/kraft-migration/03-migrate-metadata.yaml b/examples/kraft-migration/03-migrate-metadata.yaml index 022a8d90..922e559b 100644 --- a/examples/kraft-migration/03-migrate-metadata.yaml +++ b/examples/kraft-migration/03-migrate-metadata.yaml @@ -9,7 +9,7 @@ spec: productVersion: 3.9.1 pullPolicy: IfNotPresent clusterConfig: - metadataManager: ZooKeeper + metadataManager: zookeeper authentication: - authenticationClass: kafka-client-auth-tls tls: diff --git a/examples/kraft-migration/04-migrate-brokers.yaml b/examples/kraft-migration/04-migrate-brokers.yaml index ab67d4ff..59d78814 100644 --- a/examples/kraft-migration/04-migrate-brokers.yaml +++ b/examples/kraft-migration/04-migrate-brokers.yaml @@ -9,7 +9,7 @@ spec: productVersion: 3.9.1 pullPolicy: IfNotPresent clusterConfig: - metadataManager: ZooKeeper + metadataManager: zookeeper authentication: - authenticationClass: kafka-client-auth-tls tls: diff --git a/examples/kraft-migration/05-kraft-mode.yaml b/examples/kraft-migration/05-kraft-mode.yaml index 916195b3..d08adb6d 100644 --- a/examples/kraft-migration/05-kraft-mode.yaml +++ b/examples/kraft-migration/05-kraft-mode.yaml @@ -9,7 +9,7 @@ spec: productVersion: 3.9.1 pullPolicy: IfNotPresent clusterConfig: - metadataManager: KRaft + metadataManager: kraft authentication: - authenticationClass: kafka-client-auth-tls tls: diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index f5e3d7f0..7bad7ca3 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -421,10 +421,9 @@ pub struct KafkaClusterStatus { Serialize, EnumString, )] +#[serde(rename_all = "lowercase")] pub enum MetadataManager { - #[strum(serialize = "zookeeper")] ZooKeeper, - #[strum(serialize = "kraft")] KRaft, } From 6ef71e6da3582529783b01b379e7d8b9c3e3b43e Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 22 Dec 2025 16:37:05 +0100 Subject: [PATCH 14/29] update kraft kuttl tests --- tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 | 1 + tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 | 1 + tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 | 1 + tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 | 1 + .../kuttl/operations-kraft/60-scale-controller-up.yaml.j2 | 1 + .../kuttl/operations-kraft/70-scale-controller-down.yaml.j2 | 1 + .../kuttl/operations-kraft/80-scale-broker-down.yaml.j2 | 1 + tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 | 1 + 8 files changed, 8 insertions(+) diff --git a/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 index fd95c8ef..e415091c 100644 --- a/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 @@ -18,6 +18,7 @@ spec: pullPolicy: IfNotPresent {% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: + metadataManager: kraft vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} controllers: diff --git a/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 index 2be3f573..1ce5edcc 100644 --- a/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 @@ -18,6 +18,7 @@ spec: pullPolicy: IfNotPresent {% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: + metadataManager: kraft vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} controllers: diff --git a/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 index b11dd670..65ef99bc 100644 --- a/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 @@ -18,6 +18,7 @@ spec: pullPolicy: IfNotPresent {% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: + metadataManager: kraft vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} brokers: diff --git a/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 index 13a19572..cfc16a11 100644 --- a/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 @@ -17,6 +17,7 @@ spec: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: + metadataManager: kraft vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} controllers: diff --git a/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 b/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 index c1b64e7e..020ea67e 100644 --- a/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 @@ -18,6 +18,7 @@ spec: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: + metadataManager: kraft vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} controllers: diff --git a/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 b/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 index ba70a1b5..db4d1cfa 100644 --- a/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 @@ -18,6 +18,7 @@ spec: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: + metadataManager: kraft vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} controllers: diff --git a/tests/templates/kuttl/operations-kraft/80-scale-broker-down.yaml.j2 b/tests/templates/kuttl/operations-kraft/80-scale-broker-down.yaml.j2 index d532d273..d788a9c9 100644 --- a/tests/templates/kuttl/operations-kraft/80-scale-broker-down.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/80-scale-broker-down.yaml.j2 @@ -24,6 +24,7 @@ spec: {% endif %} {% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: + metadataManager: kraft vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} controllers: diff --git a/tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 b/tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 index 282686e9..95d85da6 100644 --- a/tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/smoke-kraft/30-install-kafka.yaml.j2 @@ -69,6 +69,7 @@ spec: {% endif %} pullPolicy: IfNotPresent clusterConfig: + metadataManager: kraft authentication: - authenticationClass: test-kafka-client-auth-tls tls: From 4179b64d8cef9bd7aa1b6e3a56459b882b0ae24d Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Fri, 2 Jan 2026 12:10:09 +0100 Subject: [PATCH 15/29] update changelog and start documenting the migration process --- CHANGELOG.md | 3 +++ deploy/helm/kafka-operator/crds/crds.yaml | 19 +++++++++++++++++++ .../pages/usage-guide/kraft-controller.adoc | 11 +++++++++++ rust/operator-binary/src/crd/mod.rs | 18 ++++++++++++++++++ 4 files changed, 51 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76cc7d18..549d318d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ All notable changes to this project will be documented in this file. ### Changed - Refactor: move server configuration properties from the command line to configuration files. ([#911]). +- BREAKING: add support for ZooKeeper to KRaft migration ([#923]). + In order to support migration to Kraft, the operator must disable automatic Kafka broker id generation. + The broker ids generated by the operator are incompatible with those generated by Kafka. ### Removed diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index 87d1b869..1710775d 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -796,6 +796,25 @@ spec: type: object metadataManager: default: zookeeper + description: |- + Metadata manager to use for the Kafka cluster. + + Possible values are `zookeeper` and `kraft`. + For backwards compatibility, it defaults to `zookeeper` for Kafka versions below `4.0.0` and to `kraft` for Kafka versions `4.0.0` and higher. + Using `zookeeper` for Kafka versions `4.0.0` and higher is not supported. + + When set to `kraft`, the operator will perform the following actions: + + * Generate the Kafka cluster id. + * Assign broker roles and configure controller quorum voters in the `broker.properties` files. + * Format storage when before (re)starting Kafka brokers. + * Remove ZooKeeper related configuration options from the `broker.properties` files. + + These actions are **mandatory** when in Kraft mode and partially exclusive to the ZooKeeper mode. + This means they **cannot** be performed in ZooKeeper mode. + + This property is also useful when migrating from ZooKeeper to Kraft mode because it permits the operator + to reconcile controllers while still using ZooKeeper for brokers. enum: - zookeeper - kraft diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index ea5c4946..edfc678a 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -110,3 +110,14 @@ The Stackable Kafka operator currently does not support the migration. The https://developers.redhat.com/articles/2024/11/27/dynamic-kafka-controller-quorum[Dynamic scaling] is only supported from Kafka version 3.9.0. If you are using older versions, automatic scaling may not work properly (e.g. adding or removing controller replicas). + +== Migration from ZooKeeper to KRaft mode + +The operator version `26.3.0` adds support for migrating Kafka clusters from ZooKeeper to KRaft mode. + +Requirements: + +* Kafka clusters **must** be set up with the Stackable Kafka operator version `26.3.0` or higher. Kafka clusters set up with a previous operator version **cannot be upgraded** without migrating all broker instances first. This is because broker id management must be handed over from Kafka to the operator. The broker ids are not compatible between the two systems. +* Kafka version **must** be `3.7.2` or `3.9.1`. Starting with version `4.0.0` the Zookeeper support is removed completely. + +TODO: describe the migration steps in detail diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 7bad7ca3..2898fd93 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -163,6 +163,24 @@ pub mod versioned { /// Please use the 'controller' role instead. pub zookeeper_config_map_name: Option, + /// Metadata manager to use for the Kafka cluster. + /// + /// Possible values are `zookeeper` and `kraft`. + /// For backwards compatibility, it defaults to `zookeeper` for Kafka versions below `4.0.0` and to `kraft` for Kafka versions `4.0.0` and higher. + /// Using `zookeeper` for Kafka versions `4.0.0` and higher is not supported. + /// + /// When set to `kraft`, the operator will perform the following actions: + /// + /// * Generate the Kafka cluster id. + /// * Assign broker roles and configure controller quorum voters in the `broker.properties` files. + /// * Format storage when before (re)starting Kafka brokers. + /// * Remove ZooKeeper related configuration options from the `broker.properties` files. + /// + /// These actions are **mandatory** when in Kraft mode and partially exclusive to the ZooKeeper mode. + /// This means they **cannot** be performed in ZooKeeper mode. + /// + /// This property is also useful when migrating from ZooKeeper to Kraft mode because it permits the operator + /// to reconcile controllers while still using ZooKeeper for brokers. #[serde(default = "default_metadata_manager")] pub metadata_manager: MetadataManager, } From 79a0f08645b1e7984b439ee9e21d35684c7469ec Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Fri, 2 Jan 2026 15:56:01 +0100 Subject: [PATCH 16/29] make meta manager optional and implement sanity checks --- deploy/helm/kafka-operator/crds/crds.yaml | 8 +-- rust/operator-binary/src/config/command.rs | 9 ++-- rust/operator-binary/src/crd/mod.rs | 51 +++++++++++++------ .../operator-binary/src/resource/configmap.rs | 13 +++-- .../src/resource/statefulset.rs | 13 +++-- 5 files changed, 64 insertions(+), 30 deletions(-) diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index 1710775d..379ace7d 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -731,7 +731,6 @@ spec: authentication: [] authorization: opa: null - metadataManager: zookeeper tls: internalSecretClass: tls serverSecretClass: tls @@ -795,19 +794,18 @@ spec: type: object type: object metadataManager: - default: zookeeper description: |- Metadata manager to use for the Kafka cluster. Possible values are `zookeeper` and `kraft`. - For backwards compatibility, it defaults to `zookeeper` for Kafka versions below `4.0.0` and to `kraft` for Kafka versions `4.0.0` and higher. + If not set, defaults to `zookeeper` for Kafka versions below `4.0.0` and to `kraft` for Kafka versions `4.0.0` and higher. Using `zookeeper` for Kafka versions `4.0.0` and higher is not supported. When set to `kraft`, the operator will perform the following actions: * Generate the Kafka cluster id. * Assign broker roles and configure controller quorum voters in the `broker.properties` files. - * Format storage when before (re)starting Kafka brokers. + * Format storage before (re)starting Kafka brokers. * Remove ZooKeeper related configuration options from the `broker.properties` files. These actions are **mandatory** when in Kraft mode and partially exclusive to the ZooKeeper mode. @@ -818,6 +816,8 @@ spec: enum: - zookeeper - kraft + - null + nullable: true type: string tls: default: diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index bfe150b1..7980219d 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -11,14 +11,13 @@ use crate::{ KafkaPodDescriptor, STACKABLE_CONFIG_DIR, STACKABLE_KERBEROS_KRB5_PATH, role::{broker::BROKER_PROPERTIES_FILE, controller::CONTROLLER_PROPERTIES_FILE}, security::KafkaTlsSecurity, - v1alpha1, }, product_logging::STACKABLE_LOG_DIR, }; /// Returns the commands to start the main Kafka container pub fn broker_kafka_container_commands( - kafka: &v1alpha1::KafkaCluster, + kraft_mode: bool, controller_descriptors: Vec, kafka_security: &KafkaTlsSecurity, product_version: &str, @@ -41,16 +40,16 @@ pub fn broker_kafka_container_commands( true => format!("export KERBEROS_REALM=$(grep -oP 'default_realm = \\K.*' {STACKABLE_KERBEROS_KRB5_PATH})"), false => "".to_string(), }, - broker_start_command = broker_start_command(kafka, controller_descriptors, product_version), + broker_start_command = broker_start_command(kraft_mode, controller_descriptors, product_version), } } fn broker_start_command( - kafka: &v1alpha1::KafkaCluster, + kraft_mode: bool, controller_descriptors: Vec, product_version: &str, ) -> String { - if kafka.is_kraft_mode() { + if kraft_mode { formatdoc! {" POD_INDEX=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') export REPLICA_ID=$((POD_INDEX+NODE_ID_OFFSET)) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 2898fd93..ca10e289 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -59,6 +59,11 @@ pub const STACKABLE_KERBEROS_KRB5_PATH: &str = "/stackable/kerberos/krb5.conf"; #[derive(Snafu, Debug)] pub enum Error { + #[snafu(display( + "The ZooKeeper metadata manager is not supported for Kafka version 4 and higher" + ))] + Kafka4RequiresKraftMetadataManager, + #[snafu(display("The Kafka role [{role}] is missing from spec"))] MissingRole { role: String }, @@ -166,14 +171,14 @@ pub mod versioned { /// Metadata manager to use for the Kafka cluster. /// /// Possible values are `zookeeper` and `kraft`. - /// For backwards compatibility, it defaults to `zookeeper` for Kafka versions below `4.0.0` and to `kraft` for Kafka versions `4.0.0` and higher. + /// If not set, defaults to `zookeeper` for Kafka versions below `4.0.0` and to `kraft` for Kafka versions `4.0.0` and higher. /// Using `zookeeper` for Kafka versions `4.0.0` and higher is not supported. /// /// When set to `kraft`, the operator will perform the following actions: /// /// * Generate the Kafka cluster id. /// * Assign broker roles and configure controller quorum voters in the `broker.properties` files. - /// * Format storage when before (re)starting Kafka brokers. + /// * Format storage before (re)starting Kafka brokers. /// * Remove ZooKeeper related configuration options from the `broker.properties` files. /// /// These actions are **mandatory** when in Kraft mode and partially exclusive to the ZooKeeper mode. @@ -181,8 +186,8 @@ pub mod versioned { /// /// This property is also useful when migrating from ZooKeeper to Kraft mode because it permits the operator /// to reconcile controllers while still using ZooKeeper for brokers. - #[serde(default = "default_metadata_manager")] - pub metadata_manager: MetadataManager, + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata_manager: Option, } } @@ -194,7 +199,7 @@ impl Default for v1alpha1::KafkaClusterConfig { tls: tls::default_kafka_tls(), vector_aggregator_config_map_name: None, zookeeper_config_map_name: None, - metadata_manager: default_metadata_manager(), + metadata_manager: None, } } } @@ -209,8 +214,26 @@ impl HasStatusCondition for v1alpha1::KafkaCluster { } impl v1alpha1::KafkaCluster { - pub fn is_kraft_mode(&self) -> bool { - self.spec.cluster_config.metadata_manager == MetadataManager::KRaft + pub fn effective_metadata_manager(&self) -> Result { + match &self.spec.cluster_config.metadata_manager { + Some(manager) => match manager.clone() { + MetadataManager::ZooKeeper => { + if self.spec.image.product_version().starts_with("4\\.") { + Err(Error::Kafka4RequiresKraftMetadataManager) + } else { + Ok(MetadataManager::ZooKeeper) + } + } + _ => Ok(MetadataManager::KRaft), + }, + None => { + if self.spec.image.product_version().starts_with("4\\.") { + Ok(MetadataManager::KRaft) + } else { + Ok(MetadataManager::ZooKeeper) + } + } + } } /// The Kafka cluster id when running in Kraft mode. @@ -225,10 +248,12 @@ impl v1alpha1::KafkaCluster { /// /// For freshly installed clusters, users do not need to deal with the cluster id. pub fn cluster_id(&self) -> Option<&str> { - match self.spec.cluster_config.metadata_manager { - MetadataManager::KRaft => self.metadata.name.as_deref(), - _ => None, - } + self.effective_metadata_manager() + .ok() + .and_then(|manager| match manager { + MetadataManager::KRaft => self.metadata.name.as_deref(), + _ => None, + }) } /// The name of the load-balanced Kubernetes Service providing the bootstrap address. Kafka clients will use this @@ -445,10 +470,6 @@ pub enum MetadataManager { KRaft, } -fn default_metadata_manager() -> MetadataManager { - MetadataManager::ZooKeeper -} - #[cfg(test)] mod tests { use super::*; diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index 73b81cf5..b8e78df6 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -15,8 +15,8 @@ use stackable_operator::{ use crate::{ crd::{ - JVM_SECURITY_PROPERTIES_FILE, KafkaPodDescriptor, STACKABLE_LISTENER_BOOTSTRAP_DIR, - STACKABLE_LISTENER_BROKER_DIR, + JVM_SECURITY_PROPERTIES_FILE, KafkaPodDescriptor, MetadataManager, + STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, listener::{KafkaListenerConfig, KafkaListenerName, node_address_cmd}, role::{ AnyConfig, KAFKA_ADVERTISED_LISTENERS, KAFKA_BROKER_ID, @@ -35,6 +35,9 @@ use crate::{ #[derive(Snafu, Debug)] pub enum Error { + #[snafu(display("invalid metadata manager"))] + InvalidMetadataManager { source: crate::crd::Error }, + #[snafu(display("failed to build ConfigMap for {}", rolegroup))] BuildRoleGroupConfig { source: stackable_operator::builder::configmap::Error, @@ -94,8 +97,12 @@ pub fn build_rolegroup_config_map( ) -> Result { let kafka_config_file_name = merged_config.config_file_name(); + let metadata_manager = kafka + .effective_metadata_manager() + .context(InvalidMetadataManagerSnafu)?; + let mut kafka_config = server_properties_file( - kafka.is_kraft_mode(), + metadata_manager == MetadataManager::KRaft, &rolegroup.role, pod_descriptors, listener_config, diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index a7aec39c..25f7612a 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -50,8 +50,8 @@ use crate::{ crd::{ self, APP_NAME, KAFKA_HEAP_OPTS, LISTENER_BOOTSTRAP_VOLUME_NAME, LISTENER_BROKER_VOLUME_NAME, LOG_DIRS_VOLUME_NAME, METRICS_PORT, METRICS_PORT_NAME, - STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LISTENER_BOOTSTRAP_DIR, - STACKABLE_LISTENER_BROKER_DIR, + MetadataManager, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, + STACKABLE_LISTENER_BOOTSTRAP_DIR, STACKABLE_LISTENER_BROKER_DIR, role::{ AnyConfig, KAFKA_NODE_ID_OFFSET, KafkaRole, broker::BrokerContainer, controller::ControllerContainer, @@ -71,6 +71,9 @@ use crate::{ #[derive(Snafu, Debug)] pub enum Error { + #[snafu(display("invalid metadata manager"))] + InvalidMetadataManager { source: crate::crd::Error }, + #[snafu(display("failed to add kerberos config"))] AddKerberosConfig { source: crate::kerberos::Error }, @@ -283,6 +286,10 @@ pub fn build_broker_rolegroup_statefulset( ..EnvVar::default() }); + let metadata_manager = kafka + .effective_metadata_manager() + .context(InvalidMetadataManagerSnafu)?; + cb_kafka .image_from_product_image(resolved_product_image) .command(vec![ @@ -293,7 +300,7 @@ pub fn build_broker_rolegroup_statefulset( "-c".to_string(), ]) .args(vec![broker_kafka_container_commands( - kafka, + metadata_manager == MetadataManager::KRaft, // we need controller pods kafka .pod_descriptors( From 74f5744677ee0981a4264cfdbfeb0d254eb4daad Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 5 Jan 2026 10:46:54 +0100 Subject: [PATCH 17/29] add deprecation notice --- deploy/helm/kafka-operator/crds/crds.yaml | 9 ++++++++- rust/operator-binary/src/crd/mod.rs | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index 379ace7d..be7b515a 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -797,8 +797,15 @@ spec: description: |- Metadata manager to use for the Kafka cluster. + IMPORTANT: This property will be removed as soon as Kafka 3.x support is dropped. + Possible values are `zookeeper` and `kraft`. - If not set, defaults to `zookeeper` for Kafka versions below `4.0.0` and to `kraft` for Kafka versions `4.0.0` and higher. + + If not set, defaults to: + + - `zookeeper` for Kafka versions below `4.0.0`. + - `kraft` for Kafka versions `4.0.0` and higher. + Using `zookeeper` for Kafka versions `4.0.0` and higher is not supported. When set to `kraft`, the operator will perform the following actions: diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index ca10e289..0864392f 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -170,8 +170,15 @@ pub mod versioned { /// Metadata manager to use for the Kafka cluster. /// + /// IMPORTANT: This property will be removed as soon as Kafka 3.x support is dropped. + /// /// Possible values are `zookeeper` and `kraft`. - /// If not set, defaults to `zookeeper` for Kafka versions below `4.0.0` and to `kraft` for Kafka versions `4.0.0` and higher. + /// + /// If not set, defaults to: + /// + /// - `zookeeper` for Kafka versions below `4.0.0`. + /// - `kraft` for Kafka versions `4.0.0` and higher. + /// /// Using `zookeeper` for Kafka versions `4.0.0` and higher is not supported. /// /// When set to `kraft`, the operator will perform the following actions: From 30ad3b82e4bc5b0157d4b169f2773cbd88239842 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 5 Jan 2026 14:13:27 +0100 Subject: [PATCH 18/29] kraft migration guide --- .../examples/kraft_migration}/01-setup.yaml | 0 .../02-start-controllers.yaml | 0 .../kraft_migration}/03-migrate-metadata.yaml | 8 - .../kraft_migration}/04-migrate-brokers.yaml | 7 - .../kraft_migration}/05-kraft-mode.yaml | 0 .../pages/usage-guide/kraft-controller.adoc | 137 +++++++++++++++++- 6 files changed, 134 insertions(+), 18 deletions(-) rename {examples/kraft-migration => docs/modules/kafka/examples/kraft_migration}/01-setup.yaml (100%) rename {examples/kraft-migration => docs/modules/kafka/examples/kraft_migration}/02-start-controllers.yaml (100%) rename {examples/kraft-migration => docs/modules/kafka/examples/kraft_migration}/03-migrate-metadata.yaml (67%) rename {examples/kraft-migration => docs/modules/kafka/examples/kraft_migration}/04-migrate-brokers.yaml (85%) rename {examples/kraft-migration => docs/modules/kafka/examples/kraft_migration}/05-kraft-mode.yaml (100%) diff --git a/examples/kraft-migration/01-setup.yaml b/docs/modules/kafka/examples/kraft_migration/01-setup.yaml similarity index 100% rename from examples/kraft-migration/01-setup.yaml rename to docs/modules/kafka/examples/kraft_migration/01-setup.yaml diff --git a/examples/kraft-migration/02-start-controllers.yaml b/docs/modules/kafka/examples/kraft_migration/02-start-controllers.yaml similarity index 100% rename from examples/kraft-migration/02-start-controllers.yaml rename to docs/modules/kafka/examples/kraft_migration/02-start-controllers.yaml diff --git a/examples/kraft-migration/03-migrate-metadata.yaml b/docs/modules/kafka/examples/kraft_migration/03-migrate-metadata.yaml similarity index 67% rename from examples/kraft-migration/03-migrate-metadata.yaml rename to docs/modules/kafka/examples/kraft_migration/03-migrate-metadata.yaml index 922e559b..b42636c0 100644 --- a/examples/kraft-migration/03-migrate-metadata.yaml +++ b/docs/modules/kafka/examples/kraft_migration/03-migrate-metadata.yaml @@ -28,14 +28,6 @@ spec: zookeeper.metadata.migration.enable: "true" # - Enable migration mode so the broker can participate in metadata migration. controller.listener.names: "CONTROLLER" controller.quorum.voters: "2110489703@simple-kafka-controller-default-0.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093,2110489704@simple-kafka-controller-default-1.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093,2110489705@simple-kafka-controller-default-2.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093" - - # listener.security.protocol.map: CONTROLLER:SSL,... - Already defined by the operator - # zookeeper.connect= (should already be present) - The ZooKeeper connection string. This property should already be configured. - # controller.quorum.voters= (same as controllers) - Specify the same controller quorum voters string as configured in phase 2. - # controller.listener.names=CONTROLLER - Define the listener name for the controller. - # Add CONTROLLER to listener.security.protocol.map (for example, ...CONTROLLER:PLAINTEXT) - Add the CONTROLLER listener to the security protocol map with the appropriate security protocol. - # confluent.cluster.link.metadata.topic.enable=true - This property is used by Cluster Linking during the migration. - controllers: roleGroups: default: diff --git a/examples/kraft-migration/04-migrate-brokers.yaml b/docs/modules/kafka/examples/kraft_migration/04-migrate-brokers.yaml similarity index 85% rename from examples/kraft-migration/04-migrate-brokers.yaml rename to docs/modules/kafka/examples/kraft_migration/04-migrate-brokers.yaml index 59d78814..1211ebfb 100644 --- a/examples/kraft-migration/04-migrate-brokers.yaml +++ b/docs/modules/kafka/examples/kraft_migration/04-migrate-brokers.yaml @@ -28,13 +28,6 @@ spec: controller.quorum.voters: "2110489703@simple-kafka-controller-default-0.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093,2110489704@simple-kafka-controller-default-1.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093,2110489705@simple-kafka-controller-default-2.simple-kafka-controller-default-headless.kraft-migration.svc.cluster.local:9093" process.roles: "broker" node.id: "${env:REPLICA_ID}" - - # Remove or comment out the following properties: - # broker.id - This property is replaced by node.id in KRaft mode. - # inter.broker.protocol.version - # zookeeper.metadata.migration.enable=true - # zookeeper.connect - controllers: roleGroups: default: diff --git a/examples/kraft-migration/05-kraft-mode.yaml b/docs/modules/kafka/examples/kraft_migration/05-kraft-mode.yaml similarity index 100% rename from examples/kraft-migration/05-kraft-mode.yaml rename to docs/modules/kafka/examples/kraft_migration/05-kraft-mode.yaml diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index edfc678a..b1754dd4 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -111,13 +111,144 @@ The Stackable Kafka operator currently does not support the migration. The https://developers.redhat.com/articles/2024/11/27/dynamic-kafka-controller-quorum[Dynamic scaling] is only supported from Kafka version 3.9.0. If you are using older versions, automatic scaling may not work properly (e.g. adding or removing controller replicas). -== Migration from ZooKeeper to KRaft mode +== Kraft migration guide The operator version `26.3.0` adds support for migrating Kafka clusters from ZooKeeper to KRaft mode. -Requirements: +This guide describes the steps required to migrate an existing Kafka cluster managed by the Stackable Kafka operator from ZooKeeper to KRaft mode. + +NOTE: Before starting the migration we recommend to reduce producer/consumer operations to a minimum or even pause them completely if possible to reduce the risk of data loss during the migration. + +To make the migration step as clear as possible, we'll use a complete working example throughout this guide. +The example cluster will be kept minimal without any additional configuration. +We start by creating a dedicated namespace to work on and deploy the Kafka cluster including ZooKeeper and credentials. + +[source,yaml] +---- +include::example$kraft_migration/01-setup.yaml[] +---- + +=== Requirements * Kafka clusters **must** be set up with the Stackable Kafka operator version `26.3.0` or higher. Kafka clusters set up with a previous operator version **cannot be upgraded** without migrating all broker instances first. This is because broker id management must be handed over from Kafka to the operator. The broker ids are not compatible between the two systems. * Kafka version **must** be `3.7.2` or `3.9.1`. Starting with version `4.0.0` the Zookeeper support is removed completely. -TODO: describe the migration steps in detail +=== 1. Start Kraft controllers + +In this step we will perform the following actions: + +1. Retrieve the current `cluster.id` as generated by Kafka. +2. Update the `KafkaCluster` resource to add Controller role group. +3. Configure the Controllers to run in migration mode. +4. Apply the changes and wait for all cluster Pods to become ready. + +We can obtain the current `cluster.id` either by inspecting the ZooKeeper data or from `meta.properties` file on one of the brokers. +In this example, the identifier is `cPh4Fb3pRvyqiiVjaBDaEw`. +We add this value to the `KAFKA_CLUSTER_ID` environment variable for both Brokers and Controllers. + +The complete example `KafkaCluster` resource after applying the required changes looks as follows: + +[source,yaml] +---- +include::example$kraft_migration/02-start-controllers.yaml[] +---- + +We `kubectl apply` the updated resource and wait for Brokers and Controllers to become ready. + +=== 2. Migrate metadata + +In this step we will perform the following actions: + +1. Obtain the controller quorum configuration. +2. Enable metadata migration mode on the Brokers. +3. Configure the controller quorum on the Brokers. +4. Apply the changes and restart the Broker pods. + +The exact value of the quorum must be obtained from the `/tmp/controller.properties` file on one of the Controller pods. +To start the metadata migration, we need to add the `zookeeper.metadata.migration.enable: "true"` and controller quorum configuration to the Broker configuration. + +For this step, the complete example `KafkaCluster` resource looks as follows: + +[source,yaml] +---- +include::example$kraft_migration/03-migrate-metadata.yaml[] +---- + +After we apply the changes, we then restart the brokers and wait for them to become ready again. + +[source,bash] +---- +kubectl rollout restart statefulset simple-kafka-broker-default -n kraft-migration +---- + +Finally we check that metadata migration was successful: + +[source,bash] +---- +kubectl logs -n kraft-migration simple-kafka-controller-default-2 | grep -i completed +... +[2025-12-22 09:23:53,372] INFO [KRaftMigrationDriver id=2110489705] Completed migration of metadata from ZooKeeper to KRaft. 0 records were generated in 102 ms across 0 batches. The average time spent waiting on a batch was -1.00 ms. The record types were {}. The current metadata offset is now 280 with an epoch of 3. Saw 0 brokers in the migrated metadata []. (org.apache.kafka.metadata.migration.KRaftMigrationDriver) +---- + +=== 3. Migrate brokers + + +NOTE: This is the last step before fully switching to KRaft mode. In case of unforeseen issues, it is the last step where we can roll back to ZooKeeper mode. + +In this step we will perform the following actions: + +1. Remove the migration properties from the previous step on the Brokers. +2. Assign Kraft role properties to Brokers. +3. Apply the changes and restart the Broker pods. + +We need to preserve the quorum configuration added in the previous step. + +For this step, the complete example `KafkaCluster` resource looks as follows: + + +[source,yaml] +---- +include::example$kraft_migration/04-migrate-brokers.yaml[] +---- + +=== 4. Enable Kraft mode + +After this step, the cluster will be fully running in KRaft mode and it cannot be rolled back to ZooKeeper mode anymore. + +In this step we will perform the following actions: + +1. Put the cluster in Kraft mode by updating the `spec.clusterConfig.metadataManager` property. +2. Remove Kraft quorum configuration from the Broker pods. +3. Remove the ZooKeeper migration flag from the Controllers. +4. Apply the changes and restart all Pods. + +We need to preserve the `KAFKA_CLUSTER_ID` environment variable for the rest of the lifetime of this cluster. + +The complete example `KafkaCluster` resource after applying the required changes looks as follows: + +[source,yaml] +---- +include::example$kraft_migration/05-kraft-mode.yaml[] +---- + +Verify that the cluster is healthy and consumer/producer operations work as expected. + +=== 5. Cleanup + +Before proceeding with this step please ensure that the Kafka cluster is fully operational in KRaft mode. + +In this step we remove the now unused ZooKeeper cluster and related resources. + +If the ZooKeeper cluster is also serving other usecases than Kafka you can skip this step. + +In our example we can remove the ZooKeeper cluster and the Znode resource as follows: + +[source,bash] +---- +kubectl delete -n kraft-migration zookeeperznodes simple-kafka-znode +kubectl delete -n kraft-migration zookeeperclusters simple-zk +---- + +=== 6. Next steps + +After successfully migrating to Kraft mode, consider updating the Kafka version to `4.0.0` or higher to benefit from the latest features and improvements in KRaft mode. From 49e77df9cfe25b9d694e21b321e6808b4826e0db Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 5 Jan 2026 14:46:05 +0100 Subject: [PATCH 19/29] spelling and casing --- .../pages/usage-guide/kraft-controller.adoc | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index b1754dd4..3f1b97d5 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -83,7 +83,7 @@ KRaft mode requires major configuration changes compared to ZooKeeper: * `cluster-id`: This is set to the `metadata.name` of the KafkaCluster resource during initial formatting * `node.id`: This is a calculated integer, hashed from the `role` and `rolegroup` and added `replica` id. * `process.roles`: Will always only be `broker` or `controller`. Mixed `broker,controller` servers are not supported. -* The operator configures a static voter list containing the controller pods. Controllers are not dynamicaly managed. +* The operator configures a static voter list containing the controller pods. Controllers are not dynamically managed. == Known Issues @@ -121,7 +121,8 @@ NOTE: Before starting the migration we recommend to reduce producer/consumer ope To make the migration step as clear as possible, we'll use a complete working example throughout this guide. The example cluster will be kept minimal without any additional configuration. -We start by creating a dedicated namespace to work on and deploy the Kafka cluster including ZooKeeper and credentials. + +We start by creating a dedicated namespace to work in and deploy the Kafka cluster including ZooKeeper and credentials. [source,yaml] ---- @@ -138,13 +139,13 @@ include::example$kraft_migration/01-setup.yaml[] In this step we will perform the following actions: 1. Retrieve the current `cluster.id` as generated by Kafka. -2. Update the `KafkaCluster` resource to add Controller role group. -3. Configure the Controllers to run in migration mode. -4. Apply the changes and wait for all cluster Pods to become ready. +2. Update the `KafkaCluster` resource to add `spec.controllers` property. +3. Configure the controllers to run in migration mode. +4. Apply the changes and wait for all cluster pods to become ready. We can obtain the current `cluster.id` either by inspecting the ZooKeeper data or from `meta.properties` file on one of the brokers. In this example, the identifier is `cPh4Fb3pRvyqiiVjaBDaEw`. -We add this value to the `KAFKA_CLUSTER_ID` environment variable for both Brokers and Controllers. +We add this value to the `KAFKA_CLUSTER_ID` environment variable for both brokers and controllers. The complete example `KafkaCluster` resource after applying the required changes looks as follows: @@ -153,19 +154,19 @@ The complete example `KafkaCluster` resource after applying the required changes include::example$kraft_migration/02-start-controllers.yaml[] ---- -We `kubectl apply` the updated resource and wait for Brokers and Controllers to become ready. +We `kubectl apply` the updated resource and wait for brokers and controllers to become ready. === 2. Migrate metadata In this step we will perform the following actions: 1. Obtain the controller quorum configuration. -2. Enable metadata migration mode on the Brokers. -3. Configure the controller quorum on the Brokers. -4. Apply the changes and restart the Broker pods. +2. Enable metadata migration mode on the brokers. +3. Configure the controller quorum on the brokers. +4. Apply the changes and restart the broker pods. -The exact value of the quorum must be obtained from the `/tmp/controller.properties` file on one of the Controller pods. -To start the metadata migration, we need to add the `zookeeper.metadata.migration.enable: "true"` and controller quorum configuration to the Broker configuration. +The exact value of the quorum must be obtained from the `/tmp/controller.properties` file on one of the controller pods. +To start the metadata migration, we need to add the `zookeeper.metadata.migration.enable: "true"` and controller quorum configuration to the broker configuration. For this step, the complete example `KafkaCluster` resource looks as follows: @@ -197,9 +198,9 @@ NOTE: This is the last step before fully switching to KRaft mode. In case of unf In this step we will perform the following actions: -1. Remove the migration properties from the previous step on the Brokers. -2. Assign Kraft role properties to Brokers. -3. Apply the changes and restart the Broker pods. +1. Remove the migration properties from the previous step on the brokers. +2. Assign Kraft role properties to brokers. +3. Apply the changes and restart the broker pods. We need to preserve the quorum configuration added in the previous step. @@ -218,9 +219,9 @@ After this step, the cluster will be fully running in KRaft mode and it cannot b In this step we will perform the following actions: 1. Put the cluster in Kraft mode by updating the `spec.clusterConfig.metadataManager` property. -2. Remove Kraft quorum configuration from the Broker pods. -3. Remove the ZooKeeper migration flag from the Controllers. -4. Apply the changes and restart all Pods. +2. Remove Kraft quorum configuration from the broker pods. +3. Remove the ZooKeeper migration flag from the controllers. +4. Apply the changes and restart all pods. We need to preserve the `KAFKA_CLUSTER_ID` environment variable for the rest of the lifetime of this cluster. @@ -239,7 +240,7 @@ Before proceeding with this step please ensure that the Kafka cluster is fully o In this step we remove the now unused ZooKeeper cluster and related resources. -If the ZooKeeper cluster is also serving other usecases than Kafka you can skip this step. +If the ZooKeeper cluster is also serving other use cases than Kafka you can skip this step. In our example we can remove the ZooKeeper cluster and the Znode resource as follows: From e58aaae91b6693f3443573796eca3c3689a8ea98 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 6 Jan 2026 09:36:17 +0100 Subject: [PATCH 20/29] typo --- rust/operator-binary/src/resource/configmap.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index b8e78df6..e9034bb7 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -48,7 +48,7 @@ pub enum Error { "failed to serialize [{JVM_SECURITY_PROPERTIES_FILE}] for {}", rolegroup ))] - JvmSecurityPoperties { + JvmSecurityProperties { source: product_config::writer::PropertiesWriterError, rolegroup: String, }, @@ -170,7 +170,7 @@ pub fn build_rolegroup_config_map( .add_data( JVM_SECURITY_PROPERTIES_FILE, to_java_properties_string(jvm_sec_props.iter()).with_context(|_| { - JvmSecurityPopertiesSnafu { + JvmSecurityPropertiesSnafu { rolegroup: rolegroup.role_group.clone(), } })?, @@ -183,7 +183,7 @@ pub fn build_rolegroup_config_map( .iter() .map(|(k, v)| (k, v)), ) - .with_context(|_| JvmSecurityPopertiesSnafu { + .with_context(|_| JvmSecurityPropertiesSnafu { rolegroup: rolegroup.role_group.clone(), })?, ) From 938b28e5b07ed54575d56c7a54e0da65b69ba4f1 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 6 Jan 2026 18:19:31 +0100 Subject: [PATCH 21/29] zk -> kraft mm2 example --- .../mirror_maker/01-setup-source.yaml | 90 +++++++++++++++++++ .../mirror_maker/02-setup-target.yaml | 63 +++++++++++++ .../kafka/examples/mirror_maker/README.md | 46 ++++++++++ .../kafka/examples/mirror_maker/mm.properties | 38 ++++++++ 4 files changed, 237 insertions(+) create mode 100644 docs/modules/kafka/examples/mirror_maker/01-setup-source.yaml create mode 100644 docs/modules/kafka/examples/mirror_maker/02-setup-target.yaml create mode 100644 docs/modules/kafka/examples/mirror_maker/README.md create mode 100644 docs/modules/kafka/examples/mirror_maker/mm.properties diff --git a/docs/modules/kafka/examples/mirror_maker/01-setup-source.yaml b/docs/modules/kafka/examples/mirror_maker/01-setup-source.yaml new file mode 100644 index 00000000..4d95005a --- /dev/null +++ b/docs/modules/kafka/examples/mirror_maker/01-setup-source.yaml @@ -0,0 +1,90 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + labels: + stackable.tech/vendor: Stackable + name: mm-migration +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperCluster +metadata: + name: zookeeper + namespace: mm-migration +spec: + image: + productVersion: 3.9.4 + pullPolicy: IfNotPresent + servers: + roleGroups: + default: + replicas: 1 +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperZnode +metadata: + name: source-znode + namespace: mm-migration +spec: + clusterRef: + name: zookeeper +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: source-internal-tls +spec: + backend: + autoTls: + ca: + secret: + name: secret-provisioner-source-internal-tls-ca + namespace: mm-migration + autoGenerate: true +--- +apiVersion: authentication.stackable.tech/v1alpha1 +kind: AuthenticationClass +metadata: + name: source-client-auth +spec: + provider: + tls: + clientCertSecretClass: source-client-auth-secret +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: source-client-auth-secret +spec: + backend: + autoTls: + ca: + secret: + name: secret-provisioner-tls-source-client-ca + namespace: mm-migration + autoGenerate: true +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: source + namespace: mm-migration +spec: + image: + productVersion: 3.9.1 + pullPolicy: IfNotPresent + clusterConfig: + metadataManager: zookeeper + authentication: + - authenticationClass: source-client-auth + tls: + internalSecretClass: source-internal-tls + serverSecretClass: tls + zookeeperConfigMapName: source-znode + brokers: + roleGroups: + default: + replicas: 1 + configOverrides: + broker.properties: + offsets.topic.replication.factor: "1" # https://github.com/stackabletech/kafka-operator/issues/587 diff --git a/docs/modules/kafka/examples/mirror_maker/02-setup-target.yaml b/docs/modules/kafka/examples/mirror_maker/02-setup-target.yaml new file mode 100644 index 00000000..a69421fa --- /dev/null +++ b/docs/modules/kafka/examples/mirror_maker/02-setup-target.yaml @@ -0,0 +1,63 @@ +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: target-internal-tls +spec: + backend: + autoTls: + ca: + secret: + name: secret-provisioner-target-internal-tls-ca + namespace: mm-migration + autoGenerate: true +--- +apiVersion: authentication.stackable.tech/v1alpha1 +kind: AuthenticationClass +metadata: + name: target-client-auth +spec: + provider: + tls: + clientCertSecretClass: target-client-auth-secret +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: target-client-auth-secret +spec: + backend: + autoTls: + ca: + secret: + name: secret-provisioner-tls-target-client-ca + namespace: mm-migration + autoGenerate: true +--- +apiVersion: kafka.stackable.tech/v1alpha1 +kind: KafkaCluster +metadata: + name: target + namespace: mm-migration +spec: + image: + productVersion: 3.9.1 + pullPolicy: IfNotPresent + clusterConfig: + metadataManager: kraft + authentication: + - authenticationClass: target-client-auth + tls: + internalSecretClass: target-internal-tls + serverSecretClass: tls + brokers: + roleGroups: + default: + replicas: 1 + configOverrides: + broker.properties: + offsets.topic.replication.factor: "1" # https://github.com/stackabletech/kafka-operator/issues/587 + controllers: + roleGroups: + default: + replicas: 1 diff --git a/docs/modules/kafka/examples/mirror_maker/README.md b/docs/modules/kafka/examples/mirror_maker/README.md new file mode 100644 index 00000000..df069e6d --- /dev/null +++ b/docs/modules/kafka/examples/mirror_maker/README.md @@ -0,0 +1,46 @@ + +### Setup + +k create --save-config -f docs/modules/kafka/examples/mirror_maker/01-setup-source.yaml +k create --save-config -f docs/modules/kafka/examples/mirror_maker/02-setup-target.yaml + +k cp -n mm-migration -c kafka target-broker-default-0:/stackable/tls-kafka-server/keystore.p12 docs/modules/kafka/examples/mirror_maker/keystore.p12 +k cp -n mm-migration -c kafka target-broker-default-0:/stackable/tls-kafka-server/truststore.p12 docs/modules/kafka/examples/mirror_maker/truststore.p12 + +k cp -n mm-migration -c kafka docs/modules/kafka/examples/mirror_maker/truststore.p12 source-broker-default-0:/stackable/truststore.p12 +k cp -n mm-migration -c kafka docs/modules/kafka/examples/mirror_maker/keystore.p12 source-broker-default-0:/stackable/keystore.p12 + +k cp -n mm-migration -c kafka docs/modules/kafka/examples/mirror_maker/mm.properties source-broker-default-0:/stackable/mm.properties + +### Create a topic and publish some data + +/stackable/kafka/bin/kafka-topics.sh --create --topic test --partitions 1 --bootstrap-server source-broker-default-bootstrap.mm-migration.svc.cluster.local:9093 --command-config /stackable/config/client.properties + +/stackable/kafka/bin/kafka-producer-perf-test.sh --producer-props bootstrap.servers=source-broker-default-bootstrap.mm-migration.svc.cluster.local:9093 --payload-monotonic --throughput 1 --num-records 100 --producer.config /stackable/config/client.properties --topic test + +/stackable/kafka/bin/kafka-console-consumer.sh --bootstrap-server source-broker-default-bootstrap.mm-migration.svc.cluster.local:9093 --consumer.config /stackable/config/client.properties --topic test --offset earliest --partition 0 --timeout-ms 10000 + +### Run MirrorMaker + +EXTRA_ARGS="" /stackable/kafka/bin/connect-mirror-maker.sh /stackable/mm.properties + +### Verify the topic is mirrored + +/stackable/kafka/bin/kafka-topics.sh --list --bootstrap-server target-broker-default-bootstrap.mm-migration.svc.cluster.local:9093 --command-config /stackable/config/client.properties + +/stackable/kafka/bin/kafka-console-consumer.sh --bootstrap-server target-broker-default-bootstrap.mm-migration.svc.cluster.local:9093 --consumer.config /stackable/config/client.properties --topic source.test --offset earliest --partition 0 --timeout-ms 10000 + +### Cleanup + +k delete -n mm-migration kafkaclusters source +k delete -n mm-migration kafkaclusters target +k delete -n mm-migration zookeeperznodes source-znode +k delete -n mm-migration zookeeperclusters zookeeper +k delete -n mm-migration secretclasses source-internal-tls +k delete -n mm-migration secretclasses source-client-auth-secret +k delete -n mm-migration secretclasses target-internal-tls +k delete -n mm-migration secretclasses target-client-auth-secret +k delete -n mm-migration authenticationclasses target-client-auth +k delete -n mm-migration authenticationclasses source-client-auth +k delete -n mm-migration persistentvolumeclaims --all +k delete ns mm-migration diff --git a/docs/modules/kafka/examples/mirror_maker/mm.properties b/docs/modules/kafka/examples/mirror_maker/mm.properties new file mode 100644 index 00000000..b8360527 --- /dev/null +++ b/docs/modules/kafka/examples/mirror_maker/mm.properties @@ -0,0 +1,38 @@ +# specify any number of cluster aliases +clusters = source, target + +# connection information for each cluster +# This is a comma separated host:port pairs for each cluster +# for example. "A_host1:9092, A_host2:9092, A_host3:9092" and you can see the exact host name on Ambari > Hosts +source.bootstrap.servers = source-broker-default-bootstrap.mm-migration.svc.cluster.local:9093 +target.bootstrap.servers = target-broker-default-bootstrap.mm-migration.svc.cluster.local:9093 + +# enable and configure individual replication flows +source->target.enabled = true + +# regex which defines which topics gets replicated. For eg "foo-.*" +source->target.topics = test + +# Needed for mm2 internal topics if there is only one broker running per cluster +offset.storage.replication.factor=1 +config.storage.replication.factor=1 +status.storage.replication.factor=1 + +# SSL configuration +target.security.protocol=SSL +target.ssl.truststore.password= +target.ssl.truststore.location=/stackable/truststore.p12 +target.ssl.truststore.type=PKCS12 +#keystore location in case client.auth is set to required +target.ssl.keystore.password= +target.ssl.keystore.location=/stackable/keystore.p12 +target.ssl.keystore.type=PKCS12 + +source.security.protocol=SSL +source.ssl.truststore.password= +source.ssl.truststore.location=/stackable/tls-kafka-server/truststore.p12 +source.ssl.truststore.type=PKCS12 +#keystore location in case client.auth is set to required +source.ssl.keystore.password= +source.ssl.keystore.location=/stackable/tls-kafka-server/keystore.p12 +source.ssl.keystore.type=PKCS12 From 5631178463f58e3be3f24dc4b0914fc8b78480bb Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 13 Jan 2026 11:55:54 +0100 Subject: [PATCH 22/29] add field to support manual broker id assignments --- deploy/helm/kafka-operator/crds/crds.yaml | 18 ++++++++++ .../examples/kraft_migration/01-setup.yaml | 12 +++++++ .../kraft_migration/02-start-controllers.yaml | 5 +-- .../kraft_migration/03-migrate-metadata.yaml | 5 +-- .../kraft_migration/04-migrate-brokers.yaml | 5 +-- .../kraft_migration/05-kraft-mode.yaml | 5 +-- .../pages/usage-guide/kraft-controller.adoc | 19 ++++++++--- rust/operator-binary/src/config/command.rs | 33 +++++++++++-------- rust/operator-binary/src/crd/mod.rs | 19 +++++++++++ rust/operator-binary/src/product_logging.rs | 1 + .../src/resource/statefulset.rs | 20 +++++++++-- 11 files changed, 114 insertions(+), 28 deletions(-) diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index be7b515a..6d8698b0 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -793,6 +793,24 @@ spec: - configMapName type: object type: object + brokerIdPodConfigMapName: + description: |- + Enable users to manually assign Kafka broker ids. + + Name of a ConfigMap containing a mapping of broker IDs to pod names. + The ConfigMap must contain a key `map.csv` with content in the format: + `,` + + Example: + ``` + 2000,simple-kafka-broker-default-0 + 2001,simple-kafka-broker-default-1 + 2002,simple-kafka-broker-default-2 + ``` + This is necessary when migrating from ZooKeeper to Kraft mode to retain existing broker IDs + because previously broker ids were generated by Kafka and not the operator. + nullable: true + type: string metadataManager: description: |- Metadata manager to use for the Kafka cluster. diff --git a/docs/modules/kafka/examples/kraft_migration/01-setup.yaml b/docs/modules/kafka/examples/kraft_migration/01-setup.yaml index f415f885..9275dd7d 100644 --- a/docs/modules/kafka/examples/kraft_migration/01-setup.yaml +++ b/docs/modules/kafka/examples/kraft_migration/01-setup.yaml @@ -64,6 +64,17 @@ spec: namespace: kraft-migration autoGenerate: true --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: broker-ids + namespace: kraft-migration +data: + map.csv: | + 2000,simple-kafka-broker-default-0 + 2001,simple-kafka-broker-default-1 + 2002,simple-kafka-broker-default-2 +--- apiVersion: kafka.stackable.tech/v1alpha1 kind: KafkaCluster metadata: @@ -75,6 +86,7 @@ spec: pullPolicy: IfNotPresent clusterConfig: metadataManager: zookeeper + brokerIdPodConfigMapName: broker-ids authentication: - authenticationClass: kafka-client-auth-tls tls: diff --git a/docs/modules/kafka/examples/kraft_migration/02-start-controllers.yaml b/docs/modules/kafka/examples/kraft_migration/02-start-controllers.yaml index 382fa418..8b1cb1e8 100644 --- a/docs/modules/kafka/examples/kraft_migration/02-start-controllers.yaml +++ b/docs/modules/kafka/examples/kraft_migration/02-start-controllers.yaml @@ -16,9 +16,10 @@ spec: internalSecretClass: kafka-internal-tls serverSecretClass: tls zookeeperConfigMapName: simple-kafka-znode + brokerIdPodConfigMapName: broker-ids brokers: envOverrides: - KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" + KAFKA_CLUSTER_ID: "aC1zl524Svm_uIjcvUGWSw" roleGroups: default: replicas: 3 @@ -27,7 +28,7 @@ spec: default: replicas: 3 envOverrides: - KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" + KAFKA_CLUSTER_ID: "aC1zl524Svm_uIjcvUGWSw" configOverrides: controller.properties: zookeeper.metadata.migration.enable: "true" # Enable migration mode so the controller can read metadata from ZooKeeper. diff --git a/docs/modules/kafka/examples/kraft_migration/03-migrate-metadata.yaml b/docs/modules/kafka/examples/kraft_migration/03-migrate-metadata.yaml index b42636c0..ee065951 100644 --- a/docs/modules/kafka/examples/kraft_migration/03-migrate-metadata.yaml +++ b/docs/modules/kafka/examples/kraft_migration/03-migrate-metadata.yaml @@ -16,9 +16,10 @@ spec: internalSecretClass: kafka-internal-tls serverSecretClass: tls zookeeperConfigMapName: simple-kafka-znode + brokerIdPodConfigMapName: broker-ids brokers: envOverrides: - KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" + KAFKA_CLUSTER_ID: "aC1zl524Svm_uIjcvUGWSw" roleGroups: default: replicas: 3 @@ -33,7 +34,7 @@ spec: default: replicas: 3 envOverrides: - KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" + KAFKA_CLUSTER_ID: "aC1zl524Svm_uIjcvUGWSw" configOverrides: controller.properties: zookeeper.metadata.migration.enable: "true" # Enable migration mode so the controller can read metadata from ZooKeeper. diff --git a/docs/modules/kafka/examples/kraft_migration/04-migrate-brokers.yaml b/docs/modules/kafka/examples/kraft_migration/04-migrate-brokers.yaml index 1211ebfb..9224b37d 100644 --- a/docs/modules/kafka/examples/kraft_migration/04-migrate-brokers.yaml +++ b/docs/modules/kafka/examples/kraft_migration/04-migrate-brokers.yaml @@ -16,9 +16,10 @@ spec: internalSecretClass: kafka-internal-tls serverSecretClass: tls zookeeperConfigMapName: simple-kafka-znode + brokerIdPodConfigMapName: broker-ids brokers: envOverrides: - KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" + KAFKA_CLUSTER_ID: "aC1zl524Svm_uIjcvUGWSw" roleGroups: default: replicas: 3 @@ -33,7 +34,7 @@ spec: default: replicas: 3 envOverrides: - KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" + KAFKA_CLUSTER_ID: "aC1zl524Svm_uIjcvUGWSw" configOverrides: controller.properties: zookeeper.metadata.migration.enable: "true" # Enable migration mode so the controller can read metadata from ZooKeeper. diff --git a/docs/modules/kafka/examples/kraft_migration/05-kraft-mode.yaml b/docs/modules/kafka/examples/kraft_migration/05-kraft-mode.yaml index d08adb6d..a4fbd1fe 100644 --- a/docs/modules/kafka/examples/kraft_migration/05-kraft-mode.yaml +++ b/docs/modules/kafka/examples/kraft_migration/05-kraft-mode.yaml @@ -15,9 +15,10 @@ spec: tls: internalSecretClass: kafka-internal-tls serverSecretClass: tls + brokerIdPodConfigMapName: broker-ids brokers: envOverrides: - KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" + KAFKA_CLUSTER_ID: "aC1zl524Svm_uIjcvUGWSw" roleGroups: default: replicas: 3 @@ -29,4 +30,4 @@ spec: default: replicas: 3 envOverrides: - KAFKA_CLUSTER_ID: "cPh4Fb3pRvyqiiVjaBDaEw" + KAFKA_CLUSTER_ID: "aC1zl524Svm_uIjcvUGWSw" diff --git a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc index 3f1b97d5..cfeffba9 100644 --- a/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc +++ b/docs/modules/kafka/pages/usage-guide/kraft-controller.adoc @@ -139,14 +139,22 @@ include::example$kraft_migration/01-setup.yaml[] In this step we will perform the following actions: 1. Retrieve the current `cluster.id` as generated by Kafka. -2. Update the `KafkaCluster` resource to add `spec.controllers` property. -3. Configure the controllers to run in migration mode. -4. Apply the changes and wait for all cluster pods to become ready. +2. Retrieve and store the current broker ids. +3. Update the `KafkaCluster` resource to add `spec.controllers` property. +4. Configure the controllers to run in migration mode. +5. Apply the changes and wait for all cluster pods to become ready. We can obtain the current `cluster.id` either by inspecting the ZooKeeper data or from `meta.properties` file on one of the brokers. In this example, the identifier is `cPh4Fb3pRvyqiiVjaBDaEw`. We add this value to the `KAFKA_CLUSTER_ID` environment variable for both brokers and controllers. +To be able to migrate the existing brokers, we need to preserve their broker ids. +Similarly to the cluster id, we can obtain the broker ids from the `meta.properties` file on each broker pod. +We then need to inform the operator to use these ids instead of generating new ones. +This is done by creating a configmap map containing the id mapping and pointing the `spec.clusterProperties.brokerIdPodConfigMapName` property of the `KafkaCluster` resource to it. + +These two properties must be preserverd for the rest of the migration process and the lifetime of the cluster. + The complete example `KafkaCluster` resource after applying the required changes looks as follows: [source,yaml] @@ -186,7 +194,10 @@ Finally we check that metadata migration was successful: [source,bash] ---- -kubectl logs -n kraft-migration simple-kafka-controller-default-2 | grep -i completed +kubectl logs -n kraft-migration simple-kafka-controller-default-0 | grep -i 'completed migration' \ +|| kubectl logs -n kraft-migration simple-kafka-controller-default-1 | grep -i 'completed migration' \ +|| kubectl logs -n kraft-migration simple-kafka-controller-default-2 | grep -i 'completed migration' + ... [2025-12-22 09:23:53,372] INFO [KRaftMigrationDriver id=2110489705] Completed migration of metadata from ZooKeeper to KRaft. 0 records were generated in 102 ms across 0 batches. The average time spent waiting on a batch was -1.00 ms. The record types were {}. The current metadata offset is now 280 with an epoch of 3. Saw 0 brokers in the migrated metadata []. (org.apache.kafka.metadata.migration.KRaftMigrationDriver) ---- diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 7980219d..f5610695 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -12,7 +12,7 @@ use crate::{ role::{broker::BROKER_PROPERTIES_FILE, controller::CONTROLLER_PROPERTIES_FILE}, security::KafkaTlsSecurity, }, - product_logging::STACKABLE_LOG_DIR, + product_logging::{BROKER_ID_POD_MAP_DIR, STACKABLE_LOG_DIR}, }; /// Returns the commands to start the main Kafka container @@ -49,37 +49,42 @@ fn broker_start_command( controller_descriptors: Vec, product_version: &str, ) -> String { - if kraft_mode { - formatdoc! {" - POD_INDEX=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') + let common_command = formatdoc! {" + set -x + export POD_INDEX=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') export REPLICA_ID=$((POD_INDEX+NODE_ID_OFFSET)) + if [ -f \"{broker_id_pod_map_dir}/map.csv\" ]; then + echo \"Using broker ID mapping file to determine REPLICA_ID\" + REPLICA_ID=$(grep \"$POD_NAME\" {broker_id_pod_map_dir}/map.csv | cut -d',' -f1) + fi + cp {config_dir}/{properties_file} /tmp/{properties_file} config-utils template /tmp/{properties_file} cp {config_dir}/jaas.properties /tmp/jaas.properties config-utils template /tmp/jaas.properties + ", + broker_id_pod_map_dir = BROKER_ID_POD_MAP_DIR, + config_dir = STACKABLE_CONFIG_DIR, + properties_file = BROKER_PROPERTIES_FILE, + }; + + if kraft_mode { + formatdoc! {" + {common_command} bin/kafka-storage.sh format --cluster-id \"$KAFKA_CLUSTER_ID\" --config /tmp/{properties_file} --ignore-formatted {initial_controller_command} bin/kafka-server-start.sh /tmp/{properties_file} & ", - config_dir = STACKABLE_CONFIG_DIR, properties_file = BROKER_PROPERTIES_FILE, initial_controller_command = initial_controllers_command(&controller_descriptors, product_version), } } else { formatdoc! {" - POD_INDEX=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') - export REPLICA_ID=$((POD_INDEX+NODE_ID_OFFSET)) - - cp {config_dir}/{properties_file} /tmp/{properties_file} - config-utils template /tmp/{properties_file} - - cp {config_dir}/jaas.properties /tmp/jaas.properties - config-utils template /tmp/jaas.properties + {common_command} bin/kafka-server-start.sh /tmp/{properties_file} &", - config_dir = STACKABLE_CONFIG_DIR, properties_file = BROKER_PROPERTIES_FILE, } } diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 0864392f..1d933290 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -195,6 +195,24 @@ pub mod versioned { /// to reconcile controllers while still using ZooKeeper for brokers. #[serde(skip_serializing_if = "Option::is_none")] pub metadata_manager: Option, + + /// Enable users to manually assign Kafka broker ids. + /// + /// Name of a ConfigMap containing a mapping of broker IDs to pod names. + /// The ConfigMap must contain a key `map.csv` with content in the format: + /// `,` + /// + /// Example: + /// ``` + /// 2000,simple-kafka-broker-default-0 + /// 2001,simple-kafka-broker-default-1 + /// 2002,simple-kafka-broker-default-2 + /// ``` + /// This is necessary when migrating from ZooKeeper to Kraft mode to retain existing broker IDs + /// because previously broker ids were generated by Kafka and not the operator. + /// + #[serde(skip_serializing_if = "Option::is_none")] + pub broker_id_pod_config_map_name: Option, } } @@ -207,6 +225,7 @@ impl Default for v1alpha1::KafkaClusterConfig { vector_aggregator_config_map_name: None, zookeeper_config_map_name: None, metadata_manager: None, + broker_id_pod_config_map_name: None, } } } diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index b7990be6..8336f5f7 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -15,6 +15,7 @@ use crate::crd::{ v1alpha1, }; +pub const BROKER_ID_POD_MAP_DIR: &str = "/stackable/broker-id-pod-map"; pub const STACKABLE_LOG_CONFIG_DIR: &str = "/stackable/log_config"; pub const STACKABLE_LOG_DIR: &str = "/stackable/log"; // log4j diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 25f7612a..29034a41 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -63,8 +63,8 @@ use crate::{ kerberos::add_kerberos_pod_config, operations::graceful_shutdown::add_graceful_shutdown_config, product_logging::{ - MAX_KAFKA_LOG_FILES_SIZE, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, kafka_log_opts, - kafka_log_opts_env_var, + BROKER_ID_POD_MAP_DIR, MAX_KAFKA_LOG_FILES_SIZE, STACKABLE_LOG_CONFIG_DIR, + STACKABLE_LOG_DIR, kafka_log_opts, kafka_log_opts_env_var, }, utils::build_recommended_labels, }; @@ -441,6 +441,22 @@ pub fn build_broker_rolegroup_statefulset( ) .context(AddListenerVolumeSnafu)?; } + + if let Some(broker_id_config_map_name) = + &kafka.spec.cluster_config.broker_id_pod_config_map_name + { + pod_builder + .add_volume( + VolumeBuilder::new("broker-id-pod-map-dir") + .with_config_map(broker_id_config_map_name) + .build(), + ) + .context(AddVolumeSnafu)?; + cb_kafka + .add_volume_mount("broker-id-pod-map-dir", BROKER_ID_POD_MAP_DIR) + .context(AddVolumeMountSnafu)?; + } + pod_builder .metadata(metadata) .image_pull_secrets_from_product_image(resolved_product_image) From 176fe7fa74dc002a9710753dc5537034face98c4 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Wed, 14 Jan 2026 18:15:37 +0100 Subject: [PATCH 23/29] update docs to match decision --- deploy/helm/kafka-operator/crds/crds.yaml | 16 +++++++++++----- rust/operator-binary/src/crd/mod.rs | 16 +++++++++++----- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index 4209fe8a..ad8a6677 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -792,14 +792,20 @@ spec: Enable users to manually assign Kafka broker ids. Name of a ConfigMap containing a mapping of broker IDs to pod names. - The ConfigMap must contain a key `map.csv` with content in the format: - `,` + The ConfigMap must contain a key for every broker pod in the cluster with the following format: + `: ` Example: ``` - 2000,simple-kafka-broker-default-0 - 2001,simple-kafka-broker-default-1 - 2002,simple-kafka-broker-default-2 + --- + apiVersion: v1 + kind: ConfigMap + metadata: + name: brokeridmapping + data: + simple-kafka-broker-default-0: "2001" + simple-kafka-broker-default-1: "2001" + simple-kafka-broker-default-2: "2002" ``` This is necessary when migrating from ZooKeeper to Kraft mode to retain existing broker IDs because previously broker ids were generated by Kafka and not the operator. diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 07ed70d8..eec59737 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -204,14 +204,20 @@ pub mod versioned { /// Enable users to manually assign Kafka broker ids. /// /// Name of a ConfigMap containing a mapping of broker IDs to pod names. - /// The ConfigMap must contain a key `map.csv` with content in the format: - /// `,` + /// The ConfigMap must contain a key for every broker pod in the cluster with the following format: + /// `: ` /// /// Example: /// ``` - /// 2000,simple-kafka-broker-default-0 - /// 2001,simple-kafka-broker-default-1 - /// 2002,simple-kafka-broker-default-2 + /// --- + /// apiVersion: v1 + /// kind: ConfigMap + /// metadata: + /// name: brokeridmapping + /// data: + /// simple-kafka-broker-default-0: "2001" + /// simple-kafka-broker-default-1: "2001" + /// simple-kafka-broker-default-2: "2002" /// ``` /// This is necessary when migrating from ZooKeeper to Kraft mode to retain existing broker IDs /// because previously broker ids were generated by Kafka and not the operator. From e6850aa826df01ff10a94449f6e7264c58719f7a Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Thu, 15 Jan 2026 10:35:39 +0100 Subject: [PATCH 24/29] update start command to new broker map decision --- deploy/helm/kafka-operator/crds/crds.yaml | 2 +- docs/modules/kafka/examples/kraft_migration/01-setup.yaml | 7 +++---- rust/operator-binary/src/config/command.rs | 5 ++--- rust/operator-binary/src/crd/mod.rs | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index ad8a6677..d98f1ccb 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -803,7 +803,7 @@ spec: metadata: name: brokeridmapping data: - simple-kafka-broker-default-0: "2001" + simple-kafka-broker-default-0: "2000" simple-kafka-broker-default-1: "2001" simple-kafka-broker-default-2: "2002" ``` diff --git a/docs/modules/kafka/examples/kraft_migration/01-setup.yaml b/docs/modules/kafka/examples/kraft_migration/01-setup.yaml index 9275dd7d..bb765307 100644 --- a/docs/modules/kafka/examples/kraft_migration/01-setup.yaml +++ b/docs/modules/kafka/examples/kraft_migration/01-setup.yaml @@ -70,10 +70,9 @@ metadata: name: broker-ids namespace: kraft-migration data: - map.csv: | - 2000,simple-kafka-broker-default-0 - 2001,simple-kafka-broker-default-1 - 2002,simple-kafka-broker-default-2 + simple-kafka-broker-default-0: "2000" + simple-kafka-broker-default-1: "2001" + simple-kafka-broker-default-2: "2002" --- apiVersion: kafka.stackable.tech/v1alpha1 kind: KafkaCluster diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index f5610695..9bdd5dd3 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -54,9 +54,8 @@ fn broker_start_command( export POD_INDEX=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') export REPLICA_ID=$((POD_INDEX+NODE_ID_OFFSET)) - if [ -f \"{broker_id_pod_map_dir}/map.csv\" ]; then - echo \"Using broker ID mapping file to determine REPLICA_ID\" - REPLICA_ID=$(grep \"$POD_NAME\" {broker_id_pod_map_dir}/map.csv | cut -d',' -f1) + if [ -f \"{broker_id_pod_map_dir}/$POD_NAME\" ]; then + REPLICA_ID=$(cat \"{broker_id_pod_map_dir}/$POD_NAME\") fi cp {config_dir}/{properties_file} /tmp/{properties_file} diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index eec59737..fb556cbd 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -215,7 +215,7 @@ pub mod versioned { /// metadata: /// name: brokeridmapping /// data: - /// simple-kafka-broker-default-0: "2001" + /// simple-kafka-broker-default-0: "2000" /// simple-kafka-broker-default-1: "2001" /// simple-kafka-broker-default-2: "2002" /// ``` From 60a274d8e3ae625c1d33b572dfbb20f9db50fdeb Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 19 Jan 2026 16:24:03 +0100 Subject: [PATCH 25/29] backwards compatibility: disable broker id generation only if necessary --- .../operator-binary/src/resource/configmap.rs | 25 +++++++++++++++---- tests/test-definition.yaml | 2 +- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index e9034bb7..da44b05f 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -107,6 +107,11 @@ pub fn build_rolegroup_config_map( pod_descriptors, listener_config, opa_connect_string, + kafka + .spec + .cluster_config + .broker_id_pod_config_map_name + .is_some(), )?; match merged_config { @@ -220,6 +225,7 @@ fn server_properties_file( pod_descriptors: &[KafkaPodDescriptor], listener_config: &KafkaListenerConfig, opa_connect_string: Option<&str>, + disable_broker_id_generation: bool, ) -> Result, Error> { let kraft_controllers = kraft_controllers(pod_descriptors); @@ -294,11 +300,6 @@ fn server_properties_file( KAFKA_LISTENER_SECURITY_PROTOCOL_MAP.to_string(), listener_config.listener_security_protocol_map(), ), - ( - "broker.id.generation.enable".to_string(), - "false".to_string(), - ), - (KAFKA_BROKER_ID.to_string(), "${env:REPLICA_ID}".to_string()), ( "inter.broker.listener.name".to_string(), KafkaListenerName::Internal.to_string(), @@ -310,6 +311,10 @@ fn server_properties_file( // Running in KRaft mode result.extend([ + ( + "broker.id.generation.enable".to_string(), + "false".to_string(), + ), (KAFKA_NODE_ID.to_string(), "${env:REPLICA_ID}".to_string()), ( KAFKA_PROCESS_ROLES.to_string(), @@ -335,6 +340,16 @@ fn server_properties_file( "zookeeper.connect".to_string(), "${env:ZOOKEEPER}".to_string(), )]); + // We are in zookeeper mode and the user has defined a broker id mapping + if disable_broker_id_generation { + result.extend([ + ( + "broker.id.generation.enable".to_string(), + "false".to_string(), + ), + (KAFKA_BROKER_ID.to_string(), "${env:REPLICA_ID}".to_string()), + ]); + } } // Enable OPA authorization diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index cfe2282d..9ab5790c 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -55,7 +55,7 @@ dimensions: - mit # Requires manual setup, see create-kerberos-secretclass.yaml # This will *not* respect the kerberos-realm test attribute, but instead use a hard-coded realm - # - activeDirectory + # - activeDirectory( - name: broker-listener-class values: - "cluster-internal" From a7d19b9666a2cfc7023b3c21114f3266f19a85c6 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 19 Jan 2026 17:25:38 +0100 Subject: [PATCH 26/29] update changelog --- CHANGELOG.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5dcdd103..6b6b02ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,9 +14,7 @@ All notable changes to this project will be documented in this file. ### Changed - Refactor: move server configuration properties from the command line to configuration files. ([#911]). -- BREAKING: add support for ZooKeeper to KRaft migration ([#923]). - In order to support migration to Kraft, the operator must disable automatic Kafka broker id generation. - The broker ids generated by the operator are incompatible with those generated by Kafka. +- Add support for ZooKeeper to KRaft migration ([#923]). - Bump testing-tools to `0.3.0-stackable0.0.0-dev` ([#925]). ### Removed From 1360d29acd4543a3bc33ffc2d10decbcb89c6903 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 20 Jan 2026 09:20:34 +0100 Subject: [PATCH 27/29] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b6b02ae..9d0a405f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ All notable changes to this project will be documented in this file. [#911]: https://github.com/stackabletech/kafka-operator/pull/911 [#914]: https://github.com/stackabletech/kafka-operator/pull/914 [#915]: https://github.com/stackabletech/kafka-operator/pull/915 +[#923]: https://github.com/stackabletech/kafka-operator/pull/923 [#925]: https://github.com/stackabletech/kafka-operator/pull/925 [#927]: https://github.com/stackabletech/kafka-operator/pull/927 [#929]: https://github.com/stackabletech/kafka-operator/pull/929 From 2c5a836936c253eabba8cf16438fa482e0f038de Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 20 Jan 2026 09:48:37 +0100 Subject: [PATCH 28/29] minor cleanups --- rust/operator-binary/src/config/command.rs | 1 - rust/operator-binary/src/resource/configmap.rs | 3 +++ tests/test-definition.yaml | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index cd93583f..a4540001 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -53,7 +53,6 @@ fn broker_start_command( product_version: &str, ) -> String { let common_command = formatdoc! {" - set -x export POD_INDEX=$(echo \"$POD_NAME\" | grep -oE '[0-9]+$') export REPLICA_ID=$((POD_INDEX+NODE_ID_OFFSET)) diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index da44b05f..37aca610 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -341,6 +341,9 @@ fn server_properties_file( "${env:ZOOKEEPER}".to_string(), )]); // We are in zookeeper mode and the user has defined a broker id mapping + // so we disable automatic id generation. + // This check ensures that existing clusters running in ZooKeeper mode do not + // suddenly break after the introduction of this change. if disable_broker_id_generation { result.extend([ ( diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 35796fa7..37857ac9 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -62,7 +62,7 @@ dimensions: - mit # Requires manual setup, see create-kerberos-secretclass.yaml # This will *not* respect the kerberos-realm test attribute, but instead use a hard-coded realm - # - activeDirectory( + # - activeDirectory - name: broker-listener-class values: - "cluster-internal" From 7cc5fa97c7662bcca87b8e1e93ddc42da16332bb Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 20 Jan 2026 18:15:02 +0100 Subject: [PATCH 29/29] update tests and remove static quorum property --- rust/operator-binary/src/crd/role/mod.rs | 4 --- .../operator-binary/src/resource/configmap.rs | 30 ++----------------- .../operations-kraft/20-install-kafka.yaml.j2 | 2 +- .../operations-kraft/25-pause-kafka.yaml.j2 | 2 +- .../operations-kraft/30-stop-kafka.yaml.j2 | 2 +- .../operations-kraft/50-restart-kafka.yaml.j2 | 2 +- .../60-scale-controller-up.yaml.j2 | 2 +- .../70-scale-controller-down.yaml.j2 | 2 +- .../kuttl/upgrade/02-install-kafka.yaml.j2 | 3 ++ 9 files changed, 11 insertions(+), 38 deletions(-) diff --git a/rust/operator-binary/src/crd/role/mod.rs b/rust/operator-binary/src/crd/role/mod.rs index 16f72083..bc3f4df2 100644 --- a/rust/operator-binary/src/crd/role/mod.rs +++ b/rust/operator-binary/src/crd/role/mod.rs @@ -71,10 +71,6 @@ pub const KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: &str = "listener.security.protoc /// For example: localhost:9092,localhost:9093,localhost:9094. pub const KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS: &str = "controller.quorum.bootstrap.servers"; -/// Map of id/endpoint information for the set of voters in a comma-separated list of {id}@{host}:{port} entries. -/// For example: 1@localhost:9092,2@localhost:9093,3@localhost:9094 -pub const KAFKA_CONTROLLER_QUORUM_VOTERS: &str = "controller.quorum.voters"; - #[derive(Snafu, Debug)] pub enum Error { #[snafu(display("fragment validation failure"))] diff --git a/rust/operator-binary/src/resource/configmap.rs b/rust/operator-binary/src/resource/configmap.rs index 37aca610..b423727c 100644 --- a/rust/operator-binary/src/resource/configmap.rs +++ b/rust/operator-binary/src/resource/configmap.rs @@ -20,9 +20,8 @@ use crate::{ listener::{KafkaListenerConfig, KafkaListenerName, node_address_cmd}, role::{ AnyConfig, KAFKA_ADVERTISED_LISTENERS, KAFKA_BROKER_ID, - KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, KAFKA_CONTROLLER_QUORUM_VOTERS, - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, KAFKA_LISTENERS, KAFKA_LOG_DIRS, KAFKA_NODE_ID, - KAFKA_PROCESS_ROLES, KafkaRole, + KAFKA_CONTROLLER_QUORUM_BOOTSTRAP_SERVERS, KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, + KAFKA_LISTENERS, KAFKA_LOG_DIRS, KAFKA_NODE_ID, KAFKA_PROCESS_ROLES, KafkaRole, }, security::KafkaTlsSecurity, v1alpha1, @@ -265,11 +264,6 @@ fn server_properties_file( .listener_security_protocol_map_for_controller()), ]); - let kraft_voters = - kraft_voters(pod_descriptors).context(NoKraftControllersFoundSnafu)?; - - result.extend([(KAFKA_CONTROLLER_QUORUM_VOTERS.to_string(), kraft_voters)]); - result.insert( "inter.broker.listener.name".to_string(), KafkaListenerName::Internal.to_string(), @@ -329,11 +323,6 @@ fn server_properties_file( kraft_controllers.clone(), ), ]); - - let kraft_voters = - kraft_voters(pod_descriptors).context(NoKraftControllersFoundSnafu)?; - - result.extend([(KAFKA_CONTROLLER_QUORUM_VOTERS.to_string(), kraft_voters)]); } else { // Running with ZooKeeper enabled result.extend([( @@ -399,21 +388,6 @@ fn kraft_controllers(pod_descriptors: &[KafkaPodDescriptor]) -> Option { } } -fn kraft_voters(pod_descriptors: &[KafkaPodDescriptor]) -> Option { - let result = pod_descriptors - .iter() - .filter(|pd| pd.role == KafkaRole::Controller.to_string()) - .map(|desc| desc.as_quorum_voter()) - .collect::>() - .join(","); - - if result.is_empty() { - None - } else { - Some(result) - } -} - // Generate JAAS configuration file for Kerberos authentication // or an empty string if Kerberos is not enabled. // See https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/LoginConfigFile.html diff --git a/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 index e415091c..704cacaa 100644 --- a/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/20-install-kafka.yaml.j2 @@ -16,9 +16,9 @@ spec: productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: metadataManager: kraft +{% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} controllers: diff --git a/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 index 1ce5edcc..563e72a5 100644 --- a/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/25-pause-kafka.yaml.j2 @@ -16,9 +16,9 @@ spec: productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: metadataManager: kraft +{% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} controllers: diff --git a/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 index 65ef99bc..cafaf9ba 100644 --- a/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/30-stop-kafka.yaml.j2 @@ -16,9 +16,9 @@ spec: productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" {% endif %} pullPolicy: IfNotPresent -{% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: metadataManager: kraft +{% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} brokers: diff --git a/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 b/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 index cfc16a11..a6ad4ec2 100644 --- a/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/50-restart-kafka.yaml.j2 @@ -15,9 +15,9 @@ spec: {% else %} productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" {% endif %} -{% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: metadataManager: kraft +{% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} controllers: diff --git a/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 b/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 index 020ea67e..3fdc5c4d 100644 --- a/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/60-scale-controller-up.yaml.j2 @@ -16,9 +16,9 @@ spec: {% else %} productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" {% endif %} -{% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: metadataManager: kraft +{% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} controllers: diff --git a/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 b/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 index db4d1cfa..a077213b 100644 --- a/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 +++ b/tests/templates/kuttl/operations-kraft/70-scale-controller-down.yaml.j2 @@ -16,9 +16,9 @@ spec: {% else %} productVersion: "{{ test_scenario['values']['kafka-kraft'] }}" {% endif %} -{% if lookup('env', 'VECTOR_AGGREGATOR') %} clusterConfig: metadataManager: kraft +{% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} controllers: diff --git a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 index e88820f0..93e1d415 100644 --- a/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 +++ b/tests/templates/kuttl/upgrade/02-install-kafka.yaml.j2 @@ -32,6 +32,9 @@ spec: productVersion: "{{ test_scenario['values']['upgrade_old'] }}" pullPolicy: IfNotPresent clusterConfig: + # Need to set this explicitly because the default would be zookeeper for 3.9.1 + # but we don't want to test zookeeper -> kraft migration here + metadataManager: kraft {% if test_scenario['values']['use-client-auth-tls'] == 'true' %} authentication: - authenticationClass: test-kafka-client-auth-tls