From 46600a68f2fff24925de345fe81ecbc5d39edcab Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 29 Jan 2026 12:42:27 +0100 Subject: [PATCH 01/11] wip - test format-namenodes container logging improvements --- rust/operator-binary/src/container.rs | 80 +++++++++++++------ .../hdfs-vector-aggregator-values.yaml.j2 | 38 ++++++++- 2 files changed, 91 insertions(+), 27 deletions(-) diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs index b09be984..dd303d70 100644 --- a/rust/operator-binary/src/container.rs +++ b/rust/operator-binary/src/container.rs @@ -11,7 +11,7 @@ //! use std::{collections::BTreeMap, str::FromStr}; -use indoc::formatdoc; +use indoc::{formatdoc, indoc}; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ builder::{ @@ -41,11 +41,12 @@ use stackable_operator::{ product_logging::{ self, framework::{ - LoggingError, create_vector_shutdown_file_command, remove_vector_shutdown_file_command, + LoggingError, capture_shell_output, create_vector_shutdown_file_command, + remove_vector_shutdown_file_command, }, spec::{ - ConfigMapLogConfig, ContainerLogConfig, ContainerLogConfigChoice, - CustomContainerLogConfig, + AutomaticContainerLogConfig, ConfigMapLogConfig, ContainerLogConfig, + ContainerLogConfigChoice, CustomContainerLogConfig, }, }, role_utils::RoleGroupRef, @@ -627,22 +628,22 @@ impl ContainerConfig { &merged_config.hdfs_logging(), )); - args.push_str(&format!( + args.push_str(&formatdoc!( r#"\ -{COMMON_BASH_TRAP_FUNCTIONS} -{remove_vector_shutdown_file_command} -prepare_signal_handlers -containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & -if [[ -d {LISTENER_VOLUME_DIR} ]]; then - export POD_ADDRESS=$(cat {LISTENER_VOLUME_DIR}/default-address/address) - for i in {LISTENER_VOLUME_DIR}/default-address/ports/*; do - export $(basename $i | tr a-z- A-Z_)_PORT="$(cat $i)" - done -fi -{hadoop_home}/bin/hdfs {role} {upgrade_args} & -wait_for_termination $! -{create_vector_shutdown_file_command} -"#, + {COMMON_BASH_TRAP_FUNCTIONS} + {remove_vector_shutdown_file_command} + prepare_signal_handlers + containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & + if [[ -d {LISTENER_VOLUME_DIR} ]]; then + export POD_ADDRESS=$(cat {LISTENER_VOLUME_DIR}/default-address/address) + for i in {LISTENER_VOLUME_DIR}/default-address/ports/*; do + export $(basename $i | tr a-z- A-Z_)_PORT="$(cat $i)" + done + fi + {hadoop_home}/bin/hdfs {role} {upgrade_args} & + wait_for_termination $! + {create_vector_shutdown_file_command} + "#, hadoop_home = Self::HADOOP_HOME, remove_vector_shutdown_file_command = remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), @@ -664,7 +665,9 @@ wait_for_termination $! hadoop_home = Self::HADOOP_HOME )); } - ContainerConfig::FormatNameNodes { .. } => { + ContainerConfig::FormatNameNodes { container_name, .. } => { + args.push_str(&add_capture_shell_output(container_name)); + if let Some(container_config) = merged_config.as_namenode().map(|node| { node.logging .for_container(&NameNodeContainer::FormatNameNodes) @@ -705,9 +708,17 @@ wait_for_termination $! if [ -z ${{ACTIVE_NAMENODE+x}} ] then echo "Create pod $POD_NAME as active namenode." + # Restore original stdout/stderr from FD 3 and 4 + exec 1>&3 2>&4 + # Clean up (close FD 3 and 4) + exec 3>&- 4>&- {hadoop_home}/bin/hdfs namenode -format -noninteractive else echo "Create pod $POD_NAME as standby namenode." + # Restore original stdout/stderr from FD 3 and 4 + exec 1>&3 2>&4 + # Clean up (close FD 3 and 4) + exec 3>&- 4>&- {hadoop_home}/bin/hdfs namenode -bootstrapStandby -nonInteractive fi else @@ -724,7 +735,9 @@ wait_for_termination $! .join(" "), )); } - ContainerConfig::FormatZooKeeper { .. } => { + ContainerConfig::FormatZooKeeper { container_name, .. } => { + args.push_str(&add_capture_shell_output(container_name)); + if let Some(container_config) = merged_config.as_namenode().map(|node| { node.logging .for_container(&NameNodeContainer::FormatZooKeeper) @@ -739,6 +752,10 @@ wait_for_termination $! echo "Attempt to format ZooKeeper..." if [[ "0" -eq "$(echo $POD_NAME | sed -e 's/.*-//')" ]] ; then set +e + # Restore original stdout/stderr from FD 3 and 4 + exec 1>&3 2>&4 + # Clean up (close FD 3 and 4) + exec 3>&- 4>&- {hadoop_home}/bin/hdfs zkfc -formatZK -nonInteractive EXITCODE=$? set -e @@ -755,10 +772,12 @@ wait_for_termination $! echo "ZooKeeper already formatted!" fi "###, - hadoop_home = Self::HADOOP_HOME + hadoop_home = Self::HADOOP_HOME, )); } - ContainerConfig::WaitForNameNodes { .. } => { + ContainerConfig::WaitForNameNodes { container_name, .. } => { + args.push_str(&add_capture_shell_output(container_name)); + if let Some(container_config) = merged_config.as_datanode().map(|node| { node.logging .for_container(&DataNodeContainer::WaitForNameNodes) @@ -1565,3 +1584,18 @@ impl TryFrom<&str> for ContainerVolumeDirs { }) } } + +fn add_capture_shell_output(container_name: &str) -> String { + let capture_shell_output = product_logging::framework::capture_shell_output( + STACKABLE_LOG_DIR, + container_name, + // we do not access any of the crd config options for this and just log it to file + &AutomaticContainerLogConfig::default(), + ); + formatdoc! {r###" + # Save original stdout/stderr to FD 3 and 4 + exec 3>&1 4>&2 + {capture_shell_output} + "### + } +} diff --git a/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 b/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 index 82267273..4cb67af6 100644 --- a/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 +++ b/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 @@ -60,18 +60,48 @@ customConfig: condition: >- .pod == "test-hdfs-automatic-log-namenode-default-1" && .container == "vector" - filteredAutomaticLogConfigNameNode0FormatNameNode: + filteredAutomaticLogConfigNameNode0FormatNameNodeLog4j: type: filter inputs: [validEvents] condition: >- .pod == "test-hdfs-automatic-log-namenode-default-0" && - .container == "format-namenodes" - filteredAutomaticLogConfigNameNode1FormatNameNode: + .container == "format-namenodes" && + .file == "format-namenodes.log4j.xml" + filteredAutomaticLogConfigNameNode0FormatNameNodeStdout: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-hdfs-automatic-log-namenode-default-0" && + .container == "format-namenodes" && + .file == "container.stdout.log" + filteredAutomaticLogConfigNameNode0FormatNameNodeStderr: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-hdfs-automatic-log-namenode-default-0" && + .container == "format-namenodes" && + .file == "container.stderr.log" + filteredAutomaticLogConfigNameNode1FormatNameNodeLog4j: type: filter inputs: [validEvents] condition: >- .pod == "test-hdfs-automatic-log-namenode-default-1" && - .container == "format-namenodes" + .container == "format-namenodes" && + .file == "format-namenodes.log4j.xml" + filteredAutomaticLogConfigNameNode1FormatNameNodeStdout: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-hdfs-automatic-log-namenode-default-1" && + .container == "format-namenodes" && + .file == "container.stdout.log" + filteredAutomaticLogConfigNameNode1FormatNameNodeStderr: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-hdfs-automatic-log-namenode-default-1" && + .container == "format-namenodes" && + .file == "container.stderr.log" filteredAutomaticLogConfigNameNode0FormatZookeeper: type: filter inputs: [validEvents] From a2d8d8a3e7f735fb9f3b66b1ef2059b547fd8732 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 29 Jan 2026 15:04:57 +0100 Subject: [PATCH 02/11] fix bash helpers --- rust/operator-binary/src/container.rs | 78 +++++++++++++++++---------- 1 file changed, 49 insertions(+), 29 deletions(-) diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs index dd303d70..ee6968fe 100644 --- a/rust/operator-binary/src/container.rs +++ b/rust/operator-binary/src/container.rs @@ -11,7 +11,7 @@ //! use std::{collections::BTreeMap, str::FromStr}; -use indoc::{formatdoc, indoc}; +use indoc::formatdoc; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ builder::{ @@ -41,8 +41,7 @@ use stackable_operator::{ product_logging::{ self, framework::{ - LoggingError, capture_shell_output, create_vector_shutdown_file_command, - remove_vector_shutdown_file_command, + LoggingError, create_vector_shutdown_file_command, remove_vector_shutdown_file_command, }, spec::{ AutomaticContainerLogConfig, ConfigMapLogConfig, ContainerLogConfig, @@ -666,7 +665,8 @@ impl ContainerConfig { )); } ContainerConfig::FormatNameNodes { container_name, .. } => { - args.push_str(&add_capture_shell_output(container_name)); + args.push_str(&bash_capture_shell_helper(container_name)); + args.push_str("start_capture\n"); if let Some(container_config) = merged_config.as_namenode().map(|node| { node.logging @@ -693,14 +693,18 @@ impl ContainerConfig { for namenode_id in {pod_names} do echo -n "Checking pod $namenode_id... " - {get_service_state_command} + # We only redirect 2 (stderr) to 4 (console). + # We leave 1 (stdout) alone so the $(...) can catch it. + SERVICE_STATE=$({hadoop_home}/bin/hdfs haadmin -getServiceState "$namenode_id" 2>&4 | tail -n1 || true) + if [ "$SERVICE_STATE" == "active" ] then - ACTIVE_NAMENODE=$namenode_id + ACTIVE_NAMENODE="$namenode_id" echo "active" break + else + echo "unknown" fi - echo "" done if [ ! -f "{NAMENODE_ROOT_DATA_DIR}/current/VERSION" ] @@ -708,25 +712,16 @@ impl ContainerConfig { if [ -z ${{ACTIVE_NAMENODE+x}} ] then echo "Create pod $POD_NAME as active namenode." - # Restore original stdout/stderr from FD 3 and 4 - exec 1>&3 2>&4 - # Clean up (close FD 3 and 4) - exec 3>&- 4>&- - {hadoop_home}/bin/hdfs namenode -format -noninteractive + exclude_from_capture {hadoop_home}/bin/hdfs namenode -format -noninteractive else echo "Create pod $POD_NAME as standby namenode." - # Restore original stdout/stderr from FD 3 and 4 - exec 1>&3 2>&4 - # Clean up (close FD 3 and 4) - exec 3>&- 4>&- - {hadoop_home}/bin/hdfs namenode -bootstrapStandby -nonInteractive + exclude_from_capture {hadoop_home}/bin/hdfs namenode -bootstrapStandby -nonInteractive fi else cat "{NAMENODE_ROOT_DATA_DIR}/current/VERSION" echo "Pod $POD_NAME already formatted. Skipping..." fi "###, - get_service_state_command = Self::get_namenode_service_state_command(), hadoop_home = Self::HADOOP_HOME, pod_names = namenode_podrefs .iter() @@ -736,8 +731,8 @@ impl ContainerConfig { )); } ContainerConfig::FormatZooKeeper { container_name, .. } => { - args.push_str(&add_capture_shell_output(container_name)); - + args.push_str(&bash_capture_shell_helper(container_name)); + args.push_str("start_capture\n"); if let Some(container_config) = merged_config.as_namenode().map(|node| { node.logging .for_container(&NameNodeContainer::FormatZooKeeper) @@ -775,9 +770,7 @@ impl ContainerConfig { hadoop_home = Self::HADOOP_HOME, )); } - ContainerConfig::WaitForNameNodes { container_name, .. } => { - args.push_str(&add_capture_shell_output(container_name)); - + ContainerConfig::WaitForNameNodes { .. } => { if let Some(container_config) = merged_config.as_datanode().map(|node| { node.logging .for_container(&DataNodeContainer::WaitForNameNodes) @@ -1585,17 +1578,44 @@ impl TryFrom<&str> for ContainerVolumeDirs { } } -fn add_capture_shell_output(container_name: &str) -> String { +fn bash_capture_shell_helper(container_name: &str) -> String { let capture_shell_output = product_logging::framework::capture_shell_output( STACKABLE_LOG_DIR, container_name, // we do not access any of the crd config options for this and just log it to file &AutomaticContainerLogConfig::default(), ); - formatdoc! {r###" - # Save original stdout/stderr to FD 3 and 4 - exec 3>&1 4>&2 - {capture_shell_output} - "### + + formatdoc! { + r###" + # Store the original stdout/stderr globally so we can always find our way back + # 3 and 4 are usually safe, but we'll be explicit. + exec 3>&1 + exec 4>&2 + + start_capture() {{ + # We redirect 1 and 2 to the background tee processes + {capture_shell_output} + }} + + stop_capture() {{ + # Restore stdout and stderr from our saved descriptors + exec 1>&3 2>&4 + }} + + exclude_from_capture() {{ + # Temporarily restore original FDs just for the duration of this command + # We use 'local' for the exit code to keep things clean + set +e + "$@" 1>&3 2>&4 + local exit_code=$? + set -e + + # If the command failed, we manually trigger the exit since we set +e + if [ $exit_code -ne 0 ]; then + exit $exit_code + fi + }} + "### } } From eae44cc8be75c1de07b83f946eded62d6ed607de Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 29 Jan 2026 15:12:53 +0100 Subject: [PATCH 03/11] extend shell capture for wait-for-namenodes and format-zooekeeper --- rust/operator-binary/src/container.rs | 35 ++++++++++++--------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs index ee6968fe..7f4c3326 100644 --- a/rust/operator-binary/src/container.rs +++ b/rust/operator-binary/src/container.rs @@ -693,6 +693,7 @@ impl ContainerConfig { for namenode_id in {pod_names} do echo -n "Checking pod $namenode_id... " + # We only redirect 2 (stderr) to 4 (console). # We leave 1 (stdout) alone so the $(...) can catch it. SERVICE_STATE=$({hadoop_home}/bin/hdfs haadmin -getServiceState "$namenode_id" 2>&4 | tail -n1 || true) @@ -703,7 +704,7 @@ impl ContainerConfig { echo "active" break else - echo "unknown" + echo "unknown / unreachable" fi done @@ -711,10 +712,10 @@ impl ContainerConfig { then if [ -z ${{ACTIVE_NAMENODE+x}} ] then - echo "Create pod $POD_NAME as active namenode." + echo "No active namenode found. Formatting $POD_NAME as active." exclude_from_capture {hadoop_home}/bin/hdfs namenode -format -noninteractive else - echo "Create pod $POD_NAME as standby namenode." + echo "Active namenode is $ACTIVE_NAMENODE. Bootstrapping standby." exclude_from_capture {hadoop_home}/bin/hdfs namenode -bootstrapStandby -nonInteractive fi else @@ -733,6 +734,7 @@ impl ContainerConfig { ContainerConfig::FormatZooKeeper { container_name, .. } => { args.push_str(&bash_capture_shell_helper(container_name)); args.push_str("start_capture\n"); + if let Some(container_config) = merged_config.as_namenode().map(|node| { node.logging .for_container(&NameNodeContainer::FormatZooKeeper) @@ -747,11 +749,7 @@ impl ContainerConfig { echo "Attempt to format ZooKeeper..." if [[ "0" -eq "$(echo $POD_NAME | sed -e 's/.*-//')" ]] ; then set +e - # Restore original stdout/stderr from FD 3 and 4 - exec 1>&3 2>&4 - # Clean up (close FD 3 and 4) - exec 3>&- 4>&- - {hadoop_home}/bin/hdfs zkfc -formatZK -nonInteractive + exclude_from_capture {hadoop_home}/bin/hdfs zkfc -formatZK -nonInteractive EXITCODE=$? set -e if [[ $EXITCODE -eq 0 ]]; then @@ -770,7 +768,10 @@ impl ContainerConfig { hadoop_home = Self::HADOOP_HOME, )); } - ContainerConfig::WaitForNameNodes { .. } => { + ContainerConfig::WaitForNameNodes { container_name, .. } => { + args.push_str(&bash_capture_shell_helper(container_name)); + args.push_str("start_capture\n"); + if let Some(container_config) = merged_config.as_datanode().map(|node| { node.logging .for_container(&DataNodeContainer::WaitForNameNodes) @@ -793,7 +794,11 @@ impl ContainerConfig { for namenode_id in {pod_names} do echo -n "Checking pod $namenode_id... " - {get_service_state_command} + + # We only redirect 2 (stderr) to 4 (console). + # We leave 1 (stdout) alone so the $(...) can catch it. + SERVICE_STATE=$({hadoop_home}/bin/hdfs haadmin -getServiceState "$namenode_id" 2>&4 | tail -n1 || true) + if [ "$SERVICE_STATE" = "active" ] || [ "$SERVICE_STATE" = "standby" ] then echo "$SERVICE_STATE" @@ -812,7 +817,7 @@ impl ContainerConfig { sleep 5 done "###, - get_service_state_command = Self::get_namenode_service_state_command(), + hadoop_home = Self::HADOOP_HOME, pod_names = namenode_podrefs .iter() .map(|pod_ref| pod_ref.pod_name.as_ref()) @@ -854,14 +859,6 @@ impl ContainerConfig { )) } - fn get_namenode_service_state_command() -> String { - formatdoc!( - r###" - SERVICE_STATE=$({hadoop_home}/bin/hdfs haadmin -getServiceState $namenode_id | tail -n1 || true)"###, - hadoop_home = Self::HADOOP_HOME, - ) - } - /// Returns the container env variables. fn env( &self, From b73e4e0a44fc7cdf5966e4680c358a569c5745f2 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 29 Jan 2026 15:59:51 +0100 Subject: [PATCH 04/11] cleanup --- rust/operator-binary/src/container.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs index 7f4c3326..46056908 100644 --- a/rust/operator-binary/src/container.rs +++ b/rust/operator-binary/src/container.rs @@ -666,7 +666,6 @@ impl ContainerConfig { } ContainerConfig::FormatNameNodes { container_name, .. } => { args.push_str(&bash_capture_shell_helper(container_name)); - args.push_str("start_capture\n"); if let Some(container_config) = merged_config.as_namenode().map(|node| { node.logging @@ -733,7 +732,6 @@ impl ContainerConfig { } ContainerConfig::FormatZooKeeper { container_name, .. } => { args.push_str(&bash_capture_shell_helper(container_name)); - args.push_str("start_capture\n"); if let Some(container_config) = merged_config.as_namenode().map(|node| { node.logging @@ -770,7 +768,6 @@ impl ContainerConfig { } ContainerConfig::WaitForNameNodes { container_name, .. } => { args.push_str(&bash_capture_shell_helper(container_name)); - args.push_str("start_capture\n"); if let Some(container_config) = merged_config.as_datanode().map(|node| { node.logging @@ -794,7 +791,7 @@ impl ContainerConfig { for namenode_id in {pod_names} do echo -n "Checking pod $namenode_id... " - + # We only redirect 2 (stderr) to 4 (console). # We leave 1 (stdout) alone so the $(...) can catch it. SERVICE_STATE=$({hadoop_home}/bin/hdfs haadmin -getServiceState "$namenode_id" 2>&4 | tail -n1 || true) @@ -1613,6 +1610,8 @@ fn bash_capture_shell_helper(container_name: &str) -> String { exit $exit_code fi }} + + start_capture "### } } From 1c796ff7e1e8497cfae87acc31ec9cc88fc55d03 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 29 Jan 2026 16:03:34 +0100 Subject: [PATCH 05/11] adapted changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9608259..cda16407 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,8 +10,13 @@ All notable changes to this project will be documented in this file. See [objectOverrides concepts page](https://docs.stackable.tech/home/nightly/concepts/overrides/#object-overrides) for details ([#741]). - Enable the [restart-controller](https://docs.stackable.tech/home/nightly/commons-operator/restarter/), so that the Pods are automatically restarted on config changes ([#743]). +### Fixed + +- Previosly, some shell output of init-containers was not logged properly and therefore not aggregated, which is fixed now ([#746]). + [#741]: https://github.com/stackabletech/hdfs-operator/pull/741 [#743]: https://github.com/stackabletech/hdfs-operator/pull/743 +[#746]: https://github.com/stackabletech/hdfs-operator/pull/746 ## [25.11.0] - 2025-11-07 From e719b2067ff30137074cc9b22d75a734b1ea0b93 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 29 Jan 2026 16:04:50 +0100 Subject: [PATCH 06/11] precommit --- rust/operator-binary/src/container.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs index 46056908..735111d5 100644 --- a/rust/operator-binary/src/container.rs +++ b/rust/operator-binary/src/container.rs @@ -693,7 +693,7 @@ impl ContainerConfig { do echo -n "Checking pod $namenode_id... " - # We only redirect 2 (stderr) to 4 (console). + # We only redirect 2 (stderr) to 4 (console). # We leave 1 (stdout) alone so the $(...) can catch it. SERVICE_STATE=$({hadoop_home}/bin/hdfs haadmin -getServiceState "$namenode_id" 2>&4 | tail -n1 || true) @@ -703,7 +703,7 @@ impl ContainerConfig { echo "active" break else - echo "unknown / unreachable" + echo "unknown / unreachable" fi done @@ -792,7 +792,7 @@ impl ContainerConfig { do echo -n "Checking pod $namenode_id... " - # We only redirect 2 (stderr) to 4 (console). + # We only redirect 2 (stderr) to 4 (console). # We leave 1 (stdout) alone so the $(...) can catch it. SERVICE_STATE=$({hadoop_home}/bin/hdfs haadmin -getServiceState "$namenode_id" 2>&4 | tail -n1 || true) @@ -1604,7 +1604,7 @@ fn bash_capture_shell_helper(container_name: &str) -> String { "$@" 1>&3 2>&4 local exit_code=$? set -e - + # If the command failed, we manually trigger the exit since we set +e if [ $exit_code -ne 0 ]; then exit $exit_code From 1b1d2f122be4913a535483ed508f0105ad9c9b6f Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 29 Jan 2026 16:23:16 +0100 Subject: [PATCH 07/11] fix pre-commit --- rust/operator-binary/src/container.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs index 735111d5..9ec6de7a 100644 --- a/rust/operator-binary/src/container.rs +++ b/rust/operator-binary/src/container.rs @@ -696,7 +696,7 @@ impl ContainerConfig { # We only redirect 2 (stderr) to 4 (console). # We leave 1 (stdout) alone so the $(...) can catch it. SERVICE_STATE=$({hadoop_home}/bin/hdfs haadmin -getServiceState "$namenode_id" 2>&4 | tail -n1 || true) - + if [ "$SERVICE_STATE" == "active" ] then ACTIVE_NAMENODE="$namenode_id" From 0b7ef22cb20f9505ccfd77196a6a505ec33c33f6 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 2 Feb 2026 08:33:27 +0100 Subject: [PATCH 08/11] fix typo --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cda16407..fb9c70dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ All notable changes to this project will be documented in this file. ### Fixed -- Previosly, some shell output of init-containers was not logged properly and therefore not aggregated, which is fixed now ([#746]). +- Previously, some shell output of init-containers was not logged properly and therefore not aggregated, which is fixed now ([#746]). [#741]: https://github.com/stackabletech/hdfs-operator/pull/741 [#743]: https://github.com/stackabletech/hdfs-operator/pull/743 From 61c79fb72a56584926b76aa580b532c83509a74d Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 2 Feb 2026 15:28:25 +0100 Subject: [PATCH 09/11] remove set +x and add format-zookeeper test --- rust/operator-binary/src/container.rs | 2 -- .../hdfs-vector-aggregator-values.yaml.j2 | 19 +++++++++++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs index 9ec6de7a..722d7b64 100644 --- a/rust/operator-binary/src/container.rs +++ b/rust/operator-binary/src/container.rs @@ -746,10 +746,8 @@ impl ContainerConfig { r###" echo "Attempt to format ZooKeeper..." if [[ "0" -eq "$(echo $POD_NAME | sed -e 's/.*-//')" ]] ; then - set +e exclude_from_capture {hadoop_home}/bin/hdfs zkfc -formatZK -nonInteractive EXITCODE=$? - set -e if [[ $EXITCODE -eq 0 ]]; then echo "Successfully formatted" elif [[ $EXITCODE -eq 2 ]]; then diff --git a/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 b/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 index 4cb67af6..d3ade7c6 100644 --- a/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 +++ b/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 @@ -102,12 +102,27 @@ customConfig: .pod == "test-hdfs-automatic-log-namenode-default-1" && .container == "format-namenodes" && .file == "container.stderr.log" - filteredAutomaticLogConfigNameNode0FormatZookeeper: + filteredAutomaticLogConfigNameNode0FormatZookeeperLog4j: type: filter inputs: [validEvents] condition: >- .pod == "test-hdfs-automatic-log-namenode-default-0" && - .container == "format-zookeeper" + .container == "format-zookeeper" && + .file == "format-zookeeper.log4j.xml" + filteredAutomaticLogConfigNameNode0FormatZookeeperStdout: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-hdfs-automatic-log-namenode-default-0" && + .container == "format-zookeeper" && + .file == "container.stdout.log" + filteredAutomaticLogConfigNameNode0FormatZookeeperStderr: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-hdfs-automatic-log-namenode-default-0" && + .container == "format-zookeeper" && + .file == "container.stderr.log" filteredAutomaticLogConfigDataNode0: type: filter inputs: [validEvents] From ac8d721d0314915ffff7ce3496a4976cdaee394a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 2 Feb 2026 17:35:53 +0100 Subject: [PATCH 10/11] fix capture_shell script --- rust/operator-binary/src/container.rs | 21 ++++-------- .../hdfs-vector-aggregator-values.yaml.j2 | 33 +++++++++++++++++-- 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs index 722d7b64..e6e7e490 100644 --- a/rust/operator-binary/src/container.rs +++ b/rust/operator-binary/src/container.rs @@ -744,21 +744,19 @@ impl ContainerConfig { } args.push_str(&formatdoc!( r###" - echo "Attempt to format ZooKeeper..." + echo "Attempt to format ZooKeeper ZNode for $POD_NAME ..." if [[ "0" -eq "$(echo $POD_NAME | sed -e 's/.*-//')" ]] ; then - exclude_from_capture {hadoop_home}/bin/hdfs zkfc -formatZK -nonInteractive - EXITCODE=$? + EXITCODE=$(exclude_from_capture {hadoop_home}/bin/hdfs zkfc -formatZK -nonInteractive) if [[ $EXITCODE -eq 0 ]]; then - echo "Successfully formatted" + echo "Successfully formatted ZooKeeper ZNode." elif [[ $EXITCODE -eq 2 ]]; then - echo "ZNode already existed, did nothing" + echo "ZNode already exists, nothing to do." else - echo "Zookeeper format failed with exit code $EXITCODE" + echo "ZooKeeper format ZNode failed with exit code $EXITCODE". exit $EXITCODE fi - else - echo "ZooKeeper already formatted!" + echo "ZooKeeper ZNode already formatted!" fi "###, hadoop_home = Self::HADOOP_HOME, @@ -1598,15 +1596,10 @@ fn bash_capture_shell_helper(container_name: &str) -> String { exclude_from_capture() {{ # Temporarily restore original FDs just for the duration of this command # We use 'local' for the exit code to keep things clean - set +e "$@" 1>&3 2>&4 local exit_code=$? - set -e - # If the command failed, we manually trigger the exit since we set +e - if [ $exit_code -ne 0 ]; then - exit $exit_code - fi + echo $exit_code }} start_capture diff --git a/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 b/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 index d3ade7c6..612ab98c 100644 --- a/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 +++ b/tests/templates/kuttl/logging/hdfs-vector-aggregator-values.yaml.j2 @@ -123,6 +123,20 @@ customConfig: .pod == "test-hdfs-automatic-log-namenode-default-0" && .container == "format-zookeeper" && .file == "container.stderr.log" + filteredAutomaticLogConfigNameNode1FormatZookeeperStdout: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-hdfs-automatic-log-namenode-default-1" && + .container == "format-zookeeper" && + .file == "container.stdout.log" + filteredAutomaticLogConfigNameNode1FormatZookeeperStderr: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-hdfs-automatic-log-namenode-default-1" && + .container == "format-zookeeper" && + .file == "container.stderr.log" filteredAutomaticLogConfigDataNode0: type: filter inputs: [validEvents] @@ -135,12 +149,27 @@ customConfig: condition: >- .pod == "test-hdfs-automatic-log-datanode-default-0" && .container == "vector" - filteredAutomaticLogConfigDataNode0WaitForNameNodes: + filteredAutomaticLogConfigDataNode0WaitForNameNodesLog4j: type: filter inputs: [validEvents] condition: >- .pod == "test-hdfs-automatic-log-datanode-default-0" && - .container == "wait-for-namenodes" + .container == "wait-for-namenodes" && + .file == "wait-for-namenodes.log4j.xml" + filteredAutomaticLogConfigDataNode0WaitForNameNodesStdout: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-hdfs-automatic-log-datanode-default-0" && + .container == "wait-for-namenodes" && + .file == "container.stdout.log" + filteredAutomaticLogConfigDataNode0WaitForNameNodesStderr: + type: filter + inputs: [validEvents] + condition: >- + .pod == "test-hdfs-automatic-log-datanode-default-0" && + .container == "wait-for-namenodes" && + .file == "container.stderr.log" filteredAutomaticLogConfigJournalNode0: type: filter inputs: [validEvents] From 6e9cff6b497e430f367704e8eaca13ebcfc71cc1 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 2 Feb 2026 17:49:45 +0100 Subject: [PATCH 11/11] cleanup --- rust/operator-binary/src/container.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rust/operator-binary/src/container.rs b/rust/operator-binary/src/container.rs index e6e7e490..5503422b 100644 --- a/rust/operator-binary/src/container.rs +++ b/rust/operator-binary/src/container.rs @@ -1597,9 +1597,7 @@ fn bash_capture_shell_helper(container_name: &str) -> String { # Temporarily restore original FDs just for the duration of this command # We use 'local' for the exit code to keep things clean "$@" 1>&3 2>&4 - local exit_code=$? - - echo $exit_code + echo $? }} start_capture