From cea112dce2b5912047c6180a084cac203122fbcd Mon Sep 17 00:00:00 2001
From: Jeroen Veen <jveen1@gmail.com>
Date: Sun, 13 Apr 2025 13:14:45 +0200
Subject: [PATCH 01/10] Added basic  Voice Activity Detection (VAD)
 functionality - Implement energy-based VAD node - Add VoiceActivity message
 type - Create launch file for audio capture with VAD - Update documentation
 with VAD usage instructions and parameters

---
 CMakeLists.txt                          |  31 +-
 ROS2_Audio_Tools_project_knowledge.md   | 812 ++++++++++++++++++++++++
 docs/about/implementation.md            |  55 ++
 docs/usage/overview.md                  |  12 +
 docs/usage/voice_activity_detection.md  | 123 ++++
 launch/audio_vad.launch.py              |  31 +
 msg/VoiceActivity.msg                   |   8 +
 src/vad_node.cpp                        | 205 ++++++
 src/vad_node.hpp                        | 122 ++++
 9 files changed, 1396 insertions(+), 3 deletions(-)
 create mode 100644 ROS2_Audio_Tools_project_knowledge.md
 create mode 100644 docs/usage/voice_activity_detection.md 
 create mode 100644 launch/audio_vad.launch.py
 create mode 100644 msg/VoiceActivity.msg
 create mode 100644 src/vad_node.cpp
 create mode 100644 src/vad_node.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 410e587..5be7790 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,12 +12,14 @@ rosidl_generate_interfaces(${PROJECT_NAME}
   msg/AudioData.msg
   msg/AudioDataStamped.msg
   msg/AudioInfo.msg
+  msg/VoiceActivity.msg  
   DEPENDENCIES std_msgs
 )
 
 ament_export_dependencies(rosidl_default_runtime)
 rosidl_get_typesupport_target(cpp_typesupport_target ${PROJECT_NAME} "rosidl_typesupport_cpp")
 
+# Audio Capture Node
 add_executable(audio_capture_node src/audio_capture_node.cpp)
 
 ament_target_dependencies(audio_capture_node
@@ -36,7 +38,7 @@ install(TARGETS
 audio_capture_node
 DESTINATION lib/${PROJECT_NAME}) 
 
-
+# Audio Playback Node
 add_executable(audio_playback_node src/audio_playback_node.cpp)
 target_link_libraries(audio_playback_node "${cpp_typesupport_target}" ${SDL2_LIBRARIES}) 
 target_include_directories(audio_playback_node PUBLIC ${SDL2_INCLUDE_DIRS})
@@ -50,8 +52,31 @@ ament_target_dependencies(audio_playback_node
     std_msgs
 )
 
+# Voice Activity Detection Node
+add_executable(vad_node src/vad_node.cpp)
+target_link_libraries(vad_node "${cpp_typesupport_target}")
+
+if(UNIX AND NOT APPLE)
+  target_compile_options(vad_node PRIVATE -Wall)
+endif()
 
+ament_target_dependencies(vad_node
+    rclcpp
+    std_msgs
+)
+
+# Install targets
 install(TARGETS
-audio_playback_node
-DESTINATION lib/${PROJECT_NAME}) 
+  audio_capture_node
+  audio_playback_node
+  vad_node  # Added VAD node
+  DESTINATION lib/${PROJECT_NAME}
+)
+
+# Install launch files
+install(DIRECTORY
+  launch/
+  DESTINATION share/${PROJECT_NAME}/launch
+)
+
 ament_package()
\ No newline at end of file
diff --git a/ROS2_Audio_Tools_project_knowledge.md b/ROS2_Audio_Tools_project_knowledge.md
new file mode 100644
index 0000000..080d213
--- /dev/null
+++ b/ROS2_Audio_Tools_project_knowledge.md
@@ -0,0 +1,812 @@
+# EdgeVox Project Documentation
+
+# Project Structure
+```
+ROS2_Audio_Tools/
+├── conda/
+│   ├── lin/
+│   │   ├── recipe/
+│   │   │   ├── activate.sh
+│   │   │   ├── build_ament_cmake.sh
+│   │   │   ├── build_ament_python.sh
+│   │   │   ├── build_catkin.sh
+│   │   │   ├── deactivate.sh
+│   │   │   ├── recipe.yaml
+│   │   ├── pixi.toml
+│   ├── win/
+│   │   ├── recipe/
+│   │   │   ├── activate.bat
+│   │   │   ├── bld_ament_cmake.bat
+│   │   │   ├── bld_ament_python.bat
+│   │   │   ├── bld_catkin.bat
+│   │   │   ├── bld_catkin_merge.bat
+│   │   │   ├── bld_colcon_merge.bat
+│   │   │   ├── deactivate.bat
+│   │   │   ├── recipe.yaml
+│   │   │   ├── rosdistro_snapshot.yaml
+│   │   ├── pixi.toml
+├── docs/
+│   ├── about/
+│   │   ├── implementation.md
+│   ├── assets/
+│   │   ├── stylesheets/
+│   │   │   ├── logo.css
+│   │   ├── favicon.png
+│   │   ├── logo.png
+│   ├── contributing/
+│   │   ├── license.md
+│   │   ├── rules.md
+│   ├── usage/
+│   │   ├── overview.md
+│   │   ├── ros2_pixi_build_linux_windows.md
+│   │   ├── usage_with_native_linux.md
+│   ├── index.md
+├── msg/
+│   ├── AudioData.msg
+│   ├── AudioDataStamped.msg
+│   ├── AudioInfo.msg
+├── src/
+│   ├── audio_capture_node.cpp
+│   ├── audio_capture_node.hpp
+│   ├── audio_playback_node.cpp
+│   ├── audio_playback_node.hpp
+├── CMakeLists.txt
+├── LICENSE
+├── README.md
+├── colcon.pkg
+├── mkdocs.yml
+├── package.xml
+├── pixi.toml
+```
+
+# src/audio_capture_node.cpp
+```cpp
+/**
+ * @file audio_capture_node.cpp
+ * @brief Audio Capture Node for ROS 2 using SDL2
+ *
+ * Copyright 2025 <Hoog-V / CLFML>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "audio_capture_node.hpp"
+#include <stdio.h>
+
+AudioCaptureNode::AudioCaptureNode()
+    : Node("audio_capture_node"), _desired_rate(-1.0) {
+
+  // Declare and get parameters
+  this->declare_parameter<std::string>("sample_format", "S16LE");
+  this->get_parameter("sample_format", _sample_format);
+
+  this->declare_parameter<int>("channels", 1);
+  this->declare_parameter<int>("sample_rate", 16000);
+  this->get_parameter("channels", _channels);
+  this->get_parameter("sample_rate", _sample_rate);
+
+  if (_channels < 1 || _channels > 8) {
+    RCLCPP_ERROR_STREAM(this->get_logger(),
+                        "Invalid channel count: " << _channels);
+    throw std::runtime_error("Unsupported media type.");
+  }
+
+  this->declare_parameter<int>("device", -1);
+  this->get_parameter("device", _device);
+
+  this->declare_parameter<int>("chunk_size", 1024);
+  int chunk_size;
+  this->get_parameter("chunk_size", chunk_size);
+
+  if (chunk_size < 512 || chunk_size > 4096) {
+    RCLCPP_ERROR_STREAM(this->get_logger(),
+                        "Invalid chunk size: " << chunk_size);
+    throw std::runtime_error("Unsupported chunk size!");
+  }
+
+  if (!ConfigureSDLStream(_device, _sample_format, chunk_size, _channels)) {
+    throw std::runtime_error("Failed to configure SDL audio capture.");
+  }
+
+  // Publishers
+  _pub = this->create_publisher<audio_tools::msg::AudioData>("audio", 10);
+  auto info_qos = rclcpp::QoS(rclcpp::KeepLast(1)).transient_local();
+  _pub_info = this->create_publisher<audio_tools::msg::AudioInfo>("audio_info",
+                                                                  info_qos);
+
+  _pub_stamped = this->create_publisher<audio_tools::msg::AudioDataStamped>(
+      "audio_stamped", 10);
+
+  _timer_info = rclcpp::create_timer(this, get_clock(), std::chrono::seconds(5),
+                                     [this] { publishInfo(); });
+  publishInfo();
+}
+
+void AudioCaptureNode::callback(uint8_t *stream, int len) {
+  audio_tools::msg::AudioData msg;
+  audio_tools::msg::AudioDataStamped stamped_msg;
+
+  // Generate frame ID
+  stamped_msg.header.frame_id = std::to_string(_audio_pos++);
+  // Compute timestamp based on buffer size
+  size_t buffer_size_total_nsec =
+      (static_cast<size_t>(len) * 1e9) / (_sample_rate * _nbytes);
+
+  _nano_sec += buffer_size_total_nsec;
+  while (_nano_sec >= RCL_S_TO_NS(1)) {
+    _sec++;
+    _nano_sec -= RCL_S_TO_NS(1);
+  }
+
+  stamped_msg.header.stamp.sec = _sec;
+  stamped_msg.header.stamp.nanosec = _nano_sec;
+
+  // Copy audio data
+  msg.data.assign(stream, stream + len);
+
+  stamped_msg.audio = msg;
+  stamped_msg.info = _info_msg;
+  // Publish messages
+  _pub->publish(msg);
+  _pub_stamped->publish(stamped_msg);
+}
+
+void AudioCaptureNode::publishInfo() {
+  _info_msg.channels = _channels;
+  _info_msg.sample_rate = _sample_rate;
+  _info_msg.sample_format = _sample_format;
+
+  _pub_info->publish(_info_msg);
+}
+
+AudioCaptureNode::~AudioCaptureNode() {
+  SDL_PauseAudioDevice(_dev_id_in, 1);
+  SDL_CloseAudioDevice(_dev_id_in);
+}
+
+bool AudioCaptureNode::ConfigureSDLStream(int device, const std::string &format,
+                                          int chunk_size, int channels) {
+  SDL_LogSetPriority(SDL_LOG_CATEGORY_APPLICATION, SDL_LOG_PRIORITY_INFO);
+
+  if (SDL_Init(SDL_INIT_AUDIO) < 0) {
+    RCLCPP_ERROR(this->get_logger(), "SDL initialization failed: %s",
+                 SDL_GetError());
+    return false;
+  }
+
+  SDL_SetHintWithPriority(SDL_HINT_AUDIO_RESAMPLING_MODE, "medium",
+                          SDL_HINT_OVERRIDE);
+
+  int nDevices = SDL_GetNumAudioDevices(SDL_TRUE);
+  RCLCPP_INFO(this->get_logger(), "%s: Found %d capture devices:\n", __func__,
+              nDevices);
+
+  for (int i = 0; i < nDevices; i++) {
+    RCLCPP_INFO(this->get_logger(), "  - Capture device #%d: '%s'\n", i,
+                SDL_GetAudioDeviceName(i, SDL_TRUE));
+  }
+
+  SDL_zero(_capture_spec_requested);
+  SDL_zero(_capture_spec_obtained);
+
+  auto format_it = _format_map.find(format);
+  if (format_it == _format_map.end()) {
+    RCLCPP_ERROR(this->get_logger(), "Unknown audio format: %s",
+                 format.c_str());
+    return false;
+  }
+  _nbytes = format_it->second.second;
+
+  _capture_spec_requested.freq = _sample_rate;
+  _capture_spec_requested.format = format_it->second.first;
+  _capture_spec_requested.channels = channels;
+  _capture_spec_requested.samples = chunk_size;
+  _capture_spec_requested.callback = [](void *userdata, uint8_t *stream,
+                                        int len) {
+    static_cast<AudioCaptureNode *>(userdata)->callback(stream, len);
+  };
+  _capture_spec_requested.userdata = this;
+
+  if (device >= 0 && device < nDevices) {
+    RCLCPP_INFO(this->get_logger(),
+                "%s: Attempting to open capture device #%d: '%s'...\n",
+                __func__, device, SDL_GetAudioDeviceName(device, SDL_TRUE));
+
+    _dev_id_in = SDL_OpenAudioDevice(SDL_GetAudioDeviceName(device, SDL_TRUE),
+                                     SDL_TRUE, &_capture_spec_requested,
+                                     &_capture_spec_obtained, 0);
+  } else {
+    RCLCPP_INFO(this->get_logger(),
+                "%s: Attempting to open default capture device...\n", __func__);
+    _dev_id_in =
+        SDL_OpenAudioDevice(nullptr, SDL_TRUE, &_capture_spec_requested,
+                            &_capture_spec_obtained, 0);
+  }
+
+  if (!_dev_id_in) {
+    RCLCPP_INFO(this->get_logger(), "%s: Failed to open audio device: %s\n",
+                __func__, SDL_GetError());
+    return false;
+  }
+
+  RCLCPP_INFO(this->get_logger(), "%s: Capture device opened (SDL Id = %d):\n",
+              __func__, _dev_id_in);
+  RCLCPP_INFO(this->get_logger(), "  - Sample rate: %d\n",
+              _capture_spec_obtained.freq);
+  RCLCPP_INFO(this->get_logger(), "  - Format: %d (Requested: %d)\n",
+              _capture_spec_obtained.format, _capture_spec_requested.format);
+  RCLCPP_INFO(this->get_logger(), "  - Channels: %d (Requested: %d)\n",
+              _capture_spec_obtained.channels,
+              _capture_spec_requested.channels);
+  RCLCPP_INFO(this->get_logger(), "  - Samples per frame: %d\n",
+              _capture_spec_obtained.samples);
+
+  _sample_rate = _capture_spec_obtained.freq;
+  SDL_PauseAudioDevice(_dev_id_in, 0);
+  return true;
+}
+
+int main(int argc, char *argv[]) {
+  rclcpp::init(argc, argv);
+  rclcpp::spin(std::make_shared<AudioCaptureNode>());
+  rclcpp::shutdown();
+  return 0;
+}
+```
+
+
+# src/audio_capture_node.hpp
+```cpp
+/**
+ * @file audio_capture_node.hpp
+ * @brief Audio Capture Node for ROS 2 using SDL2
+ *
+ * Copyright 2025 <Hoog-V / CLFML>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AUDIO_CAPTURE_NODE_HPP
+#define AUDIO_CAPTURE_NODE_HPP
+
+#include <SDL.h>
+#include <SDL_audio.h>
+#include <rclcpp/rclcpp.hpp>
+
+#include "audio_tools/msg/audio_data.hpp"
+#include "audio_tools/msg/audio_data_stamped.hpp"
+#include "audio_tools/msg/audio_info.hpp"
+
+/**
+ * @class AudioCaptureNode
+ * @brief A ROS 2 node for capturing and publishing audio data using SDL2.
+ */
+class AudioCaptureNode : public rclcpp::Node {
+public:
+  /**
+   * @brief Constructor for the AudioCaptureNode.
+   * Initializes audio capture, publishers, and diagnostics.
+   */
+  AudioCaptureNode();
+
+  /**
+   * @brief Exits the program with the given exit code.
+   * @param code The exit code.
+   */
+  void exitOnMainThread(int code);
+
+  /**
+   * @brief SDL2 audio callback function.
+   * Processes captured audio and publishes it as a ROS message.
+   * @param stream Pointer to the audio stream buffer.
+   * @param len Length of the audio buffer in bytes.
+   */
+  void callback(uint8_t *stream, int len);
+
+  /**
+   * @brief Publishes audio information (sample rate, channels, format).
+   */
+  void publishInfo();
+
+  /**
+   * @brief Destructor for AudioCaptureNode.
+   * Cleans up SDL audio resources.
+   */
+  ~AudioCaptureNode();
+
+private:
+  /**
+   * @brief ROS publisher for raw audio data.
+   */
+  rclcpp::Publisher<audio_tools::msg::AudioData>::SharedPtr _pub;
+
+  /**
+   * @brief ROS publisher for audio metadata (e.g., sample rate, format).
+   */
+  rclcpp::Publisher<audio_tools::msg::AudioInfo>::SharedPtr _pub_info;
+
+  /**
+   * @brief Timer for periodically publishing audio metadata.
+   */
+  rclcpp::TimerBase::SharedPtr _timer_info;
+
+  /**
+   * @brief Audio information metadata
+   */
+  audio_tools::msg::AudioInfo _info_msg;
+
+  /**
+   * @brief SDL audio specifications for requested and obtained formats.
+   */
+  SDL_AudioSpec _capture_spec_requested;
+  SDL_AudioSpec _capture_spec_obtained;
+
+  /**
+   * @brief Audio parameters: bitrate, channels, depth, sample rate, and device
+   * index.
+   */
+  int _bitrate, _channels, _sample_rate, _device;
+
+  /**
+   * @brief Number of bytes processed.
+   */
+  size_t _nbytes;
+
+  /**
+   * @brief Position counter for audio frames.
+   */
+  size_t _audio_pos = 0;
+
+  /**
+   * @brief Nanosecond and second timestamps for synchronization.
+   */
+  size_t _nano_sec = 0;
+  size_t _sec = 0;
+
+  /**
+   * @brief Desired publishing rate for diagnostics.
+   */
+  double _desired_rate;
+
+  /**
+   * @brief Audio sample format (e.g., "S16LE", "F32").
+   */
+  std::string _sample_format;
+
+  /**
+   * @brief SDL audio device ID.
+   */
+  SDL_AudioDeviceID _dev_id_in = 0;
+
+  /**
+   * @brief ROS publisher for timestamped audio data with diagnostics.
+   */
+  rclcpp::Publisher<audio_tools::msg::AudioDataStamped>::SharedPtr _pub_stamped;
+
+  /**
+   * @brief Configures the SDL2 audio capture stream.
+   * @param device The SDL2 device index (-1 for default).
+   * @param format The requested audio format (e.g., "S16LE").
+   * @param chunk_size The buffer size for each audio chunk (e.g., 1024
+   * samples).
+   * @param channels The number of audio channels (e.g., 1 for mono, 2 for
+   * stereo).
+   * @return True if configuration is successful, false otherwise.
+   */
+  bool ConfigureSDLStream(int device, const std::string &format, int chunk_size,
+                          int channels);
+
+  /**
+   * @brief Maps audio format strings to SDL2 audio formats and corresponding
+   * byte sizes.
+   */
+  const std::unordered_map<std::string, std::pair<SDL_AudioFormat, int>>
+      _format_map = {
+          {"U8", {AUDIO_U8, 1}}, ///< Unsigned 8-bit (1 byte)
+          {"S8", {AUDIO_S8, 1}}, ///< Signed 8-bit (1 byte)
+          {"U16LE",
+           {AUDIO_U16LSB, 2}}, ///< Unsigned 16-bit little-endian (2 bytes)
+          {"U16BE",
+           {AUDIO_U16MSB, 2}}, ///< Unsigned 16-bit big-endian (2 bytes)
+          {"S16LE",
+           {AUDIO_S16LSB, 2}}, ///< Signed 16-bit little-endian (2 bytes)
+          {"S16BE", {AUDIO_S16MSB, 2}}, ///< Signed 16-bit big-endian (2 bytes)
+          {"S32LE",
+           {AUDIO_S32LSB, 4}}, ///< Signed 32-bit little-endian (4 bytes)
+          {"S32BE", {AUDIO_S32MSB, 4}}, ///< Signed 32-bit big-endian (4 bytes)
+          {"F32LE",
+           {AUDIO_F32LSB, 4}}, ///< 32-bit float little-endian (4 bytes)
+          {"F32BE", {AUDIO_F32MSB, 4}}, ///< 32-bit float big-endian (4 bytes)
+          {"F32", {AUDIO_F32, 4}}       ///< 32-bit float (generic) (4 bytes)
+      };
+};
+
+#endif /* AUDIO_CAPTURE_NODE_HPP */
+
+```
+
+
+# src/audio_playback_node.cpp
+```cpp
+/**
+ * @file audio_playback_node.cpp
+ * @brief Audio Capture Node for ROS 2 using SDL2
+ *
+ * Copyright 2025 <Hoog-V / CLFML>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "audio_playback_node.hpp"
+#include <stdio.h>
+
+AudioPlaybackNode::AudioPlaybackNode()
+    : Node("audio_playback_node"), _audio_device(0), _initialized(false) {
+  // Declare and get topic name parameter
+  std::string topic_name =
+      this->declare_parameter<std::string>("audio_topic", "/audio_playback");
+  int device_index =
+      this->declare_parameter<int>("device_index", -1); // -1 = default
+
+  _subscription = this->create_subscription<audio_tools::msg::AudioDataStamped>(
+      topic_name, 10,
+      std::bind(&AudioPlaybackNode::audio_cb, this, std::placeholders::_1));
+
+  if (SDL_Init(SDL_INIT_AUDIO) < 0) {
+    RCLCPP_FATAL(get_logger(), "SDL_Init failed: %s", SDL_GetError());
+    rclcpp::shutdown();
+  }
+  int num_devices = SDL_GetNumAudioDevices(0);
+  RCLCPP_INFO(get_logger(), "Available audio output devices:");
+  for (int i = 0; i < num_devices; ++i) {
+    const char *name = SDL_GetAudioDeviceName(i, 0);
+    RCLCPP_INFO(get_logger(), "  [%d]: %s%s", i, name,
+                (i == device_index ? " (selected)" : ""));
+  }
+
+  if (device_index >= 0 && device_index < num_devices) {
+    _device_name = SDL_GetAudioDeviceName(device_index, 0);
+    RCLCPP_INFO(get_logger(), "Using audio device: %s", _device_name.c_str());
+  } else if (device_index == -1) {
+    _device_name = ""; // SDL default
+    RCLCPP_INFO(get_logger(), "Using default audio device");
+  } else {
+    RCLCPP_FATAL(get_logger(), "Invalid device_index: %d (available: 0 to %d)",
+                 device_index, num_devices - 1);
+    rclcpp::shutdown();
+  }
+
+  RCLCPP_INFO(get_logger(), "Subscribed to topic: %s", topic_name.c_str());
+}
+
+AudioPlaybackNode::~AudioPlaybackNode() {
+  if (_audio_device != 0) {
+    SDL_CloseAudioDevice(_audio_device);
+  }
+  SDL_Quit();
+}
+
+bool AudioPlaybackNode::init_audio(const audio_tools::msg::AudioInfo &info) {
+  SDL_AudioSpec desired_spec;
+  SDL_zero(desired_spec);
+
+  desired_spec.freq = static_cast<int>(info.sample_rate);
+  desired_spec.channels = info.channels;
+  desired_spec.samples = 4096;
+  desired_spec.callback = nullptr;
+
+  if (info.sample_format == "S16LE") {
+    desired_spec.format = AUDIO_S16LSB;
+  } else if (info.sample_format == "S16BE") {
+    desired_spec.format = AUDIO_S16MSB;
+  } else if (info.sample_format == "U8") {
+    desired_spec.format = AUDIO_U8;
+  } else if (info.sample_format == "S8") {
+    desired_spec.format = AUDIO_S8;
+  } else if (info.sample_format == "F32LE") {
+    desired_spec.format = AUDIO_F32LSB;
+  } else if (info.sample_format == "F32BE") {
+    desired_spec.format = AUDIO_F32MSB;
+  } else {
+    RCLCPP_ERROR(get_logger(), "Unsupported format: %s",
+                 info.sample_format.c_str());
+    return false;
+  }
+
+  _audio_device =
+      SDL_OpenAudioDevice(_device_name.empty() ? nullptr : _device_name.c_str(),
+                          0, // 0 = output
+                          &desired_spec, &_obtained_spec, 0);
+  if (_audio_device == 0) {
+    RCLCPP_ERROR(get_logger(), "SDL_OpenAudioDevice error: %s", SDL_GetError());
+    return false;
+  }
+
+  SDL_PauseAudioDevice(_audio_device, 0);
+  RCLCPP_INFO(get_logger(), "Audio device initialized: %d Hz, %d channels",
+              _obtained_spec.freq, _obtained_spec.channels);
+
+  _initialized = true;
+  return true;
+}
+
+void AudioPlaybackNode::audio_cb(
+    const audio_tools::msg::AudioDataStamped::SharedPtr msg) {
+  const auto &info = msg->info;
+  const auto &audio = msg->audio;
+
+  if (!_initialized) {
+    if (!init_audio(info)) {
+      RCLCPP_ERROR(get_logger(), "Failed to initialize audio output.");
+      return;
+    }
+  }
+
+  if (audio.data.empty())
+    return;
+
+  if (SDL_QueueAudio(_audio_device, audio.data.data(), audio.data.size()) < 0) {
+    RCLCPP_ERROR(get_logger(), "SDL_QueueAudio error: %s", SDL_GetError());
+  }
+}
+
+int main(int argc, char *argv[]) {
+  rclcpp::init(argc, argv);
+  rclcpp::spin(std::make_shared<AudioPlaybackNode>());
+  rclcpp::shutdown();
+  return 0;
+}
+```
+
+
+# src/audio_playback_node.hpp
+```cpp
+/**
+ * @file audio_playback_node.hpp
+ * @brief Audio Playback Node for ROS 2 using SDL2
+ *
+ * Copyright 2025 <Hoog-V / CLFML>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AUDIO_PLAYBACK_NODE_HPP
+#define AUDIO_PLAYBACK_NODE_HPP
+#include <SDL.h>
+#include <SDL_audio.h>
+#include <rclcpp/rclcpp.hpp>
+
+#include "audio_tools/msg/audio_data.hpp"
+#include "audio_tools/msg/audio_data_stamped.hpp"
+#include "audio_tools/msg/audio_info.hpp"
+
+/**
+ * @class AudioPlaybackNode
+ * @brief A ROS 2 node for capturing and publishing audio data using SDL2.
+ */
+class AudioPlaybackNode : public rclcpp::Node {
+public:
+  /**
+   * @brief Constructor for the AudioPlaybackNode.
+   * Initializes audio capture, publishers, and diagnostics.
+   */
+  AudioPlaybackNode();
+
+  /**
+   * @brief Destructor for AudioPlaybackNode.
+   * Cleans up SDL audio resources.
+   */
+  ~AudioPlaybackNode();
+
+private:
+  /**
+   * @brief Callback function for received audio_msg on subscribed topic
+   * @param msg The audio message which got received
+   */
+  void audio_cb(const audio_tools::msg::AudioDataStamped::SharedPtr msg);
+  /**
+   * @brief Function which automatically configures the audio_interface based on
+   * the received audio information
+   * @param info Custom message which contains the channels, sample_rate and
+   * encoding format
+   * @return false if failed e.g. invalid format, true if ok
+   */
+  bool init_audio(const audio_tools::msg::AudioInfo &info);
+
+  /**
+   * @brief Subscription object/handle for the subscribed audio topic
+   */
+  rclcpp::Subscription<audio_tools::msg::AudioDataStamped>::SharedPtr
+      _subscription;
+
+  /**
+   * @brief SDL audio device handle.
+   */
+  SDL_AudioDeviceID _audio_device;
+
+  /**
+   * @brief SDL audio specifications for obtained format.
+   */
+  SDL_AudioSpec _obtained_spec;
+
+  /**
+   * @brief Full name of audio_device
+   */
+  std::string _device_name;
+
+  /**
+   * @brief Internal state to prevent initializing audio interface twice
+   */
+  bool _initialized;
+};
+#endif /* AUDIO_PLAYBACK_NODE */
+```
+
+
+# CMakeLists.txt
+```cmake
+cmake_minimum_required(VERSION 3.1...3.14)
+project(audio_tools C CXX)
+
+find_package(ament_cmake REQUIRED)
+find_package(rclcpp REQUIRED)
+find_package(builtin_interfaces REQUIRED)
+find_package(rosidl_default_generators REQUIRED)
+find_package(std_msgs REQUIRED)
+find_package(SDL2 REQUIRED)
+
+rosidl_generate_interfaces(${PROJECT_NAME}
+  msg/AudioData.msg
+  msg/AudioDataStamped.msg
+  msg/AudioInfo.msg
+  DEPENDENCIES std_msgs
+)
+
+ament_export_dependencies(rosidl_default_runtime)
+rosidl_get_typesupport_target(cpp_typesupport_target ${PROJECT_NAME} "rosidl_typesupport_cpp")
+
+add_executable(audio_capture_node src/audio_capture_node.cpp)
+
+ament_target_dependencies(audio_capture_node
+    rclcpp
+    std_msgs
+)
+
+target_link_libraries(audio_capture_node "${cpp_typesupport_target}" ${SDL2_LIBRARIES}) 
+target_include_directories(audio_capture_node PUBLIC ${SDL2_INCLUDE_DIRS})
+
+if(UNIX AND NOT APPLE)  # Linux only
+  target_compile_options(audio_capture_node PRIVATE -Wall)
+endif()
+
+install(TARGETS
+audio_capture_node
+DESTINATION lib/${PROJECT_NAME}) 
+
+
+add_executable(audio_playback_node src/audio_playback_node.cpp)
+target_link_libraries(audio_playback_node "${cpp_typesupport_target}" ${SDL2_LIBRARIES}) 
+target_include_directories(audio_playback_node PUBLIC ${SDL2_INCLUDE_DIRS})
+
+if(UNIX AND NOT APPLE)
+  target_compile_options(audio_playback_node PRIVATE -Wall)
+endif()
+
+ament_target_dependencies(audio_playback_node
+    rclcpp
+    std_msgs
+)
+
+
+install(TARGETS
+audio_playback_node
+DESTINATION lib/${PROJECT_NAME}) 
+ament_package()
+```
+
+
+# mkdocs.yml
+```text
+site_name: ROS2_Audio_Tools
+extra_css:
+  - assets/stylesheets/logo.css
+theme:
+  name: material
+  # custom_dir: docs/overrides
+  features:
+    - announce.dismiss
+    - content.action.edit
+    - content.action.view
+    - content.code.annotate
+    - content.code.copy
+    # - content.code.select
+    # - content.footnote.tooltips
+    # - content.tabs.link
+    - content.tooltips
+    # - header.autohide
+    # - navigation.expand
+    - navigation.footer
+    - navigation.indexes
+    # - navigation.instant
+    # - navigation.instant.prefetch
+    # - navigation.instant.progress
+    # - navigation.prune
+    - navigation.sections
+    - navigation.tabs
+    # - navigation.tabs.sticky
+    - navigation.top
+    - navigation.tracking
+    - search.highlight
+    - search.share
+    - search.suggest
+    - toc.follow
+    # - toc.integrate
+  palette:
+      scheme: default
+      primary: green
+      toggle:
+        icon: material/toggle-switch
+        name: Switch to dark mode
+  font:
+    text: Avenir Next Condensed
+    code: Avenir Next Condensed
+  favicon: assets/favicon.png
+  logo: assets/logo.png
+
+markdown_extensions:
+  - attr_list
+  - md_in_html
+  - admonition
+  - pymdownx.details
+  - pymdownx.superfences
+
+nav:
+  - index.md
+  - Getting Started:
+    - usage/overview.md
+    - Build Environment:
+      - usage/ros2_pixi_build_linux_windows.md
+      - usage/usage_with_native_linux.md
+  - Implementation:
+    - about/implementation.md
+  - Contributing:
+    - contributing/rules.md
+    - contributing/license.md
+```
+
diff --git a/docs/about/implementation.md b/docs/about/implementation.md
index 292fabc..61a96a2 100644
--- a/docs/about/implementation.md
+++ b/docs/about/implementation.md
@@ -140,3 +140,58 @@ ros2 run audio_tools audio_playback_node \
 - `SDL2`
 - Custom messages in `audio_tools`
 
+
+## `vad_node`
+
+**Voice Activity Detection Node for ROS 2**
+
+Analyzes audio streams to detect the presence of speech or voice activity and publishes the detection state. Useful for conversation systems, wake word triggers, and audio recording automation.
+
+---
+### ✅ Topics
+| Topic | Type | Description |
+|------------------|------------------------------------------|-----------------------------------|
+| `/voice_activity` | `audio_tools/msg/VoiceActivity` | Voice activity detection state |
+| Subscribes to: | | |
+| `/audio_stamped` | `audio_tools/msg/AudioDataStamped` | Audio input for analysis |
+---
+
+### ⚙️ Parameters
+| Name | Type | Default | Description |
+|----------------|----------|---------|---------------------------------------------|
+| `energy_threshold`| `float` | `0.01` | Detection sensitivity threshold |
+| `hold_time` | `float` | `0.5` | Time in seconds to maintain detection after voice stops |
+| `min_samples` | `int` | `160` | Minimum samples needed for detection decision |
+---
+
+### 📦 Message Notes
+- `VoiceActivity`:
+  - `std_msgs/Header header` with `stamp` and `frame_id`
+  - `bool active` - Detection state (true/false)
+  - `float32 energy_level` - Current audio energy level
+  - `float32 threshold` - Detection threshold in use
+  - `float32 hold_time` - Time to hold detection state after voice stops
+---
+
+### 🧩 Implementation Notes
+- Uses an **energy-based algorithm** for voice detection.
+- Converts all audio formats to normalized float [-1.0, 1.0] for processing.
+- Energy calculation: `energy = sum(sample * sample) / num_samples`
+- Hold timer prevents rapid switching between states during pauses.
+- Format conversion supports U8, S8, S16LE, S16BE, F32, etc.
+---
+
+### 🏁 Launch Example
+```bash
+ros2 run audio_tools vad_node \
+ --ros-args \
+-p energy_threshold:=0.015 \
+-p hold_time:=0.7 \
+-p min_samples:=160
+```
+---
+
+### 🧪 Dependencies
+- `rclcpp`
+- `audio_tools/msg/AudioDataStamped`
+- `audio_tools/msg/VoiceActivity`
\ No newline at end of file
diff --git a/docs/usage/overview.md b/docs/usage/overview.md
index caa16c0..567b3d1 100644
--- a/docs/usage/overview.md
+++ b/docs/usage/overview.md
@@ -57,3 +57,15 @@ ros2 run audio_tools audio_capture_node
 ros2 run audio_tools audio_playback_node \
   --ros-args -p audio_topic:=/audio_stamped
 ```
+
+### 🗣️ 5. Voice Activity Detection
+
+Capture microphone input and publish to `/audio_stamped`. Simple energy-based Voice Activity Detection (VAD) node on `/audio_stamped` and publishes detection results to `/voice_activity`. Note that VAD parameters are set in launch file.
+
+```bash
+# Terminal 1
+ros2 run audio_tools audio_vad.launch.py
+
+# Terminal 2
+ros2 topic echo /voice_activity
+```
diff --git a/docs/usage/voice_activity_detection.md  b/docs/usage/voice_activity_detection.md 
new file mode 100644
index 0000000..c9a7547
--- /dev/null
+++ b/docs/usage/voice_activity_detection.md 	
@@ -0,0 +1,123 @@
+# Voice Activity Detection
+
+The Voice Activity Detection (VAD) node provides real-time detection of voice or speech in audio streams. The node uses a simple energy-based algorithm to determine if voice is present in the audio signal.
+
+## How it works
+
+The VAD node subscribes to audio data and performs the following steps:
+
+1. Converts the incoming audio to floating-point samples
+2. Calculates the energy level of the audio signal
+3. Compares this energy to a threshold
+4. Applies a hold time to prevent rapid on/off switching
+5. Publishes the detection state
+
+## Node Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `energy_threshold` | float | 0.01 | Energy threshold for detection |
+| `hold_time` | float | 0.5 | Time in seconds to maintain detection after voice stops |
+| `min_samples` | int | 160 | Minimum number of samples required for detection |
+
+## Topics
+
+### Subscribed Topics
+
+- `audio_stamped` ([audio_tools/AudioDataStamped](../msg/AudioDataStamped.md)): Audio data with timestamp and format information
+
+### Published Topics
+
+- `voice_activity` ([audio_tools/VoiceActivity](../msg/VoiceActivity.md)): Voice activity detection results
+
+## Message Format
+
+The `VoiceActivity` message includes:
+
+```
+std_msgs/Header header      # Timestamp and frame information
+bool active                 # True when voice activity is detected
+float32 energy_level        # Current audio energy level
+float32 threshold           # Current threshold being used
+float32 hold_time           # Hold time in seconds for the detection
+```
+
+## Usage Example
+
+To run the VAD node separately:
+
+```bash
+ros2 run audio_tools vad_node --ros-args -p energy_threshold:=0.015 -p hold_time:=0.3
+```
+
+To run with the audio capture node:
+
+```bash
+ros2 launch audio_tools audio_vad.launch.py
+```
+
+## Tuning the VAD
+
+The energy threshold is the most important parameter to tune:
+
+- **Too low**: Will trigger on background noise
+- **Too high**: May miss quiet speech
+
+### Step-by-Step Tuning Process
+
+1. **Monitor the energy level of your audio**:
+   ```bash
+   ros2 topic echo /voice_activity
+   ```
+
+2. **Determine your noise floor**:
+   Remain silent and observe the `energy_level` values. Note the maximum level during silence.
+
+3. **Determine speech energy level**:
+   Speak at a normal volume and note the typical `energy_level` values.
+
+4. **Set the threshold**:
+   Set the threshold slightly above your noise floor:
+   ```bash
+   ros2 param set /vad energy_threshold 0.015  # Adjust based on your observations
+   ```
+
+5. **Adjust hold time**:
+   If detection is fragmented during normal speech, increase the hold time:
+   ```bash
+   ros2 param set /vad hold_time 0.7  # Adjust as needed
+   ```
+
+### Visual Tuning
+
+For visual tuning, use RQT to plot the energy levels:
+
+```bash
+rqt_plot /voice_activity/energy_level /voice_activity/threshold /voice_activity/active
+```
+
+This will display:
+- Your audio energy over time
+- The threshold line
+- The detection state (0 or 1)
+
+### Environment-Specific Settings
+
+Here are suggested starting points for different environments:
+
+| Environment | Suggested energy_threshold | Suggested hold_time |
+|-------------|----------------------------|---------------------|
+| Quiet room, close mic | 0.005-0.01 | 0.3-0.5s |
+| Office, close mic | 0.01-0.02 | 0.5-0.7s |
+| Noisy environment | 0.02-0.05 | 0.7-1.0s |
+| Far-field mic | 0.01-0.03 | 0.7-1.0s |
+
+## Limitations
+
+This implementation uses a simple energy-based detector which works well in low-noise environments but may not perform optimally in:
+
+- Noisy environments
+- Situations with varying background noise
+- When capturing audio from distant microphones
+
+For more advanced detection in challenging environments, consider integrating a machine learning-based VAD algorithm.
\ No newline at end of file
diff --git a/launch/audio_vad.launch.py b/launch/audio_vad.launch.py
new file mode 100644
index 0000000..3299074
--- /dev/null
+++ b/launch/audio_vad.launch.py
@@ -0,0 +1,31 @@
+from launch import LaunchDescription
+from launch_ros.actions import Node
+
+def generate_launch_description():
+    return LaunchDescription([
+        Node(
+            package='audio_tools',
+            executable='audio_capture_node',
+            name='audio_capture',
+            parameters=[{
+                'sample_format': 'S16LE',
+                'channels': 1,
+                'sample_rate': 16000,
+                'device': -1,  # Use default device
+                'chunk_size': 1024,
+            }],
+            output='screen',
+        ),
+        
+        Node(
+            package='audio_tools',
+            executable='vad_node',
+            name='vad',
+            parameters=[{
+                'energy_threshold': 0.01,
+                'hold_time': 0.5,
+                'min_samples': 160,
+            }], # Office, close mic
+            output='screen',
+        ),
+    ])
\ No newline at end of file
diff --git a/msg/VoiceActivity.msg b/msg/VoiceActivity.msg
new file mode 100644
index 0000000..8a31c90
--- /dev/null
+++ b/msg/VoiceActivity.msg
@@ -0,0 +1,8 @@
+std_msgs/Header header
+bool active
+# Current audio energy level
+float32 energy_level
+# Current threshold being used
+float32 threshold
+# Hold time in seconds for the detection
+float32 hold_time
diff --git a/src/vad_node.cpp b/src/vad_node.cpp
new file mode 100644
index 0000000..f4b614a
--- /dev/null
+++ b/src/vad_node.cpp
@@ -0,0 +1,205 @@
+/**
+ * @file vad_node.cpp
+ * @brief Voice Activity Detection Node for ROS 2
+ *
+ * Copyright 2025 <ducroq / CLFML>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "vad_node.hpp"
+#include <cstring>
+#include <algorithm>
+#include <cmath>
+
+VADNode::VADNode() : Node("vad_node"), _current_vad_state(false) {
+  // Declare and get parameters
+  this->declare_parameter<float>("energy_threshold", 0.01);
+  this->declare_parameter<float>("hold_time", 0.5);
+  this->declare_parameter<int>("min_samples", 160);
+  
+  this->get_parameter("energy_threshold", _energy_threshold);
+  this->get_parameter("hold_time", _hold_time);
+  this->get_parameter("min_samples", _min_samples);
+
+  RCLCPP_INFO(this->get_logger(), "VAD initialized with parameters:");
+  RCLCPP_INFO(this->get_logger(), "  - Energy threshold: %.4f", _energy_threshold);
+  RCLCPP_INFO(this->get_logger(), "  - Hold time: %.2f seconds", _hold_time);
+  RCLCPP_INFO(this->get_logger(), "  - Min samples: %d", _min_samples);
+
+  // Initialize the last voice time to current time
+  _last_voice_time = this->now();
+
+  // Create publisher for voice activity detection
+  _vad_pub = this->create_publisher<audio_tools::msg::VoiceActivity>("voice_activity", 10);
+  
+  // Subscribe to audio data stamped topic
+  _audio_sub = this->create_subscription<audio_tools::msg::AudioDataStamped>(
+      "audio_stamped", 10,
+      std::bind(&VADNode::audioCallback, this, std::placeholders::_1));
+  
+  RCLCPP_INFO(this->get_logger(), "VAD node is ready");
+}
+
+void VADNode::audioCallback(const audio_tools::msg::AudioDataStamped::SharedPtr msg) {
+  const auto &audio_data = msg->audio.data;
+  const auto &audio_info = msg->info;
+  
+  // Skip if we have no data
+  if (audio_data.empty()) {
+    return;
+  }
+
+  // Convert audio data to float samples for processing
+  std::vector<float> samples;
+  if (!convertToFloatSamples(audio_data, audio_info.sample_format, samples)) {
+    RCLCPP_WARN(this->get_logger(), "Failed to convert audio format %s", 
+                audio_info.sample_format.c_str());
+    return;
+  }
+  
+  // Calculate energy level
+  float energy_level = calculateEnergyLevel(samples);
+  
+  // Detect voice activity
+  bool voice_active = (energy_level > _energy_threshold) && 
+                      (samples.size() >= static_cast<size_t>(_min_samples));
+  
+  // Handle hold time logic
+  if (voice_active) {
+    _last_voice_time = this->now();
+    
+    if (!_current_vad_state) {
+      _current_vad_state = true;
+      RCLCPP_DEBUG(this->get_logger(), "Voice activity started");
+    }
+  } else {
+    // Check if we're within the hold time
+    rclcpp::Duration time_since_voice = this->now() - _last_voice_time;
+    if (_current_vad_state && time_since_voice.seconds() > _hold_time) {
+      _current_vad_state = false;
+      RCLCPP_DEBUG(this->get_logger(), "Voice activity ended");
+    }
+  }
+  
+  // Publish the current VAD state
+  publishVoiceActivity(_current_vad_state, energy_level, msg->header.stamp);
+}
+
+bool VADNode::detectVoiceActivity(const std::vector<uint8_t> &audio_data, 
+                                 const std::string &sample_format) {
+  // Convert to float samples
+  std::vector<float> samples;
+  if (!convertToFloatSamples(audio_data, sample_format, samples)) {
+    return false;
+  }
+  
+  // Check if we have enough samples
+  if (samples.size() < static_cast<size_t>(_min_samples)) {
+    return false;
+  }
+  
+  // Calculate energy level
+  float energy_level = calculateEnergyLevel(samples);
+  
+  // Compare with threshold
+  return energy_level > _energy_threshold;
+}
+
+float VADNode::calculateEnergyLevel(const std::vector<float> &samples) {
+  float energy = 0.0f;
+  
+  for (const auto &sample : samples) {
+    energy += sample * sample;
+  }
+  
+  // Normalize by number of samples
+  if (!samples.empty()) {
+    energy /= samples.size();
+  }
+  
+  return energy;
+}
+
+bool VADNode::convertToFloatSamples(const std::vector<uint8_t> &audio_data,
+                                   const std::string &sample_format,
+                                   std::vector<float> &out_samples) {
+  // Find the byte size for this format
+  auto it = _format_byte_size.find(sample_format);
+  if (it == _format_byte_size.end()) {
+    RCLCPP_ERROR(this->get_logger(), "Unknown audio format: %s", sample_format.c_str());
+    return false;
+  }
+  
+  int bytes_per_sample = it->second;
+  size_t num_samples = audio_data.size() / bytes_per_sample;
+  out_samples.resize(num_samples);
+  
+  // Convert based on format
+  if (sample_format == "F32" || sample_format == "F32LE") {
+    // Direct copy for float format (assuming little endian)
+    std::memcpy(out_samples.data(), audio_data.data(), audio_data.size());
+  } 
+  else if (sample_format == "S16LE") {
+    // Convert signed 16-bit LE to float
+    const int16_t *int_samples = reinterpret_cast<const int16_t*>(audio_data.data());
+    for (size_t i = 0; i < num_samples; i++) {
+      out_samples[i] = int_samples[i] / 32768.0f;
+    }
+  } 
+  else if (sample_format == "S8") {
+    // Convert signed 8-bit to float
+    const int8_t *int_samples = reinterpret_cast<const int8_t*>(audio_data.data());
+    for (size_t i = 0; i < num_samples; i++) {
+      out_samples[i] = int_samples[i] / 128.0f;
+    }
+  } 
+  else if (sample_format == "U8") {
+    // Convert unsigned 8-bit to float
+    for (size_t i = 0; i < num_samples; i++) {
+      out_samples[i] = (audio_data[i] - 128) / 128.0f;
+    }
+  } 
+  else {
+    RCLCPP_WARN(this->get_logger(), 
+                "Format conversion not implemented for %s, using raw values", 
+                sample_format.c_str());
+    
+    // Just copy raw bytes as a fallback
+    for (size_t i = 0; i < num_samples; i++) {
+      out_samples[i] = static_cast<float>(audio_data[i * bytes_per_sample]) / 255.0f;
+    }
+  }
+  
+  return true;
+}
+
+void VADNode::publishVoiceActivity(bool active, float energy_level, 
+                                  const rclcpp::Time &timestamp) {
+  auto msg = std::make_unique<audio_tools::msg::VoiceActivity>();
+  
+  msg->header.stamp = timestamp;
+  msg->header.frame_id = "audio_frame";
+  msg->active = active;
+  msg->energy_level = energy_level;
+  msg->threshold = _energy_threshold;
+  msg->hold_time = _hold_time;
+  
+  _vad_pub->publish(std::move(msg));
+}
+
+int main(int argc, char *argv[]) {
+  rclcpp::init(argc, argv);
+  rclcpp::spin(std::make_shared<VADNode>());
+  rclcpp::shutdown();
+  return 0;
+}
\ No newline at end of file
diff --git a/src/vad_node.hpp b/src/vad_node.hpp
new file mode 100644
index 0000000..16c5011
--- /dev/null
+++ b/src/vad_node.hpp
@@ -0,0 +1,122 @@
+/**
+ * @file vad_node.hpp
+ * @brief Voice Activity Detection Node for ROS 2
+ *
+ * Copyright 2025 <ducroq / CLFML>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef VAD_NODE_HPP
+#define VAD_NODE_HPP
+
+#include <rclcpp/rclcpp.hpp>
+
+#include "audio_tools/msg/audio_data.hpp"
+#include "audio_tools/msg/audio_data_stamped.hpp"
+#include "audio_tools/msg/audio_info.hpp"
+#include "audio_tools/msg/voice_activity.hpp"
+
+/**
+ * @class VADNode
+ * @brief A ROS 2 node for Voice Activity Detection (VAD) using audio streams.
+ */
+class VADNode : public rclcpp::Node {
+public:
+  /**
+   * @brief Constructor for the VADNode.
+   * Initializes subscribers, publishers, and parameters.
+   */
+  VADNode();
+
+  /**
+   * @brief Destructor for VADNode.
+   */
+  ~VADNode() = default;
+
+private:
+  /**
+   * @brief Callback for audio data messages.
+   * @param msg The audio message received from the audio capture node.
+   */
+  void audioCallback(const audio_tools::msg::AudioDataStamped::SharedPtr msg);
+
+  /**
+   * @brief Processes audio data to detect voice activity.
+   * @param audio_data The raw audio data samples.
+   * @param sample_format The format of the audio samples.
+   * @return True if voice activity is detected, false otherwise.
+   */
+  bool detectVoiceActivity(const std::vector<uint8_t> &audio_data, 
+                           const std::string &sample_format);
+  
+  /**
+   * @brief Converts audio data to float samples for processing.
+   * @param audio_data The raw audio data.
+   * @param sample_format The format of the audio samples.
+   * @param out_samples Output vector of float samples.
+   * @return True if conversion was successful, false otherwise.
+   */
+  bool convertToFloatSamples(const std::vector<uint8_t> &audio_data,
+                             const std::string &sample_format,
+                             std::vector<float> &out_samples);
+
+  /**
+   * @brief Calculates the energy level of the audio samples.
+   * @param samples The audio samples in float format.
+   * @return The calculated energy level.
+   */
+  float calculateEnergyLevel(const std::vector<float> &samples);
+
+  /**
+   * @brief Publishes the current voice activity state.
+   * @param active Whether voice activity is detected.
+   * @param energy_level The current audio energy level.
+   * @param timestamp The timestamp for the message.
+   */
+  void publishVoiceActivity(bool active, float energy_level, 
+                            const rclcpp::Time &timestamp);
+
+  /**
+   * @brief ROS publisher for voice activity detection results.
+   */
+  rclcpp::Publisher<audio_tools::msg::VoiceActivity>::SharedPtr _vad_pub;
+
+  /**
+   * @brief ROS subscriber for audio data.
+   */
+  rclcpp::Subscription<audio_tools::msg::AudioDataStamped>::SharedPtr _audio_sub;
+
+  /**
+   * @brief Parameters for VAD.
+   */
+  float _energy_threshold;  // Threshold for energy-based detection
+  float _hold_time;         // Time in seconds to hold detection state
+  int _min_samples;         // Minimum number of samples for detection
+
+  /**
+   * @brief State variables for VAD.
+   */
+  bool _current_vad_state;  // Current voice activity state
+  rclcpp::Time _last_voice_time;  // Time when voice was last detected
+  
+  /**
+   * @brief Conversion map for audio formats to byte sizes.
+   */
+  const std::unordered_map<std::string, int> _format_byte_size = {
+      {"U8", 1},   {"S8", 1},     {"U16LE", 2}, {"U16BE", 2},
+      {"S16LE", 2}, {"S16BE", 2}, {"S32LE", 4}, {"S32BE", 4},
+      {"F32LE", 4}, {"F32BE", 4}, {"F32", 4}
+  };
+};
+
+#endif /* VAD_NODE_HPP */
\ No newline at end of file

From 65ae51a7ccd6d930eea7650691e0ff12a80b248b Mon Sep 17 00:00:00 2001
From: Jeroen Veen <jveen1@gmail.com>
Date: Sun, 13 Apr 2025 13:17:05 +0200
Subject: [PATCH 02/10] Updated mkdocs.yml navigation

---
 docs/{usage => about}/voice_activity_detection.md  | 0
 mkdocs.yml                                         | 1 +
 2 files changed, 1 insertion(+)
 rename docs/{usage => about}/voice_activity_detection.md  (100%)

diff --git a/docs/usage/voice_activity_detection.md  b/docs/about/voice_activity_detection.md 
similarity index 100%
rename from docs/usage/voice_activity_detection.md 
rename to docs/about/voice_activity_detection.md 
diff --git a/mkdocs.yml b/mkdocs.yml
index f9321c3..ae06784 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -60,6 +60,7 @@ nav:
       - usage/usage_with_native_linux.md
   - Implementation:
     - about/implementation.md
+    - about/voice_activity_detection.md
   - Contributing:
     - contributing/rules.md
     - contributing/license.md
\ No newline at end of file

From 8b98a8665d6903e34e939d6d1c3b9890fcde1399 Mon Sep 17 00:00:00 2001
From: Jeroen Veen <jveen1@gmail.com>
Date: Sun, 13 Apr 2025 19:45:50 +0200
Subject: [PATCH 03/10] removed notes

---
 ROS2_Audio_Tools_project_knowledge.md | 812 --------------------------
 1 file changed, 812 deletions(-)
 delete mode 100644 ROS2_Audio_Tools_project_knowledge.md

diff --git a/ROS2_Audio_Tools_project_knowledge.md b/ROS2_Audio_Tools_project_knowledge.md
deleted file mode 100644
index 080d213..0000000
--- a/ROS2_Audio_Tools_project_knowledge.md
+++ /dev/null
@@ -1,812 +0,0 @@
-# EdgeVox Project Documentation
-
-# Project Structure
-```
-ROS2_Audio_Tools/
-├── conda/
-│   ├── lin/
-│   │   ├── recipe/
-│   │   │   ├── activate.sh
-│   │   │   ├── build_ament_cmake.sh
-│   │   │   ├── build_ament_python.sh
-│   │   │   ├── build_catkin.sh
-│   │   │   ├── deactivate.sh
-│   │   │   ├── recipe.yaml
-│   │   ├── pixi.toml
-│   ├── win/
-│   │   ├── recipe/
-│   │   │   ├── activate.bat
-│   │   │   ├── bld_ament_cmake.bat
-│   │   │   ├── bld_ament_python.bat
-│   │   │   ├── bld_catkin.bat
-│   │   │   ├── bld_catkin_merge.bat
-│   │   │   ├── bld_colcon_merge.bat
-│   │   │   ├── deactivate.bat
-│   │   │   ├── recipe.yaml
-│   │   │   ├── rosdistro_snapshot.yaml
-│   │   ├── pixi.toml
-├── docs/
-│   ├── about/
-│   │   ├── implementation.md
-│   ├── assets/
-│   │   ├── stylesheets/
-│   │   │   ├── logo.css
-│   │   ├── favicon.png
-│   │   ├── logo.png
-│   ├── contributing/
-│   │   ├── license.md
-│   │   ├── rules.md
-│   ├── usage/
-│   │   ├── overview.md
-│   │   ├── ros2_pixi_build_linux_windows.md
-│   │   ├── usage_with_native_linux.md
-│   ├── index.md
-├── msg/
-│   ├── AudioData.msg
-│   ├── AudioDataStamped.msg
-│   ├── AudioInfo.msg
-├── src/
-│   ├── audio_capture_node.cpp
-│   ├── audio_capture_node.hpp
-│   ├── audio_playback_node.cpp
-│   ├── audio_playback_node.hpp
-├── CMakeLists.txt
-├── LICENSE
-├── README.md
-├── colcon.pkg
-├── mkdocs.yml
-├── package.xml
-├── pixi.toml
-```
-
-# src/audio_capture_node.cpp
-```cpp
-/**
- * @file audio_capture_node.cpp
- * @brief Audio Capture Node for ROS 2 using SDL2
- *
- * Copyright 2025 <Hoog-V / CLFML>
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "audio_capture_node.hpp"
-#include <stdio.h>
-
-AudioCaptureNode::AudioCaptureNode()
-    : Node("audio_capture_node"), _desired_rate(-1.0) {
-
-  // Declare and get parameters
-  this->declare_parameter<std::string>("sample_format", "S16LE");
-  this->get_parameter("sample_format", _sample_format);
-
-  this->declare_parameter<int>("channels", 1);
-  this->declare_parameter<int>("sample_rate", 16000);
-  this->get_parameter("channels", _channels);
-  this->get_parameter("sample_rate", _sample_rate);
-
-  if (_channels < 1 || _channels > 8) {
-    RCLCPP_ERROR_STREAM(this->get_logger(),
-                        "Invalid channel count: " << _channels);
-    throw std::runtime_error("Unsupported media type.");
-  }
-
-  this->declare_parameter<int>("device", -1);
-  this->get_parameter("device", _device);
-
-  this->declare_parameter<int>("chunk_size", 1024);
-  int chunk_size;
-  this->get_parameter("chunk_size", chunk_size);
-
-  if (chunk_size < 512 || chunk_size > 4096) {
-    RCLCPP_ERROR_STREAM(this->get_logger(),
-                        "Invalid chunk size: " << chunk_size);
-    throw std::runtime_error("Unsupported chunk size!");
-  }
-
-  if (!ConfigureSDLStream(_device, _sample_format, chunk_size, _channels)) {
-    throw std::runtime_error("Failed to configure SDL audio capture.");
-  }
-
-  // Publishers
-  _pub = this->create_publisher<audio_tools::msg::AudioData>("audio", 10);
-  auto info_qos = rclcpp::QoS(rclcpp::KeepLast(1)).transient_local();
-  _pub_info = this->create_publisher<audio_tools::msg::AudioInfo>("audio_info",
-                                                                  info_qos);
-
-  _pub_stamped = this->create_publisher<audio_tools::msg::AudioDataStamped>(
-      "audio_stamped", 10);
-
-  _timer_info = rclcpp::create_timer(this, get_clock(), std::chrono::seconds(5),
-                                     [this] { publishInfo(); });
-  publishInfo();
-}
-
-void AudioCaptureNode::callback(uint8_t *stream, int len) {
-  audio_tools::msg::AudioData msg;
-  audio_tools::msg::AudioDataStamped stamped_msg;
-
-  // Generate frame ID
-  stamped_msg.header.frame_id = std::to_string(_audio_pos++);
-  // Compute timestamp based on buffer size
-  size_t buffer_size_total_nsec =
-      (static_cast<size_t>(len) * 1e9) / (_sample_rate * _nbytes);
-
-  _nano_sec += buffer_size_total_nsec;
-  while (_nano_sec >= RCL_S_TO_NS(1)) {
-    _sec++;
-    _nano_sec -= RCL_S_TO_NS(1);
-  }
-
-  stamped_msg.header.stamp.sec = _sec;
-  stamped_msg.header.stamp.nanosec = _nano_sec;
-
-  // Copy audio data
-  msg.data.assign(stream, stream + len);
-
-  stamped_msg.audio = msg;
-  stamped_msg.info = _info_msg;
-  // Publish messages
-  _pub->publish(msg);
-  _pub_stamped->publish(stamped_msg);
-}
-
-void AudioCaptureNode::publishInfo() {
-  _info_msg.channels = _channels;
-  _info_msg.sample_rate = _sample_rate;
-  _info_msg.sample_format = _sample_format;
-
-  _pub_info->publish(_info_msg);
-}
-
-AudioCaptureNode::~AudioCaptureNode() {
-  SDL_PauseAudioDevice(_dev_id_in, 1);
-  SDL_CloseAudioDevice(_dev_id_in);
-}
-
-bool AudioCaptureNode::ConfigureSDLStream(int device, const std::string &format,
-                                          int chunk_size, int channels) {
-  SDL_LogSetPriority(SDL_LOG_CATEGORY_APPLICATION, SDL_LOG_PRIORITY_INFO);
-
-  if (SDL_Init(SDL_INIT_AUDIO) < 0) {
-    RCLCPP_ERROR(this->get_logger(), "SDL initialization failed: %s",
-                 SDL_GetError());
-    return false;
-  }
-
-  SDL_SetHintWithPriority(SDL_HINT_AUDIO_RESAMPLING_MODE, "medium",
-                          SDL_HINT_OVERRIDE);
-
-  int nDevices = SDL_GetNumAudioDevices(SDL_TRUE);
-  RCLCPP_INFO(this->get_logger(), "%s: Found %d capture devices:\n", __func__,
-              nDevices);
-
-  for (int i = 0; i < nDevices; i++) {
-    RCLCPP_INFO(this->get_logger(), "  - Capture device #%d: '%s'\n", i,
-                SDL_GetAudioDeviceName(i, SDL_TRUE));
-  }
-
-  SDL_zero(_capture_spec_requested);
-  SDL_zero(_capture_spec_obtained);
-
-  auto format_it = _format_map.find(format);
-  if (format_it == _format_map.end()) {
-    RCLCPP_ERROR(this->get_logger(), "Unknown audio format: %s",
-                 format.c_str());
-    return false;
-  }
-  _nbytes = format_it->second.second;
-
-  _capture_spec_requested.freq = _sample_rate;
-  _capture_spec_requested.format = format_it->second.first;
-  _capture_spec_requested.channels = channels;
-  _capture_spec_requested.samples = chunk_size;
-  _capture_spec_requested.callback = [](void *userdata, uint8_t *stream,
-                                        int len) {
-    static_cast<AudioCaptureNode *>(userdata)->callback(stream, len);
-  };
-  _capture_spec_requested.userdata = this;
-
-  if (device >= 0 && device < nDevices) {
-    RCLCPP_INFO(this->get_logger(),
-                "%s: Attempting to open capture device #%d: '%s'...\n",
-                __func__, device, SDL_GetAudioDeviceName(device, SDL_TRUE));
-
-    _dev_id_in = SDL_OpenAudioDevice(SDL_GetAudioDeviceName(device, SDL_TRUE),
-                                     SDL_TRUE, &_capture_spec_requested,
-                                     &_capture_spec_obtained, 0);
-  } else {
-    RCLCPP_INFO(this->get_logger(),
-                "%s: Attempting to open default capture device...\n", __func__);
-    _dev_id_in =
-        SDL_OpenAudioDevice(nullptr, SDL_TRUE, &_capture_spec_requested,
-                            &_capture_spec_obtained, 0);
-  }
-
-  if (!_dev_id_in) {
-    RCLCPP_INFO(this->get_logger(), "%s: Failed to open audio device: %s\n",
-                __func__, SDL_GetError());
-    return false;
-  }
-
-  RCLCPP_INFO(this->get_logger(), "%s: Capture device opened (SDL Id = %d):\n",
-              __func__, _dev_id_in);
-  RCLCPP_INFO(this->get_logger(), "  - Sample rate: %d\n",
-              _capture_spec_obtained.freq);
-  RCLCPP_INFO(this->get_logger(), "  - Format: %d (Requested: %d)\n",
-              _capture_spec_obtained.format, _capture_spec_requested.format);
-  RCLCPP_INFO(this->get_logger(), "  - Channels: %d (Requested: %d)\n",
-              _capture_spec_obtained.channels,
-              _capture_spec_requested.channels);
-  RCLCPP_INFO(this->get_logger(), "  - Samples per frame: %d\n",
-              _capture_spec_obtained.samples);
-
-  _sample_rate = _capture_spec_obtained.freq;
-  SDL_PauseAudioDevice(_dev_id_in, 0);
-  return true;
-}
-
-int main(int argc, char *argv[]) {
-  rclcpp::init(argc, argv);
-  rclcpp::spin(std::make_shared<AudioCaptureNode>());
-  rclcpp::shutdown();
-  return 0;
-}
-```
-
-
-# src/audio_capture_node.hpp
-```cpp
-/**
- * @file audio_capture_node.hpp
- * @brief Audio Capture Node for ROS 2 using SDL2
- *
- * Copyright 2025 <Hoog-V / CLFML>
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef AUDIO_CAPTURE_NODE_HPP
-#define AUDIO_CAPTURE_NODE_HPP
-
-#include <SDL.h>
-#include <SDL_audio.h>
-#include <rclcpp/rclcpp.hpp>
-
-#include "audio_tools/msg/audio_data.hpp"
-#include "audio_tools/msg/audio_data_stamped.hpp"
-#include "audio_tools/msg/audio_info.hpp"
-
-/**
- * @class AudioCaptureNode
- * @brief A ROS 2 node for capturing and publishing audio data using SDL2.
- */
-class AudioCaptureNode : public rclcpp::Node {
-public:
-  /**
-   * @brief Constructor for the AudioCaptureNode.
-   * Initializes audio capture, publishers, and diagnostics.
-   */
-  AudioCaptureNode();
-
-  /**
-   * @brief Exits the program with the given exit code.
-   * @param code The exit code.
-   */
-  void exitOnMainThread(int code);
-
-  /**
-   * @brief SDL2 audio callback function.
-   * Processes captured audio and publishes it as a ROS message.
-   * @param stream Pointer to the audio stream buffer.
-   * @param len Length of the audio buffer in bytes.
-   */
-  void callback(uint8_t *stream, int len);
-
-  /**
-   * @brief Publishes audio information (sample rate, channels, format).
-   */
-  void publishInfo();
-
-  /**
-   * @brief Destructor for AudioCaptureNode.
-   * Cleans up SDL audio resources.
-   */
-  ~AudioCaptureNode();
-
-private:
-  /**
-   * @brief ROS publisher for raw audio data.
-   */
-  rclcpp::Publisher<audio_tools::msg::AudioData>::SharedPtr _pub;
-
-  /**
-   * @brief ROS publisher for audio metadata (e.g., sample rate, format).
-   */
-  rclcpp::Publisher<audio_tools::msg::AudioInfo>::SharedPtr _pub_info;
-
-  /**
-   * @brief Timer for periodically publishing audio metadata.
-   */
-  rclcpp::TimerBase::SharedPtr _timer_info;
-
-  /**
-   * @brief Audio information metadata
-   */
-  audio_tools::msg::AudioInfo _info_msg;
-
-  /**
-   * @brief SDL audio specifications for requested and obtained formats.
-   */
-  SDL_AudioSpec _capture_spec_requested;
-  SDL_AudioSpec _capture_spec_obtained;
-
-  /**
-   * @brief Audio parameters: bitrate, channels, depth, sample rate, and device
-   * index.
-   */
-  int _bitrate, _channels, _sample_rate, _device;
-
-  /**
-   * @brief Number of bytes processed.
-   */
-  size_t _nbytes;
-
-  /**
-   * @brief Position counter for audio frames.
-   */
-  size_t _audio_pos = 0;
-
-  /**
-   * @brief Nanosecond and second timestamps for synchronization.
-   */
-  size_t _nano_sec = 0;
-  size_t _sec = 0;
-
-  /**
-   * @brief Desired publishing rate for diagnostics.
-   */
-  double _desired_rate;
-
-  /**
-   * @brief Audio sample format (e.g., "S16LE", "F32").
-   */
-  std::string _sample_format;
-
-  /**
-   * @brief SDL audio device ID.
-   */
-  SDL_AudioDeviceID _dev_id_in = 0;
-
-  /**
-   * @brief ROS publisher for timestamped audio data with diagnostics.
-   */
-  rclcpp::Publisher<audio_tools::msg::AudioDataStamped>::SharedPtr _pub_stamped;
-
-  /**
-   * @brief Configures the SDL2 audio capture stream.
-   * @param device The SDL2 device index (-1 for default).
-   * @param format The requested audio format (e.g., "S16LE").
-   * @param chunk_size The buffer size for each audio chunk (e.g., 1024
-   * samples).
-   * @param channels The number of audio channels (e.g., 1 for mono, 2 for
-   * stereo).
-   * @return True if configuration is successful, false otherwise.
-   */
-  bool ConfigureSDLStream(int device, const std::string &format, int chunk_size,
-                          int channels);
-
-  /**
-   * @brief Maps audio format strings to SDL2 audio formats and corresponding
-   * byte sizes.
-   */
-  const std::unordered_map<std::string, std::pair<SDL_AudioFormat, int>>
-      _format_map = {
-          {"U8", {AUDIO_U8, 1}}, ///< Unsigned 8-bit (1 byte)
-          {"S8", {AUDIO_S8, 1}}, ///< Signed 8-bit (1 byte)
-          {"U16LE",
-           {AUDIO_U16LSB, 2}}, ///< Unsigned 16-bit little-endian (2 bytes)
-          {"U16BE",
-           {AUDIO_U16MSB, 2}}, ///< Unsigned 16-bit big-endian (2 bytes)
-          {"S16LE",
-           {AUDIO_S16LSB, 2}}, ///< Signed 16-bit little-endian (2 bytes)
-          {"S16BE", {AUDIO_S16MSB, 2}}, ///< Signed 16-bit big-endian (2 bytes)
-          {"S32LE",
-           {AUDIO_S32LSB, 4}}, ///< Signed 32-bit little-endian (4 bytes)
-          {"S32BE", {AUDIO_S32MSB, 4}}, ///< Signed 32-bit big-endian (4 bytes)
-          {"F32LE",
-           {AUDIO_F32LSB, 4}}, ///< 32-bit float little-endian (4 bytes)
-          {"F32BE", {AUDIO_F32MSB, 4}}, ///< 32-bit float big-endian (4 bytes)
-          {"F32", {AUDIO_F32, 4}}       ///< 32-bit float (generic) (4 bytes)
-      };
-};
-
-#endif /* AUDIO_CAPTURE_NODE_HPP */
-
-```
-
-
-# src/audio_playback_node.cpp
-```cpp
-/**
- * @file audio_playback_node.cpp
- * @brief Audio Capture Node for ROS 2 using SDL2
- *
- * Copyright 2025 <Hoog-V / CLFML>
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "audio_playback_node.hpp"
-#include <stdio.h>
-
-AudioPlaybackNode::AudioPlaybackNode()
-    : Node("audio_playback_node"), _audio_device(0), _initialized(false) {
-  // Declare and get topic name parameter
-  std::string topic_name =
-      this->declare_parameter<std::string>("audio_topic", "/audio_playback");
-  int device_index =
-      this->declare_parameter<int>("device_index", -1); // -1 = default
-
-  _subscription = this->create_subscription<audio_tools::msg::AudioDataStamped>(
-      topic_name, 10,
-      std::bind(&AudioPlaybackNode::audio_cb, this, std::placeholders::_1));
-
-  if (SDL_Init(SDL_INIT_AUDIO) < 0) {
-    RCLCPP_FATAL(get_logger(), "SDL_Init failed: %s", SDL_GetError());
-    rclcpp::shutdown();
-  }
-  int num_devices = SDL_GetNumAudioDevices(0);
-  RCLCPP_INFO(get_logger(), "Available audio output devices:");
-  for (int i = 0; i < num_devices; ++i) {
-    const char *name = SDL_GetAudioDeviceName(i, 0);
-    RCLCPP_INFO(get_logger(), "  [%d]: %s%s", i, name,
-                (i == device_index ? " (selected)" : ""));
-  }
-
-  if (device_index >= 0 && device_index < num_devices) {
-    _device_name = SDL_GetAudioDeviceName(device_index, 0);
-    RCLCPP_INFO(get_logger(), "Using audio device: %s", _device_name.c_str());
-  } else if (device_index == -1) {
-    _device_name = ""; // SDL default
-    RCLCPP_INFO(get_logger(), "Using default audio device");
-  } else {
-    RCLCPP_FATAL(get_logger(), "Invalid device_index: %d (available: 0 to %d)",
-                 device_index, num_devices - 1);
-    rclcpp::shutdown();
-  }
-
-  RCLCPP_INFO(get_logger(), "Subscribed to topic: %s", topic_name.c_str());
-}
-
-AudioPlaybackNode::~AudioPlaybackNode() {
-  if (_audio_device != 0) {
-    SDL_CloseAudioDevice(_audio_device);
-  }
-  SDL_Quit();
-}
-
-bool AudioPlaybackNode::init_audio(const audio_tools::msg::AudioInfo &info) {
-  SDL_AudioSpec desired_spec;
-  SDL_zero(desired_spec);
-
-  desired_spec.freq = static_cast<int>(info.sample_rate);
-  desired_spec.channels = info.channels;
-  desired_spec.samples = 4096;
-  desired_spec.callback = nullptr;
-
-  if (info.sample_format == "S16LE") {
-    desired_spec.format = AUDIO_S16LSB;
-  } else if (info.sample_format == "S16BE") {
-    desired_spec.format = AUDIO_S16MSB;
-  } else if (info.sample_format == "U8") {
-    desired_spec.format = AUDIO_U8;
-  } else if (info.sample_format == "S8") {
-    desired_spec.format = AUDIO_S8;
-  } else if (info.sample_format == "F32LE") {
-    desired_spec.format = AUDIO_F32LSB;
-  } else if (info.sample_format == "F32BE") {
-    desired_spec.format = AUDIO_F32MSB;
-  } else {
-    RCLCPP_ERROR(get_logger(), "Unsupported format: %s",
-                 info.sample_format.c_str());
-    return false;
-  }
-
-  _audio_device =
-      SDL_OpenAudioDevice(_device_name.empty() ? nullptr : _device_name.c_str(),
-                          0, // 0 = output
-                          &desired_spec, &_obtained_spec, 0);
-  if (_audio_device == 0) {
-    RCLCPP_ERROR(get_logger(), "SDL_OpenAudioDevice error: %s", SDL_GetError());
-    return false;
-  }
-
-  SDL_PauseAudioDevice(_audio_device, 0);
-  RCLCPP_INFO(get_logger(), "Audio device initialized: %d Hz, %d channels",
-              _obtained_spec.freq, _obtained_spec.channels);
-
-  _initialized = true;
-  return true;
-}
-
-void AudioPlaybackNode::audio_cb(
-    const audio_tools::msg::AudioDataStamped::SharedPtr msg) {
-  const auto &info = msg->info;
-  const auto &audio = msg->audio;
-
-  if (!_initialized) {
-    if (!init_audio(info)) {
-      RCLCPP_ERROR(get_logger(), "Failed to initialize audio output.");
-      return;
-    }
-  }
-
-  if (audio.data.empty())
-    return;
-
-  if (SDL_QueueAudio(_audio_device, audio.data.data(), audio.data.size()) < 0) {
-    RCLCPP_ERROR(get_logger(), "SDL_QueueAudio error: %s", SDL_GetError());
-  }
-}
-
-int main(int argc, char *argv[]) {
-  rclcpp::init(argc, argv);
-  rclcpp::spin(std::make_shared<AudioPlaybackNode>());
-  rclcpp::shutdown();
-  return 0;
-}
-```
-
-
-# src/audio_playback_node.hpp
-```cpp
-/**
- * @file audio_playback_node.hpp
- * @brief Audio Playback Node for ROS 2 using SDL2
- *
- * Copyright 2025 <Hoog-V / CLFML>
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef AUDIO_PLAYBACK_NODE_HPP
-#define AUDIO_PLAYBACK_NODE_HPP
-#include <SDL.h>
-#include <SDL_audio.h>
-#include <rclcpp/rclcpp.hpp>
-
-#include "audio_tools/msg/audio_data.hpp"
-#include "audio_tools/msg/audio_data_stamped.hpp"
-#include "audio_tools/msg/audio_info.hpp"
-
-/**
- * @class AudioPlaybackNode
- * @brief A ROS 2 node for capturing and publishing audio data using SDL2.
- */
-class AudioPlaybackNode : public rclcpp::Node {
-public:
-  /**
-   * @brief Constructor for the AudioPlaybackNode.
-   * Initializes audio capture, publishers, and diagnostics.
-   */
-  AudioPlaybackNode();
-
-  /**
-   * @brief Destructor for AudioPlaybackNode.
-   * Cleans up SDL audio resources.
-   */
-  ~AudioPlaybackNode();
-
-private:
-  /**
-   * @brief Callback function for received audio_msg on subscribed topic
-   * @param msg The audio message which got received
-   */
-  void audio_cb(const audio_tools::msg::AudioDataStamped::SharedPtr msg);
-  /**
-   * @brief Function which automatically configures the audio_interface based on
-   * the received audio information
-   * @param info Custom message which contains the channels, sample_rate and
-   * encoding format
-   * @return false if failed e.g. invalid format, true if ok
-   */
-  bool init_audio(const audio_tools::msg::AudioInfo &info);
-
-  /**
-   * @brief Subscription object/handle for the subscribed audio topic
-   */
-  rclcpp::Subscription<audio_tools::msg::AudioDataStamped>::SharedPtr
-      _subscription;
-
-  /**
-   * @brief SDL audio device handle.
-   */
-  SDL_AudioDeviceID _audio_device;
-
-  /**
-   * @brief SDL audio specifications for obtained format.
-   */
-  SDL_AudioSpec _obtained_spec;
-
-  /**
-   * @brief Full name of audio_device
-   */
-  std::string _device_name;
-
-  /**
-   * @brief Internal state to prevent initializing audio interface twice
-   */
-  bool _initialized;
-};
-#endif /* AUDIO_PLAYBACK_NODE */
-```
-
-
-# CMakeLists.txt
-```cmake
-cmake_minimum_required(VERSION 3.1...3.14)
-project(audio_tools C CXX)
-
-find_package(ament_cmake REQUIRED)
-find_package(rclcpp REQUIRED)
-find_package(builtin_interfaces REQUIRED)
-find_package(rosidl_default_generators REQUIRED)
-find_package(std_msgs REQUIRED)
-find_package(SDL2 REQUIRED)
-
-rosidl_generate_interfaces(${PROJECT_NAME}
-  msg/AudioData.msg
-  msg/AudioDataStamped.msg
-  msg/AudioInfo.msg
-  DEPENDENCIES std_msgs
-)
-
-ament_export_dependencies(rosidl_default_runtime)
-rosidl_get_typesupport_target(cpp_typesupport_target ${PROJECT_NAME} "rosidl_typesupport_cpp")
-
-add_executable(audio_capture_node src/audio_capture_node.cpp)
-
-ament_target_dependencies(audio_capture_node
-    rclcpp
-    std_msgs
-)
-
-target_link_libraries(audio_capture_node "${cpp_typesupport_target}" ${SDL2_LIBRARIES}) 
-target_include_directories(audio_capture_node PUBLIC ${SDL2_INCLUDE_DIRS})
-
-if(UNIX AND NOT APPLE)  # Linux only
-  target_compile_options(audio_capture_node PRIVATE -Wall)
-endif()
-
-install(TARGETS
-audio_capture_node
-DESTINATION lib/${PROJECT_NAME}) 
-
-
-add_executable(audio_playback_node src/audio_playback_node.cpp)
-target_link_libraries(audio_playback_node "${cpp_typesupport_target}" ${SDL2_LIBRARIES}) 
-target_include_directories(audio_playback_node PUBLIC ${SDL2_INCLUDE_DIRS})
-
-if(UNIX AND NOT APPLE)
-  target_compile_options(audio_playback_node PRIVATE -Wall)
-endif()
-
-ament_target_dependencies(audio_playback_node
-    rclcpp
-    std_msgs
-)
-
-
-install(TARGETS
-audio_playback_node
-DESTINATION lib/${PROJECT_NAME}) 
-ament_package()
-```
-
-
-# mkdocs.yml
-```text
-site_name: ROS2_Audio_Tools
-extra_css:
-  - assets/stylesheets/logo.css
-theme:
-  name: material
-  # custom_dir: docs/overrides
-  features:
-    - announce.dismiss
-    - content.action.edit
-    - content.action.view
-    - content.code.annotate
-    - content.code.copy
-    # - content.code.select
-    # - content.footnote.tooltips
-    # - content.tabs.link
-    - content.tooltips
-    # - header.autohide
-    # - navigation.expand
-    - navigation.footer
-    - navigation.indexes
-    # - navigation.instant
-    # - navigation.instant.prefetch
-    # - navigation.instant.progress
-    # - navigation.prune
-    - navigation.sections
-    - navigation.tabs
-    # - navigation.tabs.sticky
-    - navigation.top
-    - navigation.tracking
-    - search.highlight
-    - search.share
-    - search.suggest
-    - toc.follow
-    # - toc.integrate
-  palette:
-      scheme: default
-      primary: green
-      toggle:
-        icon: material/toggle-switch
-        name: Switch to dark mode
-  font:
-    text: Avenir Next Condensed
-    code: Avenir Next Condensed
-  favicon: assets/favicon.png
-  logo: assets/logo.png
-
-markdown_extensions:
-  - attr_list
-  - md_in_html
-  - admonition
-  - pymdownx.details
-  - pymdownx.superfences
-
-nav:
-  - index.md
-  - Getting Started:
-    - usage/overview.md
-    - Build Environment:
-      - usage/ros2_pixi_build_linux_windows.md
-      - usage/usage_with_native_linux.md
-  - Implementation:
-    - about/implementation.md
-  - Contributing:
-    - contributing/rules.md
-    - contributing/license.md
-```
-

From 058025c58750f78f72100661447610e3faa737cc Mon Sep 17 00:00:00 2001
From: Jeroen Veen <jveen1@gmail.com>
Date: Sun, 13 Apr 2025 19:50:21 +0200
Subject: [PATCH 04/10] removed duplicate docs

---
 docs/about/voice_activity_detection.md  | 123 ------------------------
 1 file changed, 123 deletions(-)
 delete mode 100644 docs/about/voice_activity_detection.md 

diff --git a/docs/about/voice_activity_detection.md  b/docs/about/voice_activity_detection.md 
deleted file mode 100644
index c9a7547..0000000
--- a/docs/about/voice_activity_detection.md 	
+++ /dev/null
@@ -1,123 +0,0 @@
-# Voice Activity Detection
-
-The Voice Activity Detection (VAD) node provides real-time detection of voice or speech in audio streams. The node uses a simple energy-based algorithm to determine if voice is present in the audio signal.
-
-## How it works
-
-The VAD node subscribes to audio data and performs the following steps:
-
-1. Converts the incoming audio to floating-point samples
-2. Calculates the energy level of the audio signal
-3. Compares this energy to a threshold
-4. Applies a hold time to prevent rapid on/off switching
-5. Publishes the detection state
-
-## Node Parameters
-
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `energy_threshold` | float | 0.01 | Energy threshold for detection |
-| `hold_time` | float | 0.5 | Time in seconds to maintain detection after voice stops |
-| `min_samples` | int | 160 | Minimum number of samples required for detection |
-
-## Topics
-
-### Subscribed Topics
-
-- `audio_stamped` ([audio_tools/AudioDataStamped](../msg/AudioDataStamped.md)): Audio data with timestamp and format information
-
-### Published Topics
-
-- `voice_activity` ([audio_tools/VoiceActivity](../msg/VoiceActivity.md)): Voice activity detection results
-
-## Message Format
-
-The `VoiceActivity` message includes:
-
-```
-std_msgs/Header header      # Timestamp and frame information
-bool active                 # True when voice activity is detected
-float32 energy_level        # Current audio energy level
-float32 threshold           # Current threshold being used
-float32 hold_time           # Hold time in seconds for the detection
-```
-
-## Usage Example
-
-To run the VAD node separately:
-
-```bash
-ros2 run audio_tools vad_node --ros-args -p energy_threshold:=0.015 -p hold_time:=0.3
-```
-
-To run with the audio capture node:
-
-```bash
-ros2 launch audio_tools audio_vad.launch.py
-```
-
-## Tuning the VAD
-
-The energy threshold is the most important parameter to tune:
-
-- **Too low**: Will trigger on background noise
-- **Too high**: May miss quiet speech
-
-### Step-by-Step Tuning Process
-
-1. **Monitor the energy level of your audio**:
-   ```bash
-   ros2 topic echo /voice_activity
-   ```
-
-2. **Determine your noise floor**:
-   Remain silent and observe the `energy_level` values. Note the maximum level during silence.
-
-3. **Determine speech energy level**:
-   Speak at a normal volume and note the typical `energy_level` values.
-
-4. **Set the threshold**:
-   Set the threshold slightly above your noise floor:
-   ```bash
-   ros2 param set /vad energy_threshold 0.015  # Adjust based on your observations
-   ```
-
-5. **Adjust hold time**:
-   If detection is fragmented during normal speech, increase the hold time:
-   ```bash
-   ros2 param set /vad hold_time 0.7  # Adjust as needed
-   ```
-
-### Visual Tuning
-
-For visual tuning, use RQT to plot the energy levels:
-
-```bash
-rqt_plot /voice_activity/energy_level /voice_activity/threshold /voice_activity/active
-```
-
-This will display:
-- Your audio energy over time
-- The threshold line
-- The detection state (0 or 1)
-
-### Environment-Specific Settings
-
-Here are suggested starting points for different environments:
-
-| Environment | Suggested energy_threshold | Suggested hold_time |
-|-------------|----------------------------|---------------------|
-| Quiet room, close mic | 0.005-0.01 | 0.3-0.5s |
-| Office, close mic | 0.01-0.02 | 0.5-0.7s |
-| Noisy environment | 0.02-0.05 | 0.7-1.0s |
-| Far-field mic | 0.01-0.03 | 0.7-1.0s |
-
-## Limitations
-
-This implementation uses a simple energy-based detector which works well in low-noise environments but may not perform optimally in:
-
-- Noisy environments
-- Situations with varying background noise
-- When capturing audio from distant microphones
-
-For more advanced detection in challenging environments, consider integrating a machine learning-based VAD algorithm.
\ No newline at end of file

From ee109a6df80d344c3609109e7f0f11ff52a7e45b Mon Sep 17 00:00:00 2001
From: Jeroen Veen <jveen1@gmail.com>
Date: Sun, 13 Apr 2025 19:52:04 +0200
Subject: [PATCH 05/10] removed double install

---
 CMakeLists.txt | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5be7790..69c1197 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -34,10 +34,6 @@ if(UNIX AND NOT APPLE)  # Linux only
   target_compile_options(audio_capture_node PRIVATE -Wall)
 endif()
 
-install(TARGETS
-audio_capture_node
-DESTINATION lib/${PROJECT_NAME}) 
-
 # Audio Playback Node
 add_executable(audio_playback_node src/audio_playback_node.cpp)
 target_link_libraries(audio_playback_node "${cpp_typesupport_target}" ${SDL2_LIBRARIES}) 
@@ -69,7 +65,7 @@ ament_target_dependencies(vad_node
 install(TARGETS
   audio_capture_node
   audio_playback_node
-  vad_node  # Added VAD node
+  vad_node
   DESTINATION lib/${PROJECT_NAME}
 )
 

From e9e53b4df4f22e76ac1e1a8c25272843d041e803 Mon Sep 17 00:00:00 2001
From: Jeroen Veen <jveen1@gmail.com>
Date: Sun, 13 Apr 2025 19:53:27 +0200
Subject: [PATCH 06/10] removed vad md file

---
 mkdocs.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mkdocs.yml b/mkdocs.yml
index ae06784..f9321c3 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -60,7 +60,6 @@ nav:
       - usage/usage_with_native_linux.md
   - Implementation:
     - about/implementation.md
-    - about/voice_activity_detection.md
   - Contributing:
     - contributing/rules.md
     - contributing/license.md
\ No newline at end of file

From 803e58d355c582763d1577c605ac329a50a405ba Mon Sep 17 00:00:00 2001
From: Jeroen Veen <jveen1@gmail.com>
Date: Sun, 13 Apr 2025 19:56:09 +0200
Subject: [PATCH 07/10] linted

---
 src/vad_node.cpp | 97 +++++++++++++++++++++++++-----------------------
 src/vad_node.hpp | 26 ++++++-------
 2 files changed, 63 insertions(+), 60 deletions(-)

diff --git a/src/vad_node.cpp b/src/vad_node.cpp
index f4b614a..96457b6 100644
--- a/src/vad_node.cpp
+++ b/src/vad_node.cpp
@@ -17,22 +17,23 @@
  * limitations under the License.
  */
 #include "vad_node.hpp"
-#include <cstring>
 #include <algorithm>
 #include <cmath>
+#include <cstring>
 
 VADNode::VADNode() : Node("vad_node"), _current_vad_state(false) {
   // Declare and get parameters
   this->declare_parameter<float>("energy_threshold", 0.01);
   this->declare_parameter<float>("hold_time", 0.5);
   this->declare_parameter<int>("min_samples", 160);
-  
+
   this->get_parameter("energy_threshold", _energy_threshold);
   this->get_parameter("hold_time", _hold_time);
   this->get_parameter("min_samples", _min_samples);
 
   RCLCPP_INFO(this->get_logger(), "VAD initialized with parameters:");
-  RCLCPP_INFO(this->get_logger(), "  - Energy threshold: %.4f", _energy_threshold);
+  RCLCPP_INFO(this->get_logger(), "  - Energy threshold: %.4f",
+              _energy_threshold);
   RCLCPP_INFO(this->get_logger(), "  - Hold time: %.2f seconds", _hold_time);
   RCLCPP_INFO(this->get_logger(), "  - Min samples: %d", _min_samples);
 
@@ -40,20 +41,22 @@ VADNode::VADNode() : Node("vad_node"), _current_vad_state(false) {
   _last_voice_time = this->now();
 
   // Create publisher for voice activity detection
-  _vad_pub = this->create_publisher<audio_tools::msg::VoiceActivity>("voice_activity", 10);
-  
+  _vad_pub = this->create_publisher<audio_tools::msg::VoiceActivity>(
+      "voice_activity", 10);
+
   // Subscribe to audio data stamped topic
   _audio_sub = this->create_subscription<audio_tools::msg::AudioDataStamped>(
       "audio_stamped", 10,
       std::bind(&VADNode::audioCallback, this, std::placeholders::_1));
-  
+
   RCLCPP_INFO(this->get_logger(), "VAD node is ready");
 }
 
-void VADNode::audioCallback(const audio_tools::msg::AudioDataStamped::SharedPtr msg) {
+void VADNode::audioCallback(
+    const audio_tools::msg::AudioDataStamped::SharedPtr msg) {
   const auto &audio_data = msg->audio.data;
   const auto &audio_info = msg->info;
-  
+
   // Skip if we have no data
   if (audio_data.empty()) {
     return;
@@ -62,22 +65,22 @@ void VADNode::audioCallback(const audio_tools::msg::AudioDataStamped::SharedPtr
   // Convert audio data to float samples for processing
   std::vector<float> samples;
   if (!convertToFloatSamples(audio_data, audio_info.sample_format, samples)) {
-    RCLCPP_WARN(this->get_logger(), "Failed to convert audio format %s", 
+    RCLCPP_WARN(this->get_logger(), "Failed to convert audio format %s",
                 audio_info.sample_format.c_str());
     return;
   }
-  
+
   // Calculate energy level
   float energy_level = calculateEnergyLevel(samples);
-  
+
   // Detect voice activity
-  bool voice_active = (energy_level > _energy_threshold) && 
+  bool voice_active = (energy_level > _energy_threshold) &&
                       (samples.size() >= static_cast<size_t>(_min_samples));
-  
+
   // Handle hold time logic
   if (voice_active) {
     _last_voice_time = this->now();
-    
+
     if (!_current_vad_state) {
       _current_vad_state = true;
       RCLCPP_DEBUG(this->get_logger(), "Voice activity started");
@@ -90,110 +93,110 @@ void VADNode::audioCallback(const audio_tools::msg::AudioDataStamped::SharedPtr
       RCLCPP_DEBUG(this->get_logger(), "Voice activity ended");
     }
   }
-  
+
   // Publish the current VAD state
   publishVoiceActivity(_current_vad_state, energy_level, msg->header.stamp);
 }
 
-bool VADNode::detectVoiceActivity(const std::vector<uint8_t> &audio_data, 
-                                 const std::string &sample_format) {
+bool VADNode::detectVoiceActivity(const std::vector<uint8_t> &audio_data,
+                                  const std::string &sample_format) {
   // Convert to float samples
   std::vector<float> samples;
   if (!convertToFloatSamples(audio_data, sample_format, samples)) {
     return false;
   }
-  
+
   // Check if we have enough samples
   if (samples.size() < static_cast<size_t>(_min_samples)) {
     return false;
   }
-  
+
   // Calculate energy level
   float energy_level = calculateEnergyLevel(samples);
-  
+
   // Compare with threshold
   return energy_level > _energy_threshold;
 }
 
 float VADNode::calculateEnergyLevel(const std::vector<float> &samples) {
   float energy = 0.0f;
-  
+
   for (const auto &sample : samples) {
     energy += sample * sample;
   }
-  
+
   // Normalize by number of samples
   if (!samples.empty()) {
     energy /= samples.size();
   }
-  
+
   return energy;
 }
 
 bool VADNode::convertToFloatSamples(const std::vector<uint8_t> &audio_data,
-                                   const std::string &sample_format,
-                                   std::vector<float> &out_samples) {
+                                    const std::string &sample_format,
+                                    std::vector<float> &out_samples) {
   // Find the byte size for this format
   auto it = _format_byte_size.find(sample_format);
   if (it == _format_byte_size.end()) {
-    RCLCPP_ERROR(this->get_logger(), "Unknown audio format: %s", sample_format.c_str());
+    RCLCPP_ERROR(this->get_logger(), "Unknown audio format: %s",
+                 sample_format.c_str());
     return false;
   }
-  
+
   int bytes_per_sample = it->second;
   size_t num_samples = audio_data.size() / bytes_per_sample;
   out_samples.resize(num_samples);
-  
+
   // Convert based on format
   if (sample_format == "F32" || sample_format == "F32LE") {
     // Direct copy for float format (assuming little endian)
     std::memcpy(out_samples.data(), audio_data.data(), audio_data.size());
-  } 
-  else if (sample_format == "S16LE") {
+  } else if (sample_format == "S16LE") {
     // Convert signed 16-bit LE to float
-    const int16_t *int_samples = reinterpret_cast<const int16_t*>(audio_data.data());
+    const int16_t *int_samples =
+        reinterpret_cast<const int16_t *>(audio_data.data());
     for (size_t i = 0; i < num_samples; i++) {
       out_samples[i] = int_samples[i] / 32768.0f;
     }
-  } 
-  else if (sample_format == "S8") {
+  } else if (sample_format == "S8") {
     // Convert signed 8-bit to float
-    const int8_t *int_samples = reinterpret_cast<const int8_t*>(audio_data.data());
+    const int8_t *int_samples =
+        reinterpret_cast<const int8_t *>(audio_data.data());
     for (size_t i = 0; i < num_samples; i++) {
       out_samples[i] = int_samples[i] / 128.0f;
     }
-  } 
-  else if (sample_format == "U8") {
+  } else if (sample_format == "U8") {
     // Convert unsigned 8-bit to float
     for (size_t i = 0; i < num_samples; i++) {
       out_samples[i] = (audio_data[i] - 128) / 128.0f;
     }
-  } 
-  else {
-    RCLCPP_WARN(this->get_logger(), 
-                "Format conversion not implemented for %s, using raw values", 
+  } else {
+    RCLCPP_WARN(this->get_logger(),
+                "Format conversion not implemented for %s, using raw values",
                 sample_format.c_str());
-    
+
     // Just copy raw bytes as a fallback
     for (size_t i = 0; i < num_samples; i++) {
-      out_samples[i] = static_cast<float>(audio_data[i * bytes_per_sample]) / 255.0f;
+      out_samples[i] =
+          static_cast<float>(audio_data[i * bytes_per_sample]) / 255.0f;
     }
   }
-  
+
   return true;
 }
 
-void VADNode::publishVoiceActivity(bool active, float energy_level, 
-                                  const rclcpp::Time &timestamp) {
+void VADNode::publishVoiceActivity(bool active, float energy_level,
+                                   const rclcpp::Time &timestamp) {
   auto msg = std::make_unique<audio_tools::msg::VoiceActivity>();
-  
+
   msg->header.stamp = timestamp;
   msg->header.frame_id = "audio_frame";
   msg->active = active;
   msg->energy_level = energy_level;
   msg->threshold = _energy_threshold;
   msg->hold_time = _hold_time;
-  
+
   _vad_pub->publish(std::move(msg));
 }
 
diff --git a/src/vad_node.hpp b/src/vad_node.hpp
index 16c5011..0458ddf 100644
--- a/src/vad_node.hpp
+++ b/src/vad_node.hpp
@@ -56,9 +56,9 @@ class VADNode : public rclcpp::Node {
    * @param sample_format The format of the audio samples.
    * @return True if voice activity is detected, false otherwise.
    */
-  bool detectVoiceActivity(const std::vector<uint8_t> &audio_data, 
+  bool detectVoiceActivity(const std::vector<uint8_t> &audio_data,
                            const std::string &sample_format);
-  
+
   /**
    * @brief Converts audio data to float samples for processing.
    * @param audio_data The raw audio data.
@@ -83,7 +83,7 @@ class VADNode : public rclcpp::Node {
    * @param energy_level The current audio energy level.
    * @param timestamp The timestamp for the message.
    */
-  void publishVoiceActivity(bool active, float energy_level, 
+  void publishVoiceActivity(bool active, float energy_level,
                             const rclcpp::Time &timestamp);
 
   /**
@@ -94,29 +94,29 @@ class VADNode : public rclcpp::Node {
   /**
    * @brief ROS subscriber for audio data.
    */
-  rclcpp::Subscription<audio_tools::msg::AudioDataStamped>::SharedPtr _audio_sub;
+  rclcpp::Subscription<audio_tools::msg::AudioDataStamped>::SharedPtr
+      _audio_sub;
 
   /**
    * @brief Parameters for VAD.
    */
-  float _energy_threshold;  // Threshold for energy-based detection
-  float _hold_time;         // Time in seconds to hold detection state
-  int _min_samples;         // Minimum number of samples for detection
+  float _energy_threshold; // Threshold for energy-based detection
+  float _hold_time;        // Time in seconds to hold detection state
+  int _min_samples;        // Minimum number of samples for detection
 
   /**
    * @brief State variables for VAD.
    */
-  bool _current_vad_state;  // Current voice activity state
-  rclcpp::Time _last_voice_time;  // Time when voice was last detected
-  
+  bool _current_vad_state;       // Current voice activity state
+  rclcpp::Time _last_voice_time; // Time when voice was last detected
+
   /**
    * @brief Conversion map for audio formats to byte sizes.
    */
   const std::unordered_map<std::string, int> _format_byte_size = {
-      {"U8", 1},   {"S8", 1},     {"U16LE", 2}, {"U16BE", 2},
+      {"U8", 1},    {"S8", 1},    {"U16LE", 2}, {"U16BE", 2},
       {"S16LE", 2}, {"S16BE", 2}, {"S32LE", 4}, {"S32BE", 4},
-      {"F32LE", 4}, {"F32BE", 4}, {"F32", 4}
-  };
+      {"F32LE", 4}, {"F32BE", 4}, {"F32", 4}};
 };
 
 #endif /* VAD_NODE_HPP */
\ No newline at end of file

From 2441d8f8055b361e506c938ed4f5e5f20ae23e1d Mon Sep 17 00:00:00 2001
From: Jeroen Veen <jveen1@gmail.com>
Date: Sun, 13 Apr 2025 20:09:51 +0200
Subject: [PATCH 08/10] Now output topic names are retrieved from parameters.

---
 src/vad_node.cpp | 26 +++++++++++++++-----------
 src/vad_node.hpp | 14 ++++++++++++--
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/src/vad_node.cpp b/src/vad_node.cpp
index 96457b6..81ab573 100644
--- a/src/vad_node.cpp
+++ b/src/vad_node.cpp
@@ -26,27 +26,36 @@ VADNode::VADNode() : Node("vad_node"), _current_vad_state(false) {
   this->declare_parameter<float>("energy_threshold", 0.01);
   this->declare_parameter<float>("hold_time", 0.5);
   this->declare_parameter<int>("min_samples", 160);
+  this->declare_parameter<std::string>("voice_activity_topic",
+                                       "voice_activity");
+  this->declare_parameter<std::string>("audio_data_topic", "audio_stamped");
 
   this->get_parameter("energy_threshold", _energy_threshold);
   this->get_parameter("hold_time", _hold_time);
   this->get_parameter("min_samples", _min_samples);
+  this->get_parameter("voice_activity_topic", _voice_activity_topic);
+  this->get_parameter("audio_data_topic", _audio_data_topic);
 
   RCLCPP_INFO(this->get_logger(), "VAD initialized with parameters:");
   RCLCPP_INFO(this->get_logger(), "  - Energy threshold: %.4f",
               _energy_threshold);
   RCLCPP_INFO(this->get_logger(), "  - Hold time: %.2f seconds", _hold_time);
   RCLCPP_INFO(this->get_logger(), "  - Min samples: %d", _min_samples);
+  RCLCPP_INFO(this->get_logger(), "  - Voice activity topic: %s",
+              _voice_activity_topic.c_str());
+  RCLCPP_INFO(this->get_logger(), "  - Audio data topic: %s",
+              _audio_data_topic.c_str());
 
   // Initialize the last voice time to current time
   _last_voice_time = this->now();
 
   // Create publisher for voice activity detection
   _vad_pub = this->create_publisher<audio_tools::msg::VoiceActivity>(
-      "voice_activity", 10);
+      _voice_activity_topic, 10);
 
   // Subscribe to audio data stamped topic
   _audio_sub = this->create_subscription<audio_tools::msg::AudioDataStamped>(
-      "audio_stamped", 10,
+      _audio_data_topic, 10,
       std::bind(&VADNode::audioCallback, this, std::placeholders::_1));
 
   RCLCPP_INFO(this->get_logger(), "VAD node is ready");
@@ -172,15 +181,10 @@ bool VADNode::convertToFloatSamples(const std::vector<uint8_t> &audio_data,
       out_samples[i] = (audio_data[i] - 128) / 128.0f;
     }
   } else {
-    RCLCPP_WARN(this->get_logger(),
-                "Format conversion not implemented for %s, using raw values",
-                sample_format.c_str());
-
-    // Just copy raw bytes as a fallback
-    for (size_t i = 0; i < num_samples; i++) {
-      out_samples[i] =
-          static_cast<float>(audio_data[i * bytes_per_sample]) / 255.0f;
-    }
+    RCLCPP_ERROR(this->get_logger(), "Format conversion not implemented for %s",
+                 sample_format.c_str());
+    throw std::runtime_error("Format conversion not implemented for " +
+                             sample_format);
   }
 
   return true;
diff --git a/src/vad_node.hpp b/src/vad_node.hpp
index 0458ddf..00da1b9 100644
--- a/src/vad_node.hpp
+++ b/src/vad_node.hpp
@@ -8,10 +8,10 @@
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at:
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
+ * distributed under the License is an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
@@ -117,6 +117,16 @@ class VADNode : public rclcpp::Node {
       {"U8", 1},    {"S8", 1},    {"U16LE", 2}, {"U16BE", 2},
       {"S16LE", 2}, {"S16BE", 2}, {"S32LE", 4}, {"S32BE", 4},
       {"F32LE", 4}, {"F32BE", 4}, {"F32", 4}};
+
+  /**
+   * @brief Parameter for the voice activity topic name.
+   */
+  std::string _voice_activity_topic;
+
+  /**
+   * @brief Parameter for the audio data topic name.
+   */
+  std::string _audio_data_topic;
 };
 
 #endif /* VAD_NODE_HPP */
\ No newline at end of file

From a7c77ac7240638af9cfde982c2eb29c239aee6b0 Mon Sep 17 00:00:00 2001
From: Jeroen Veen <jveen1@gmail.com>
Date: Sun, 13 Apr 2025 20:15:45 +0200
Subject: [PATCH 09/10] VAD node output  parameter definitions set

---
 launch/audio_vad.launch.py | 62 +++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/launch/audio_vad.launch.py b/launch/audio_vad.launch.py
index 3299074..c56b88f 100644
--- a/launch/audio_vad.launch.py
+++ b/launch/audio_vad.launch.py
@@ -1,31 +1,39 @@
 from launch import LaunchDescription
 from launch_ros.actions import Node
 
+
 def generate_launch_description():
-    return LaunchDescription([
-        Node(
-            package='audio_tools',
-            executable='audio_capture_node',
-            name='audio_capture',
-            parameters=[{
-                'sample_format': 'S16LE',
-                'channels': 1,
-                'sample_rate': 16000,
-                'device': -1,  # Use default device
-                'chunk_size': 1024,
-            }],
-            output='screen',
-        ),
-        
-        Node(
-            package='audio_tools',
-            executable='vad_node',
-            name='vad',
-            parameters=[{
-                'energy_threshold': 0.01,
-                'hold_time': 0.5,
-                'min_samples': 160,
-            }], # Office, close mic
-            output='screen',
-        ),
-    ])
\ No newline at end of file
+    return LaunchDescription(
+        [
+            Node(
+                package="audio_tools",
+                executable="audio_capture_node",
+                name="audio_capture",
+                parameters=[
+                    {
+                        "sample_format": "S16LE",
+                        "channels": 1,
+                        "sample_rate": 16000,
+                        "device": -1,  # Use default device
+                        "chunk_size": 1024,
+                    }
+                ],
+                output="screen",
+            ),
+            Node(
+                package="audio_tools",
+                executable="vad_node",
+                name="vad",
+                parameters=[
+                    {
+                        "energy_threshold": 0.01,
+                        "hold_time": 0.5,
+                        "min_samples": 160,
+                        "voice_activity_topic": "voice_activity",
+                        "audio_data_topic": "audio_stamped",
+                    }
+                ],  # Office, close mic
+                output="screen",
+            ),
+        ]
+    )

From 41f304f423711d0b17ec1c51cb29c72115fb123c Mon Sep 17 00:00:00 2001
From: Jeroen Veen <jveen1@gmail.com>
Date: Sun, 13 Apr 2025 20:22:49 +0200
Subject: [PATCH 10/10] fixed typo

---
 docs/usage/overview.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/usage/overview.md b/docs/usage/overview.md
index 567b3d1..9d8a144 100644
--- a/docs/usage/overview.md
+++ b/docs/usage/overview.md
@@ -64,7 +64,7 @@ Capture microphone input and publish to `/audio_stamped`. Simple energy-based Vo
 
 ```bash
 # Terminal 1
-ros2 run audio_tools audio_vad.launch.py
+ros2 launch audio_tools audio_vad.launch.py
 
 # Terminal 2
 ros2 topic echo /voice_activity