diff --git a/homeassistant/components/esphome/manager.py b/homeassistant/components/esphome/manager.py
index ee0d2371a56..f9f24128e2a 100644
--- a/homeassistant/components/esphome/manager.py
+++ b/homeassistant/components/esphome/manager.py
@@ -16,6 +16,7 @@ from aioesphomeapi import (
     RequiresEncryptionAPIError,
     UserService,
     UserServiceArgType,
+    VoiceAssistantAudioSettings,
     VoiceAssistantEventType,
 )
 from awesomeversion import AwesomeVersion
@@ -319,7 +320,10 @@ class ESPHomeManager:
             self.voice_assistant_udp_server = None
 
     async def _handle_pipeline_start(
-        self, conversation_id: str, flags: int
+        self,
+        conversation_id: str,
+        flags: int,
+        audio_settings: VoiceAssistantAudioSettings,
     ) -> int | None:
         """Start a voice assistant pipeline."""
         if self.voice_assistant_udp_server is not None:
@@ -340,6 +344,7 @@ class ESPHomeManager:
                 device_id=self.device_id,
                 conversation_id=conversation_id or None,
                 flags=flags,
+                audio_settings=audio_settings,
             ),
             "esphome.voice_assistant_udp_server.run_pipeline",
         )
diff --git a/homeassistant/components/esphome/manifest.json b/homeassistant/components/esphome/manifest.json
index 01e11071b69..d6fdd971fa6 100644
--- a/homeassistant/components/esphome/manifest.json
+++ b/homeassistant/components/esphome/manifest.json
@@ -16,7 +16,7 @@
   "loggers": ["aioesphomeapi", "noiseprotocol"],
   "requirements": [
     "async-interrupt==1.1.1",
-    "aioesphomeapi==16.0.6",
+    "aioesphomeapi==17.0.0",
     "bluetooth-data-tools==1.12.0",
     "esphome-dashboard-api==1.2.3"
   ],
diff --git a/homeassistant/components/esphome/voice_assistant.py b/homeassistant/components/esphome/voice_assistant.py
index c501d756e54..58f9ce5abf4 100644
--- a/homeassistant/components/esphome/voice_assistant.py
+++ b/homeassistant/components/esphome/voice_assistant.py
@@ -7,14 +7,20 @@ import logging
 import socket
 from typing import cast
 
-from aioesphomeapi import VoiceAssistantCommandFlag, VoiceAssistantEventType
+from aioesphomeapi import (
+    VoiceAssistantAudioSettings,
+    VoiceAssistantCommandFlag,
+    VoiceAssistantEventType,
+)
 
 from homeassistant.components import stt, tts
 from homeassistant.components.assist_pipeline import (
+    AudioSettings,
     PipelineEvent,
     PipelineEventType,
     PipelineNotFound,
     PipelineStage,
+    WakeWordSettings,
     async_pipeline_from_audio_stream,
     select as pipeline_select,
 )
@@ -64,7 +70,6 @@ class VoiceAssistantUDPServer(asyncio.DatagramProtocol):
         entry_data: RuntimeEntryData,
         handle_event: Callable[[VoiceAssistantEventType, dict[str, str] | None], None],
         handle_finished: Callable[[], None],
-        audio_timeout: float = 2.0,
     ) -> None:
         """Initialize UDP receiver."""
         self.context = Context()
@@ -78,7 +83,6 @@ class VoiceAssistantUDPServer(asyncio.DatagramProtocol):
         self.handle_event = handle_event
         self.handle_finished = handle_finished
         self._tts_done = asyncio.Event()
-        self.audio_timeout = audio_timeout
 
     async def start_server(self) -> int:
         """Start accepting connections."""
@@ -212,9 +216,11 @@ class VoiceAssistantUDPServer(asyncio.DatagramProtocol):
         device_id: str,
         conversation_id: str | None,
         flags: int = 0,
-        pipeline_timeout: float = 30.0,
+        audio_settings: VoiceAssistantAudioSettings | None = None,
     ) -> None:
         """Run the Voice Assistant pipeline."""
+        if audio_settings is None:
+            audio_settings = VoiceAssistantAudioSettings()
 
         tts_audio_output = (
             "raw" if self.device_info.voice_assistant_version >= 2 else "mp3"
@@ -226,31 +232,36 @@ class VoiceAssistantUDPServer(asyncio.DatagramProtocol):
         else:
             start_stage = PipelineStage.STT
         try:
-            async with asyncio.timeout(pipeline_timeout):
-                await async_pipeline_from_audio_stream(
-                    self.hass,
-                    context=self.context,
-                    event_callback=self._event_callback,
-                    stt_metadata=stt.SpeechMetadata(
-                        language="",  # set in async_pipeline_from_audio_stream
-                        format=stt.AudioFormats.WAV,
-                        codec=stt.AudioCodecs.PCM,
-                        bit_rate=stt.AudioBitRates.BITRATE_16,
-                        sample_rate=stt.AudioSampleRates.SAMPLERATE_16000,
-                        channel=stt.AudioChannels.CHANNEL_MONO,
-                    ),
-                    stt_stream=self._iterate_packets(),
-                    pipeline_id=pipeline_select.get_chosen_pipeline(
-                        self.hass, DOMAIN, self.device_info.mac_address
-                    ),
-                    conversation_id=conversation_id,
-                    device_id=device_id,
-                    tts_audio_output=tts_audio_output,
-                    start_stage=start_stage,
-                )
+            await async_pipeline_from_audio_stream(
+                self.hass,
+                context=self.context,
+                event_callback=self._event_callback,
+                stt_metadata=stt.SpeechMetadata(
+                    language="",  # set in async_pipeline_from_audio_stream
+                    format=stt.AudioFormats.WAV,
+                    codec=stt.AudioCodecs.PCM,
+                    bit_rate=stt.AudioBitRates.BITRATE_16,
+                    sample_rate=stt.AudioSampleRates.SAMPLERATE_16000,
+                    channel=stt.AudioChannels.CHANNEL_MONO,
+                ),
+                stt_stream=self._iterate_packets(),
+                pipeline_id=pipeline_select.get_chosen_pipeline(
+                    self.hass, DOMAIN, self.device_info.mac_address
+                ),
+                conversation_id=conversation_id,
+                device_id=device_id,
+                tts_audio_output=tts_audio_output,
+                start_stage=start_stage,
+                wake_word_settings=WakeWordSettings(timeout=5),
+                audio_settings=AudioSettings(
+                    noise_suppression_level=audio_settings.noise_suppression_level,
+                    auto_gain_dbfs=audio_settings.auto_gain,
+                    volume_multiplier=audio_settings.volume_multiplier,
+                ),
+            )
 
-                # Block until TTS is done sending
-                await self._tts_done.wait()
+            # Block until TTS is done sending
+            await self._tts_done.wait()
 
             _LOGGER.debug("Pipeline finished")
         except PipelineNotFound:
@@ -271,18 +282,6 @@ class VoiceAssistantUDPServer(asyncio.DatagramProtocol):
                 },
             )
             _LOGGER.warning("No Wake word provider found")
-        except asyncio.TimeoutError:
-            if self.stopped:
-                # The pipeline was stopped gracefully
-                return
-            self.handle_event(
-                VoiceAssistantEventType.VOICE_ASSISTANT_ERROR,
-                {
-                    "code": "pipeline-timeout",
-                    "message": "Pipeline timeout",
-                },
-            )
-            _LOGGER.warning("Pipeline timeout")
         finally:
             self.handle_finished()
 
diff --git a/requirements_all.txt b/requirements_all.txt
index 9ecfb48fba0..db2a0f8cb05 100644
--- a/requirements_all.txt
+++ b/requirements_all.txt
@@ -231,7 +231,7 @@ aioecowitt==2023.5.0
 aioemonitor==1.0.5
 
 # homeassistant.components.esphome
-aioesphomeapi==16.0.6
+aioesphomeapi==17.0.0
 
 # homeassistant.components.flo
 aioflo==2021.11.0
diff --git a/requirements_test_all.txt b/requirements_test_all.txt
index 4a6181d4d4d..41304510b2b 100644
--- a/requirements_test_all.txt
+++ b/requirements_test_all.txt
@@ -212,7 +212,7 @@ aioecowitt==2023.5.0
 aioemonitor==1.0.5
 
 # homeassistant.components.esphome
-aioesphomeapi==16.0.6
+aioesphomeapi==17.0.0
 
 # homeassistant.components.flo
 aioflo==2021.11.0
diff --git a/tests/components/esphome/test_voice_assistant.py b/tests/components/esphome/test_voice_assistant.py
index b7ce5670441..6c54c5f62f3 100644
--- a/tests/components/esphome/test_voice_assistant.py
+++ b/tests/components/esphome/test_voice_assistant.py
@@ -10,7 +10,6 @@ import pytest
 from homeassistant.components.assist_pipeline import (
     PipelineEvent,
     PipelineEventType,
-    PipelineNotFound,
     PipelineStage,
 )
 from homeassistant.components.assist_pipeline.error import WakeWordDetectionError
@@ -370,6 +369,8 @@ async def test_wake_word(
     with patch(
         "homeassistant.components.esphome.voice_assistant.async_pipeline_from_audio_stream",
         new=async_pipeline_from_audio_stream,
+    ), patch(
+        "asyncio.Event.wait"  # TTS wait event
     ):
         voice_assistant_udp_server_v2.transport = Mock()
 
@@ -377,7 +378,6 @@ async def test_wake_word(
             device_id="mock-device-id",
             conversation_id=None,
             flags=2,
-            pipeline_timeout=1,
         )
 
 
@@ -410,38 +410,4 @@ async def test_wake_word_exception(
             device_id="mock-device-id",
             conversation_id=None,
             flags=2,
-            pipeline_timeout=1,
-        )
-
-
-async def test_pipeline_timeout(
-    hass: HomeAssistant,
-    voice_assistant_udp_server_v2: VoiceAssistantUDPServer,
-) -> None:
-    """Test that the pipeline is set to start with Wake word."""
-
-    async def async_pipeline_from_audio_stream(*args, **kwargs):
-        raise PipelineNotFound("not-found", "Pipeline not found")
-
-    with patch(
-        "homeassistant.components.esphome.voice_assistant.async_pipeline_from_audio_stream",
-        new=async_pipeline_from_audio_stream,
-    ):
-        voice_assistant_udp_server_v2.transport = Mock()
-
-        def handle_event(
-            event_type: VoiceAssistantEventType, data: dict[str, str] | None
-        ) -> None:
-            if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_ERROR:
-                assert data is not None
-                assert data["code"] == "pipeline not found"
-                assert data["message"] == "Selected pipeline not found"
-
-        voice_assistant_udp_server_v2.handle_event = handle_event
-
-        await voice_assistant_udp_server_v2.run_pipeline(
-            device_id="mock-device-id",
-            conversation_id=None,
-            flags=2,
-            pipeline_timeout=1,
         )