From a3dbbcc9e26b9571259d137f4fe83ed95ee2a572 Mon Sep 17 00:00:00 2001 From: Rahul Vadisetty Date: Wed, 21 Aug 2024 13:06:55 +0500 Subject: [PATCH] Create speech_handler_refactor.py This fork of the Mycroft Core repository focuses on improving the functionality and maintainability of the audio speech module, specifically within the `speech.py` file. The primary goal of this fork is to address several critical issues that were identified during the development and testing phases. These issues include high cognitive complexity in key functions and the presence of undefined variables, both of which could lead to runtime errors and make the codebase harder to maintain and extend Issues Resolved: 1. Cognitive Complexity Reduction in `handle_speak` Function: The original `handle_speak` function was flagged for having a cognitive complexity of 24, which exceeded the recommended limit of 15. High cognitive complexity in code can lead to difficulty in understanding, maintaining, and extending the functionality. In this fork, the `handle_speak` function was refactored to reduce its complexity by breaking down large blocks of code into smaller, more manageable functions. This not only made the code more readable but also improved its maintainability by encapsulating specific tasks into dedicated helper functions. 2. Resolution of Undefined Variable (`utterance`): Another critical issue that was addressed in this fork was the presence of an undefined variable, `utterance`, in the code. The undefined variable was identified as a potential cause of runtime errors, particularly in test environments. This was resolved by ensuring that the variable was correctly defined and passed through the necessary functions, thereby eliminating the possibility of it being referenced before assignment. This change ensures that the code runs smoothly without unexpected crashes due to missing variables. 3. Improved TTS Error Handling: The fork also enhances the error-handling mechanisms within the Text-to-Speech (TTS) functionality. Specifically, it refines how the system falls back to the Mimic TTS when the primary TTS service encounters an issue. This ensures a more robust and reliable speech synthesis experience, even when network issues or remote TTS failures occur. 4. Enhanced Configuration Management: The configuration handling within the speech module was also improved. The refactoring ensures that configuration updates are handled more efficiently, with clear separation of concerns. This change reduces the likelihood of configuration-related issues affecting the TTS functionality, providing a more stable environment for speech synthesis. Key Changes: - Refactored `handle_speak` Function: Simplified the logic and split complex operations into smaller functions, reducing cognitive complexity and improving code readability. - Defined `utterance` Variable: Ensured that the `utterance` variable is correctly defined and passed, resolving potential undefined variable errors. - Enhanced Fallback TTS Mechanism: Improved the reliability of speech output by refining error handling and ensuring a smooth fallback to Mimic TTS when necessary. - Improved Configuration Handling: Streamlined the management of configuration settings, ensuring that updates are applied consistently and effectively. Conclusion: This fork is intended for developers and contributors who wish to work with a more maintainable and robust version of the Mycroft audio speech module. The changes implemented in this fork make the code easier to understand, less prone to errors, and more resilient in production environments. By addressing these critical issues, this fork contributes to the overall stability and performance of the Mycroft Core project. --- speech_handler_refactor.py | 201 +++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 speech_handler_refactor.py diff --git a/speech_handler_refactor.py b/speech_handler_refactor.py new file mode 100644 index 00000000000..06a04be8020 --- /dev/null +++ b/speech_handler_refactor.py @@ -0,0 +1,201 @@ +# Copyright 2017 Mycroft AI Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import re +import time +from threading import Lock + +from mycroft.configuration import Configuration +from mycroft.metrics import report_timing, Stopwatch +from mycroft.tts import TTSFactory +from mycroft.util import check_for_signal +from mycroft.util.log import LOG +from mycroft.messagebus.message import Message +from mycroft.tts.remote_tts import RemoteTTSException +from mycroft.tts.mimic_tts import Mimic + +bus = None # Mycroft messagebus connection +config = None +tts = None +tts_hash = None +lock = Lock() +mimic_fallback_obj = None + +_last_stop_signal = 0 + + +def handle_speak(event): + """Handle 'speak' message by parsing sentences and invoking TTS.""" + global _last_stop_signal + initialize_config() + + if should_skip_speech(event): + return + + ident = get_conversation_id(event) + start = time.time() + + with lock: + utterance = event.data['utterance'] + listen = event.data.get('expect_response', False) + chunks = prepare_chunks(utterance, listen) + + for chunk, listen in chunks: + if should_abort_speech(start): + clear_speech_queue() + break + try: + mute_and_speak(chunk, ident, listen) + except KeyboardInterrupt: + raise + except Exception: + LOG.error('Error in mute_and_speak', exc_info=True) + + report_speech_timing(ident, utterance) + + +def initialize_config(): + """Initialize configuration settings.""" + global config + config = Configuration.get() + Configuration.set_config_update_handlers(bus) + + +def should_skip_speech(event): + """Determine if the speech synthesis should be skipped.""" + event.context = event.context or {} + return event.context.get('destination') and not ( + 'debug_cli' in event.context['destination'] or + 'audio' in event.context['destination'] + ) + + +def get_conversation_id(event): + """Extract the conversation ID from the event.""" + return event.context.get('ident', 'unknown') + + +def prepare_chunks(utterance, listen): + """Prepare chunks of the utterance for TTS processing.""" + if should_split_utterance(utterance): + chunks = tts.preprocess_utterance(utterance) + return [(chunks[i], listen if i == len(chunks) - 1 else False) + for i in range(len(chunks))] + return [(utterance, listen)] + + +def should_split_utterance(utterance): + """Determine if the utterance should be split into chunks.""" + return (config.get('enclosure', {}).get('platform') != "picroft" and + len(re.findall('<[^>]*>', utterance)) == 0) + + +def should_abort_speech(start): + """Check if speech should be aborted due to stop signal.""" + return (_last_stop_signal > start or check_for_signal('buttonPress')) + + +def clear_speech_queue(): + """Clear the TTS speech queue.""" + tts.playback.clear() + + +def report_speech_timing(ident, utterance): + """Report the timing of the speech process.""" + stopwatch = Stopwatch() + stopwatch.start() + stopwatch.stop() + report_timing(ident, 'speech', stopwatch, {'utterance': utterance, + 'tts': tts.__class__.__name__}) + + +def mute_and_speak(utterance, ident, listen=False): + """Mute mic and start speaking the utterance using the selected TTS backend.""" + update_tts_if_needed() + LOG.info("Speak: " + utterance) + + try: + tts.execute(utterance, ident, listen) + except RemoteTTSException as e: + LOG.error(e) + mimic_fallback_tts(utterance, ident, listen) + except Exception: + LOG.exception('TTS execution failed.') + + +def update_tts_if_needed(): + """Update the TTS object if the configuration has changed.""" + global tts_hash, tts + new_tts_hash = hash(str(config.get('tts', ''))) + if tts_hash != new_tts_hash: + if tts: + tts.playback.detach_tts(tts) + tts = TTSFactory.create() + tts.init(bus) + tts_hash = new_tts_hash + + +def _get_mimic_fallback(): + """Lazily initialize the fallback TTS if needed.""" + global mimic_fallback_obj + if not mimic_fallback_obj: + config = Configuration.get() + tts_config = config.get('tts', {}).get("mimic", {}) + lang = config.get("lang", "en-us") + tts = Mimic(lang, tts_config) + tts.validator.validate() + tts.init(bus) + mimic_fallback_obj = tts + + return mimic_fallback_obj + + +def mimic_fallback_tts(utterance, ident, listen): + """Speak utterance using fallback TTS if connection is lost.""" + tts = _get_mimic_fallback() + LOG.debug("Mimic fallback, utterance : " + str(utterance)) + tts.execute(utterance, ident, listen) + + +def handle_stop(event): + """Handle stop message and shutdown any speech.""" + global _last_stop_signal + if check_for_signal("isSpeaking", -1): + _last_stop_signal = time.time() + tts.playback.clear() # Clear here to get instant stop + bus.emit(Message("mycroft.stop.handled", {"by": "TTS"})) + + +def init(messagebus): + """Start speech-related handlers.""" + global bus, tts, tts_hash, config + bus = messagebus + initialize_config() + bus.on('mycroft.stop', handle_stop) + bus.on('mycroft.audio.speech.stop', handle_stop) + bus.on('speak', handle_speak) + + tts = TTSFactory.create() + tts.init(bus) + tts_hash = hash(str(config.get('tts', ''))) + + +def shutdown(): + """Shutdown the audio service cleanly.""" + if tts: + tts.playback.stop() + tts.playback.join() + if mimic_fallback_obj: + mimic_fallback_obj.playback.stop() + mimic_fallback_obj.playback.join()