diff --git a/CMakeLists.txt b/CMakeLists.txt index 926710a..9213ea0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,16 +141,17 @@ endif() ### Manual ### set(WVP_MANUAL_DIR ${CMAKE_CURRENT_BINARY_DIR}/Manual) file(MAKE_DIRECTORY ${WVP_MANUAL_DIR}) + file(READ ${CMAKE_CURRENT_SOURCE_DIR}/docs/whisper-manual.md MANUAL_CONTENT) string(REPLACE "APPVERSION" "${WVP_BUILD_TAG} (${GIT_COMMIT_ID})" MANUAL_CONTENT ${MANUAL_CONTENT}) -string(REPLACE "src=\"../resource/" "src=\"./" MANUAL_CONTENT ${MANUAL_CONTENT}) -file(WRITE ${WVP_MANUAL_DIR}/whisper-manual.md ${MANUAL_CONTENT}) +string(REPLACE "src=\"../resource/" "src=\"${CMAKE_CURRENT_SOURCE_DIR}/resource/" MANUAL_CONTENT ${MANUAL_CONTENT}) +file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/whisper-manual.md ${MANUAL_CONTENT}) + file(COPY ${CMAKE_CURRENT_BINARY_DIR}/_deps/whisper_cpp-src/models/download-ggml-model.cmd DESTINATION ${WVP_MANUAL_DIR}) file(COPY ${CMAKE_CURRENT_BINARY_DIR}/_deps/whisper_cpp-src/models/download-ggml-model.sh DESTINATION ${WVP_MANUAL_DIR}) -file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/resource/Screenshot.png DESTINATION ${WVP_MANUAL_DIR}) find_program(MDPDF_EXE "mdpdf") if(MDPDF_EXE) - add_custom_target(wvp_manual COMMAND ${MDPDF_EXE} ${WVP_MANUAL_DIR}/whisper-manual.md --debug VERBATIM) + add_custom_target(wvp_manual COMMAND ${MDPDF_EXE} ${CMAKE_CURRENT_BINARY_DIR}/whisper-manual.md ${WVP_MANUAL_DIR}/whisper-manual.pdf VERBATIM) else() message(STATUS "Manual target cannot be generated because mdpdf is not found") endif() diff --git a/README.md b/README.md index 56c9e6e..1f82e9b 100644 --- a/README.md +++ b/README.md @@ -4,14 +4,22 @@ Workflows

-The Whisper plug-in is an implementation of the [Whisper](https://github.com/openai/whisper) speech recognition model developed by [OpenAI](https://openai.com/) as a [Vamp plug-in](https://www.vamp-plugins.org/). The Whisper plug-in analyses the text in the audio stream and generates markers corresponding to the tokens (words and/or syllables) found. +The Whisper plugin is an implementation of the [Whisper](https://github.com/openai/whisper) speech recognition model developed by [OpenAI](https://openai.com/) as a [Vamp plugin](https://www.vamp-plugins.org/). The Whisper plugin analyses the text in the audio stream and generates markers corresponding to the tokens (words and/or syllables) found. The lightweight ggml-tiny model is embedded in the plugin (so you don’t have to download anything to start experimenting), but it is possible to download and use other models that may be more appropriate to your needs. -The Whisper Vamp Plugin has been designed for use in the [Partiels](https://forum.ircam.fr/projects/detail/partiels/) application and requires the [Ircam Vamp Extension](https://github.com/Ircam-Partiels/ircam-vamp-extension). +The Whisper Vamp Plugin has been designed for use in the free audio analysis application [Partiels](https://forum.ircam.fr/projects/detail/partiels/).

Screenshot

+## Installation + +Download the Whisper Vamp plugin installation package for your operating system from the [Releases](https://github.com/Ircam-Partiels/whisper-vamp-plugin/releases) section and run the installer. + +## Use + +Launch the Partiels application. In a new or existing document, create a new analysis track with the Whisper plugin. Modify the model or the analysis parameters via the property window. Please refer to the manual available in the [Releases](https://github.com/Ircam-Partiels/whisper-vamp-plugin/releases) section for further information. + ## Compilation The compilation system is based on [CMake](https://cmake.org/), for example: @@ -23,7 +31,7 @@ ctest -C Debug -VV --test-dir build ## Credits -- **[Whisper Vamp plug-in](https://www.ircam.fr/)** by Pierre Guillot at IRCAM IMR Department +- **[Whisper Vamp plugin](https://www.ircam.fr/)** by Pierre Guillot at IRCAM IMR Department - **[Whisper.cpp](https://github.com/ggerganov/whisper.cpp)** by Georgi Gerganov - **[Whisper](https://github.com/openai/whisper)** model by OpenAI - **[Vamp SDK](https://github.com/vamp-plugins/vamp-plugin-sdk)** by Chris Cannam, copyright (c) 2005-2024 Chris Cannam and Centre for Digital Music, Queen Mary, University of London. diff --git a/docs/whisper-manual.md b/docs/whisper-manual.md index 8742985..5707127 100644 --- a/docs/whisper-manual.md +++ b/docs/whisper-manual.md @@ -21,7 +21,9 @@ ## Introduction -The Whisper plug-in is an implementation of the [Whisper](https://github.com/openai/whisper) speech recognition model developed by [OpenAI](https://openai.com/) as a [Vamp plug-in](https://www.vamp-plugins.org/). The Whisper plug-in analyses the text in the audio stream and generates markers corresponding to the tokens (words and/or syllables) found. +The Whisper plugin is an implementation of the [Whisper](https://github.com/openai/whisper) speech recognition model developed by [OpenAI](https://openai.com/) as a [Vamp plugin](https://www.vamp-plugins.org/). The Whisper plugin analyses the text in the audio stream and generates markers corresponding to the tokens (words and/or syllables) found. The lightweight ggml-tiny model is embedded in the plugin (so you don’t have to download anything to start experimenting), but it is possible to download and use other models that may be more appropriate to your needs. + +The Whisper Vamp Plugin has been designed for use in the free audio analysis application [Partiels](https://forum.ircam.fr/projects/detail/partiels/). ## Requirements @@ -31,7 +33,7 @@ The Whisper plug-in is an implementation of the [Whisper](https://github.com/ope ## Installation -Use the installer for your operating system. The plugin dynamic library (*whisper.dylib* for MacOS, *whisper.dll* for Windows and *whisper.so* for Linux) and the category file (*whisper.cat*) will be installed in your operating system's Vamp plug-in installation directory: +Use the installer for your operating system. The plugin dynamic library (*whisper.dylib* for MacOS, *whisper.dll* for Windows and *whisper.so* for Linux) and the category file (*whisper.cat*) will be installed in your operating system's Vamp plugin installation directory: - Linux: `~/vamp` - MacOS: `/Library/Audio/Plug-Ins/Vamp` - Windows: `C:\Program Files\Vamp` @@ -40,7 +42,7 @@ Use the installer for your operating system. The plugin dynamic library (*whispe By default, the plugin embeds the ggml-tiny model, which requires little space and offers fairly fast calculation but relatively unreliable results. -It is possible to download other models that are potentially more robust. To do this, you can use the `download-ggml-model.sh/cmd` scripts from Georgi Gerganov's [Whisper.cpp](https://github.com/ggerganov/whisper.cpp) project and provided with the plug-in package. Once downloaded, these models must be installed in the repository: +It is possible to download other models that are potentially more robust. To do this, you can use the `download-ggml-model.sh/cmd` scripts from Georgi Gerganov's [Whisper.cpp](https://github.com/ggerganov/whisper.cpp) project and provided with the plugin package. Once downloaded, these models must be installed in the repository: - Linux: `~/.config/Ircam/whispermodels` or `/opt/Ircam/whispermodels` - MacOS: `~/Library/Application Support/Ircam/whispermodels` or `/Library/Application Support/Ircam/whispermodels` - Windows: `\Documents and Settings\username\Application Data\Ircam\whispermodels` or `\Documents and Settings\All Users\Application Data\Ircam\whispermodels` @@ -49,17 +51,17 @@ It is possible to use another directory by setting the `WHISPERMODELSPATH` envir Once installed in one of the directories, you can select the models in the plugin properties window. -> ⚠️ Please note that if you delete, modify or add models in these directories, the models will no longer be indexed in the same way, and the plug-in may no longer be able to find the selected model. After modification, make sure that the template name corresponds to the one you want. +> ⚠️ Please note that if you delete, modify or add models in these directories, the models will no longer be indexed in the same way, and the plugin may no longer be able to find the selected model. After modification, make sure that the template name corresponds to the one you want. [Further information](https://github.com/ggerganov/whisper.cpp/blob/master/models/README.md#available-models) on downloading and generating models can be found on Georgi Gerganov's Whisper.cpp project page. ## Inputs -The plug-in lets you define an input marker track to segment the analysis. This feature can be useful in avoiding the biases of certain models, such as the generation or repetition of words not present in the audio stream. +The plugin lets you define an input marker track to segment the analysis. This feature can be useful in avoiding the biases of certain models, such as the generation or repetition of words not present in the audio stream. ## Credits -- **[Whisper Vamp plug-in](https://www.ircam.fr/)** by Pierre Guillot at IRCAM +- **[Whisper Vamp plugin](https://www.ircam.fr/)** by Pierre Guillot at IRCAM - **[Whisper.cpp](https://github.com/ggerganov/whisper.cpp)** by Georgi Gerganov - **[Whisper](https://github.com/openai/whisper)** model by OpenAI - **[Vamp](www.vamp-plugins.org)** by the Centre for Digital Music, Queen Mary, University of London