From 8d3bfc5ae4afe04438366c821e0069d0b3f73e4e Mon Sep 17 00:00:00 2001 From: sevenc-nanashi Date: Sat, 16 Sep 2023 22:30:01 +0900 Subject: [PATCH] =?UTF-8?q?WIP:=200.15=E3=81=AB=E5=AF=BE=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 25 +- .gitignore | 1 + .rubocop.yml | 3 + CHANGELOG.md | 8 +- Gemfile | 2 + Rakefile | 22 + Steepfile | 4 +- lib/voicevox.rb | 7 +- lib/voicevox/core.rb | 707 ++++++++++++---------------- lib/voicevox/error.rb | 84 ++-- lib/voicevox/version.rb | 4 +- lib/voicevox/wrapper/audio_query.rb | 151 +++--- lib/voicevox/wrapper/info.rb | 125 +---- lib/voicevox/wrapper/manager.rb | 137 ------ lib/voicevox/wrapper/open_jtalk.rb | 28 ++ lib/voicevox/wrapper/synthesizer.rb | 319 +++++++++++++ lib/voicevox/wrapper/user_dict.rb | 187 ++++++++ lib/voicevox/wrapper/utils.rb | 50 +- lib/voicevox/wrapper/voice_model.rb | 92 ++++ rbs_collection.lock.yaml | 78 ++- sig/ffi.rbs | 13 +- sig/manifest.yaml | 2 + sig/voicevox.rbs | 5 +- sig/voicevox/core.rbs | 190 ++++---- sig/voicevox/error.rbs | 53 --- sig/voicevox/wrapper/info.rbs | 26 - sig/voicevox/wrapper/manager.rbs | 31 -- sig/voicevox/wrapper/utils.rbs | 9 - spec/audio_query_spec.rb | 15 - spec/core_spec.rb | 8 - spec/helper.rb | 21 - spec/info_spec.rb | 13 + spec/spec_helper.rb | 46 ++ spec/synthesizer_spec.rb | 54 +++ spec/user_dict_spec.rb | 61 +++ spec/voice_model_spec.rb | 19 + spec/wrapper_spec.rb | 19 - 37 files changed, 1516 insertions(+), 1103 deletions(-) delete mode 100644 lib/voicevox/wrapper/manager.rb create mode 100644 lib/voicevox/wrapper/open_jtalk.rb create mode 100644 lib/voicevox/wrapper/synthesizer.rb create mode 100644 lib/voicevox/wrapper/user_dict.rb create mode 100644 lib/voicevox/wrapper/voice_model.rb create mode 100644 sig/manifest.yaml delete mode 100644 sig/voicevox/error.rbs delete mode 100644 sig/voicevox/wrapper/info.rbs delete mode 100644 sig/voicevox/wrapper/manager.rbs delete mode 100644 sig/voicevox/wrapper/utils.rbs delete mode 100644 spec/audio_query_spec.rb delete mode 100644 spec/core_spec.rb delete mode 100644 spec/helper.rb create mode 100644 spec/info_spec.rb create mode 100644 spec/spec_helper.rb create mode 100644 spec/synthesizer_spec.rb create mode 100644 spec/user_dict_spec.rb create mode 100644 spec/voice_model_spec.rb delete mode 100644 spec/wrapper_spec.rb diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6af67ed..93fcfef 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,6 @@ jobs: matrix: ruby_version: - "3.0" - - "3.1" - "3.2" platform: - "ubuntu-latest" @@ -20,18 +19,13 @@ jobs: - name: Checkout repository uses: actions/checkout@v3 - - name: Download voicevox core (Windows) - if: matrix.platform == 'windows-latest' - run: | - curl https://github.com/VOICEVOX/voicevox_core/releases/download/0.14.0/download-windows-x64.exe -Lo download.exe - ./download.exe - - - name: Download voicevox core (Ubuntu) - if: matrix.platform == 'ubuntu-latest' - run: | - curl https://github.com/VOICEVOX/voicevox_core/releases/download/0.14.0/download-linux-x64 -Lo download - chmod +x download - ./download + - name: Download voicevox core + id: download-voicevox-core + uses: sevenc-nanashi/setup-voicevox@v0.1 + with: + download-item: core + path: voicevox_core + version: 0.15.0-preview.8 - name: Setup Ruby uses: ruby/setup-ruby@v1 @@ -43,6 +37,5 @@ jobs: run: | bundle exec rake spec env: - LD_LIBRARY_PATH: voicevox_core - RUBY_DLL_PATH: voicevox_core - VOICEVOX_OPEN_JTALK_DICT: voicevox_core/open_jtalk_dic_utf_8-1.11 + VOICEVOX_DLL_PATH: ${{ steps.download-voicevox-core.outputs.entrypoint }} + VOICEVOX_OPEN_JTALK_DIC_DIR: ${{ steps.download-voicevox-core.outputs.open_jtalk_dic_dir }} diff --git a/.gitignore b/.gitignore index f0d6706..34902fc 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ Gemfile.lock voicevox_core download.ps1 download.sh +download-* diff --git a/.rubocop.yml b/.rubocop.yml index e2927b4..b8749dc 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -19,3 +19,6 @@ Style/GuardClause: Naming/MethodParameterName: Enabled: false + +Lint/MissingSuper: + Enabled: false diff --git a/CHANGELOG.md b/CHANGELOG.md index fcbdddb..f7f59c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ -## [Unreleased] - ## [0.1.0] - 2022-08-13 -- Initial release +- 初期リリース + +## [0.2.0] - 2023-08-13 + + diff --git a/Gemfile b/Gemfile index 4a3878c..5b6ab2c 100644 --- a/Gemfile +++ b/Gemfile @@ -17,3 +17,5 @@ gem "syntax_tree", "~> 5.2" gem "syntax_tree-rbs", "~> 0.5.1" gem "rspec", "~> 3.11" + +gem "typeprof", "~> 0.21.7" diff --git a/Rakefile b/Rakefile index 9cf4757..9d2aa66 100644 --- a/Rakefile +++ b/Rakefile @@ -2,6 +2,28 @@ require "bundler/gem_tasks" require "rspec/core/rake_task" +require "orthoses" +require "orthoses-yard" task default: [:spec] RSpec::Core::RakeTask.new(:spec) + +namespace :rbs do + desc "build RBS to sig/orthoses" + task :build do + Orthoses::Builder + .new do + use Orthoses::LoadRBS, paths: Dir.glob("sig/**/*.rbs") + use Orthoses::CreateFileByName, depth: 2, to: "sig" + use Orthoses::Filter do |name| + name.start_with?("Voicevox") + end + use Orthoses::Mixin + use Orthoses::Constant + use Orthoses::Walk, root: "Voicevox" + use Orthoses::YARD, parse: ["lib/**/*.rb"] + run -> { require_relative "lib/voicevox" } + end + .call + end +end diff --git a/Steepfile b/Steepfile index e94d0b2..bb2f631 100644 --- a/Steepfile +++ b/Steepfile @@ -5,9 +5,9 @@ D = Steep::Diagnostic target :lib do signature "sig" - check "lib" # Directory name + # check "lib" # Directory name # check "Gemfile" # File name - # check "app/models/**/*.rb" # Glob + check "lib/**/*.rb" # ignore "lib/templates/*.rb" # library "pathname", "set" # Standard libraries diff --git a/lib/voicevox.rb b/lib/voicevox.rb index ec65abb..395fd79 100644 --- a/lib/voicevox.rb +++ b/lib/voicevox.rb @@ -5,11 +5,14 @@ require_relative "voicevox/error" require_relative "voicevox/wrapper/utils" require_relative "voicevox/wrapper/info" -require_relative "voicevox/wrapper/manager" require_relative "voicevox/wrapper/audio_query" +require_relative "voicevox/wrapper/voice_model" +require_relative "voicevox/wrapper/open_jtalk" +require_relative "voicevox/wrapper/user_dict" +require_relative "voicevox/wrapper/synthesizer" # # voicevox_coreのラッパー。 # -class Voicevox # rubocop:disable Lint/EmptyClass +module Voicevox end diff --git a/lib/voicevox/core.rb b/lib/voicevox/core.rb index 829c230..512edb8 100644 --- a/lib/voicevox/core.rb +++ b/lib/voicevox/core.rb @@ -1,448 +1,367 @@ # frozen_string_literal: true require "ffi" +require "fiddle" require "English" -class Voicevox +Fiddle.dlopen(ENV["ORT_DLL_PATH"]) if ENV["ORT_DLL_PATH"] + +# rubocop:disable Naming/ConstantName + +module Voicevox # # voicevox_coreの薄いラッパー。 # module Core extend FFI::Library - enum :voicevox_result_code, - { - voicevox_result_succeed: 0, - voicevox_result_not_loaded_openjtalk_dict: 1, - voicevox_result_failed_load_model: 2, - voicevox_result_failed_get_supported_devices: 3, - voicevox_result_cant_gpu_support: 4, - voicevox_result_failed_load_metas: 5, - voicevox_result_uninitialized_status: 6, - voicevox_result_invalid_speaker_id: 7, - voicevox_result_invalid_model_index: 8, - voicevox_result_inference_failed: 9, - voicevox_result_failed_extract_full_context_label: 10, - voicevox_result_invalid_utf8_input: 11, - voicevox_result_failed_parse_kana: 12, - voicevox_result_invalid_audio_query: 13 - }.to_a.flatten + ffi_lib %w[voicevox_core.dll libvoicevox_core.dylib libvoicevox_core.so] - enum :voicevox_acceleration_mode, + Uintptr = + if Fiddle::SIZEOF_VOIDP == 8 + :uint64 + else + :uint32 + end + + enum FFI::NativeType::INT32, + :voicevox_acceleration_mode, { voicevox_acceleration_mode_auto: 0, voicevox_acceleration_mode_cpu: 1, voicevox_acceleration_mode_gpu: 2 }.to_a.flatten + VoicevoxAccelerationMode = :voicevox_acceleration_mode + + enum :voicevox_result_code, + { + voicevox_result_ok: 0, + voicevox_result_not_loaded_openjtalk_dict_error: 1, + voicevox_result_load_model_error: 2, + voicevox_result_get_supported_devices_error: 3, + voicevox_result_gpu_support_error: 4, + voicevox_result_load_metas_error: 5, + voicevox_result_invalid_style_id_error: 6, + voicevox_result_invalid_model_id_error: 7, + voicevox_result_inference_error: 8, + voicevox_result_extract_full_context_label_error: 11, + voicevox_result_invalid_utf8_input_error: 12, + voicevox_result_parse_kana_error: 13, + voicevox_result_invalid_audio_query_error: 14, + voicevox_result_invalid_accent_phrase_error: 15, + voicevox_result_open_file_error: 16, + voicevox_result_vvm_model_read_error: 17, + voicevox_result_already_loaded_model_error: 18, + voicevox_result_unloaded_model_error: 19, + voicevox_result_load_user_dict_error: 20, + voicevox_result_save_user_dict_error: 21, + voicevox_result_unknown_user_dict_word_error: 22, + voicevox_result_use_user_dict_error: 23, + voicevox_result_invalid_user_dict_word_error: 24, + voicevox_result_invalid_uuid_error: 25 + }.to_a.flatten + VoicevoxResultCode = :voicevox_result_code + + enum :voicevox_user_dict_word_type, + { + voicevox_user_dict_word_type_proper_noun: 0, + voicevox_user_dict_word_type_common_noun: 1, + voicevox_user_dict_word_type_verb: 2, + voicevox_user_dict_word_type_adjective: 3, + voicevox_user_dict_word_type_suffix: 4 + }.to_a.flatten + VoicevoxUserDictWordType = :voicevox_user_dict_word_type + + # class VoicevoxOpenJtalkRc < FFI::Struct + # end + VoicevoxOpenJtalkRc = :pointer + + # class VoicevoxSynthesizer < FFI::Struct + # end + VoicevoxSynthesizer = :pointer + + # class VoicevoxUserDict < FFI::Struct + # end + VoicevoxUserDict = :pointer + + # class VoicevoxVoiceModel < FFI::Struct + # end + VoicevoxVoiceModel = :pointer + + VoicevoxVoiceModelId = :string class VoicevoxInitializeOptions < FFI::Struct layout( - *{ - acceleration_mode: :voicevox_acceleration_mode, - cpu_num_threads: :int16, - load_all_models: :bool, - openjtalk_dict_path: :pointer - }.to_a.flatten + { + acceleration_mode: VoicevoxAccelerationMode, + cpu_num_threads: :uint16, + load_all_models: :bool + } ) end + VoicevoxStyleId = :uint32 + class VoicevoxAudioQueryOptions < FFI::Struct - layout :kana, :bool + layout({ kana: :bool }) + end + + class VoicevoxAccentPhraseOptions < FFI::Struct + layout({ kana: :bool }) end class VoicevoxSynthesisOptions < FFI::Struct - layout :enable_interrogative_upspeak, :bool + layout({ enable_interrogative_upspeak: :bool }) end class VoicevoxTtsOptions < FFI::Struct - layout :kana, :bool, :enable_interrogative_upspeak, :bool + layout({ kana: :bool, enable_interrogative_upspeak: :bool }) + end + + class VoicevoxUserDictWord < FFI::Struct + layout( + { + surface: :string, + pronunciation: :string, + accent_type: Uintptr, + word_type: VoicevoxUserDictWordType, + priority: :uint32 + } + ) end - ffi_lib %w[voicevox_core.dll libvoicevox_core.dylib libvoicevox_core.so] attach_function :voicevox_make_default_initialize_options, - [], + {}, VoicevoxInitializeOptions.by_value - attach_function :voicevox_initialize, - [VoicevoxInitializeOptions.by_value], - :voicevox_result_code - - attach_function :voicevox_load_model, [:int64], :voicevox_result_code - - attach_function :voicevox_is_gpu_mode, [], :bool - - attach_function :voicevox_is_model_loaded, [:int64], :bool - - attach_function :voicevox_finalize, [], :void - - attach_function :voicevox_get_metas_json, [], :string - - attach_function :voicevox_get_supported_devices_json, [], :string - - attach_function :voicevox_get_version, [], :string - - attach_function :voicevox_predict_duration, - %i[int64 pointer int32 pointer], - :voicevox_result_code - - attach_function :voicevox_predict_intonation, - %i[ - int64 - pointer - pointer - pointer - pointer - pointer - pointer - int32 - pointer - ], - :voicevox_result_code - - attach_function :voicevox_decode, - %i[int64 int64 pointer pointer int32 pointer], - :voicevox_result_code + attach_function :voicevox_get_version, {}, :string attach_function :voicevox_make_default_audio_query_options, - [], + {}, VoicevoxAudioQueryOptions.by_value - attach_function :voicevox_audio_query, - [ - :string, - :int32, - VoicevoxAudioQueryOptions.by_value, - :pointer - ], - :voicevox_result_code - - # attach_function :voicevox_make_default_synthesis_options, - # [], - # VoicevoxSynthesisOptions.by_value - - attach_function :voicevox_synthesis, - [ - :string, - :int32, - VoicevoxSynthesisOptions.by_value, - :pointer, - :pointer - ], - :voicevox_result_code - - attach_function :voicevox_make_default_tts_options, - [], - VoicevoxTtsOptions.by_value - - attach_function :voicevox_tts, - [ - :string, - :int64, - VoicevoxTtsOptions.by_value, - :pointer, - :pointer - ], - :voicevox_result_code - - attach_function :voicevox_audio_query_json_free, [:pointer], :void - attach_function :voicevox_wav_free, [:pointer], :void + attach_function :voicevox_make_default_synthesis_options, + {}, + VoicevoxSynthesisOptions.by_value - attach_function :voicevox_error_result_to_message, - [:voicevox_result_code], - :string - rescue LoadError - module Old - extend FFI::Library - ffi_lib %w[core.dll libcore.dylib libcore.so] - - enum :voicevox_result_code, - [ - :voicevox_result_succeed, - 0, - :voicevox_result_not_loaded_openjtalk_dict, - 1, - :voicevox_result_failed_load_model, - 2, - :voicevox_result_failed_get_supported_devices, - 3, - :voicevox_result_cant_gpu_support, - 4, - :voicevox_result_failed_load_metas, - 5, - :voicevox_result_uninitialized_status, - 6, - :voicevox_result_invalid_speaker_id, - 7, - :voicevox_result_invalid_model_index, - 8, - :voicevox_result_inference_failed, - 9, - :voicevox_result_failed_extract_full_context_label, - 10, - :voicevox_result_invalid_utf8_input, - 11, - :voicevox_result_failed_parse_kana, - 12 - ] - - attach_function :initialize, %i[bool int bool], :bool - - attach_function :load_model, [:int64], :bool - - attach_function :is_model_loaded, [:int64], :bool - - attach_function :finalize, [], :void - - attach_function :metas, [], :string - - attach_function :last_error_message, [], :string - - attach_function :supported_devices, [], :string - - attach_function :yukarin_s_forward, - %i[int64 pointer pointer pointer], - :bool - - attach_function :yukarin_sa_forward, - %i[ - int64 - pointer - pointer - pointer - pointer - pointer - pointer - pointer - pointer - ], - :bool - - attach_function :decode_forward, - %i[int64 int64 pointer pointer pointer pointer], - :bool - - attach_function :voicevox_load_openjtalk_dict, - [:string], - :voicevox_result_code - - attach_function :voicevox_tts, - %i[string int64 pointer pointer], - :voicevox_result_code - - attach_function :voicevox_tts_from_kana, - %i[string int64 pointer pointer], - :voicevox_result_code - - attach_function :voicevox_wav_free, [:pointer], :void - - attach_function :voicevox_error_result_to_message, - [:voicevox_result_code], - :string - rescue LoadError - raise( - LoadError, - "Failed to load voicevox_core! " \ - "(voicevox_core.dll, libvoicevox_core.so, libvoicevox_core.dylib, " \ - "core.dll, libcore.so, libcore.dylib)\n" \ - "Make sure you have installed voicevox_core and its dependencies " \ - "(such as onnxruntime), and that the voicevox_core shared library " \ - "can be found in your library path." - ) - end - - module_function - - # @return [Voicevox::Core::VoicevoxInitializeOptions] - def voicevox_make_default_initialize_options - options = VoicevoxInitializeOptions.new - options[:acceleration_mode] = :voicevox_acceleration_mode_auto - options[:cpu_num_threads] = 0 - options[:load_all_models] = false - options[:openjtalk_dict_path] = nil - options - end + attach_function :voicevox_make_default_accent_phrases_options, + {}, + VoicevoxAccentPhraseOptions.by_value - # @param [Voicevox::Core::VoicevoxInitializeOptions] - # @return [Symbol] - def voicevox_initialize(options) - gpu = - case options[:acceleration_mode] - when :voicevox_acceleration_mode_auto - supported_devices = JSON.parse(Old.supported_devices) - supported_devices["cuda"] || supported_devices["dml"] - when :voicevox_acceleration_mode_gpu - true - when :voicevox_acceleration_mode_cpu - false - end - @is_gpu_mode = gpu - if Old.initialize( - gpu, - options[:cpu_num_threads], - options[:load_all_models] - ) - Old.voicevox_load_openjtalk_dict( - options[:openjtalk_dict_path].read_string - ) - else - raise(Old.last_error_message) - end - end + attach_function :voicevox_make_default_synthesis_options, + {}, + VoicevoxSynthesisOptions.by_value - # @param [Integer] speaker_id - # @return [Symbol] - def voicevox_load_model(speaker_id) - if Old.load_model(speaker_id) - :voicevox_result_succeed - else - raise(Old.last_error_message) - end - end + attach_function :voicevox_make_default_tts_options, {}, VoicevoxTtsOptions.by_value - # @param [Integer] speaker_id - # @return [Boolean] - def voicevox_is_model_loaded(speaker_id) - Old.is_model_loaded(speaker_id) - end + attach_function :voicevox_open_jtalk_rc_new, + { + open_jtalk_dic_dir: :string, + out_open_jtalk: :pointer + }.values, + VoicevoxResultCode - # @return [Boolean] - def voicevox_is_gpu_mode - @is_gpu_mode - end + attach_function :voicevox_open_jtalk_rc_use_user_dict, + { + open_jtalk_rc: VoicevoxOpenJtalkRc, + user_dict: VoicevoxUserDict + }.values, + VoicevoxResultCode - # @return [void] - def voicevox_finalize - Old.finalize - end + attach_function :voicevox_open_jtalk_rc_delete, + { open_jtalk_rc: VoicevoxOpenJtalkRc }.values, + :void - # @return [String] - def voicevox_get_metas_json - Old.metas - end + attach_function :voicevox_voice_model_new_from_path, + { path: :string, out_model: :pointer }.values, + VoicevoxResultCode - # @return [String] - def voicevox_get_supported_devices_json - Old.supported_devices - end + attach_function :voicevox_voice_model_id, + { model: VoicevoxVoiceModel }.values, + VoicevoxVoiceModelId - # @param [Ingeger] length - # @param [FFI::Pointer] phoneme_list - # @param [Integer] speaker_id - # @param [FFI::Pointer] output - # @return [Symbol] - def voicevox_predict_duration(length, phoneme_list, speaker_id, output) - speaker_id_ptr = FFI::MemoryPointer.new(:int64) - speaker_id_ptr.put(:int64, 0, speaker_id) - if Old.yukarin_s_forward(length, phoneme_list, speaker_id_ptr, output) - :voicevox_result_succeed - else - raise(Old.last_error_message) - end - end - - # @param [Ingeger] length - # @param [FFI::Pointer] phoneme_list - # @param [FFI::Pointer] vowel_phoneme_list - # @param [FFI::Pointer] consonant_phoneme_list - # @param [FFI::Pointer] start_accent_list - # @param [FFI::Pointer] end_accent_list - # @param [FFI::Pointer] start_accent_phrase_list - # @param [FFI::Pointer] end_accent_phrase_list - # @param [Integer] speaker_id - # @param [FFI::Pointer] output - # @return [Symbol] - def voicevox_predict_intonation( - length, - vowel_phoneme_list, - consonant_phoneme_list, - start_accent_list, - end_accent_list, - start_accent_phrase_list, - end_accent_phrase_list, - speaker_id, - output - ) - speaker_id_ptr = FFI::MemoryPointer.new(:int64) - speaker_id_ptr.put(:int64, 0, speaker_id) - if Old.yukarin_sa_forward( - length, - vowel_phoneme_list, - consonant_phoneme_list, - start_accent_list, - end_accent_list, - start_accent_phrase_list, - end_accent_phrase_list, - speaker_id_ptr, - output - ) - :voicevox_result_succeed - else - raise(Old.last_error_message) - end - end - - # @param [Ingeger] length - # @param [Integer] phoneme_size - # @param [FFI::Pointer] f0 - # @param [FFI::Pointer] phoneme - # @param [Integer] speaker_id - # @param [FFI::Pointer] output - # @return [Symbol] - def voicevox_decode(length, phoneme_size, f0, phoneme, speaker_id, output) - speaker_id_ptr = FFI::MemoryPointer.new(:int64) - speaker_id_ptr.put(:int32, 0, speaker_id) - if Old.decode_forward( - length, - phoneme_size, - f0, - phoneme, - speaker_id_ptr, - output - ) - :voicevox_result_succeed - else - raise(Old.last_error_message) - end - end + attach_function :voicevox_voice_model_get_metas_json, + { model: VoicevoxVoiceModel }.values, + :string - # @param [FFI::Pointer] text - # @param [Integer] speaker_id - # @param [Voicevox::Core::VoicevoxTtsOptions] options - # @param [FFI::Pointer] output_binary_size - # @param [FFI::Pointer] output_wav - # @return [Symbol] - def voicevox_tts(text, speaker_id, options, output_binary_size, output_wav) - if options[:kana] - Old.voicevox_tts_from_kana( - text, - speaker_id, - output_binary_size, - output_wav - ) - else - Old.voicevox_tts(text, speaker_id, output_binary_size, output_wav) - end - end + attach_function :voicevox_voice_model_delete, + { model: VoicevoxVoiceModel }.values, + :void + + attach_function :voicevox_synthesizer_new_with_initialize, + { + open_jtalk: VoicevoxOpenJtalkRc, + options: VoicevoxInitializeOptions.by_value, + out_synthesizer: :pointer + }.values, + VoicevoxResultCode + + attach_function :voicevox_synthesizer_delete, + { synthesizer: VoicevoxSynthesizer }.values, + :void + + attach_function :voicevox_synthesizer_load_voice_model, + { + synthesizer: VoicevoxSynthesizer, + model: VoicevoxVoiceModel + }.values, + VoicevoxResultCode + + attach_function :voicevox_synthesizer_unload_voice_model, + { + synthesizer: VoicevoxSynthesizer, + model_id: VoicevoxVoiceModelId + }.values, + VoicevoxResultCode + + attach_function :voicevox_synthesizer_is_gpu_mode, + { synthesizer: VoicevoxSynthesizer }.values, + :bool + + attach_function :voicevox_synthesizer_is_loaded_voice_model, + { + synthesizer: VoicevoxSynthesizer, + model_id: VoicevoxVoiceModelId + }.values, + :bool + + attach_function :voicevox_synthesizer_create_metas_json, + { synthesizer: VoicevoxSynthesizer }.values, + :string - # @param [FFI::Pointer] wav - def voicevox_wav_free(wav) - Old.voicevox_wav_free(wav) - end + attach_function :voicevox_create_supported_devices_json, + { output_supported_devices_json: :pointer }.values, + VoicevoxResultCode + + attach_function :voicevox_synthesizer_create_audio_query, + { + synthesizer: VoicevoxSynthesizer, + text: :string, + style_id: VoicevoxStyleId, + options: VoicevoxAudioQueryOptions.by_value, + out_audio_query_json: :pointer + }.values, + VoicevoxResultCode + + attach_function :voicevox_synthesizer_create_accent_phrases, + { + synthesizer: VoicevoxSynthesizer, + text: :string, + style_id: VoicevoxStyleId, + options: VoicevoxAudioQueryOptions.by_value, + out_accent_phrases_json: :pointer + }.values, + VoicevoxResultCode + + attach_function :voicevox_synthesizer_replace_mora_data, + { + synthesizer: VoicevoxSynthesizer, + accent_phrases_json: :string, + style_id: VoicevoxStyleId, + out_accent_phrases_json: :pointer + }.values, + VoicevoxResultCode + + attach_function :voicevox_synthesizer_replace_phoneme_length, + { + synthesizer: VoicevoxSynthesizer, + accent_phrases_json: :string, + style_id: VoicevoxStyleId, + out_accent_phrases_json: :pointer + }.values, + VoicevoxResultCode + + attach_function :voicevox_synthesizer_replace_mora_pitch, + { + synthesizer: VoicevoxSynthesizer, + accent_phrases_json: :string, + style_id: VoicevoxStyleId, + out_accent_phrases_json: :pointer + }.values, + VoicevoxResultCode + + attach_function :voicevox_synthesizer_synthesis, + { + synthesizer: VoicevoxSynthesizer, + audio_query_json: :string, + style_id: VoicevoxStyleId, + options: VoicevoxSynthesisOptions.by_value, + output_wav_length: :pointer, + output_wav: :pointer + }.values, + VoicevoxResultCode + + attach_function :voicevox_synthesizer_tts, + { + synthesizer: VoicevoxSynthesizer, + text: :string, + style_id: VoicevoxStyleId, + options: VoicevoxTtsOptions.by_value, + output_wav_length: :pointer, + output_wav: :pointer + }.values, + VoicevoxResultCode + + attach_function :voicevox_json_free, { json: :pointer }.values, :void + + attach_function :voicevox_wav_free, { json: :pointer }.values, :void - # @param [Symbol] type - # @param [String] text - def voicevox_error_result_to_message(type) - Old.voicevox_error_result_to_message(type) - end + attach_function :voicevox_error_result_to_message, + { result: VoicevoxResultCode }.values, + :string - def voicevox_make_default_tts_options - options = Voicevox::Core::VoicevoxTtsOptions.new - options[:kana] = false - options - end - warn( - "Failed to load new core (voicevox_core.dll, libvoicevox_core.so, libvoicevox_core.dylib), " \ - "using old core (core.dll, libcore.so, libcore.dylib)." - ) + attach_function :voicevox_user_dict_word_make, + { surface: :string, pronunciation: :string }.values, + VoicevoxUserDictWord.by_value + + attach_function :voicevox_user_dict_new, {}, VoicevoxUserDict + + attach_function :voicevox_user_dict_load, + { user_dict: VoicevoxUserDict, path: :string }.values, + VoicevoxResultCode + + attach_function :voicevox_user_dict_add_word, + { + user_dict: VoicevoxUserDict, + word: VoicevoxUserDictWord.by_ref, + output_word_uuid: :pointer + }.values, + VoicevoxResultCode + + attach_function :voicevox_user_dict_update_word, + { + user_dict: VoicevoxUserDict, + word_uuid: :pointer, + word: VoicevoxUserDictWord.by_ref + }.values, + VoicevoxResultCode + + attach_function :voicevox_user_dict_remove_word, + { user_dict: VoicevoxUserDict, word_uuid: :pointer }.values, + VoicevoxResultCode + + attach_function :voicevox_user_dict_to_json, + { + user_dict: VoicevoxUserDict, + output_json: :pointer + }.values, + VoicevoxResultCode + + attach_function :voicevox_user_dict_import, + { + user_dict: VoicevoxUserDict, + other_dict: VoicevoxUserDict + }.values, + VoicevoxResultCode + + attach_function :voicevox_user_dict_save, + { user_dict: VoicevoxUserDict, path: :string }.values, + VoicevoxResultCode + + attach_function :voicevox_user_dict_delete, + { user_dict: VoicevoxUserDict }.values, + :void end end + +# rubocop:enable Naming/ConstantName diff --git a/lib/voicevox/error.rb b/lib/voicevox/error.rb index 20fc131..f40abd8 100644 --- a/lib/voicevox/error.rb +++ b/lib/voicevox/error.rb @@ -2,7 +2,7 @@ require "objspace" -class Voicevox +module Voicevox # # Voicevox関連のエラー。 # @@ -36,44 +36,74 @@ def from_code(code) end end - class NotLoadedOpenjtalkDict < Voicevox::CoreError - @code = :voicevox_result_not_loaded_openjtalk_dict + class NotLoadedOpenjtalkDict < CoreError + @code = :voicevox_result_not_loaded_openjtalk_dict_error end - class FailedLoadModel < Voicevox::CoreError - @code = :voicevox_result_failed_load_model + class LoadModel < CoreError + @code = :voicevox_result_load_model_error end - class FailedGetSupportedDevices < Voicevox::CoreError - @code = :voicevox_result_failed_get_supported_devices + class GetSupportedDevices < CoreError + @code = :voicevox_result_get_supported_devices_error end - class CantGpuSupport < Voicevox::CoreError - @code = :voicevox_result_cant_gpu_support + class GpuSupport < CoreError + @code = :voicevox_result_gpu_support_error end - class FailedLoadMetas < Voicevox::CoreError - @code = :voicevox_result_failed_load_metas + class LoadMetas < CoreError + @code = :voicevox_result_load_metas_error end - class UninitializedStatus < Voicevox::CoreError - @code = :voicevox_result_uninitialized_status + class InvalidStyleId < CoreError + @code = :voicevox_result_invalid_style_id_error end - class InvalidSpeakerId < Voicevox::CoreError - @code = :voicevox_result_invalid_speaker_id + class InvalidModelId < CoreError + @code = :voicevox_result_invalid_model_id_error end - class InvalidModelIndex < Voicevox::CoreError - @code = :voicevox_result_invalid_model_index + class Inference < CoreError + @code = :voicevox_result_inference_error end - class InferenceFailed < Voicevox::CoreError - @code = :voicevox_result_inference_failed + class ExtractFullContextLabel < CoreError + @code = :voicevox_result_extract_full_context_label_error end - class FailedExtractFullContextLabel < Voicevox::CoreError - @code = :voicevox_result_failed_extract_full_context_label + class InvalidUtf8Input < CoreError + @code = :voicevox_result_invalid_utf8_input_error end - class InvalidUtf8Input < Voicevox::CoreError - @code = :voicevox_result_invalid_utf8_input + class ParseKana < CoreError + @code = :voicevox_result_parse_kana_error end - class FailedParseKana < Voicevox::CoreError - @code = :voicevox_result_failed_parse_kana + class InvalidAudioQuery < CoreError + @code = :voicevox_result_invalid_audio_query_error end - class InvalidAudioQuery < Voicevox::CoreError - @code = :voicevox_result_invalid_audio_query + class InvalidAccentPhrase < CoreError + @code = :voicevox_result_invalid_accent_phrase_error + end + class OpenFile < CoreError + @code = :voicevox_result_open_file_error + end + class VvmModelRead < CoreError + @code = :voicevox_result_vvm_model_read_error + end + class AlreadyLoadedModel < CoreError + @code = :voicevox_result_already_loaded_model_error + end + class UnloadedModel < CoreError + @code = :voicevox_result_unloaded_model_error + end + class LoadUserDict < CoreError + @code = :voicevox_result_load_user_dict_error + end + class SaveUserDict < CoreError + @code = :voicevox_result_save_user_dict_error + end + class UnknownUserDictWord < CoreError + @code = :voicevox_result_unknown_user_dict_word_error + end + class UseUserDict < CoreError + @code = :voicevox_result_use_user_dict_error + end + class InvalidUserDictWord < CoreError + @code = :voicevox_result_invalid_user_dict_word_error + end + class InvalidUuid < CoreError + @code = :voicevox_result_invalid_uuid_error end end end diff --git a/lib/voicevox/version.rb b/lib/voicevox/version.rb index 104f286..9286c54 100644 --- a/lib/voicevox/version.rb +++ b/lib/voicevox/version.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -class Voicevox +module Voicevox # @return [String] voicevox.rbのバージョン。 - VERSION = "0.1.0" + VERSION = "0.2.0" end diff --git a/lib/voicevox/wrapper/audio_query.rb b/lib/voicevox/wrapper/audio_query.rb index 01f2ff7..393a051 100644 --- a/lib/voicevox/wrapper/audio_query.rb +++ b/lib/voicevox/wrapper/audio_query.rb @@ -1,69 +1,7 @@ # frozen_string_literal: true require "json" -class Voicevox - # - # テキストからAudioQueryを生成します。 - # - # @param [String] text 生成するAudioQueryのテキスト。 - # @param [Voicevox::CharacterInfo, Voicevox::StyleInfo, Integer] speaker 話者、または話者のID。 - # @param [Boolean] kana textをAquesTalkライクな記法として解釈するかどうか。デフォルトはfalse。 - # - # @return [Voicevox::AudioQuery] 生成されたAudioQuery。 - # - # @see Voicevox#synthesis - # - def audio_query(text, speaker, kana: false) - options = Voicevox::Core.voicevox_make_default_audio_query_options - options[:kana] = kana - speaker_id = speaker.is_a?(Integer) ? speaker : speaker.id - load_model speaker_id - return_ptr = FFI::MemoryPointer.new(:pointer) - Voicevox.process_result Voicevox::Core.voicevox_audio_query( - text, - speaker_id, - options, - return_ptr - ) - return_str_ptr = return_ptr.read_pointer - json = return_str_ptr.read_string - Voicevox::Core.voicevox_audio_query_json_free return_str_ptr - - AudioQuery.new JSON.parse(json, symbolize_names: true) - end - - # - # AudioQueryから音声を生成します。 - # - # @param [AudioQuery] query AudioQuery。 - # @param [Voicevox::CharacterInfo, Voicevox::StyleInfo, Integer] speaker 話者、または話者のID。 - # @param [Boolran] enable_interrogative_upspeak 疑問文の調整を有効にするかどうか。デフォルトはtrue。 - # - # @return [String] 生成された音声のwavデータ。 - # - def synthesis(query, speaker, enable_interrogative_upspeak: true) - size_ptr = FFI::MemoryPointer.new(:int) - return_ptr = FFI::MemoryPointer.new(:pointer) - id = speaker.is_a?(Integer) ? speaker : speaker.id - load_model id - options = Voicevox::Core::VoicevoxSynthesisOptions.new - options[:enable_interrogative_upspeak] = enable_interrogative_upspeak - Voicevox.process_result( - Voicevox::Core.voicevox_synthesis( - query.to_json, - id, - options, - size_ptr, - return_ptr - ) - ) - data_ptr = return_ptr.read_pointer - size_ptr.free - data = data_ptr.read_string(size_ptr.read_int) - Voicevox::Core.voicevox_wav_free(data_ptr) - data - end - +module Voicevox # # 音声合成用のクエリ。 # @@ -89,6 +27,7 @@ class AudioQuery # @return [String] AquesTalkライクな読み仮名。 attr_reader :kana + # @private def initialize(query) @accent_phrases = query[:accent_phrases].map { |ap| AccentPhrase.new ap } @speed_scale = query[:speed_scale] @@ -102,6 +41,15 @@ def initialize(query) @kana = query[:kana] end + # + # JSONからAudioQueryを生成します。 + # @param [String] json AudioQueryのJSON。 + # @return [AccentPhrase] + # + def self.from_json(json) + new JSON.parse(json, symbolize_names: true) + end + # # AudioQueryをHashにします。 # @@ -122,6 +70,8 @@ def to_hash } end + alias to_h to_hash + # # AudioQueryをjsonにします。 # @@ -130,6 +80,19 @@ def to_hash def to_json(...) to_hash.to_json(...) end + + # @private + def eql?(other) + to_hash.eql?(other.to_hash) + end + + # @private + def hash + to_hash.hash + end + + # @private + alias == eql? end # @@ -146,6 +109,7 @@ class AccentPhrase attr_reader :is_interrogative alias interrogative? is_interrogative + # @private def initialize(query) @moras = query[:moras].map { |ap| Mora.new ap } @accent = query[:accent] @@ -153,6 +117,15 @@ def initialize(query) @is_interrogative = query[:is_interrogative] end + # + # JSONからAccentPhraseを生成します。 + # @param [String] json AccentPhraseの情報を持つJSON。 + # @return [AccentPhrase] + # + def self.from_json(json) + new JSON.parse(json, symbolize_names: true) + end + # # AccentPhraseをHashにします。 # @@ -167,6 +140,30 @@ def to_hash } end + alias to_h to_hash + + # + # AccentPhraseをJSONにします。 + # + # @return [String] + # + def to_json(...) + to_hash.to_json(...) + end + + # @private + def eql?(other) + other.is_a?(self.class) && other.to_hash == to_hash + end + + # @private + def hash + to_hash.hash + end + + # @private + alias == eql? + # # モーラ(子音+母音)ごとの情報。 # @@ -208,6 +205,34 @@ def to_hash pitch: @pitch } end + + alias to_h to_hash + + # + # MoraをJSONにします。 + # @return [String] + # + def to_json(...) + to_hash.to_json(...) + end + + # @private + def eql?(other) + @text == other.text && + @consonant == other.consonant && + @consonant_length == other.consonant_length && + @vowel == other.vowel && + @vowel_length == other.vowel_length && + @pitch == other.pitch + end + + # @private + def hash + to_hash.hash + end + + # @private + alias == eql? end end end diff --git a/lib/voicevox/wrapper/info.rb b/lib/voicevox/wrapper/info.rb index 06a4e6a..fd5fc1d 100644 --- a/lib/voicevox/wrapper/info.rb +++ b/lib/voicevox/wrapper/info.rb @@ -2,112 +2,29 @@ require "json" -class Voicevox - # サポートされているデバイスを表すStruct。 - SupportedDevices = Struct.new(:cpu, :cuda, :dml, keyword_init: true) +module Voicevox + module_function - # キャラクターの情報を表すStruct。 - CharacterInfo = - Struct.new(:name, :styles, :speaker_uuid, :version, keyword_init: true) do - # - # キャラクターの最初のスタイルのIDを返します。 - # @note ほとんどの場合はノーマルになります。 - # - # @return [Integer] スタイルのID。 - # - def id - styles[0].id - end - - # - # キャラクターのスタイルが全てロードされているかを返します。 - # - # @return [Boolean] 全てロードされている場合はtrue、そうでない場合はfalse。 - # - def loaded? - styles.map(&:loaded?).all? - end - - # - # キャラクターのスタイルを全てロードします。 - # - # @return [void] - # - def load - Voicevox.initialize_required - styles.map(&:load) - end - end - StyleInfo = - Struct.new(:name, :id, keyword_init: true) do - # - # スタイルがロードされているかを返します。 - # - # @return [Boolean] ロードされている場合はtrue、そうでない場合はfalse。 - # - def loaded? - Voicevox::Core.is_model_loaded(id) - end - - # - # スタイルをロードします。 - # - # @return [void] - # - def load - Voicevox.initialize_required - Voicevox.process_result Voicevox::Core.voicevox_load_model(id) - end - end - - class << self - # - # サポートしているデバイスを取得します。 - # - # @return [Voicevox::SupportedDevices] サポートしているデバイス。 - # - def supported_devices - SupportedDevices.new( - **JSON.parse(Voicevox::Core.voicevox_get_supported_devices_json) - ) - end - - # - # キャラクターの一覧を取得します。 - # - # @return [Array] キャラクターの一覧。 - # - def characters - JSON - .parse(Voicevox::Core.voicevox_get_metas_json) - .map do |meta| - CharacterInfo.new( - **{ - **meta, - "styles" => meta["styles"].map { |style| StyleInfo.new(**style) } - } - ) - end - end - - # - # GPUをサポートしているかを返します。 - # - # @note CUDA、またはDirectMLが使える場合にtrueを返します。 - # - # @return [Boolean] GPUをサポートしているかどうか。 - # - def gpu_supported? - Voicevox.supported_devices.cuda || Voicevox.supported_devices.dml - end + def core_version + @core_version ||= Voicevox::Core.voicevox_get_version + end - # - # コアのバージョンを取得します。 - # - # @return [String] コアのバージョン。 - # - def core_version - Voicevox::Core.voicevox_get_version + SupportedDevices = Struct.new(:cpu, :cuda, :dml) + + def supported_devices + if @supported_devices.nil? + pointer = FFI::MemoryPointer.new(:pointer) + Voicevox::Core.voicevox_create_supported_devices_json(pointer) + supported_devices = + JSON.parse(pointer.read_pointer.read_string, symbolize_names: true) + + @supported_devices = + SupportedDevices.new( + supported_devices[:cpu], + supported_devices[:cuda], + supported_devices[:dml] + ) end + @supported_devices end end diff --git a/lib/voicevox/wrapper/manager.rb b/lib/voicevox/wrapper/manager.rb deleted file mode 100644 index b28dcbe..0000000 --- a/lib/voicevox/wrapper/manager.rb +++ /dev/null @@ -1,137 +0,0 @@ -# frozen_string_literal: true - -require "etc" -require "objspace" - -class Voicevox - @initialized = false - # @return [:cpu, :gpu] ハードウェアアクセラレーションモード。 - attr_reader :acceleration_mode - # @return [Integer] スレッド数。 - attr_reader :cpu_num_threads - # @return [Boolean] 起動時に全てのモデルを読み込むかどうか。 - attr_reader :load_all_models - - # - # GPUモードで動作しているかどうか。 - # - # @return [Boolean] GPUモードで動作している場合はtrue、そうでない場合はfalse。 - # - def gpu? - @acceleration_mode == :gpu - end - - # - # CPUモードで動作しているかどうか。 - # - # @return [Boolean] CPUモードで動作している場合はtrue、そうでない場合はfalse。 - # - def cpu? - @acceleration_mode == :cpu - end - - # - # Voicevoxのコアを初期化します。 - # - # @param [String] openjtalk_dict_path OpenJTalkの辞書へのパス。 - # @param [:cpu, :gpu, :auto] acceleration_mode ハードウェアアクセラレーションモード。:autoを指定するとコア側で自動的に決定されます。 - # @param [Integer] cpu_num_threads スレッド数。省略する、または0を渡すとコア側で自動的に決定されます。 - # @param [Boolean] load_all_models 全てのモデルを読み込むかどうか。省略するとfalseになります。 - # - def initialize( - openjtalk_dict_path, - acceleration_mode: :auto, - cpu_num_threads: nil, - load_all_models: false - ) - acceleration_mode_enum = - { - auto: :voicevox_acceleration_mode_auto, - gpu: :voicevox_acceleration_mode_gpu, - cpu: :voicevox_acceleration_mode_cpu - }.fetch(acceleration_mode) do - raise ArgumentError, "無効なacceleration_mode: #{acceleration_mode}" - end - @cpu_num_threads = cpu_num_threads || 0 - @load_all_models = load_all_models - @openjtalk_dict_path = openjtalk_dict_path - options = Voicevox::Core.voicevox_make_default_initialize_options - options[:acceleration_mode] = acceleration_mode_enum - options[:cpu_num_threads] = @cpu_num_threads - options[:load_all_models] = @load_all_models - options[:openjtalk_dict_path] = FFI::MemoryPointer.from_string( - openjtalk_dict_path - ) - - Voicevox.process_result Voicevox::Core.voicevox_initialize(options) - @acceleration_mode = Voicevox::Core.voicevox_is_gpu_mode ? :gpu : :cpu - at_exit { Voicevox::Core.voicevox_finalize } unless self.class.initialized - self.class.initialized = true - end - - # - # Voicevoxのコアをファイナライズします。 - # - def finalize - Voicevox::Core.voicevox_finalize - self.class.initialized = false - end - - # - # 話者のモデルを読み込みます。 - # - # @param [Voicevox::CharacterInfo, Voicevox::StyleInfo, Integer] speaker 話者、または話者のID。 - # - def load_model(speaker) - id = speaker.is_a?(Integer) ? speaker : speaker.id - - Voicevox.process_result Voicevox::Core.voicevox_load_model(id) - end - - # - # モデルが読み込まれているかどうかを返します。 - # - # @param [Voicevox::CharacterInfo, Voicevox::StyleInfo, Integer] speaker 話者、または話者のID。 - # - # @return [Boolean] 読み込まれているかどうか。 - # - def model_loaded?(speaker) - id = speaker.is_a?(Integer) ? speaker : speaker.id - - Voicevox::Core.voicevox_is_model_loaded(id) - end - - # - # voicevox_ttsを使って音声を生成します。 - # - # @param [String] text 生成する音声のテキスト。 - # @param [Voicevox::CharacterInfo, Voicevox::StyleInfo, Integer] speaker 話者、または話者のID。 - # @param [Boolean] kana textをAquesTalkライクな記法として解釈するかどうか。デフォルトはfalse。 - # @param [Boolran] enable_interrogative_upspeak 疑問文の調整を有効にするかどうか。デフォルトはtrue。 - # - # @return [String] 生成された音声のwavデータ。 - # - def tts(text, speaker, kana: false, enable_interrogative_upspeak: true) - size_ptr = FFI::MemoryPointer.new(:int) - return_ptr = FFI::MemoryPointer.new(:pointer) - id = speaker.is_a?(Integer) ? speaker : speaker.id - load_model id - options = Voicevox::Core.voicevox_make_default_tts_options - options[:kana] = kana - options[:enable_interrogative_upspeak] = enable_interrogative_upspeak - Voicevox.process_result( - Voicevox::Core.voicevox_tts(text, id, options, size_ptr, return_ptr) - ) - data_ptr = return_ptr.read_pointer - data = data_ptr.read_string(size_ptr.read_int) - size_ptr.free - Voicevox::Core.voicevox_wav_free(data_ptr) - data - end - - class << self - attr_accessor :initialized - - alias initialized? initialized - end -end diff --git a/lib/voicevox/wrapper/open_jtalk.rb b/lib/voicevox/wrapper/open_jtalk.rb new file mode 100644 index 0000000..ccb1fcb --- /dev/null +++ b/lib/voicevox/wrapper/open_jtalk.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Voicevox + class OpenJtalk + # @private + attr_reader :pointer + def initialize(open_jtalk_dic_dir) + pointer = FFI::MemoryPointer.new(Core::Uintptr) + Voicevox.process_result Core.voicevox_open_jtalk_rc_new( + open_jtalk_dic_dir, + pointer + ) + + @pointer = + FFI::AutoPointer.new( + pointer.read_pointer, + Core.method(:voicevox_open_jtalk_rc_delete) + ) + end + + def user_dict=(user_dict) + Voicevox.process_result Core.voicevox_open_jtalk_rc_use_user_dict( + @pointer, + user_dict.pointer + ) + end + end +end diff --git a/lib/voicevox/wrapper/synthesizer.rb b/lib/voicevox/wrapper/synthesizer.rb new file mode 100644 index 0000000..4abe254 --- /dev/null +++ b/lib/voicevox/wrapper/synthesizer.rb @@ -0,0 +1,319 @@ +# frozen_string_literal: true + +module Voicevox + # + # 音声シンセサイザ。 + # + class Synthesizer + # + # 音声シンセサイザを構築する。 + # + # @param [OpenJtalk] open_jtalk {OpenJtalk}のインスタンス。 + # @param [:auto, :cpu, :gpu] acceleration_mode ハードウェアアクセラレーションモード。 + # @param [Integer] cpu_num_threads CPU利用数。 + # nilを指定すると環境に合わせたCPUが利用される。 + # @param [Boolean] load_all_models 全てのモデルを読み込むかどうか。 + # + def initialize( + open_jtalk, + acceleration_mode: :cpu, + cpu_num_threads: nil, + load_all_models: false + ) + options = Core.voicevox_make_default_initialize_options + options[:acceleration_mode] = case acceleration_mode + when :auto + :voicevox_acceleration_mode_auto + when :cpu + :voicevox_acceleration_mode_cpu + when :gpu + :voicevox_acceleration_mode_gpu + else + raise ArgumentError, "invalid acceleration_mode: #{acceleration_mode}" + end + options[:cpu_num_threads] = cpu_num_threads || 0 + options[:load_all_models] = load_all_models + + pointer = FFI::MemoryPointer.new(Core::Uintptr) + + Voicevox.process_result Core.voicevox_synthesizer_new_with_initialize( + open_jtalk.pointer, + options, + pointer + ) + + @pointer = + FFI::AutoPointer.new( + pointer.read_pointer, + Core.method(:voicevox_synthesizer_delete) + ) + end + + # + # 音声モデルを読み込む。 + # + # @param [VoiceModel] model 音声モデル。 + # + def load_voice_model(model) + Voicevox.process_result Core.voicevox_synthesizer_load_voice_model( + @pointer, + model.pointer + ) + end + + # + # 音声モデルの読み込みを解除する。 + # + # @param [String] id 音声モデルID。 + # + def unload_voice_model(id) + Voicevox.process_result Core.voicevox_synthesizer_unload_voice_model( + @pointer, + id + ) + end + + # + # 指定したIDの音声モデルが読み込まれているか判定する。 + # + # @param [String] id 音声モデルID。 + # @return [Boolean] 音声モデルが読み込まれているかどうか。 + # + def loaded_voice_model?(id) + Core.voicevox_synthesizer_is_loaded(@pointer, id) + end + + # + # ハードウェアアクセラレーションがGPUモードか判定する。 + # + # @return [Boolean] GPUモードかどうか。 + # + def gpu_mode? + Core.voicevox_synthesizer_is_gpu_mode(@pointer) + end + + # + # 今読み込んでいる音声も出るのメタ情報を取得する。 + # + # @return [Array] メタ情報。 + # + def metas + Core + .voicevox_synthesizer_get_metas_json(@pointer) + .then do |json| + parsed = JSON.parse(json, symbolize_names: true) + parsed.map do |meta| + Meta.new( + meta[:name], + meta[:speaker_uuid], + meta[:styles].map { |style| Style.new(style[:name], style[:id]) }, + meta[:version] + ) + end + end + end + + # + # {AudioQuery}を生成する。 + # + # @param [String] text テキスト。 + # @param [String] style_id スタイルID。 + # @param [Boolean] kana AquesTalk風記法としてテキストを解釈するかどうか。 + # + # @return [AudioQuery] 生成した{AudioQuery}。 + # + def create_audio_query(text, style_id, kana: false) + pointer = FFI::MemoryPointer.new(Core::Uintptr) + + options = Core.voicevox_make_default_audio_query_options + + options[:kana] = kana + + Voicevox.process_result Core.voicevox_synthesizer_create_audio_query( + @pointer, + text, + style_id, + options, + pointer + ) + json = JSON.parse(pointer.read_pointer.read_string, symbolize_names: true) + + Core.voicevox_json_free(pointer.read_pointer) + + AudioQuery.new(json) + end + + # + # {AccentPhrase AccentPhrase(アクセント句)}の配列を生成する。 + # + # @param [String] text テキスト。 + # @param [String] style_id スタイルID。 + # @param [Boolean] kana AquesTalk風記法としてテキストを解釈するかどうか。 + # + # @return [AudioQuery] 生成した{AudioQuery}。 + # + def create_accent_phrases(text, style_id, kana: false) + pointer = FFI::MemoryPointer.new(Core::Uintptr) + + options = Core.voicevox_make_default_audio_query_options + + options[:kana] = kana + + Voicevox.process_result Core.voicevox_synthesizer_create_accent_phrases( + @pointer, + text, + style_id, + options, + pointer + ) + json = JSON.parse(pointer.read_pointer.read_string, symbolize_names: true) + + Core.voicevox_json_free(pointer.read_pointer) + + json.map { |accent_phrase| AccentPhrase.new(accent_phrase) } + end + + # + # AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。 + # + # @param [Array] accent_phrases AccentPhraseの配列。 + # @param [String] style_id スタイルID。 + # + # @return [Array] 生成したAccentPhraseの配列。 + # + def replace_mora_data(accent_phrases, style_id) + pointer = FFI::MemoryPointer.new(Core::Uintptr) + + Voicevox.process_result Core.voicevox_synthesizer_replace_mora_data( + @pointer, + accent_phrases.to_json, + style_id, + pointer + ) + json = JSON.parse(pointer.read_pointer.read_string, symbolize_names: true) + + Core.voicevox_json_free(pointer.read_pointer) + + json.map { |accent_phrase| AccentPhrase.new(accent_phrase) } + end + + # + # AccentPhraseの配列の音素長を、特定の声で生成しなおす。 + # + # @param [Array] accent_phrases AccentPhraseの配列。 + # @param [String] style_id スタイルID。 + # + # @return [Array] 生成したAccentPhraseの配列。 + # + def replace_phoneme_length(accent_phrases, style_id) + pointer = FFI::MemoryPointer.new(Core::Uintptr) + + Voicevox.process_result Core.voicevox_synthesizer_replace_phoneme_length( + @pointer, + accent_phrases.to_json, + style_id, + pointer + ) + json = JSON.parse(pointer.read_pointer.read_string, symbolize_names: true) + + Core.voicevox_json_free(pointer.read_pointer) + + json.map { |accent_phrase| AccentPhrase.new(accent_phrase) } + end + + # + # AccentPhraseの配列の音高を、特定の声で生成しなおす。 + # + # @param [Array] accent_phrases AccentPhraseの配列。 + # @param [String] style_id スタイルID。 + # + # @return [Array] 生成したAccentPhraseの配列。 + # + def replace_mora_pitch(accent_phrases, style_id) + pointer = FFI::MemoryPointer.new(Core::Uintptr) + + Voicevox.process_result Core.voicevox_synthesizer_replace_mora_pitch( + @pointer, + accent_phrases.to_json, + style_id, + pointer + ) + + json = JSON.parse(pointer.read_pointer.read_string, symbolize_names: true) + + Core.voicevox_json_free(pointer.read_pointer) + + json.map { |accent_phrase| AccentPhrase.new(accent_phrase) } + end + + # + # {AudioQuery}から音声合成を行う。 + # + # @param [AudioQuery] audio_query {AudioQuery}。 + # @param [String] style_id スタイルID。 + # @param [Boolean] interrogative_upspeak 疑問文の調整を有効にするかどうか。 + # + # @return [String] WAVデータ。 + # + def synthesis(audio_query, style_id, interrogative_upspeak: false) + length_pointer = FFI::MemoryPointer.new(Core::Uintptr) + data_pointer = FFI::MemoryPointer.new(Core::Uintptr) + + options = Core.voicevox_make_default_synthesis_options + + options[:enable_interrogative_upspeak] = interrogative_upspeak + + Voicevox.process_result Core.voicevox_synthesizer_synthesis( + @pointer, + audio_query.to_json, + style_id, + options, + length_pointer, + data_pointer + ) + + wav = + data_pointer.read_pointer.read_bytes(length_pointer.read(Core::Uintptr)) + + Core.voicevox_wav_free(data_pointer.read_pointer) + + wav + end + + # + # テキスト音声合成を行う。 + # + # @param [String] text テキスト。 + # @param [String] style_id スタイルID。 + # @param [Boolean] kana AquesTalk風記法としてテキストを解釈するかどうか。 + # @param [Boolean] interrogative_upspeak 疑問文の調整を有効にするかどうか。 + # + # @return [String] WAVデータ。 + # + def tts(text, style_id, kana: false, interrogative_upspeak: false) + length_pointer = FFI::MemoryPointer.new(Core::Uintptr) + data_pointer = FFI::MemoryPointer.new(Core::Uintptr) + + options = Core.voicevox_make_default_tts_options + + options[:kana] = kana + options[:enable_interrogative_upspeak] = interrogative_upspeak + + Voicevox.process_result Core.voicevox_synthesizer_tts( + @pointer, + text, + style_id, + options, + length_pointer, + data_pointer + ) + + wav = + data_pointer.read_pointer.read_bytes(length_pointer.read(Core::Uintptr)) + + Core.voicevox_wav_free(data_pointer.read_pointer) + + wav + end + end +end diff --git a/lib/voicevox/wrapper/user_dict.rb b/lib/voicevox/wrapper/user_dict.rb new file mode 100644 index 0000000..106f0e7 --- /dev/null +++ b/lib/voicevox/wrapper/user_dict.rb @@ -0,0 +1,187 @@ +# frozen_string_literal: true + +require "json" +require "delegate" + +module Voicevox + # + # ユーザー辞書。 + # + class UserDict < Delegator + # @private + attr_reader :pointer + + # @private + def __getobj__ + json_ptr = FFI::MemoryPointer.new(:pointer) + Voicevox.process_result Core.voicevox_user_dict_to_json( + @pointer, + json_ptr + ) + json = json_ptr.read_pointer.read_string + Core.voicevox_json_free(json_ptr.read_pointer) + json_ptr.free + ret = + JSON + .parse(json, symbolize_names: true) + .to_h { |uuid, word| [uuid.to_s, Word.from_hash(word)] } + ret.freeze + ret + end + + # + # ユーザー辞書を構築する。 + # + def initialize + @pointer = + FFI::AutoPointer.new( + Core.voicevox_user_dict_new, + Core.method(:voicevox_user_dict_delete) + ) + end + + # + # ユーザー辞書にファイルを読み込ませる。 + # + # @param [String] path ファイルのパス。 + # @return [void] + # + def load(path) + Voicevox.process_resultCore.voicevox_user_dict_load(@pointer, path) + end + + # + # ユーザー辞書をファイルに保存する。 + # + # @param [String] path ファイルのパス。 + # @return [void] + # + def save(path) + Voicevox.process_result Core.voicevox_user_dict_save(@pointer, path) + end + + # + # ユーザー辞書に単語を追加する。 + # + # @param [Word] word 追加する単語。 + # @return [String] 追加した単語のUUID。 + # + def add_word(word) + uuid_ptr = FFI::MemoryPointer.new(:uint8, 16) + Voicevox.process_result Core.voicevox_user_dict_add_word( + @pointer, + word.to_struct, + uuid_ptr + ) + uuid = + uuid_ptr + .read_array_of_type(:uint8, :read_uint8, 16) + .map { |x| x.to_s(16).rjust(2, "0") } + .join + uuid = + "#{uuid[0..7]}-#{uuid[8..11]}-#{uuid[12..15]}-#{uuid[16..19]}-#{uuid[20..]}" + uuid_ptr.free + uuid + end + + # + # ユーザー辞書の単語を更新する。 + # + # @param [String] uuid 更新する単語のUUID。 + # @param [Word] word 更新する単語。 + # @return [void] + # + def update_word(uuid, word) + uuid = uuid.gsub("-", "") + uuid_ptr = FFI::MemoryPointer.new(:uint8, 16) + uuid_ptr.write_array_of_type( + :uint8, + :put_uint8, + uuid.scan(/../).map(&:hex) + ) + Voicevox.process_result Core.voicevox_user_dict_update_word( + @pointer, + uuid_ptr, + word.to_struct + ) + uuid_ptr.free + end + + # + # ユーザー辞書から単語を削除する。 + # + # @param [String] uuid 削除する単語のUUID。 + # @return [void] + # + def remove_word(uuid) + uuid = uuid.gsub("-", "") + uuid_ptr = FFI::MemoryPointer.new(:uint8, 16) + uuid_ptr.write_array_of_type( + :uint8, + :put_uint8, + uuid.scan(/../).map(&:hex) + ) + Voicevox.process_result Core.voicevox_user_dict_remove_word( + @pointer, + uuid_ptr + ) + uuid_ptr.free + end + + # + # ユーザー辞書の単語。 + # + class Word + # @return [String] 表記。 + attr_accessor :surface + # @return [String] 読み。 + # 発音として有効なカタカナである必要がある。 + attr_accessor :pronunciation + # @return [Integer] アクセント型。 + # 0からモーラ数。 + attr_accessor :accent_type + # @return [:proper_noun, :common_noun, :verb, :adjective, :suffix] 単語の種類。 + attr_accessor :word_type + # @return [Integer] 優先度。 + # 0から10。 + attr_accessor :priority + + # + # ユーザー辞書の単語を構築する。 + # + def initialize( + surface, + pronunciation, + accent_type: 0, + word_type: :proper_noun, + priority: 5 + ) + @surface = surface + @pronunciation = pronunciation + @accent_type = accent_type + @word_type = word_type + @priority = priority + end + + # @private + def to_struct + word = Core.voicevox_user_dict_word_make(@surface, @pronunciation) + word[:accent_type] = @accent_type + word[:word_type] = :"voicevox_user_dict_word_type_#{@word_type}" + word[:priority] = @priority + word + end + + # @private + def self.from_hash(hash) + new( + hash[:surface], + hash[:pronunciation], + accent_type: hash[:accent_type], + word_type: hash[:word_type].downcase.to_sym, + priority: hash[:priority] + ) + end + end + end +end diff --git a/lib/voicevox/wrapper/utils.rb b/lib/voicevox/wrapper/utils.rb index 9a803f0..33446f0 100644 --- a/lib/voicevox/wrapper/utils.rb +++ b/lib/voicevox/wrapper/utils.rb @@ -2,44 +2,18 @@ require "etc" -class Voicevox - class << self - # - # Voicevoxが初期化されていなかったらエラーを出す。 - # - def initialize_required - raise Voicevox::Error, "Voicevoxが初期化されていません" unless Voicevox.initialized? - end +module Voicevox + module_function + # + # @private + # voicevox_result_codeに対応するエラーをraiseする。 + # + # @param [Symbol] result voicevox_result_code。 + # + def process_result(result) + return if result == :voicevox_result_ok + raise "Assert: result.is_a?(Symbol), got: #{result.class}" unless result.is_a?(Symbol) - # - # voicevox_result_codeに対応するエラーをraiseします。 - # - # @param [Symbol] result voicevox_result_code。 - # - def process_result(result) - return if result == :voicevox_result_succeed - raise "#{result}はSymbolではありません" unless result.is_a?(Symbol) - - raise Voicevox::CoreError.from_code(result) - end - - # - # 製品版Voicevoxのパスを返します。 - # - # @return [String] Voicevoxへの絶対パス。 - # @return [nil] Voicevoxが見付からなかった場合。zip版やLinux版ではnilを返します。 - # - def voicevox_path - paths = - if Gem.win_platform? - [File.join(ENV.fetch("LOCALAPPDATA", ""), "Programs", "VOICEVOX")] - else - [ - "/Applications/VOICEVOX", - "/Users/#{Etc.getlogin}/Library/Application Support/VOICEVOX" - ] - end - paths.find { |path| Dir.exist?(path) } - end + raise Voicevox::CoreError.from_code(result) end end diff --git a/lib/voicevox/wrapper/voice_model.rb b/lib/voicevox/wrapper/voice_model.rb new file mode 100644 index 0000000..b5e7877 --- /dev/null +++ b/lib/voicevox/wrapper/voice_model.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +module Voicevox + # 音声モデル。 + # + # VVMファイルと対応する。 + class VoiceModel + # @private + attr_reader :pointer + + # + # VVMファイルから{VoiceModel}を構築する。 + # + # @param [String] path VVMファイルのパス。 + # + def initialize(path) + pointer = FFI::MemoryPointer.new(Core::Uintptr) + + Voicevox.process_result Core.voicevox_voice_model_new_from_path( + path, + pointer + ) + + @pointer = + FFI::AutoPointer.new( + pointer.read_pointer, + Core.method(:voicevox_voice_model_delete) + ) + end + + # + # IDを取得する。 + # + # @return [String] ID。 + # + def id + @id ||= Core.voicevox_voice_model_id(@pointer) + end + + # + # メタ情報を取得する。 + # + # @return [Metas] メタ情報。 + # + def metas + @metas ||= + Core + .voicevox_voice_model_get_metas_json(@pointer) + .then do |json| + parsed = JSON.parse(json, symbolize_names: true) + parsed.map do |meta| + Meta.new( + meta[:name], + meta[:speaker_uuid], + meta[:styles].map do |style| + Style.new(style[:name], style[:id]) + end, + meta[:version] + ) + end + end + end + + def to_s + @path + end + + # + # 音声モデルのメタ情報。 + # + # @!attribute [r] name + # @return [String] 名前。 + # @!attribute [r] speaker_uuid + # @return [String] スピーカーのUUID。 + # @!attribute [r] styles + # @return [Array