diff --git a/CHANGELOG.md b/CHANGELOG.md index 1118085b1..c5b6eca79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +## [0.63.0] + +### Added +- Added support for Google's Gemini API via OpenAI compatibility mode (`GoogleOpenAISchema`). Use model aliases `gem15p` (Gemini 1.5 Pro), `gem15f` (Gemini 1.5 Flash), and `gem15f8` (Gemini 1.5 Flash 8b). Set your ENV api key `GOOGLE_API_KEY` to use it. +- Thanks to @sixzero, added support for Google Flash via OpenRouter and Qwen 72b models. + ## [0.62.1] ### Fixed diff --git a/Project.toml b/Project.toml index c2d232ab7..d5966c0b4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PromptingTools" uuid = "670122d1-24a8-4d70-bfce-740807c42192" authors = ["J S @svilupp and contributors"] -version = "0.62.1" +version = "0.63.0" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/src/llm_interface.jl b/src/llm_interface.jl index 7e034b07d..245c20c8e 100644 --- a/src/llm_interface.jl +++ b/src/llm_interface.jl @@ -274,6 +274,25 @@ Requires one environment variable to be set: """ struct XAIOpenAISchema <: AbstractOpenAISchema end +""" + GoogleOpenAISchema + +Schema to call the Google's Gemini API using OpenAI compatibility mode. [API Reference](https://ai.google.dev/gemini-api/docs/openai#rest) + +Links: +- [Get your API key](https://aistudio.google.com/apikey) +- [API Reference](https://ai.google.dev/gemini-api/docs/openai#rest) +- [Available models](https://ai.google.dev/models/gemini) + +Requires one environment variable to be set: +- `GOOGLE_API_KEY`: Your API key + +The base URL for the API is "https://generativelanguage.googleapis.com/v1beta" + +Warning: Token counting and cost counting have not yet been implemented by Google, so you'll not have any such metrics. If you need it, use the native GoogleSchema with the GoogleGenAI.jl library. +""" +struct GoogleOpenAISchema <: AbstractOpenAISchema end + abstract type AbstractOllamaSchema <: AbstractPromptSchema end """ @@ -517,4 +536,4 @@ end abstract type AbstractExtractedData end Base.show(io::IO, x::AbstractExtractedData) = dump(io, x; maxdepth = 1) "Check if the object is an instance of `AbstractExtractedData`" -isextracted(x) = x isa AbstractExtractedData \ No newline at end of file +isextracted(x) = x isa AbstractExtractedData diff --git a/src/llm_openai_schema_defs.jl b/src/llm_openai_schema_defs.jl index 5fbae52c3..e5cc14e7d 100644 --- a/src/llm_openai_schema_defs.jl +++ b/src/llm_openai_schema_defs.jl @@ -214,6 +214,34 @@ function OpenAI.create_chat(schema::XAIOpenAISchema, api_key = isempty(XAI_API_KEY) ? api_key : XAI_API_KEY OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...) end + +# Add GoogleProvider implementation +Base.@kwdef struct GoogleProvider <: AbstractCustomProvider + api_key::String = "" + base_url::String = "https://generativelanguage.googleapis.com/v1beta" + api_version::String = "" +end + +function OpenAI.auth_header(provider::GoogleProvider, api_key::AbstractString) + OpenAI.auth_header(OpenAI.OpenAIProvider(provider.api_key, provider.base_url, provider.api_version), api_key) +end + +function OpenAI.create_chat(schema::GoogleOpenAISchema, + api_key::AbstractString, + model::AbstractString, + conversation; + url::String = "https://generativelanguage.googleapis.com/v1beta", + kwargs...) + api_key = isempty(GOOGLE_API_KEY) ? api_key : GOOGLE_API_KEY + # Use GoogleProvider instead of CustomProvider + provider = GoogleProvider(; api_key, base_url = url) + OpenAI.openai_request("chat/completions", + provider; + method = "POST", + messages = conversation, + model = model, + kwargs...) +end function OpenAI.create_chat(schema::DatabricksOpenAISchema, api_key::AbstractString, model::AbstractString, @@ -384,6 +412,21 @@ function OpenAI.create_embeddings(schema::XAIOpenAISchema, base_url = url) OpenAI.create_embeddings(provider, docs, model; kwargs...) end +function OpenAI.create_embeddings(schema::GoogleOpenAISchema, + api_key::AbstractString, + docs, + model::AbstractString; + url::String = "https://generativelanguage.googleapis.com/v1beta", + kwargs...) + api_key = isempty(GOOGLE_API_KEY) ? api_key : GOOGLE_API_KEY + provider = GoogleProvider(; api_key, base_url = url) + OpenAI.openai_request("embeddings", + provider; + method = "POST", + input = docs, + model = model, + kwargs...) +end function OpenAI.create_embeddings(schema::AzureOpenAISchema, api_key::AbstractString, docs, @@ -441,4 +484,4 @@ function OpenAI.create_images(schema::TestEchoOpenAISchema, schema.model_id = get(kwargs, :model, "") schema.inputs = prompt return schema -end \ No newline at end of file +end diff --git a/src/user_preferences.jl b/src/user_preferences.jl index 367766eaf..8aa18f50e 100644 --- a/src/user_preferences.jl +++ b/src/user_preferences.jl @@ -474,7 +474,11 @@ aliases = merge( "oro1" => "openai/o1-preview", "oro1m" => "openai/o1-mini", "orcop" => "cohere/command-r-plus-08-2024", - "orco" => "cohere/command-r-08-2024" + "orco" => "cohere/command-r-08-2024", + ## Gemini 1.5 Models + "gem15p" => "gemini-1.5-pro-latest", + "gem15f8" => "gemini-1.5-flash-8b-latest", + "gem15f" => "gemini-1.5-flash-latest" ), ## Load aliases from preferences as well @load_preference("MODEL_ALIASES", default=Dict{String, String}())) @@ -509,12 +513,12 @@ registry = Dict{String, ModelSpec}( OpenAISchema(), 1e-5, 3e-5, - "GPT-4 Turbo is an updated version of GPT4 that is much faster and the cheaper to use. 0125 refers to the release date of January 25, 2024."), + "GPT-4 Turbo is an updated version of GPT4 that is much faster and the cheaper to use. This is the general name for whatever is the latest GPT4 Turbo preview release. In April-24, it points to version 2024-04-09."), "gpt-4-turbo" => ModelSpec("gpt-4-turbo", OpenAISchema(), 1e-5, 3e-5, - "GPT-4 Turbo is an updated version of GPT4 that is much faster and the cheaper to use. This is the general name for whatever is the latest GPT4 Turbo preview release. In April-24, it points to version 2024-04-09."), + "GPT-4 Turbo is an updated version of GPT4 that is much faster and cheaper to use. This is the general name for whatever is the latest GPT4 Turbo preview release."), "gpt-4-turbo-2024-04-09" => ModelSpec("gpt-4-turbo-2024-04-09", OpenAISchema(), 1e-5, @@ -1103,7 +1107,23 @@ registry = Dict{String, ModelSpec}( XAIOpenAISchema(), 5e-6, 15e-6, - "XAI's Grok 2 beta model. Max 128K context.") + "XAI's Grok 2 beta model. Max 128K context."), + ## Gemini 1.5 Models + "gemini-1.5-pro-latest" => ModelSpec("gemini-1.5-pro-latest", + GoogleOpenAISchema(), + 1e-6, + 5e-6, + "Gemini 1.5 Pro is Google's latest large language model with enhanced capabilities across reasoning, math, coding, and multilingual tasks. 128K context window."), + "gemini-1.5-flash-8b-latest" => ModelSpec("gemini-1.5-flash-8b-latest", + GoogleOpenAISchema(), + 3.75e-8, + 1.5e-7, + "Gemini 1.5 Flash 8B is a smaller, faster version of Gemini 1.5 optimized for quick responses while maintaining good performance. 128K context window."), + "gemini-1.5-flash-latest" => ModelSpec("gemini-1.5-flash-latest", + GoogleOpenAISchema(), + 7.5e-8, + 3.0e-7, + "Gemini 1.5 Flash is a high-performance model optimized for speed while maintaining strong capabilities across various tasks. 128K context window.") ) """ diff --git a/test/llm_openai_schema_def.jl b/test/llm_openai_schema_def.jl new file mode 100644 index 000000000..75d009094 --- /dev/null +++ b/test/llm_openai_schema_def.jl @@ -0,0 +1,123 @@ +using Test +using PromptingTools: GoogleOpenAISchema, AIMessage, aigenerate, aiembed + +@testset "GoogleOpenAISchema" begin + # Save original API key + original_api_key = PromptingTools.GOOGLE_API_KEY + + + # Test with empty GOOGLE_API_KEY + PromptingTools.GOOGLE_API_KEY = "" + PORT = rand(10000:20000) + echo_server = HTTP.serve!(PORT, verbose = -1) do req + auth_header = HTTP.header(req, "Authorization") + @test HTTP.header(req, "Authorization") == "Bearer test_key" + + content = JSON3.read(req.body) + + response = Dict( + :choices => [ + Dict(:message => Dict(:content => "Test response"), + :finish_reason => "stop") + ], + :usage => Dict(:total_tokens => 5, + :prompt_tokens => 5, + :completion_tokens => 0)) + return HTTP.Response(200, JSON3.write(response)) + end + + msg = aigenerate(GoogleOpenAISchema(), + "Test prompt"; + api_key = "test_key", + model = "gemini-1.5-pro-latest", + api_kwargs = (; url = "http://localhost:$(PORT)")) + + @test msg.content == "Test response" + @test msg.finish_reason == "stop" + close(echo_server) + + # Test with non-empty GOOGLE_API_KEY + PromptingTools.GOOGLE_API_KEY = "env_key" + PORT = rand(10000:20000) + echo_server = HTTP.serve!(PORT, verbose = -1) do req + auth_header = HTTP.header(req, "Authorization") + @test HTTP.header(req, "Authorization") == "Bearer env_key" + + content = JSON3.read(req.body) + + response = Dict( + :choices => [ + Dict(:message => Dict(:content => "Test response"), + :finish_reason => "stop") + ], + :usage => Dict(:total_tokens => 5, + :prompt_tokens => 5, + :completion_tokens => 0)) + return HTTP.Response(200, JSON3.write(response)) + end + + msg = aigenerate(GoogleOpenAISchema(), + "Test prompt"; + api_key = "test_key", # This should be ignored since GOOGLE_API_KEY is set + model = "gemini-1.5-pro-latest", + api_kwargs = (; url = "http://localhost:$(PORT)")) + + @test msg.content == "Test response" + @test msg.finish_reason == "stop" + close(echo_server) + + # Test embeddings with empty GOOGLE_API_KEY + PromptingTools.GOOGLE_API_KEY = "" + PORT = rand(10000:20000) + echo_server = HTTP.serve!(PORT, verbose = -1) do req + auth_header = HTTP.header(req, "Authorization") + @test HTTP.header(req, "Authorization") == "Bearer test_key" + + content = JSON3.read(req.body) + + response = Dict(:data => [Dict(:embedding => ones(128))], + :usage => Dict(:total_tokens => 5, + :prompt_tokens => 5, + :completion_tokens => 0)) + return HTTP.Response(200, JSON3.write(response)) + end + + msg = aiembed(GoogleOpenAISchema(), + "Test prompt"; + api_key = "test_key", + model = "gemini-1.5-pro-latest", + api_kwargs = (; url = "http://localhost:$(PORT)")) + + @test msg.content == ones(128) + @test msg.tokens == (5, 0) + close(echo_server) + + # Test embeddings with non-empty GOOGLE_API_KEY + PromptingTools.GOOGLE_API_KEY = "env_key" + PORT = rand(10000:20000) + echo_server = HTTP.serve!(PORT, verbose = -1) do req + auth_header = HTTP.header(req, "Authorization") + @test HTTP.header(req, "Authorization") == "Bearer env_key" + + content = JSON3.read(req.body) + + response = Dict(:data => [Dict(:embedding => ones(128))], + :usage => Dict(:total_tokens => 5, + :prompt_tokens => 5, + :completion_tokens => 0)) + return HTTP.Response(200, JSON3.write(response)) + end + + msg = aiembed(GoogleOpenAISchema(), + "Test prompt"; + api_key = "test_key", # This should be ignored since GOOGLE_API_KEY is set + model = "gemini-1.5-pro-latest", + api_kwargs = (; url = "http://localhost:$(PORT)")) + + @test msg.content == ones(128) + @test msg.tokens == (5, 0) + close(echo_server) + + # Restore original API key + PromptingTools.GOOGLE_API_KEY = original_api_key +end diff --git a/test/runtests.jl b/test/runtests.jl index dd42f6b68..930a83861 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -28,6 +28,7 @@ end include("llm_ollama_managed.jl") include("llm_ollama.jl") include("llm_google.jl") + include("llm_openai_schema_def.jl") include("llm_anthropic.jl") include("llm_sharegpt.jl") include("llm_tracer.jl") diff --git a/test_analysis.txt b/test_analysis.txt new file mode 100644 index 000000000..c13bf3058 --- /dev/null +++ b/test_analysis.txt @@ -0,0 +1,54 @@ +Test Coverage Analysis for GoogleOpenAISchema + +Current Issues: +- 6 uncovered lines in src/llm_openai_schema_defs.jl +- Patch coverage at 14.29% + +Implementation Analysis: +1. GoogleProvider struct (lines 219-223) + - Basic struct definition, likely covered + - auth_header method (lines 225-227) might be uncovered + +2. create_chat method (lines 229-244) + - Key lines that might be uncovered: + * Line 235: api_key override logic + * Line 237: GoogleProvider instantiation + * Lines 238-243: OpenAI.openai_request call + +3. create_embeddings method (lines 415-429) + - Similar pattern to create_chat + - Potential uncovered lines: + * Line 421: api_key override logic + * Line 422: GoogleProvider instantiation + * Lines 423-428: OpenAI.openai_request call + +Hypotheses for Coverage Issues: +1. Streaming callback paths not tested + - We're using openai_request directly, which might have different behavior + - Solution: Add tests for streaming scenarios + +2. Error handling paths not tested + - No tests for API errors or invalid responses + - Solution: Add tests with mock error responses + +3. Provider instantiation edge cases + - GoogleProvider creation with different URL combinations not tested + - Solution: Add tests with various URL configurations + +4. API key override logic not fully tested + - Need to test all combinations of empty/non-empty GOOGLE_API_KEY + - Solution: Expand current tests to cover more scenarios + +5. Request parameter handling not fully tested + - Different combinations of optional parameters not tested + - Solution: Add tests with various kwargs combinations + +Most Likely Issue: +Hypothesis #4 seems most likely - our tests don't fully exercise the API key override logic in both create_chat and create_embeddings methods. The current tests only check basic scenarios, but we need to test edge cases and different combinations of API keys. + +Action Plan: +1. Add tests for API key override edge cases +2. Add tests for different URL configurations +3. Add tests for error scenarios +4. Add tests for streaming callbacks +5. Add tests for various kwargs combinations