Skip to content

Commit

Permalink
847 add ability to send images to the assistant when using ollama (#870)
Browse files Browse the repository at this point in the history
* remove warning

* pass image url instance var

* default to llama3.2

* example

* back to 3.1

* pass base64 encoded image

* spec

* use llava

* doc

* changelog entry

* fix linter

---------

Co-authored-by: Andrei Bondarev <[email protected]>
  • Loading branch information
sergiobayona and andreibondarev authored Nov 12, 2024
1 parent 2406166 commit e8a1fc6
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
- [OPTIM] [https://github.com/patterns-ai-core/langchainrb/pull/867] Refactor `GoogleGeminiMessage#to_hash` and `OpenAIMessage#to_hash` methods.
- [OPTIM] [https://github.com/patterns-ai-core/langchainrb/pull/849] Simplify Langchain::LLM::AwsBedrock class
- [BUGFIX] [https://github.com/patterns-ai-core/langchainrb/pull/869] AnthropicMessage now correctly handles tool calls with content.
- [OPTIM] [https://github.com/patterns-ai-core/langchainrb/pull/870] Assistant, when using Ollama (e.g.: llava model), now also accepts image_url in the message.

## [0.19.0] - 2024-10-23
- [BREAKING] [https://github.com/patterns-ai-core/langchainrb/pull/840] Rename `chat_completion_model_name` parameter to `chat_model` in Langchain::LLM parameters.
Expand Down
13 changes: 13 additions & 0 deletions examples/ollama_inquire_about_image.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
require_relative "../lib/langchain"
require "faraday"

llm = Langchain::LLM::Ollama.new(default_options: {chat_model: "llava"})

assistant = Langchain::Assistant.new(llm: llm)

response = assistant.add_message_and_run(
image_url: "https://gist.githubusercontent.com/andreibondarev/b6f444194d0ee7ab7302a4d83184e53e/raw/099e10af2d84638211e25866f71afa7308226365/sf-cable-car.jpg",
content: "Please describe this image"
)

puts response.inspect
4 changes: 1 addition & 3 deletions lib/langchain/assistant/llm/adapters/ollama.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ def build_chat_params(
# @param tool_call_id [String] The tool call ID
# @return [Messages::OllamaMessage] The Ollama message
def build_message(role:, content: nil, image_url: nil, tool_calls: [], tool_call_id: nil)
Langchain.logger.warn "WARNING: Image URL is not supported by Ollama currently" if image_url

Messages::OllamaMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
Messages::OllamaMessage.new(role: role, content: content, image_url: image_url, tool_calls: tool_calls, tool_call_id: tool_call_id)
end

# Extract the tool call information from the OpenAI tool call hash
Expand Down
6 changes: 5 additions & 1 deletion lib/langchain/assistant/messages/ollama_message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,18 @@ class OllamaMessage < Base
#
# @param role [String] The role of the message
# @param content [String] The content of the message
# @param image_url [String] The URL of the image to include in the message
# @param tool_calls [Array<Hash>] The tool calls made in the message
# @param tool_call_id [String] The ID of the tool call
def initialize(role:, content: nil, tool_calls: [], tool_call_id: nil)
def initialize(role:, content: nil, image_url: nil, tool_calls: [], tool_call_id: nil)
raise ArgumentError, "Role must be one of #{ROLES.join(", ")}" unless ROLES.include?(role)
raise ArgumentError, "Tool calls must be an array of hashes" unless tool_calls.is_a?(Array) && tool_calls.all? { |tool_call| tool_call.is_a?(Hash) }
raise ArgumentError, "image_url must be a valid url" if image_url && !URI::DEFAULT_PARSER.make_regexp.match?(image_url)

@role = role
# Some Tools return content as a JSON hence `.to_s`
@content = content.to_s
@image_url = image_url
@tool_calls = tool_calls
@tool_call_id = tool_call_id
end
Expand All @@ -38,6 +41,7 @@ def to_hash
{}.tap do |h|
h[:role] = role
h[:content] = content if content # Content is nil for tool calls
h[:images] = [image.base64] if image
h[:tool_calls] = tool_calls if tool_calls.any?
h[:tool_call_id] = tool_call_id if tool_call_id
end
Expand Down
1 change: 1 addition & 0 deletions lib/langchain/utils/image_wrapper.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# frozen_string_literal: true

require "open-uri"
require "base64"

module Langchain
module Utils
Expand Down
12 changes: 12 additions & 0 deletions spec/langchain/assistant/llm/adapters/ollama_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,16 @@
expect(subject.tool_role).to eq("tool")
end
end

describe "#build_message" do
it "returns an Ollama message" do
expect(
subject.build_message(
role: "user",
content: "Hello",
image_url: "https://example.com/image.jpg"
)
).to be_a(Langchain::Assistant::Messages::OllamaMessage)
end
end
end
21 changes: 20 additions & 1 deletion spec/langchain/assistant/messages/ollama_message_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
let(:valid_roles) { ["system", "assistant", "user", "tool"] }
let(:role) { "assistant" }
let(:content) { "This is a message" }
let(:image_url) { "https://example.com/image.jpg" }
let(:raw_response) { JSON.parse(File.read("spec/fixtures/llm/ollama/chat_with_tool_calls.json")) }
let(:response) { Langchain::LLM::OllamaResponse.new(raw_response) }
let(:tool_calls) { response.tool_calls }
Expand All @@ -14,7 +15,7 @@
describe "#initialize" do
context "with valid arguments" do
it "creates an instance of OllamaMessage" do
message = described_class.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
message = described_class.new(role: role, content: content, image_url: image_url, tool_calls: tool_calls, tool_call_id: tool_call_id)
expect(message).to be_an_instance_of(described_class)
end
end
Expand All @@ -34,6 +35,14 @@
expect { described_class.new(role: role, tool_calls: tool_calls) }.to raise_error(ArgumentError, "Tool calls must be an array of hashes")
end
end

context "with invalid image_url" do
let(:image_url) { "invalid_image_url" }

it "raises an ArgumentError" do
expect { described_class.new(role: role, image_url: image_url) }.to raise_error(ArgumentError, "image_url must be a valid url")
end
end
end

describe "#to_hash" do
Expand Down Expand Up @@ -72,6 +81,16 @@
expect(message.to_hash).to eq({role: "assistant", content: "", tool_calls: [tool_call]})
end
end

context "with an image" do
let(:message) { described_class.new(role: "user", content: "Describe this image", image_url: "https://example.com/image.jpg") }

it "returns a hash with the images key" do
allow(message).to receive(:image).and_return(double(base64: "base64_data", mime_type: "image/jpeg"))

expect(message.to_hash).to eq({role: "user", content: "Describe this image", images: ["base64_data"]})
end
end
end

describe "#llm?" do
Expand Down

0 comments on commit e8a1fc6

Please sign in to comment.