forked from run-llama/llama_index
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of https://github.com/Tonic-AI/llama-tonic
- Loading branch information
Showing
2 changed files
with
218 additions
and
0 deletions.
There are no files selected for viewing
90 changes: 90 additions & 0 deletions
90
llama-index-integrations/llms/llama-index-llms-githubllm/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,91 @@ | ||
# LlamaIndex Llms Integration: Githubllm | ||
|
||
# GithubLLM | ||
|
||
GithubLLM is a custom LLM (Language Model) interface that allows you to interact with AI models hosted on GitHub's inference endpoint, with automatic fallback to Azure when rate limits are reached. | ||
|
||
## Features | ||
|
||
- Seamless integration with GitHub-hosted AI models | ||
- Automatic fallback to Azure when GitHub rate limits are reached | ||
- Support for both completion and chat-based interactions | ||
- Streaming support for both completion and chat responses | ||
- Easy integration with LlamaIndex ecosystem | ||
|
||
## Installation | ||
|
||
```bash | ||
pip install llama-index-llms-githubllm | ||
``` | ||
|
||
## Usage | ||
|
||
```python | ||
from llama_index.llms.github import GithubLLM | ||
|
||
# Initialize the LLM | ||
llm = GithubLLM( | ||
model="gpt-4o", | ||
system_prompt="You are a helpful assistant.", | ||
use_azure_fallback=True, | ||
) | ||
|
||
# Completion | ||
response = llm.complete("What is the capital of France?") | ||
print(response.text) | ||
|
||
# Chat | ||
messages = [ | ||
ChatMessage(role="user", content="Tell me about the French Revolution."), | ||
ChatMessage( | ||
role="assistant", | ||
content="The French Revolution was a period of major social and political upheaval in France...", | ||
), | ||
ChatMessage(role="user", content="What were the main causes?"), | ||
] | ||
response = llm.chat(messages) | ||
print(response.message.content) | ||
|
||
# Streaming | ||
for chunk in llm.stream_chat( | ||
[ | ||
ChatMessage( | ||
role="user", content="Can you elaborate on the Reign of Terror?" | ||
) | ||
] | ||
): | ||
print(chunk.message.content, end="", flush=True) | ||
``` | ||
|
||
## Configuration | ||
|
||
- Set `GITHUB_TOKEN` environment variable for GitHub API access | ||
- Set `AZURE_API_KEY` environment variable for Azure fallback | ||
|
||
## Rate Limits | ||
|
||
GithubLLM respects the following rate limits: | ||
|
||
| Model Type | Requests/min | Requests/day | Tokens/request (in/out) | Concurrent Requests | | ||
| ---------- | ------------ | ------------ | ----------------------- | ------------------- | | ||
| Low | 15 | 150 | 8000/4000 | 5 | | ||
| High | 10 | 50 | 8000/4000 | 2 | | ||
| Embedding | 15 | 150 | 64000 | 5 | | ||
|
||
Note: Rate limits may vary based on your GitHub account type (Free, Copilot Individual, Copilot Business, Copilot Enterprise). | ||
|
||
## Going to Production | ||
|
||
For production use, replace the GitHub token with a paid Azure account token. No other code changes are required. | ||
|
||
## License | ||
|
||
This project is licensed under the MIT License. | ||
|
||
## Contributing | ||
|
||
Contributions are welcome! Please feel free to submit a Pull Request. | ||
|
||
## Disclaimer | ||
|
||
This library is for prototyping and experimentation. Ensure compliance with GitHub's and Azure's terms of service when using in production. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
# ./tests/test_github_llm.py | ||
|
||
import pytest | ||
from unittest.mock import patch, MagicMock | ||
from llama_index.core.base.llms.types import ( | ||
ChatMessage, | ||
ChatResponse, | ||
CompletionResponse, | ||
) | ||
from llama_index.llms.githubllm import GithubLLM | ||
|
||
|
||
@pytest.fixture() | ||
def github_llm(): | ||
return GithubLLM(model="gpt-4o", system_prompt="You are a helpful assistant.") | ||
|
||
|
||
@pytest.fixture() | ||
def mock_response(): | ||
mock = MagicMock() | ||
mock.json.return_value = { | ||
"choices": [{"message": {"content": "This is a test response."}}] | ||
} | ||
return mock | ||
|
||
|
||
@patch.dict( | ||
"os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"} | ||
) | ||
@patch("requests.post") | ||
def test_complete(mock_post, github_llm, mock_response): | ||
mock_post.return_value = mock_response | ||
|
||
response = github_llm.complete("What is the capital of France?") | ||
|
||
assert isinstance(response, CompletionResponse) | ||
assert response.text == "This is a test response." | ||
mock_post.assert_called_once() | ||
|
||
|
||
@patch.dict( | ||
"os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"} | ||
) | ||
@patch("requests.post") | ||
def test_chat(mock_post, github_llm, mock_response): | ||
mock_post.return_value = mock_response | ||
|
||
messages = [ChatMessage(role="user", content="Tell me about Python.")] | ||
response = github_llm.chat(messages) | ||
|
||
assert isinstance(response, ChatResponse) | ||
assert response.message.content == "This is a test response." | ||
assert response.message.role == "assistant" | ||
mock_post.assert_called_once() | ||
|
||
|
||
@patch.dict( | ||
"os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"} | ||
) | ||
@patch("requests.post") | ||
def test_stream_complete(mock_post, github_llm): | ||
mock_response = MagicMock() | ||
mock_response.iter_lines.return_value = [b"chunk1", b"chunk2"] | ||
mock_post.return_value = mock_response | ||
|
||
generator = github_llm.stream_complete("What is the capital of France?") | ||
responses = list(generator) | ||
|
||
assert len(responses) == 2 | ||
assert all(isinstance(r, CompletionResponse) for r in responses) | ||
assert [r.text for r in responses] == ["chunk1", "chunk2"] | ||
mock_post.assert_called_once() | ||
|
||
|
||
@patch.dict( | ||
"os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"} | ||
) | ||
@patch("requests.post") | ||
def test_stream_chat(mock_post, github_llm): | ||
mock_response = MagicMock() | ||
mock_response.iter_lines.return_value = [b"chunk1", b"chunk2"] | ||
mock_post.return_value = mock_response | ||
|
||
messages = [ChatMessage(role="user", content="Tell me about Python.")] | ||
generator = github_llm.stream_chat(messages) | ||
responses = list(generator) | ||
|
||
assert len(responses) == 2 | ||
assert all(isinstance(r, ChatResponse) for r in responses) | ||
assert [r.message.content for r in responses] == ["chunk1", "chunk2"] | ||
assert all(r.message.role == "assistant" for r in responses) | ||
mock_post.assert_called_once() | ||
|
||
|
||
def test_metadata(github_llm): | ||
metadata = github_llm.metadata | ||
|
||
assert metadata.model_name == "gpt-4o" | ||
assert metadata.num_output == 256 # Default value | ||
assert metadata.context_window == 4096 # Default value | ||
|
||
|
||
def test_unsupported_model(): | ||
with pytest.raises(ValueError): | ||
GithubLLM(model="unsupported-model") | ||
|
||
|
||
@patch.dict("os.environ", {"GITHUB_TOKEN": "fake_token"}) | ||
@patch("requests.post") | ||
def test_azure_fallback(mock_post, github_llm, mock_response): | ||
# Simulate GitHub API failure | ||
mock_post.side_effect = [Exception("GitHub API Error"), mock_response] | ||
|
||
response = github_llm.complete("What is the capital of France?") | ||
|
||
assert isinstance(response, CompletionResponse) | ||
assert response.text == "This is a test response." | ||
assert mock_post.call_count == 2 # Called twice: once for GitHub, once for Azure | ||
|
||
|
||
@patch.dict("os.environ", {}) | ||
def test_missing_env_variables(github_llm): | ||
with pytest.raises(ValueError): | ||
github_llm.complete("What is the capital of France?") | ||
|
||
|
||
def test_class_name(github_llm): | ||
assert github_llm.class_name() == "GithubLLM" |