Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Josephrp committed Aug 6, 2024
2 parents 70456ec + 1522717 commit d99a6a8
Show file tree
Hide file tree
Showing 2 changed files with 218 additions and 0 deletions.
90 changes: 90 additions & 0 deletions llama-index-integrations/llms/llama-index-llms-githubllm/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,91 @@
# LlamaIndex Llms Integration: Githubllm

# GithubLLM

GithubLLM is a custom LLM (Language Model) interface that allows you to interact with AI models hosted on GitHub's inference endpoint, with automatic fallback to Azure when rate limits are reached.

## Features

- Seamless integration with GitHub-hosted AI models
- Automatic fallback to Azure when GitHub rate limits are reached
- Support for both completion and chat-based interactions
- Streaming support for both completion and chat responses
- Easy integration with LlamaIndex ecosystem

## Installation

```bash
pip install llama-index-llms-githubllm
```

## Usage

```python
from llama_index.llms.github import GithubLLM

# Initialize the LLM
llm = GithubLLM(
model="gpt-4o",
system_prompt="You are a helpful assistant.",
use_azure_fallback=True,
)

# Completion
response = llm.complete("What is the capital of France?")
print(response.text)

# Chat
messages = [
ChatMessage(role="user", content="Tell me about the French Revolution."),
ChatMessage(
role="assistant",
content="The French Revolution was a period of major social and political upheaval in France...",
),
ChatMessage(role="user", content="What were the main causes?"),
]
response = llm.chat(messages)
print(response.message.content)

# Streaming
for chunk in llm.stream_chat(
[
ChatMessage(
role="user", content="Can you elaborate on the Reign of Terror?"
)
]
):
print(chunk.message.content, end="", flush=True)
```

## Configuration

- Set `GITHUB_TOKEN` environment variable for GitHub API access
- Set `AZURE_API_KEY` environment variable for Azure fallback

## Rate Limits

GithubLLM respects the following rate limits:

| Model Type | Requests/min | Requests/day | Tokens/request (in/out) | Concurrent Requests |
| ---------- | ------------ | ------------ | ----------------------- | ------------------- |
| Low | 15 | 150 | 8000/4000 | 5 |
| High | 10 | 50 | 8000/4000 | 2 |
| Embedding | 15 | 150 | 64000 | 5 |

Note: Rate limits may vary based on your GitHub account type (Free, Copilot Individual, Copilot Business, Copilot Enterprise).

## Going to Production

For production use, replace the GitHub token with a paid Azure account token. No other code changes are required.

## License

This project is licensed under the MIT License.

## Contributing

Contributions are welcome! Please feel free to submit a Pull Request.

## Disclaimer

This library is for prototyping and experimentation. Ensure compliance with GitHub's and Azure's terms of service when using in production.
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# ./tests/test_github_llm.py

import pytest
from unittest.mock import patch, MagicMock
from llama_index.core.base.llms.types import (
ChatMessage,
ChatResponse,
CompletionResponse,
)
from llama_index.llms.githubllm import GithubLLM


@pytest.fixture()
def github_llm():
return GithubLLM(model="gpt-4o", system_prompt="You are a helpful assistant.")


@pytest.fixture()
def mock_response():
mock = MagicMock()
mock.json.return_value = {
"choices": [{"message": {"content": "This is a test response."}}]
}
return mock


@patch.dict(
"os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"}
)
@patch("requests.post")
def test_complete(mock_post, github_llm, mock_response):
mock_post.return_value = mock_response

response = github_llm.complete("What is the capital of France?")

assert isinstance(response, CompletionResponse)
assert response.text == "This is a test response."
mock_post.assert_called_once()


@patch.dict(
"os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"}
)
@patch("requests.post")
def test_chat(mock_post, github_llm, mock_response):
mock_post.return_value = mock_response

messages = [ChatMessage(role="user", content="Tell me about Python.")]
response = github_llm.chat(messages)

assert isinstance(response, ChatResponse)
assert response.message.content == "This is a test response."
assert response.message.role == "assistant"
mock_post.assert_called_once()


@patch.dict(
"os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"}
)
@patch("requests.post")
def test_stream_complete(mock_post, github_llm):
mock_response = MagicMock()
mock_response.iter_lines.return_value = [b"chunk1", b"chunk2"]
mock_post.return_value = mock_response

generator = github_llm.stream_complete("What is the capital of France?")
responses = list(generator)

assert len(responses) == 2
assert all(isinstance(r, CompletionResponse) for r in responses)
assert [r.text for r in responses] == ["chunk1", "chunk2"]
mock_post.assert_called_once()


@patch.dict(
"os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"}
)
@patch("requests.post")
def test_stream_chat(mock_post, github_llm):
mock_response = MagicMock()
mock_response.iter_lines.return_value = [b"chunk1", b"chunk2"]
mock_post.return_value = mock_response

messages = [ChatMessage(role="user", content="Tell me about Python.")]
generator = github_llm.stream_chat(messages)
responses = list(generator)

assert len(responses) == 2
assert all(isinstance(r, ChatResponse) for r in responses)
assert [r.message.content for r in responses] == ["chunk1", "chunk2"]
assert all(r.message.role == "assistant" for r in responses)
mock_post.assert_called_once()


def test_metadata(github_llm):
metadata = github_llm.metadata

assert metadata.model_name == "gpt-4o"
assert metadata.num_output == 256 # Default value
assert metadata.context_window == 4096 # Default value


def test_unsupported_model():
with pytest.raises(ValueError):
GithubLLM(model="unsupported-model")


@patch.dict("os.environ", {"GITHUB_TOKEN": "fake_token"})
@patch("requests.post")
def test_azure_fallback(mock_post, github_llm, mock_response):
# Simulate GitHub API failure
mock_post.side_effect = [Exception("GitHub API Error"), mock_response]

response = github_llm.complete("What is the capital of France?")

assert isinstance(response, CompletionResponse)
assert response.text == "This is a test response."
assert mock_post.call_count == 2 # Called twice: once for GitHub, once for Azure


@patch.dict("os.environ", {})
def test_missing_env_variables(github_llm):
with pytest.raises(ValueError):
github_llm.complete("What is the capital of France?")


def test_class_name(github_llm):
assert github_llm.class_name() == "GithubLLM"

0 comments on commit d99a6a8

Please sign in to comment.