Merge branch 'main' of https://github.com/Tonic-AI/llama-tonic

Tonic-AI · Aug 6, 2024 · d99a6a8 · d99a6a8
2 parents 70456ec + 1522717
commit d99a6a8
Show file tree

Hide file tree

Showing 2 changed files with 218 additions and 0 deletions.
diff --git a/llama-index-integrations/llms/llama-index-llms-githubllm/README.md b/llama-index-integrations/llms/llama-index-llms-githubllm/README.md
@@ -1 +1,91 @@
 # LlamaIndex Llms Integration: Githubllm
+
+# GithubLLM
+
+GithubLLM is a custom LLM (Language Model) interface that allows you to interact with AI models hosted on GitHub's inference endpoint, with automatic fallback to Azure when rate limits are reached.
+
+## Features
+
+- Seamless integration with GitHub-hosted AI models
+- Automatic fallback to Azure when GitHub rate limits are reached
+- Support for both completion and chat-based interactions
+- Streaming support for both completion and chat responses
+- Easy integration with LlamaIndex ecosystem
+
+## Installation
+
+```bash
+pip install llama-index-llms-githubllm
+```
+
+## Usage
+
+```python
+from llama_index.llms.github import GithubLLM
+
+# Initialize the LLM
+llm = GithubLLM(
+    model="gpt-4o",
+    system_prompt="You are a helpful assistant.",
+    use_azure_fallback=True,
+)
+
+# Completion
+response = llm.complete("What is the capital of France?")
+print(response.text)
+
+# Chat
+messages = [
+    ChatMessage(role="user", content="Tell me about the French Revolution."),
+    ChatMessage(
+        role="assistant",
+        content="The French Revolution was a period of major social and political upheaval in France...",
+    ),
+    ChatMessage(role="user", content="What were the main causes?"),
+]
+response = llm.chat(messages)
+print(response.message.content)
+
+# Streaming
+for chunk in llm.stream_chat(
+    [
+        ChatMessage(
+            role="user", content="Can you elaborate on the Reign of Terror?"
+        )
+    ]
+):
+    print(chunk.message.content, end="", flush=True)
+```
+
+## Configuration
+
+- Set `GITHUB_TOKEN` environment variable for GitHub API access
+- Set `AZURE_API_KEY` environment variable for Azure fallback
+
+## Rate Limits
+
+GithubLLM respects the following rate limits:
+
+| Model Type | Requests/min | Requests/day | Tokens/request (in/out) | Concurrent Requests |
+| ---------- | ------------ | ------------ | ----------------------- | ------------------- |
+| Low        | 15           | 150          | 8000/4000               | 5                   |
+| High       | 10           | 50           | 8000/4000               | 2                   |
+| Embedding  | 15           | 150          | 64000                   | 5                   |
+
+Note: Rate limits may vary based on your GitHub account type (Free, Copilot Individual, Copilot Business, Copilot Enterprise).
+
+## Going to Production
+
+For production use, replace the GitHub token with a paid Azure account token. No other code changes are required.
+
+## License
+
+This project is licensed under the MIT License.
+
+## Contributing
+
+Contributions are welcome! Please feel free to submit a Pull Request.
+
+## Disclaimer
+
+This library is for prototyping and experimentation. Ensure compliance with GitHub's and Azure's terms of service when using in production.
diff --git a/llama-index-integrations/llms/llama-index-llms-githubllm/tests/test_llms_githubllm.py b/llama-index-integrations/llms/llama-index-llms-githubllm/tests/test_llms_githubllm.py
@@ -0,0 +1,128 @@
+# ./tests/test_github_llm.py
+
+import pytest
+from unittest.mock import patch, MagicMock
+from llama_index.core.base.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    CompletionResponse,
+)
+from llama_index.llms.githubllm import GithubLLM
+
+
+@pytest.fixture()
+def github_llm():
+    return GithubLLM(model="gpt-4o", system_prompt="You are a helpful assistant.")
+
+
+@pytest.fixture()
+def mock_response():
+    mock = MagicMock()
+    mock.json.return_value = {
+        "choices": [{"message": {"content": "This is a test response."}}]
+    }
+    return mock
+
+
+@patch.dict(
+    "os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"}
+)
+@patch("requests.post")
+def test_complete(mock_post, github_llm, mock_response):
+    mock_post.return_value = mock_response
+
+    response = github_llm.complete("What is the capital of France?")
+
+    assert isinstance(response, CompletionResponse)
+    assert response.text == "This is a test response."
+    mock_post.assert_called_once()
+
+
+@patch.dict(
+    "os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"}
+)
+@patch("requests.post")
+def test_chat(mock_post, github_llm, mock_response):
+    mock_post.return_value = mock_response
+
+    messages = [ChatMessage(role="user", content="Tell me about Python.")]
+    response = github_llm.chat(messages)
+
+    assert isinstance(response, ChatResponse)
+    assert response.message.content == "This is a test response."
+    assert response.message.role == "assistant"
+    mock_post.assert_called_once()
+
+
+@patch.dict(
+    "os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"}
+)
+@patch("requests.post")
+def test_stream_complete(mock_post, github_llm):
+    mock_response = MagicMock()
+    mock_response.iter_lines.return_value = [b"chunk1", b"chunk2"]
+    mock_post.return_value = mock_response
+
+    generator = github_llm.stream_complete("What is the capital of France?")
+    responses = list(generator)
+
+    assert len(responses) == 2
+    assert all(isinstance(r, CompletionResponse) for r in responses)
+    assert [r.text for r in responses] == ["chunk1", "chunk2"]
+    mock_post.assert_called_once()
+
+
+@patch.dict(
+    "os.environ", {"GITHUB_TOKEN": "fake_token", "AZURE_API_KEY": "fake_azure_key"}
+)
+@patch("requests.post")
+def test_stream_chat(mock_post, github_llm):
+    mock_response = MagicMock()
+    mock_response.iter_lines.return_value = [b"chunk1", b"chunk2"]
+    mock_post.return_value = mock_response
+
+    messages = [ChatMessage(role="user", content="Tell me about Python.")]
+    generator = github_llm.stream_chat(messages)
+    responses = list(generator)
+
+    assert len(responses) == 2
+    assert all(isinstance(r, ChatResponse) for r in responses)
+    assert [r.message.content for r in responses] == ["chunk1", "chunk2"]
+    assert all(r.message.role == "assistant" for r in responses)
+    mock_post.assert_called_once()
+
+
+def test_metadata(github_llm):
+    metadata = github_llm.metadata
+
+    assert metadata.model_name == "gpt-4o"
+    assert metadata.num_output == 256  # Default value
+    assert metadata.context_window == 4096  # Default value
+
+
+def test_unsupported_model():
+    with pytest.raises(ValueError):
+        GithubLLM(model="unsupported-model")
+
+
+@patch.dict("os.environ", {"GITHUB_TOKEN": "fake_token"})
+@patch("requests.post")
+def test_azure_fallback(mock_post, github_llm, mock_response):
+    # Simulate GitHub API failure
+    mock_post.side_effect = [Exception("GitHub API Error"), mock_response]
+
+    response = github_llm.complete("What is the capital of France?")
+
+    assert isinstance(response, CompletionResponse)
+    assert response.text == "This is a test response."
+    assert mock_post.call_count == 2  # Called twice: once for GitHub, once for Azure
+
+
+@patch.dict("os.environ", {})
+def test_missing_env_variables(github_llm):
+    with pytest.raises(ValueError):
+        github_llm.complete("What is the capital of France?")
+
+
+def test_class_name(github_llm):
+    assert github_llm.class_name() == "GithubLLM"