google · daiyip · Oct 9, 2023 · Oct 6, 2023 · Oct 6, 2023 · Oct 7, 2023
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+ignore = E111  # ignore the error of "indentation is not a multiple of 4"
diff --git a/langfun/core/llms/__init__.py b/langfun/core/llms/__init__.py
@@ -46,6 +46,9 @@
 from langfun.core.llms.openai import Gpt3Babbage
 from langfun.core.llms.openai import Gpt3Ada
 
+# LLaMA C++ models.
+from langfun.core.llms.llama_cpp import LlamaCppRemote
+
 # Placeholder for Google-internal imports.
 
 # Include cache as sub-module.

diff --git a/langfun/core/llms/llama_cpp.py b/langfun/core/llms/llama_cpp.py
@@ -0,0 +1,80 @@
+# Copyright 2023 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Language models from llama.cpp."""
+
+from typing import Annotated
+
+import requests
+
+import langfun.core as lf
+
+@lf.use_init_args(['url'])
+class LlamaCppRemote(lf.LanguageModel):
+  """The remote LLaMA C++ model.
+
+  The Remote LLaMA C++ models can be launched via
+  https://github.com/ggerganov/llama.cpp/tree/master/examples/server
+  """
+
+  url: Annotated[
+      str | None,
+      'The name of the model to use.',
+  ] = None
+
+  name: Annotated[
+      str,
+      'The abbreviation for the LLaMA CPP-based model name.',
+  ] = ''
+
+  def _on_bound(self):
+    super()._on_bound()
+    if not self.url:
+      raise ValueError('Please specify `url`')
+
+  @property
+  def model_id(self) -> str:
+    """Returns a string to identify the model."""
+    return f'LLaMAC++({self.name})'
+
+  def _sample(self, prompts: list[lf.Message]) -> list[lf.LMSamplingResult]:
+    def _complete_fn(cur_prompts):
+      results = []
+      for prompt in cur_prompts:
+        result = lf.LMSamplingResult()
+        for _ in range(self.sampling_options.n or 1):
+          data = {
+                "prompt": prompt.text,
+                "n_predict": self.sampling_options.max_tokens,
+                "temperature": self.sampling_options.temperature,
+                "top_k": self.sampling_options.top_k or 50,
+                "top_p": self.sampling_options.top_p or 0.95,
+            }
+          response = requests.post(
+                f"{self.url}/completion",
+                json=data,
+                headers={"Content-Type": "application/json"},
+                timeout=self.timeout,
+            )
+          decoded_response = response.json()
+          response = decoded_response["content"]
+          result.samples.append(lf.LMSample(response, score=0.))
+        results.append(result)
+      return results
+    return lf.with_retry(
+        _complete_fn,
+        retry_on_errors=(),
+        max_attempts=self.max_attempts,
+        retry_interval=(1, 60),
+        exponential_backoff=True,
+    )(prompts)
diff --git a/langfun/core/llms/llama_cpp_test.py b/langfun/core/llms/llama_cpp_test.py
@@ -0,0 +1,45 @@
+# Copyright 2023 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for llama cpp models."""
+
+import unittest
+from unittest import mock
+import typing
+import langfun.core as lf
+from langfun.core.llms import llama_cpp
+
+
+def mock_requests_post(url: str, json: typing.Dict[str, typing.Any], **kwargs):
+  del kwargs
+  class TEMP:
+    def json(self):
+      return {"content": json["prompt"] + "\n" + url}
+  return TEMP()
+
+
+class LlamaCppRemoteTest(unittest.TestCase):
+  """Tests for the LlamaCppRemote model."""
+
+  def test_call_completion(self):
+    with mock.patch('requests.post') as mock_request:
+      mock_request.side_effect = mock_requests_post
+      lm = llama_cpp.LlamaCppRemote(url="http://127.0.0.1:8080")
+      response = lm('hello', sampling_options=lf.LMSamplingOptions(n=1))
+      self.assertEqual(
+          response.text,
+          'hello\nhttp://127.0.0.1:8080/completion',
+      )
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/langfun/core/llms/openai.py b/langfun/core/llms/openai.py
@@ -15,10 +15,14 @@
 
 import collections
 import os
-from typing import Annotated, Any, Literal
-import langfun.core as lf
+from typing import Annotated, Any, Literal, cast
+
 import openai
 import pyglove as pg
+from openai import openai_object
+from openai import error as openai_error
+
+import langfun.core as lf
 
 
 class Usage(pg.Object):
@@ -128,7 +132,8 @@ def _get_request_args(
   def _sample(self, prompts: list[lf.Message]) -> list[LMSamplingResult]:
     if self.is_chat_model:
       return self._chat_complete_batch(prompts)
-    return self._complete_batch(prompts)
+    else:
+      return self._complete_batch(prompts)
 
   def _complete_batch(
       self, prompts: list[lf.Message]) -> list[LMSamplingResult]:
@@ -138,7 +143,7 @@ def _open_ai_completion(prompts):
           prompt=[p.text for p in prompts],
           **self._get_request_args(self.sampling_options),
       )
-
+      response = cast(openai_object.OpenAIObject, response)
       # Parse response.
       samples_by_index = collections.defaultdict(list)
       for choice in response.choices:
@@ -161,23 +166,24 @@ def _open_ai_completion(prompts):
     return lf.with_retry(
         _open_ai_completion,
         retry_on_errors=(
-            openai.error.ServiceUnavailableError,
-            openai.error.RateLimitError,
+            openai_error.ServiceUnavailableError,
+            openai_error.RateLimitError,
         ),
         max_attempts=self.max_attempts,
         retry_interval=(1, 60),
         exponential_backoff=True,
     )(prompts)
 
   def _chat_complete_batch(
-      self, prompts: list[lf.Message]) -> list[lf.LMSamplingResult]:
+      self, prompts: list[lf.Message]) -> list[LMSamplingResult]:
 
     def _open_ai_chat_completion(prompt):
       response = openai.ChatCompletion.create(
-          # TODO(daiyip): support conversation history.
+          # TODO(daiyip): support conversation history and system prompt.
           messages=[{'role': 'user', 'content': prompt.text}],
           **self._get_request_args(self.sampling_options),
       )
+      response = cast(openai_object.OpenAIObject, response)
       return LMSamplingResult(
           [
               lf.LMSample(choice.message.content, score=0.0)
@@ -195,8 +201,8 @@ def _open_ai_chat_completion(prompt):
         prompts,
         max_workers=8,
         retry_on_errors=(
-            openai.error.ServiceUnavailableError,
-            openai.error.RateLimitError,
+            openai_error.ServiceUnavailableError,
+            openai_error.RateLimitError,
         ),
         max_attempts=self.max_attempts,
         retry_interval=(1, 60),

diff --git a/langfun/core/message.py b/langfun/core/message.py
@@ -102,7 +102,7 @@ def __init__(
       sender: str | pg.object_utils.MissingValue = pg.MISSING_VALUE,
       metadata: dict[str, Any] | None = None,
       tags: list[str] | None = None,
-      source: 'Message' = None,
+      source: Optional['Message'] = None,
       # The rest are `pg.Object.__init__` arguments.
       allow_partial: bool = False,
       sealed: bool = False,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[flake8]
		ignore = E111 # ignore the error of "indentation is not a multiple of 4"