Add basic logging capabilities (#18)

* wip add some basic logging capabilities * Use rich for logging * Do more to prevent logs from being dropped when a Live() display is active * Throw error instead of warn if tool parameters aren't valid JSON * Update readme
posit-dev · Dec 10, 2024 · 4e8084a · 4e8084a
1 parent 3b7ccea
commit 4e8084a
Show file tree

Hide file tree

Showing 12 changed files with 138 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -250,13 +250,12 @@ If the problem isn't self-evident, you can also reach into the `.last_turn()`, w
 </div>
 
 
-
-
-Also, since `chatlas` builds on top of packages like `anthropic` and `openai`, you can also enable their debug logging to get even more detailed information about what's going on under the hood:
+For monitoring issues in a production (or otherwise non-interactive) environment, you may want to enabling logging. Also, since `chatlas` builds on top of packages like `anthropic` and `openai`, you can also enable their debug logging to get lower-level information, like HTTP requests and response codes.
 
 ```shell
-$ export ANTHROPIC_LOG=info
+$ export CHATLAS_LOG=info
 $ export OPENAI_LOG=info
+$ export ANTHROPIC_LOG=info
 ```
 
 ### Next steps

diff --git a/chatlas/_anthropic.py b/chatlas/_anthropic.py
@@ -16,11 +16,11 @@
     ContentToolRequest,
     ContentToolResult,
 )
+from ._logging import log_model_default
 from ._provider import Provider
 from ._tokens import tokens_log
 from ._tools import Tool, basemodel_to_param_schema
 from ._turn import Turn, normalize_turns
-from ._utils import log_model_default
 
 if TYPE_CHECKING:
     from anthropic.types import (

diff --git a/chatlas/_chat.py b/chatlas/_chat.py
@@ -34,6 +34,7 @@
     MarkdownDisplay,
     MockMarkdownDisplay,
 )
+from ._logging import log_tool_error
 from ._provider import Provider
 from ._tools import Tool
 from ._turn import Turn, user_turn
@@ -1035,6 +1036,7 @@ def _invoke_tool(
 
             return ContentToolResult(id_, result, None)
         except Exception as e:
+            log_tool_error(func.__name__, str(arguments), e)
             return ContentToolResult(id_, None, str(e))
 
     @staticmethod
@@ -1054,6 +1056,7 @@ async def _invoke_tool_async(
 
             return ContentToolResult(id_, result, None)
         except Exception as e:
+            log_tool_error(func.__name__, str(arguments), e)
             return ContentToolResult(id_, None, str(e))
 
     def _markdown_display(

diff --git a/chatlas/_display.py b/chatlas/_display.py
@@ -1,9 +1,12 @@
+import logging
 from abc import ABC, abstractmethod
 from typing import Any
 from uuid import uuid4
 
 from rich.live import Live
+from rich.logging import RichHandler
 
+from ._logging import logger
 from ._typing_extensions import TypedDict
 
 
@@ -64,6 +67,15 @@ def update(self, content: str):
 
     def __enter__(self):
         self.live.__enter__()
+        # Live() isn't smart enough to know to automatically display logs when
+        # when they get handled while it Live() is active.
+        # However, if the logging handler is a RichHandler, it can be told
+        # about the live console so it can add logs to the top of the Live console.
+        handlers = [*logging.getLogger().handlers, *logger.handlers]
+        for h in handlers:
+            if isinstance(h, RichHandler):
+                h.console = self.live.console
+
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):

diff --git a/chatlas/_github.py b/chatlas/_github.py
@@ -4,9 +4,10 @@
 from typing import TYPE_CHECKING, Optional
 
 from ._chat import Chat
+from ._logging import log_model_default
 from ._openai import ChatOpenAI
 from ._turn import Turn
-from ._utils import MISSING, MISSING_TYPE, log_model_default
+from ._utils import MISSING, MISSING_TYPE
 
 if TYPE_CHECKING:
     from ._openai import ChatCompletion

diff --git a/chatlas/_google.py b/chatlas/_google.py
@@ -15,10 +15,10 @@
     ContentToolRequest,
     ContentToolResult,
 )
+from ._logging import log_model_default
 from ._provider import Provider
 from ._tools import Tool, basemodel_to_param_schema
 from ._turn import Turn, normalize_turns
-from ._utils import log_model_default
 
 if TYPE_CHECKING:
     from google.generativeai.types.content_types import (

diff --git a/chatlas/_groq.py b/chatlas/_groq.py
@@ -4,9 +4,10 @@
 from typing import TYPE_CHECKING, Optional
 
 from ._chat import Chat
+from ._logging import log_model_default
 from ._openai import ChatOpenAI
 from ._turn import Turn
-from ._utils import MISSING, MISSING_TYPE, log_model_default
+from ._utils import MISSING, MISSING_TYPE
 
 if TYPE_CHECKING:
     from ._openai import ChatCompletion

diff --git a/chatlas/_logging.py b/chatlas/_logging.py
@@ -0,0 +1,61 @@
+import logging
+import os
+import warnings
+
+from rich.logging import RichHandler
+
+
+def _rich_handler() -> RichHandler:
+    formatter = logging.Formatter("%(name)s - %(message)s")
+    handler = RichHandler()
+    handler.setFormatter(formatter)
+    return handler
+
+
+logger = logging.getLogger("chatlas")
+
+if os.environ.get("CHATLAS_LOG") == "info":
+    # By adding a RichHandler to chatlas' logger, we can guarantee that they
+    # never get dropped, even if the root logger's handlers are not
+    # RichHandlers.
+    logger.setLevel(logging.INFO)
+    logger.addHandler(_rich_handler())
+    logger.propagate = False
+
+    # Add a RichHandler to the root logger if there are no handlers. Note that
+    # if chatlas is imported before other libraries that set up logging, (like
+    # openai, anthropic, or httpx), this will ensure that logs from those
+    # libraries are also displayed in the rich console.
+    root = logging.getLogger()
+    if not root.handlers:
+        root.addHandler(_rich_handler())
+
+    # Warn if there are non-RichHandler handlers on the root logger.
+    # TODO: we could consider something a bit more abusive here, like removing
+    # non-RichHandler handlers from the root logger, but that could be
+    # surprising to users.
+    bad_handlers = [
+        h.get_name() for h in root.handlers if not isinstance(h, RichHandler)
+    ]
+    if len(bad_handlers) > 0:
+        warnings.warn(
+            "When setting up logging handlers for CHATLAS_LOG, chatlas detected "
+            f"non-rich handler(s) on the root logger named {bad_handlers}. "
+            "As a result, logs handled those handlers may be dropped when the "
+            "`echo` argument of `.chat()`, `.stream()`, etc., is something "
+            "other than 'none'. This problem can likely be fixed by importing "
+            "`chatlas` before other libraries that set up logging, or adding a "
+            "RichHandler to the root logger before loading other libraries.",
+        )
+
+
+def log_model_default(model: str) -> str:
+    logger.info(f"Defaulting to `model = '{model}'`.")
+    return model
+
+
+def log_tool_error(name: str, arguments: str, e: Exception):
+    logger.info(
+        f"Error invoking tool function '{name}' with arguments: {arguments}. "
+        f"The error message is: '{e}'",
+    )
diff --git a/chatlas/_openai.py b/chatlas/_openai.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import json
-import warnings
 from typing import TYPE_CHECKING, Any, Literal, Optional, cast, overload
 
 from pydantic import BaseModel
@@ -16,12 +15,13 @@
     ContentToolRequest,
     ContentToolResult,
 )
+from ._logging import log_model_default
 from ._merge import merge_dicts
 from ._provider import Provider
 from ._tokens import tokens_log
 from ._tools import Tool, basemodel_to_param_schema
 from ._turn import Turn, normalize_turns
-from ._utils import MISSING, MISSING_TYPE, is_testing, log_model_default
+from ._utils import MISSING, MISSING_TYPE, is_testing
 
 if TYPE_CHECKING:
     from openai.types.chat import (
@@ -473,11 +473,12 @@ def _as_turn(
                 try:
                     args = json.loads(func.arguments) if func.arguments else {}
                 except json.JSONDecodeError:
-                    warnings.warn(
+                    raise ValueError(
                         f"The model's completion included a tool request ({func.name}) "
-                        "with invalid JSON for input arguments: '{func.arguments}'",
-                        InvalidJSONParameterWarning,
-                        stacklevel=2,
+                        "with invalid JSON for input arguments: '{func.arguments}'"
+                        "This can happen if the model hallucinates parameters not defined by "
+                        "your function schema. Try revising your tool description and system "
+                        "prompt to be more specific about the expected input arguments to this function."
                     )
 
                 contents.append(

diff --git a/chatlas/_perplexity.py b/chatlas/_perplexity.py
@@ -4,9 +4,10 @@
 from typing import TYPE_CHECKING, Optional
 
 from ._chat import Chat
+from ._logging import log_model_default
 from ._openai import ChatOpenAI
 from ._turn import Turn
-from ._utils import MISSING, MISSING_TYPE, log_model_default
+from ._utils import MISSING, MISSING_TYPE
 
 if TYPE_CHECKING:
     from ._openai import ChatCompletion

diff --git a/chatlas/_utils.py b/chatlas/_utils.py
@@ -2,15 +2,12 @@
 
 import functools
 import inspect
-import logging
 import os
 import re
 from typing import Awaitable, Callable, TypeVar, cast
 
 from ._typing_extensions import ParamSpec, TypeGuard
 
-logger = logging.getLogger("chatlas")
-
 # --------------------------------------------------------------------
 # wrap_async() and is_async_callable() was copied from shiny/_utils.py
 # --------------------------------------------------------------------
@@ -80,11 +77,6 @@ class MISSING_TYPE:
 MISSING = MISSING_TYPE()
 
 
-def log_model_default(model: str) -> str:
-    logger.info(f"Defaulting to `model = '{model}'`.")
-    return model
-
-
 # --------------------------------------------------------------------
 # html_escape was copied from htmltools/_utils.py
 # --------------------------------------------------------------------

diff --git a/docs/tool-calling.qmd b/docs/tool-calling.qmd
@@ -20,57 +20,58 @@ from chatlas import ChatOpenAI
 
 ### Motivating example
 
-Let's take a look at an example where we really need an external tool. Chat models generally do not know the current time, which makes questions like these impossible.
+Let's take a look at an example where we really need an external tool. Chat models generally do not have access to "real-time" information, such as current events, weather, etc.  Let's see what happens when we ask the chat model about the weather in a specific location:
 
 ```{python}
-chat = ChatOpenAI(model="gpt-4o")
-_ = chat.chat("How long ago exactly was the moment Neil Armstrong touched down on the moon?")
+chat = ChatOpenAI(model="gpt-4o-mini")
+_ = chat.chat("What's the weather like today in Duluth, MN?")
 ```
+
 
-Unfortunately, the LLM doesn't hallucinates the current date. Let's give the chat model the ability to determine the current time and try again.
+Fortunately, the model is smart enough to know that it doesn't have access to real-time information, and it doesn't try to make up an answer. However, we can help it out by providing a tool that can fetch the weather for a given location.
 
 ### Defining a tool function
 
-The first thing we'll do is define a Python function that returns the current time. This will be our tool.
+At it turns out, LLMs are pretty good at figuring out 'structure' like latitude and longitude from 'unstructured' things like a location name. So we can write a tool function that takes a latitude and longitude and returns the current temperature at that location. Here's an example of how you might write such a function using the [Open-Meteo API](https://open-meteo.com/):
 
 ```{python}
-def get_current_time(tz: str = "UTC") -> str:
+import requests
+
+def get_current_temperature(latitude: float, longitude: float):
     """
-    Gets the current time in the given time zone.
+    Get the current weather given a latitude and longitude.
 
     Parameters
     ----------
-    tz
-        The time zone to get the current time in. Defaults to "UTC".
-
-    Returns
-    -------
-    str
-        The current time in the given time zone.
+    latitude
+        The latitude of the location.
+    longitude
+        The longitude of the location.
     """
-    from datetime import datetime
-    from zoneinfo import ZoneInfo
-
-    return datetime.now(ZoneInfo(tz)).strftime("%Y-%m-%d %H:%M:%S %Z")
+    lat_lng = f"latitude={latitude}&longitude={longitude}"
+    url = f"https://api.open-meteo.com/v1/forecast?{lat_lng}&current=temperature_2m,wind_speed_10m&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m"
+    response = requests.get(url)
+    json = response.json()
+    return json["current"]
 ```
 
 Note that we've gone through the trouble of adding the following to our function: 
 
-- Type hints for arguments and the return value
-- A docstring that explains what the function does and what arguments it expects
+- Type hints for function arguments
+- A docstring that explains what the function does and what arguments it expects (as well as descriptions for the arguments themselves)
 
-**Providing these hints and context is very important**, as it helps the chat model understand how to use your tool correctly!
+**Providing these hints and documentation is very important**, as it helps the chat model understand how to use your tool correctly!
 
 Let's test it:
 
 ```{python}
-get_current_time()
+get_current_temperature(46.7867, -92.1005)
 ```
 
 
 ### Using the tool
 
-In order for the LLM to make use of our tool, we need to register it with the chat object. This is done by calling the `register_tool` method on the chat object.
+In order for the LLM to make use of our tool, we need to register it with the `chat` object. This is done by calling the `register_tool` method on the chat object.
 
 ```{python}
 chat.register_tool(get_current_time)
@@ -79,12 +80,30 @@ chat.register_tool(get_current_time)
 Now let's retry our original question:
 
 ```{python}
-_ = chat.chat("How long ago exactly was the moment Neil Armstrong touched down on the moon?")
+_ = chat.chat("What's the weather like today in Duluth, MN?")
 ```
 
 That's correct! Without any further guidance, the chat model decided to call our tool function and successfully used its result in formulating its response.
 
-This tool example was extremely simple, but you can imagine doing much more interesting things from tool functions: calling APIs, reading from or writing to a database, kicking off a complex simulation, or even calling a complementary GenAI model (like an image generator). Or if you are using chatlas in a Shiny app, you could use tools to set reactive values, setting off a chain of reactive updates.
+This tool example was extremely simple, but you can imagine doing much more interesting things from tool functions: calling APIs, reading from or writing to a database, kicking off a complex simulation, or even calling a complementary GenAI model (like an image generator). Or if you are using chatlas in a Shiny app, you could use tools to set reactive values, setting off a chain of reactive updates. This is precisely what the [sidebot dashboard](https://github.com/jcheng5/py-sidebot) does to allow for an AI assisted "drill-down" into the data.
+
+### Trouble-shooting
+
+When the execution of a tool function fails, chatlas sends the exception message back to the chat model. This can be useful for gracefully handling errors in the chat model. However, this can also lead to confusion as to why a response did not come back as expected. If you encounter such a situation, you can set `echo="all"` in the `chat.chat()` method to see the full conversation, including tool calls and their results.
+
+```{python}
+def get_current_temperature(latitude: float, longitude: float):
+    "Get the current weather given a latitude and longitude."
+    raise ValueError("Failed to get current temperature")
+
+chat.tools = [get_current_temperature]
+
+_ = chat.chat("What's the weather like today in Duluth, MN?")
+```
+
+
+
+
 
 ### Tool limitations