From 003092c5381fcebfa3e34d785423610b74dd4c99 Mon Sep 17 00:00:00 2001
From: Daiyi Peng <daiyip@google.com>
Date: Wed, 18 Dec 2024 15:14:25 -0800
Subject: [PATCH] Add "Logs" panel for evaluation v2.

Evaluation classes could use `self.debug`/`self.info`/`self.warning`/`self.error`/`self.fatal` to write logs to this panel.

PiperOrigin-RevId: 707683046
---
 langfun/core/eval/v2/evaluation.py      | 79 +++++++++++++++++++++++--
 langfun/core/eval/v2/evaluation_test.py |  6 ++
 langfun/core/logging.py                 | 19 ++++++
 langfun/core/logging_test.py            | 19 ++++++
 4 files changed, 117 insertions(+), 6 deletions(-)

diff --git a/langfun/core/eval/v2/evaluation.py b/langfun/core/eval/v2/evaluation.py
index 3a3fb96..9a1887d 100644
--- a/langfun/core/eval/v2/evaluation.py
+++ b/langfun/core/eval/v2/evaluation.py
@@ -14,7 +14,9 @@
 """Base class for Langfun evaluation tasks."""
 
 import abc
+import datetime
 import functools
+import threading
 import time
 
 from typing import Annotated, Any, Callable, Iterable
@@ -63,6 +65,8 @@ def _on_bound(self):
     self.__dict__.pop('is_leaf', None)
     self.__dict__.pop('children', None)
     super()._on_bound()
+    self._log_entries = []
+    self._log_lock = threading.Lock()
 
   #
   # Handling evaluation hierarchy (materialized vs. hyper evaluations).
@@ -277,6 +281,41 @@ def _reset(self) -> None:
       for metric in self.metrics:
         metric.reset()
 
+  #
+  # Evaluation-level logging.
+  #
+
+  def _log(self, level: lf.logging.LogLevel, message: str, **kwargs):
+    with self._log_lock:
+      self._log_entries.append(
+          lf.logging.LogEntry(
+              level=level,
+              time=datetime.datetime.now(),
+              message=message,
+              metadata=kwargs,
+          )
+      )
+
+  def debug(self, message: str, **kwargs):
+    """Logs a debug message to the session."""
+    self._log('debug', message, **kwargs)
+
+  def info(self, message: str, **kwargs):
+    """Logs an info message to the session."""
+    self._log('info', message, **kwargs)
+
+  def warning(self, message: str, **kwargs):
+    """Logs a warning message to the session."""
+    self._log('warning', message, **kwargs)
+
+  def error(self, message: str, **kwargs):
+    """Logs an error message to the session."""
+    self._log('error', message, **kwargs)
+
+  def fatal(self, message: str, **kwargs):
+    """Logs a fatal message to the session."""
+    self._log('fatal', message, **kwargs)
+
   #
   # HTML views.
   #
@@ -465,6 +504,25 @@ def _metric_value_tab(
           )
       )
 
+    def _logs_tab() -> pg.views.html.controls.Tab:
+      """Renders a tab for the logs of the evaluation."""
+      with self._log_lock:
+        log_history = '\n'.join(str(l) for l in self._log_entries)
+      return pg.views.html.controls.Tab(
+          label='Logs',
+          content=pg.Html.element(
+              'div',
+              [
+                  pg.Html.element(
+                      'textarea',
+                      [pg.Html.escape(log_history)],
+                      readonly=True,
+                      css_classes=['logs-textarea'],
+                  )
+              ]
+          )
+      )
+
     def _main_tabs() -> pg.Html:
       return pg.Html.element(
           'div',
@@ -474,6 +532,8 @@ def _main_tabs() -> pg.Html:
                       _definition_tab(),
                   ] + [
                       _metric_tab(m) for m in self.metrics
+                  ] + [
+                      _logs_tab()
                   ],
                   selected=1,
               )
@@ -593,6 +653,14 @@ def _html_tree_view_css_styles(self) -> list[str]:
           width:100%;
           height:100%;
         }
+        .logs-textarea {
+          width: 100%;
+          height: 500px;
+          padding: 5px;
+          border: 1px solid #DDD;
+          background-color: #EEE;
+          resize: vertical;
+        }
         """
     ]
 
@@ -615,6 +683,11 @@ def load(
         assert isinstance(example, example_lib.Example), example
         self._evaluated_examples[example.id] = example
 
+  @property
+  def evaluated_examples(self) -> dict[int, example_lib.Example]:
+    """Returns the examples in the state."""
+    return self._evaluated_examples
+
   def get(self, example_id: int) -> example_lib.Example | None:
     """Returns the example with the given ID."""
     return self._evaluated_examples.get(example_id)
@@ -622,9 +695,3 @@ def get(self, example_id: int) -> example_lib.Example | None:
   def update(self, example: example_lib.Example) -> None:
     """Updates the state with the given example."""
     self._evaluated_examples[example.id] = example
-
-  @property
-  def evaluated_examples(self) -> dict[int, example_lib.Example]:
-    """Returns the examples in the state."""
-    return self._evaluated_examples
-
diff --git a/langfun/core/eval/v2/evaluation_test.py b/langfun/core/eval/v2/evaluation_test.py
index 49b8b0d..92331f9 100644
--- a/langfun/core/eval/v2/evaluation_test.py
+++ b/langfun/core/eval/v2/evaluation_test.py
@@ -133,6 +133,12 @@ def test_evaluate_with_state(self):
 
   def test_html_view(self):
     exp = test_helper.TestEvaluation()
+    exp.debug('debug message')
+    exp.info('info message')
+    exp.warning('warning message', x=1)
+    exp.error('error message', x=1)
+    exp.fatal('fatal message')
+
     self.assertIn(
         exp.id,
         exp.to_html(extra_flags=dict(card_view=True, current_run=None)).content
diff --git a/langfun/core/logging.py b/langfun/core/logging.py
index 7666528..44ba047 100644
--- a/langfun/core/logging.py
+++ b/langfun/core/logging.py
@@ -54,6 +54,25 @@ class LogEntry(pg.Object, pg.views.HtmlTreeView.Extension):
   def should_output(self, min_log_level: LogLevel) -> bool:
     return _LOG_LEVELS.index(self.level) >= _LOG_LEVELS.index(min_log_level)
 
+  def format(self,
+             compact: bool = False,
+             verbose: bool = True,
+             root_indent: int = 0,
+             *,
+             text_format: bool = True,
+             **kwargs):
+    if text_format:
+      s = f"""{self.time.strftime('%H:%M:%S')} {self.level.upper()} - {self.message}"""
+      if self.metadata:
+        s += f' (metadata: {self.metadata!r})'
+      return s
+    return super().format(
+        compact=compact,
+        verbose=verbose,
+        root_indent=root_indent,
+        **kwargs
+    )
+
   def _html_tree_view_summary(
       self,
       view: pg.views.HtmlTreeView,
diff --git a/langfun/core/logging_test.py b/langfun/core/logging_test.py
index de56ace..fbaf86b 100644
--- a/langfun/core/logging_test.py
+++ b/langfun/core/logging_test.py
@@ -61,6 +61,25 @@ def assert_html_content(self, html, expected):
       print(actual)
     self.assertEqual(actual, expected)
 
+  def test_format(self):
+    time = datetime.datetime(2024, 10, 10, 12, 30, 45)
+    self.assertEqual(
+        str(
+            logging.LogEntry(
+                level='info', message='hello\nworld',
+                time=time, metadata=dict(x=1),
+            )
+        ),
+        '12:30:45 INFO - hello\nworld (metadata: {x=1})',
+    )
+    self.assertIn(
+        'LogEntry(',
+        logging.LogEntry(
+            level='info', message='hello\nworld',
+            time=time, metadata=dict(x=1),
+        ).format(text_format=False),
+    )
+
   def test_html(self):
     time = datetime.datetime(2024, 10, 10, 12, 30, 45)
     self.assert_html_content(