From 003092c5381fcebfa3e34d785423610b74dd4c99 Mon Sep 17 00:00:00 2001 From: Daiyi Peng Date: Wed, 18 Dec 2024 15:14:25 -0800 Subject: [PATCH] Add "Logs" panel for evaluation v2. Evaluation classes could use `self.debug`/`self.info`/`self.warning`/`self.error`/`self.fatal` to write logs to this panel. PiperOrigin-RevId: 707683046 --- langfun/core/eval/v2/evaluation.py | 79 +++++++++++++++++++++++-- langfun/core/eval/v2/evaluation_test.py | 6 ++ langfun/core/logging.py | 19 ++++++ langfun/core/logging_test.py | 19 ++++++ 4 files changed, 117 insertions(+), 6 deletions(-) diff --git a/langfun/core/eval/v2/evaluation.py b/langfun/core/eval/v2/evaluation.py index 3a3fb96..9a1887d 100644 --- a/langfun/core/eval/v2/evaluation.py +++ b/langfun/core/eval/v2/evaluation.py @@ -14,7 +14,9 @@ """Base class for Langfun evaluation tasks.""" import abc +import datetime import functools +import threading import time from typing import Annotated, Any, Callable, Iterable @@ -63,6 +65,8 @@ def _on_bound(self): self.__dict__.pop('is_leaf', None) self.__dict__.pop('children', None) super()._on_bound() + self._log_entries = [] + self._log_lock = threading.Lock() # # Handling evaluation hierarchy (materialized vs. hyper evaluations). @@ -277,6 +281,41 @@ def _reset(self) -> None: for metric in self.metrics: metric.reset() + # + # Evaluation-level logging. + # + + def _log(self, level: lf.logging.LogLevel, message: str, **kwargs): + with self._log_lock: + self._log_entries.append( + lf.logging.LogEntry( + level=level, + time=datetime.datetime.now(), + message=message, + metadata=kwargs, + ) + ) + + def debug(self, message: str, **kwargs): + """Logs a debug message to the session.""" + self._log('debug', message, **kwargs) + + def info(self, message: str, **kwargs): + """Logs an info message to the session.""" + self._log('info', message, **kwargs) + + def warning(self, message: str, **kwargs): + """Logs a warning message to the session.""" + self._log('warning', message, **kwargs) + + def error(self, message: str, **kwargs): + """Logs an error message to the session.""" + self._log('error', message, **kwargs) + + def fatal(self, message: str, **kwargs): + """Logs a fatal message to the session.""" + self._log('fatal', message, **kwargs) + # # HTML views. # @@ -465,6 +504,25 @@ def _metric_value_tab( ) ) + def _logs_tab() -> pg.views.html.controls.Tab: + """Renders a tab for the logs of the evaluation.""" + with self._log_lock: + log_history = '\n'.join(str(l) for l in self._log_entries) + return pg.views.html.controls.Tab( + label='Logs', + content=pg.Html.element( + 'div', + [ + pg.Html.element( + 'textarea', + [pg.Html.escape(log_history)], + readonly=True, + css_classes=['logs-textarea'], + ) + ] + ) + ) + def _main_tabs() -> pg.Html: return pg.Html.element( 'div', @@ -474,6 +532,8 @@ def _main_tabs() -> pg.Html: _definition_tab(), ] + [ _metric_tab(m) for m in self.metrics + ] + [ + _logs_tab() ], selected=1, ) @@ -593,6 +653,14 @@ def _html_tree_view_css_styles(self) -> list[str]: width:100%; height:100%; } + .logs-textarea { + width: 100%; + height: 500px; + padding: 5px; + border: 1px solid #DDD; + background-color: #EEE; + resize: vertical; + } """ ] @@ -615,6 +683,11 @@ def load( assert isinstance(example, example_lib.Example), example self._evaluated_examples[example.id] = example + @property + def evaluated_examples(self) -> dict[int, example_lib.Example]: + """Returns the examples in the state.""" + return self._evaluated_examples + def get(self, example_id: int) -> example_lib.Example | None: """Returns the example with the given ID.""" return self._evaluated_examples.get(example_id) @@ -622,9 +695,3 @@ def get(self, example_id: int) -> example_lib.Example | None: def update(self, example: example_lib.Example) -> None: """Updates the state with the given example.""" self._evaluated_examples[example.id] = example - - @property - def evaluated_examples(self) -> dict[int, example_lib.Example]: - """Returns the examples in the state.""" - return self._evaluated_examples - diff --git a/langfun/core/eval/v2/evaluation_test.py b/langfun/core/eval/v2/evaluation_test.py index 49b8b0d..92331f9 100644 --- a/langfun/core/eval/v2/evaluation_test.py +++ b/langfun/core/eval/v2/evaluation_test.py @@ -133,6 +133,12 @@ def test_evaluate_with_state(self): def test_html_view(self): exp = test_helper.TestEvaluation() + exp.debug('debug message') + exp.info('info message') + exp.warning('warning message', x=1) + exp.error('error message', x=1) + exp.fatal('fatal message') + self.assertIn( exp.id, exp.to_html(extra_flags=dict(card_view=True, current_run=None)).content diff --git a/langfun/core/logging.py b/langfun/core/logging.py index 7666528..44ba047 100644 --- a/langfun/core/logging.py +++ b/langfun/core/logging.py @@ -54,6 +54,25 @@ class LogEntry(pg.Object, pg.views.HtmlTreeView.Extension): def should_output(self, min_log_level: LogLevel) -> bool: return _LOG_LEVELS.index(self.level) >= _LOG_LEVELS.index(min_log_level) + def format(self, + compact: bool = False, + verbose: bool = True, + root_indent: int = 0, + *, + text_format: bool = True, + **kwargs): + if text_format: + s = f"""{self.time.strftime('%H:%M:%S')} {self.level.upper()} - {self.message}""" + if self.metadata: + s += f' (metadata: {self.metadata!r})' + return s + return super().format( + compact=compact, + verbose=verbose, + root_indent=root_indent, + **kwargs + ) + def _html_tree_view_summary( self, view: pg.views.HtmlTreeView, diff --git a/langfun/core/logging_test.py b/langfun/core/logging_test.py index de56ace..fbaf86b 100644 --- a/langfun/core/logging_test.py +++ b/langfun/core/logging_test.py @@ -61,6 +61,25 @@ def assert_html_content(self, html, expected): print(actual) self.assertEqual(actual, expected) + def test_format(self): + time = datetime.datetime(2024, 10, 10, 12, 30, 45) + self.assertEqual( + str( + logging.LogEntry( + level='info', message='hello\nworld', + time=time, metadata=dict(x=1), + ) + ), + '12:30:45 INFO - hello\nworld (metadata: {x=1})', + ) + self.assertIn( + 'LogEntry(', + logging.LogEntry( + level='info', message='hello\nworld', + time=time, metadata=dict(x=1), + ).format(text_format=False), + ) + def test_html(self): time = datetime.datetime(2024, 10, 10, 12, 30, 45) self.assert_html_content(