Skip to content

Commit

Permalink
feat: add open metrics
Browse files Browse the repository at this point in the history
Signed-off-by: Keming <[email protected]>
  • Loading branch information
kemingy committed Apr 4, 2024
1 parent e96b239 commit c8940db
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 3 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ End-to-end service to query the text.
- [x] generate vector and sparse vector if not provided
- [x] reranking
- [x] semantic highlight
- [x] TUI
- [x] OpenAPI
- [x] OpenMetrics
- [ ] filtering

## How to use
Expand Down Expand Up @@ -45,6 +48,7 @@ Check the [OpenAPI documentation](http://127.0.0.1:8000/openapi/redoc) for more
We provide a simple terminal UI powered by [Textual](https://github.com/textualize/textual) for you to interact with the service.

```bash
pip install textual
python tui.py $QTEXT_PORT
```

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies = [
"defspec~=0.1.1",
"httpx~=0.27",
"cohere~=4.45",
"prometheus-client~=0.20",
]
[project.optional-dependencies]
dev = [
Expand Down
3 changes: 3 additions & 0 deletions qtext/emb_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import openai

from qtext.log import logger
from qtext.metrics import embedding_histogram, sparse_histogram
from qtext.spec import SparseEmbedding
from qtext.utils import time_it

Expand All @@ -19,6 +20,7 @@ def __init__(self, model_name: str, api_key: str, endpoint: str, timeout: int):
)

@time_it
@embedding_histogram.time()
def embedding(self, text: str | list[str]) -> list[float]:
response = self.client.embeddings.create(
model=self.model_name,
Expand All @@ -36,6 +38,7 @@ def __init__(self, endpoint: str, dim: int, timeout: int) -> None:
self.decoder = msgspec.json.Decoder(type=list[SparseEmbedding])

@time_it
@sparse_histogram.time()
def sparse_embedding(
self, text: str | list[str]
) -> list[SparseEmbedding] | SparseEmbedding:
Expand Down
2 changes: 2 additions & 0 deletions qtext/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from qtext.config import Config
from qtext.emb_client import EmbeddingClient, SparseEmbeddingClient
from qtext.highlight_client import ENGLISH_STOPWORDS, HighlightClient
from qtext.metrics import rerank_histogram
from qtext.pg_client import PgVectorsClient
from qtext.schema import DefaultTable, Querier
from qtext.spec import (
Expand Down Expand Up @@ -58,6 +59,7 @@ def add_doc(self, req) -> None:
self.pg_client.add_doc(req)

@time_it
@rerank_histogram.time()
def rank(
self,
req: QueryDocRequest,
Expand Down
2 changes: 2 additions & 0 deletions qtext/highlight_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
import msgspec

from qtext.log import logger
from qtext.metrics import highlight_histogram
from qtext.spec import HighlightScore


class HighlightClient:
def __init__(self, addr: str) -> None:
self.client = httpx.Client(base_url=addr)

@highlight_histogram.time()
def highlight_score(
self, query: str, docs: list[str]
) -> list[list[HighlightScore]]:
Expand Down
24 changes: 24 additions & 0 deletions qtext/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from prometheus_client import Counter, Histogram

labels = ("namespace",)

highlight_histogram = Histogram("highlight_latency_seconds", "Highlight cost time")
rerank_histogram = Histogram("rerank_latency_seconds", "ReRank cost time")
doc_counter = Counter("add_doc", "Added documents", labelnames=labels)
embedding_histogram = Histogram("embedding_latency_seconds", "Embedding cost time")
sparse_histogram = Histogram("sparse_latency_seconds", "Sparse embedding cost time")
text_search_histogram = Histogram(
"full_text_search_latency_seconds",
"Full text search cost time",
labelnames=labels,
)
vector_search_histogram = Histogram(
"vector_search_latency_seconds",
"Vector search cost time",
labelnames=labels,
)
sparse_search_histogram = Histogram(
"sparse_vector_search_latency_seconds",
"Sparse vector search cost time",
labelnames=labels,
)
30 changes: 27 additions & 3 deletions qtext/pg_client.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
from __future__ import annotations

from time import perf_counter

import numpy as np
import psycopg
from psycopg.adapt import Dumper, Loader
from psycopg.rows import dict_row
from psycopg.types import TypeInfo

from qtext.log import logger
from qtext.metrics import (
doc_counter,
sparse_search_histogram,
text_search_histogram,
vector_search_histogram,
)
from qtext.schema import DefaultTable, Querier
from qtext.spec import AddNamespaceRequest, QueryDocRequest, SparseEmbedding
from qtext.utils import time_it
Expand Down Expand Up @@ -130,6 +138,7 @@ def add_doc(self, req):
placeholders,
)
self.conn.commit()
doc_counter.labels(req.namespace).inc()
except psycopg.errors.Error as err:
logger.info("pg client add doc error", exc_info=err)
self.conn.rollback()
Expand All @@ -141,15 +150,20 @@ def query_text(self, req: QueryDocRequest) -> list[DefaultTable]:
logger.debug("skip text query since there is no text index")
return []
try:
start_time = perf_counter()
cursor = self.conn.execute(
self.querier.text_query(req.namespace),
(" | ".join(req.query.strip().split(" ")), req.limit),
)
results = cursor.fetchall()
text_search_histogram.labels(req.namespace).observe(
perf_counter() - start_time
)
except psycopg.errors.Error as err:
logger.info("pg client query text error", exc_info=err)
self.conn.rollback()
raise RuntimeError("query text error") from err
return [self.resp_cls(**res) for res in cursor.fetchall()]
return [self.resp_cls(**res) for res in results]

@time_it
def query_vector(self, req: QueryDocRequest) -> list[DefaultTable]:
Expand All @@ -158,28 +172,38 @@ def query_vector(self, req: QueryDocRequest) -> list[DefaultTable]:
return []
try:
# TODO: filter
start_time = perf_counter()
cursor = self.conn.execute(
self.querier.vector_query(req.namespace),
(req.vector, req.limit),
)
results = cursor.fetchall()
vector_search_histogram.labels(req.namespace).observe(
perf_counter() - start_time
)
except psycopg.errors.Error as err:
logger.info("pg client query vector error", exc_info=err)
self.conn.rollback()
raise RuntimeError("query vector error") from err
return [self.resp_cls(**res) for res in cursor.fetchall()]
return [self.resp_cls(**res) for res in results]

@time_it
def query_sparse_vector(self, req: QueryDocRequest) -> list[DefaultTable]:
if not self.querier.has_sparse_index():
logger.debug("skip sparse vector query since there is no sparse index")
return []
try:
start_time = perf_counter()
cursor = self.conn.execute(
self.querier.sparse_query(req.namespace),
(req.sparse_vector, req.limit),
)
results = cursor.fetchall()
sparse_search_histogram.labels(req.namespace).observe(
perf_counter() - start_time
)
except psycopg.errors.Error as err:
logger.info("pg client query sparse vector error", exc_info=err)
self.conn.rollback()
raise RuntimeError("query sparse vector error") from err
return [self.resp_cls(**res) for res in cursor.fetchall()]
return [self.resp_cls(**res) for res in results]
9 changes: 9 additions & 0 deletions qtext/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import msgspec
from defspec import OpenAPI, RenderTemplate
from falcon import App, Request, Response
from prometheus_client import REGISTRY
from prometheus_client.openmetrics import exposition as openmetrics

from qtext.engine import RetrievalEngine
from qtext.log import logger
Expand Down Expand Up @@ -163,9 +165,16 @@ def on_get(self, req: Request, resp: Response):
resp.text = self.template


class OpenMetrics:
def on_get(self, req: Request, resp: Response):
resp.content_type = openmetrics.CONTENT_TYPE_LATEST
resp.text = openmetrics.generate_latest(REGISTRY)


def create_app(engine: RetrievalEngine) -> App:
app = App()
app.add_route("/", HealthCheck())
app.add_route("/metrics", OpenMetrics())
app.add_route("/api/namespace", NamespaceResource(engine))
app.add_route("/api/doc", DocResource(engine))
app.add_route("/api/query", QueryResource(engine))
Expand Down

0 comments on commit c8940db

Please sign in to comment.