Skip to content

Commit

Permalink
Метрики (#91)
Browse files Browse the repository at this point in the history
* metrics

* Fix code style issues with Black

* reload linters

* fix lint

* add forced logout and client handler errors metric

* Fix code style issues with Black

* lint

* Fix code style issues with Black

---------

Co-authored-by: Lint Action <[email protected]>
  • Loading branch information
vzalygin and lint-action authored Oct 1, 2024
1 parent 13a557a commit c9e7599
Show file tree
Hide file tree
Showing 15 changed files with 215 additions and 42 deletions.
1 change: 1 addition & 0 deletions .github/workflows/deploy-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ jobs:
echo TELEGRAM_TOKEN=${{ secrets.DEV_TG_TOKEN }} >> .env;
echo ENCRYPTION='${{ secrets.DEV_ENCRYPTION }}' >> .env;
echo VERSION=$(git rev-parse --short HEAD) >> .env;
echo ENABLE_PROMETHEUS_METRICS_SERVER= >> .env;
- name: Enable DEBUG logs
if: ${{ inputs.debug-logs }}
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/deploy-prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ jobs:
echo TELEGRAM_TOKEN=${{ secrets.DEV_TG_TOKEN }} >> .env;
echo ENCRYPTION='${{ secrets.DEV_ENCRYPTION }}' >> .env;
echo VERSION=$(git rev-parse --short HEAD) >> .env;
echo ENABLE_PROMETHEUS_METRICS_SERVER= >> .env;
- name: Enable DEBUG logs
if: ${{ inputs.dev-debug-logs }}
Expand Down Expand Up @@ -139,6 +140,7 @@ jobs:
echo TELEGRAM_TOKEN=${{ secrets.PROD_TG_TOKEN }} >> .env;
echo ENCRYPTION='${{ secrets.PROD_ENCRYPTION }}' >> .env;
echo VERSION=$(git rev-parse --short HEAD) >> .env;
echo ENABLE_PROMETHEUS_METRICS_SERVER= >> .env;
- name: Enable DEBUG logs
if: ${{ inputs.prod-debug-logs }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,6 @@ jobs:
auto_fix: true
black: true
black_auto_fix: true
flake8_args: "--ignore=E501,E203,W503" # too long lines ignore, whitespace before :, newline before operator
flake8_args: "--ignore=E501,E203,E722,W503" # too long lines ignore, whitespace before :, bare except, newline before operator
flake8: true
flake8_auto_fix: false
7 changes: 6 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,9 @@ COPY requirements.txt .
RUN pip install -r requirements.txt

COPY ./migrations ./migrations
COPY ./src .
COPY ./src .

# port for prometheus metric server
EXPOSE 53000

ENTRYPOINT [ "python3", "samowarium.py" ]
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ ENV= # название окружения, в котором ра
VERSION= # название версии программы (none, если не задано)
ENCRYPTION= # ключ шифрования для базы данных (генерируется при запуске, если не задано)
DEBUG= # выставляет уровень логирования DEBUG (INFO, если не задано)
ENABLE_PROMETHEUS_METRICS_SERVER= # запускает сервер для получения метрик (не запускает, если не задано)
PROMETHEUS_METRICS_SERVER_PORT= # указывает порт для сервера метрик (53000, если не задано)
```

- Использовать python3.12 и выше.
Expand Down Expand Up @@ -40,19 +42,19 @@ yoyo new -m "migration name"
- Собрать образ:

```bash
DOCKER_TAG=latest docker compose build
docker compose build
```

- Или получить из регистра:

```bash
DOCKER_TAG=latest docker compose pull
docker compose pull
```

- Запустить сервис (не забыть создать `.env` файл с переменными):

```bash
DOCKER_TAG=latest docker compose up -d
docker compose up -d
```

- Остановить сервис:
Expand Down
4 changes: 3 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
services:
samowarium:
build: .
image: vzalygin/samowarium:${DOCKER_TAG}
image: vzalygin/samowarium:${DOCKER_TAG:-latest}
working_dir: /samowarium
env_file:
- .env
Expand All @@ -13,6 +13,8 @@ services:
command:
- samowarium.py
restart: unless-stopped
ports:
- 53000:53000

volumes:
db:
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ python-dotenv==1.0.1
python-dateutil==2.9.0
yoyo-migrations==8.2.0
pycryptodome==3.20.0
setuptools==74.1.2
setuptools==74.1.2
prometheus-client==0.16.0
prometheus-summary==0.1.2
33 changes: 28 additions & 5 deletions src/client_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@
UnauthorizedError,
)
from util import MessageSender
from metrics import (
login_metric,
logout_metric,
forced_logout_metric,
relogin_metric,
revalidation_metric,
client_handler_errors_metric,
)

REVALIDATE_INTERVAL = timedelta(hours=5)
SESSION_TOKEN_PATTERN = re.compile("^[0-9]{6}-[a-zA-Z0-9]{20}$")
Expand Down Expand Up @@ -63,6 +71,7 @@ async def make_new(
message_sender, db, Context(telegram_id, samoware_login)
)
is_successful_login = await handler.login(samoware_password)
login_metric.labels(is_successful=is_successful_login).inc()
if not is_successful_login:
await message_sender(telegram_id, WRONG_CREDS_PROMPT, MARKDOWN_FORMAT)
return None
Expand All @@ -86,6 +95,7 @@ def get_polling_task(self) -> asyncio.Task:
async def stop_handling(self) -> None:
if not (self.polling_task.cancelled() or self.polling_task.done()):
self.polling_task.cancel()
logout_metric.inc()
await asyncio.wait([self.polling_task])

async def polling(self) -> None:
Expand Down Expand Up @@ -121,38 +131,48 @@ async def polling(self) -> None:
timezone.utc,
) < datetime.now(timezone.utc):
is_successful_revalidation = await self.revalidate()
revalidation_metric.labels(
is_successful=is_successful_revalidation
).inc()
if not is_successful_revalidation:
await self.can_not_revalidate()
self.db.remove_client(self.context.telegram_id)
forced_logout_metric.inc()
return
retry_count = 0
except asyncio.CancelledError:
return
except UnauthorizedError:
except UnauthorizedError as error:
client_handler_errors_metric.labels(type=type(error).__name__).inc()
log.info(f"session for {self.context.samoware_login} expired")
samoware_password = self.db.get_password(self.context.telegram_id)
if samoware_password is None:
await self.session_has_expired()
self.db.remove_client(self.context.telegram_id)
forced_logout_metric.inc()
return
is_successful_relogin = await self.login(samoware_password)
relogin_metric.labels(is_successful=is_successful_relogin).inc()
if not is_successful_relogin:
await self.can_not_relogin()
self.db.remove_client(self.context.telegram_id)
forced_logout_metric.inc()
return
except (
aiohttp.ClientOSError
) as error: # unknown source error https://github.com/aio-libs/aiohttp/issues/6912
log.warning(
f"retry_count={retry_count}. ClientOSError. Probably Broken pipe. Retrying in {HTTP_RETRY_DELAY_SEC} seconds. {str(error)}"
)
client_handler_errors_metric.labels(type=type(error).__name__).inc()
retry_count += 1
await asyncio.sleep(HTTP_RETRY_DELAY_SEC)
except Exception:
except Exception as error:
log.exception("exception in client_handler")
log.warning(
f"retry_count={retry_count}. Retrying longpolling for {self.context.samoware_login} in {HTTP_RETRY_DELAY_SEC} seconds..."
)
client_handler_errors_metric.labels(type=type(error).__name__).inc()
retry_count += 1
await asyncio.sleep(HTTP_RETRY_DELAY_SEC)
finally:
Expand All @@ -173,16 +193,18 @@ async def login(self, samoware_password: str) -> bool:
self.db.set_handler_context(self.context)
log.info(f"successful login for user {self.context.samoware_login}")
return True
except UnauthorizedError:
except UnauthorizedError as error:
log.info(f"unsuccessful login for user {self.context.samoware_login}")
client_handler_errors_metric.labels(type=type(error).__name__).inc()
return False
except asyncio.CancelledError:
log.info("login cancelled")
return False
except Exception:
except Exception as error:
log.exception(
f"retry_count={retry_count}. exception on login. retrying in {HTTP_RETRY_DELAY_SEC}..."
)
client_handler_errors_metric.labels(type=type(error).__name__).inc()
retry_count += 1
await asyncio.sleep(HTTP_RETRY_DELAY_SEC)

Expand All @@ -204,8 +226,9 @@ async def revalidate(self) -> bool:
self.db.set_handler_context(self.context)
log.info(f"successful revalidation for user {self.context.samoware_login}")
return True
except UnauthorizedError:
except UnauthorizedError as error:
log.exception("UnauthorizedError on revalidation")
client_handler_errors_metric.labels(type=type(error).__name__).inc()
return False

async def can_not_revalidate(self):
Expand Down
35 changes: 34 additions & 1 deletion src/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ def __enter__(self) -> Self:
def __exit__(self, *args) -> None:
self.close()

def is_open(self) -> bool:
try:
self.connection.cursor()
return True
except:
return False

def initialize(self) -> None:
log.debug("initializing db...")
self.connection = connect(self.path, check_same_thread=False)
Expand Down Expand Up @@ -130,7 +137,7 @@ def is_client_active(self, telegram_id: int) -> bool:
log.debug(f"client {telegram_id} is active: {is_active}")
return is_active

def get_all_clients(self) -> list[tuple[int, Context, str | None]]:
def get_all_clients(self) -> list[tuple[int, Context]]:
def map_client_from_tuple(client):
(telegram_id, context) = client
return (
Expand All @@ -154,6 +161,32 @@ def map_client_from_tuple(client):
)
return clients

def get_all_clients_stat(self) -> list[tuple[int, Context, bool, bool]]:
def map_client_from_tuple(client):
(telegram_id, context, password, autoread) = client
return (
telegram_id,
map_context_from_dict(
loads(context),
telegram_id,
),
password is not None,
bool(autoread),
)

clients = list(
map(
map_client_from_tuple,
self.connection.execute(
"SELECT telegram_id, samoware_context, password, autoread FROM clients"
).fetchall(),
)
)
log.debug(
f"fetching all clients from database for gathering statistics, an amount of the clients {len(clients)}"
)
return clients

def remove_client(self, telegram_id: int) -> None:
self.connection.execute(
"DELETE FROM clients WHERE telegram_id=?", (telegram_id,)
Expand Down
2 changes: 1 addition & 1 deletion src/encryption.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self) -> None:
self.encryption_key = SHA256.new(
self.encryption_key.encode("utf-8")
).digest()
log.info("encrypter initialized")
log.info("encrypter has initialized")

def encrypt(self, data: str) -> bytes:
raw = str.encode(pad(data), encoding="utf-8")
Expand Down
10 changes: 10 additions & 0 deletions src/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
DEBUG_VAR_NAME = "DEBUG"
ENCRYPTION_KEY_VAR_NAME = "ENCRYPTION"
IP_CHECK_VAR_NAME = "IP_CHECK"
ENABLE_PROMETHEUS_METRICS_SERVER_VAR_NAME = "ENABLE_PROMETHEUS_METRICS_SERVER"
PROMETHEUS_METRICS_SERVER_PORT_VAR_NAME = "PROMETHEUS_METRICS_SERVER_PORT"

DEV_PROFILE_NAME = "DEV"
PROD_PROFILE_NAME = "PROD"
Expand All @@ -32,6 +34,10 @@ def get_encryption_key() -> str | None:
return os.environ.get(ENCRYPTION_KEY_VAR_NAME, default=None)


def get_prometheus_metrics_server_port() -> int:
return os.environ.get(PROMETHEUS_METRICS_SERVER_PORT_VAR_NAME, default=53000)


def is_ip_check_enabled() -> bool:
return os.environ.get(IP_CHECK_VAR_NAME) is not None

Expand All @@ -46,3 +52,7 @@ def is_prod_profile() -> bool:

def is_debug() -> bool:
return os.environ.get(DEBUG_VAR_NAME) is not None


def is_prometheus_metrics_server_enabled() -> bool:
return os.environ.get(ENABLE_PROMETHEUS_METRICS_SERVER_VAR_NAME) is not None
35 changes: 35 additions & 0 deletions src/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from prometheus_client import Gauge, Counter

GATHER_METRIC_DELAY_SEC = 3 * 60 # 3 min

clients_amount_metric = Gauge(
"clients_amount", "Users", labelnames=["pswd", "autoread"]
)

# Logging
log_metric = Counter("log_info", "Logs metric", labelnames=["level"])

# Telegram
incoming_commands_metric = Counter(
"incoming_command", "Incoming commands metric", labelnames=["command_name"]
)
sent_message_metric = Counter("sent_message", "Sent messages metric")

# Samoware
samoware_response_status_code_metric = Counter(
"samoware_response_sc", "Samoware reponses status code metric", labelnames=["sc"]
)

# Domain
login_metric = Counter("login", "Login events metric", labelnames=["is_successful"])
relogin_metric = Counter(
"relogin", "Relogin events metric", labelnames=["is_successful"]
)
revalidation_metric = Counter(
"revalidation", "Revalidation events metric", labelnames=["is_successful"]
)
logout_metric = Counter("logout", "Logout events metric")
forced_logout_metric = Counter("forced_logout", "Forced logout events metric")
client_handler_errors_metric = Counter(
"client_handler_error", "Client handler error events metric", labelnames=["type"]
)
Loading

0 comments on commit c9e7599

Please sign in to comment.