Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
chouinar committed Sep 22, 2023
1 parent bcaf0eb commit 2127815
Show file tree
Hide file tree
Showing 21 changed files with 1,088 additions and 175 deletions.
791 changes: 740 additions & 51 deletions app/poetry.lock

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions app/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ python = "^3.10"
SQLAlchemy = {extras = ["mypy"], version = "2.0"}
alembic = "^1.8.1"
psycopg2-binary = "^2.9.3"
python-dotenv = "^0.20.0"
pydantic = "^1.10.0"
python-dotenv = "0.21.0"
pydantic = "2.0.3"
botocore = "^1.27.67"
boto3 = "~1.24.67"
smart-open = "^6.1.0"
Expand All @@ -20,6 +20,7 @@ APIFlask = "^1.1.3"
marshmallow-dataclass = {extras = ["enum", "union"], version = "^8.5.8"}
marshmallow = "^3.18.0"
gunicorn = "^21.2.0"
fastapi = {extras = ["all"], version = "^0.103.1"}

[tool.poetry.group.dev.dependencies]
black = "^22.6.0"
Expand Down
8 changes: 6 additions & 2 deletions app/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

import src.adapters.db as db
import src.adapters.db.flask_db as flask_db
import src.logging
import src.logging.flask_logger as flask_logger
import src.logger
import src.logger.flask_logger as flask_logger
from src.api.healthcheck import healthcheck_blueprint
from src.api.schemas import response_schema
from src.api.users import user_blueprint
Expand All @@ -21,7 +21,11 @@
def create_app() -> APIFlask:
app = APIFlask(__name__)

<<<<<<< Updated upstream
src.logging.init(__package__)
=======
root_logger = src.logging.init(__package__)
>>>>>>> Stashed changes
flask_logger.init_app(logging.root, app)

db_client = db.PostgresDBClient()
Expand Down
12 changes: 11 additions & 1 deletion app/src/db/migrations/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sqlalchemy

import src.adapters.db as db
import src.logging
import src.logger
from src.db.models import metadata

# this is the Alembic Config object, which provides
Expand All @@ -16,6 +16,16 @@

# Initialize logging
with src.logging.init("migrations"):
<<<<<<< Updated upstream
=======

if not config.get_main_option("sqlalchemy.url"):
uri = make_connection_uri(get_db_config())

# Escape percentage signs in the URI.
# https://alembic.sqlalchemy.org/en/latest/api/config.html#alembic.config.Config.set_main_option
config.set_main_option("sqlalchemy.url", uri.replace("%", "%%"))
>>>>>>> Stashed changes

# add your model's MetaData object here
# for 'autogenerate' support
Expand Down
38 changes: 38 additions & 0 deletions app/src/fastapi_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import fastapi
import logging
import logger as our_logging
from pydantic import BaseModel, Field

logger = logging.getLogger(__name__)

def create_app() -> fastapi.FastAPI:
our_logging.init(__package__)

logger.info("Creating app")
return fastapi.FastAPI()

app = create_app()

@app.get("/")
def root():
logger.info("hello - in the root")
return {"message": "hello"}

class NestedA(BaseModel):
x: str = Field(examples=["hello there again"])


class NestedB(BaseModel):
y: list[NestedA]

class Item(BaseModel):
name: str
description: str | None = None
price: float
tax: float | None = None
z: list[NestedB]

@app.post("/items/")
def create_item(item: Item) -> Item:
logger.info(item)
return item
33 changes: 33 additions & 0 deletions app/src/logger/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Module for initializing logging configuration for the application.
There are two formatters for the log messages: human-readable and JSON.
The formatter that is used is determined by the environment variable
LOG_FORMAT. If the environment variable is not set, the JSON formatter
is used by default. See src.logging.formatters for more information.
The logger also adds a PII mask filter to the root logger. See
src.logging.pii for more information.
Usage:
import src.logging
with src.logging.init("program name"):
...
Once the module has been initialized, the standard logging module can be
used to log messages:
Example:
import logging
logger = logging.getLogger(__name__)
logger.info("message")
"""


import src.logger.config as config


def init(program_name: str) -> config.LoggingContext:

return config.LoggingContext(program_name)
File renamed without changes.
154 changes: 154 additions & 0 deletions app/src/logger/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import logging
import os
import platform
import pwd
import sys
from typing import Any, ContextManager, cast

import src.logger.audit
import src.logger.formatters as formatters
import src.logger.pii as pii
from src.util.env_config import PydanticBaseEnvConfig

logger = logging.getLogger(__name__)

_original_argv = tuple(sys.argv)


class HumanReadableFormatterConfig(PydanticBaseEnvConfig):
message_width: int = formatters.HUMAN_READABLE_FORMATTER_DEFAULT_MESSAGE_WIDTH


class LoggingConfig(PydanticBaseEnvConfig):
format: str = "json"
level: str = "INFO"
enable_audit: bool = False
human_readable_formatter: PydanticBaseEnvConfig = HumanReadableFormatterConfig()

class Config:
env_prefix = "log_"
env_nested_delimiter = "__"


class LoggingContext(ContextManager[None]):
"""
A context manager for handling setting up the logging stream.
To help facillitate being able to test logging, we need to be able
to easily create temporary output streams and then tear them down.
When this context manager is torn down, the stream handler created
with it will be removed.
For example:
```py
import logging
logger = logging.getLogger(__name__)
with LoggingContext("example_program_name"):
# This log message will go to stdout
logger.info("example log message")
# This log message won't go to stdout as the
# handler will have been removed
logger.info("example log message")
```
Note that any other handlers added to the root logger won't be affected
and calling this multiple times before exit would result in duplicate logs.
"""

def __init__(self, program_name: str) -> None:
self._configure_logging()
log_program_info(program_name)

def __enter__(self) -> None:
pass

def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
# Remove the console handler to stop logs from being sent to stdout
# This is useful in the test suite, since multiple tests may initialize
# separate duplicate handlers. This allows for easier cleanup for each
# of those tests.
logging.root.removeHandler(self.console_handler)

def _configure_logging(self) -> None:
"""Configure logging for the application.
Configures the root module logger to log to stdout.
Adds a PII mask filter to the root logger.
Also configures log levels third party packages.
"""
config = LoggingConfig()

# Loggers can be configured using config functions defined
# in logging.config or by directly making calls to the main API
# of the logging module (see https://docs.python.org/3/library/logging.config.html)
# We opt to use the main API using functions like `addHandler` which is
# non-destructive, i.e. it does not overwrite any existing handlers.
# In contrast, logging.config.dictConfig() would overwrite any existing loggers.
# This is important during testing, since fixtures like `caplog` add handlers that would
# get overwritten if we call logging.config.dictConfig() during the scope of the test.
self.console_handler = logging.StreamHandler(sys.stdout)
formatter = get_formatter(config)
self.console_handler.setFormatter(formatter)
self.console_handler.addFilter(pii.mask_pii)
logging.root.addHandler(self.console_handler)
logging.root.setLevel(config.level)

if config.enable_audit:
src.logging.audit.init()

# Configure loggers for third party packages
logging.getLogger("alembic").setLevel(logging.INFO)
logging.getLogger("werkzeug").setLevel(logging.WARN)
logging.getLogger("sqlalchemy.pool").setLevel(logging.INFO)
logging.getLogger("sqlalchemy.dialects.postgresql").setLevel(logging.INFO)


def get_formatter(config: LoggingConfig) -> logging.Formatter:
"""Return the formatter used by the root logger.
The formatter is determined by the environment variable LOG_FORMAT. If the
environment variable is not set, the JSON formatter is used by default.
"""
if config.format == "human-readable":
return get_human_readable_formatter(config.human_readable_formatter)
return formatters.JsonFormatter()


def log_program_info(program_name: str) -> None:
logger.info(
"start %s: %s %s %s, hostname %s, pid %i, user %i(%s)",
program_name,
platform.python_implementation(),
platform.python_version(),
platform.system(),
platform.node(),
os.getpid(),
os.getuid(),
pwd.getpwuid(os.getuid()).pw_name,
extra={
"hostname": platform.node(),
"cpu_count": os.cpu_count(),
# If mypy is run on a mac, it will throw a module has no attribute error, even though
# we never actually access it with the conditional.
#
# However, we can't just silence this error, because on linux (e.g. CI/CD) that will
# throw an unused “type: ignore” comment error. Casting to Any instead ensures this
# passes regardless of where mypy is being run
"cpu_usable": (
len(cast(Any, os).sched_getaffinity(0))
if "sched_getaffinity" in dir(os)
else "unknown"
),
},
)
logger.info("invoked as: %s", " ".join(_original_argv))


def get_human_readable_formatter(
config: HumanReadableFormatterConfig,
) -> formatters.HumanReadableFormatter:
"""Return the human readable formatter used by the root logger."""
return formatters.HumanReadableFormatter(message_width=config.message_width)
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing import Any, Callable, Type, TypeVar
from uuid import UUID

import src.logging.decodelog as decodelog
import src.logger.decodelog as decodelog

T = TypeVar("T")

Expand Down
File renamed without changes.
60 changes: 57 additions & 3 deletions app/src/logging/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,64 @@
logger = logging.getLogger(__name__)
logger.info("message")
"""

from contextlib import contextmanager
import logging
import os
import platform
import pwd
import sys
from typing import Any, ContextManager, cast

import src.logging.config as config

logger = logging.getLogger(__name__)
_original_argv = tuple(sys.argv)


class Log:
def __init__(self, program_name: str) -> None:
self.program_name = program_name
self.root_logger, self.stream_handler = config.configure_logging()
log_program_info(self.program_name)

def __enter__(self) -> logging.Logger:
return self.root_logger

def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
self.root_logger.removeHandler(self.stream_handler)

@contextmanager
def init(program_name: str):
stream_handler = config.configure_logging()
log_program_info(program_name)
yield
logging.root.removeHandler(stream_handler)

def init(program_name: str) -> config.LoggingContext:
return config.LoggingContext(program_name)
def log_program_info(program_name: str) -> None:
logger.info(
"start %s: %s %s %s, hostname %s, pid %i, user %i(%s)",
program_name,
platform.python_implementation(),
platform.python_version(),
platform.system(),
platform.node(),
os.getpid(),
os.getuid(),
pwd.getpwuid(os.getuid()).pw_name,
extra={
"hostname": platform.node(),
"cpu_count": os.cpu_count(),
# If mypy is run on a mac, it will throw a module has no attribute error, even though
# we never actually access it with the conditional.
#
# However, we can't just silence this error, because on linux (e.g. CI/CD) that will
# throw an unused “type: ignore” comment error. Casting to Any instead ensures this
# passes regardless of where mypy is being run
"cpu_usable": (
len(cast(Any, os).sched_getaffinity(0))
if "sched_getaffinity" in dir(os)
else "unknown"
),
},
)
logger.info("invoked as: %s", " ".join(_original_argv))
Loading

0 comments on commit 2127815

Please sign in to comment.