Skip to content

Commit

Permalink
implemented article_cache endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
mojomonger committed May 16, 2024
1 parent a780e85 commit 23f3310
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 3 deletions.
8 changes: 7 additions & 1 deletion src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/flaskr/__init__.py
"""
import logging
import os

from flask import Flask # type: ignore
from flask_restful import Api, Resource # type: ignore
Expand All @@ -28,6 +29,8 @@
from src.views.statistics.references import References
from src.views.statistics.xhtml import Xhtml

# new stuff apr 2024
from src.views.v2.article_cache_view_v2 import ArticleCacheV2
# new stuff jan 2024
from src.views.v2.article_view_v2 import ArticleV2
from src.views.version import Version
Expand All @@ -37,7 +40,6 @@

app = Flask(__name__)


def add_cors_headers(response):
# Replace "*" with the specific origin(s) you want to allow
response.headers["Access-Control-Allow-Origin"] = "*"
Expand All @@ -46,6 +48,9 @@ def add_cors_headers(response):
return response


# let's see if we can distinguish which server we are on
server_name = os.getenv('FLASK_SERVER_NAME', 'Unknown Server')

# Register the function as a after_request handler
app.after_request(add_cors_headers)

Expand All @@ -56,6 +61,7 @@ def add_cors_headers(response):
# Here we link together the API views and endpoint urls
# api.add_resource(LookupByWikidataQid, "/wikidata-qid/<string:qid>")
api.add_resource(ArticleV2, "/article")
api.add_resource(ArticleCacheV2, "/article_cache")

api.add_resource(Version, "/version")
api.add_resource(CheckUrls, "/check-urls")
Expand Down
24 changes: 24 additions & 0 deletions src/models/v2/file_io/article_cache_file_io_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Any, Dict, Optional

from src.models.exceptions import MissingInformationError
from src.models.file_io import FileIo
from src.models.v2.job.article_cache_job_v2 import ArticleCacheJobV2


class ArticleCacheFileIoV2(FileIo):
data: Optional[Dict[str, Any]] = None

job: Optional[ArticleCacheJobV2]

subfolder = "articles/"

# we override FileIo::filename property to provide custom one for cached article
@property
def filename(self) -> str:
# raise NotFoundError()
if not self.job:
raise MissingInformationError("self.job undefined")

# we got a job, file name should be iari_id +".json"
filename = f"{self.job.iari_id}.json"
return filename
8 changes: 8 additions & 0 deletions src/models/v2/job/article_cache_job_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from src.models.v2.job import JobV2


class ArticleCacheJobV2(JobV2):

# the iari id of the article as previously retrieved
iari_id: str = 0
article_version: int = 0 # should be 1 or 2
27 changes: 27 additions & 0 deletions src/models/v2/schema/article_cache_schema_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import logging

from marshmallow import fields, post_load

from src.models.v2.job.article_cache_job_v2 import ArticleCacheJobV2
from src.models.v2.schema import BaseSchemaV2

logger = logging.getLogger(__name__)


class ArticleCacheSchemaV2(BaseSchemaV2):
# marshmallow style declarations
iari_id = fields.Str(required=True)
article_version = fields.Int(default=1) # version 1 is original, 2 is V2

# noinspection PyUnusedLocal
@post_load
# runs after request args are loaded
# **kwargs is needed here despite what the validator claims
def return_object(self, data, **kwargs) -> ArticleCacheJobV2: # type: ignore # dead: disable
"""Return job object"""
from src import app

app.logger.debug("ArticleCacheSchemaV2::@post_load:return_object")

job = ArticleCacheJobV2(**data)
return job
86 changes: 86 additions & 0 deletions src/views/v2/article_cache_view_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# from flask_restful import Resource, abort # type: ignore
# from marshmallow import Schema
from datetime import datetime
from typing import Any, Optional, Tuple
import traceback

from src.models.exceptions import MissingInformationError, WikipediaApiFetchError

from src.models.v2.file_io.article_cache_file_io_v2 import ArticleCacheFileIoV2
from src.models.v2.job.article_cache_job_v2 import ArticleCacheJobV2
from src.models.v2.schema.article_cache_schema_v2 import ArticleCacheSchemaV2

from src.models.v2.wikimedia.wikipedia.analyzer_v2 import WikipediaAnalyzerV2
from src.models.wikimedia.enums import AnalyzerReturnValues, WikimediaDomain
from src.views.v2.statistics import StatisticsViewV2

from src.helpers.get_version import get_poetry_version


class ArticleCacheV2(StatisticsViewV2):
"""
returns data associated with article specified by the schema
"""

schema = ArticleCacheSchemaV2() # overrides StatisticsViewV2's schema property
job: ArticleCacheJobV2 # overrides StatisticsViewV2's job property

def __setup_io__(self):
"""
implementation for StatisticsWriteView.__setup_io__
"""
self.io = ArticleCacheFileIoV2(job=self.job)

def __return_article_data__(self):
from src import app

app.logger.debug("ArticleCacheV2::__return_article_data__")

self.__setup_io__() # defined right above!

self.__read_from_cache__() # inherited from StatisticsWriteView; fills io.data if successful

# if self.io.data and not self.job.refresh:
if self.io.data:
# cached data has been successfully retrieved - return it
app.logger.info(
f"Returning cached articleV2 json data, date: {self.time_of_analysis}"
)
return self.io.data, 200

# else no cache, so return"no cached data" error


def get(self):
"""
main entrypoint for flask
must return a tuple (Any,response_code)
"""
from src import app
app.logger.debug("ArticleCacheV2::get")

try:
self.__validate_and_get_job__()
# inherited from StatisticsWriteView -> StatisticsViewV2
# sets up job parameters, possibly with some massaging from the @post_load function
# (@postload is courtesy of the marshmallow module addition)

if self.job.iari_id:
return self.__return_article_data__()

else:
return self.__return_article_error__()

except WikipediaApiFetchError as e:
return {"error": f"API Error: {str(e)}"}, 500

except Exception as e:
traceback.print_exc()
return {"error": f"General Error: {str(e)}"}, 500

def __return_article_error__(self):
from src import app

if self.job.iari_id == "":
app.logger.error("ArticleCacheV2: ERROR: iari_id is missing")
return "iari_id is missing", 400
3 changes: 2 additions & 1 deletion src/views/v2/article_view_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def get(self):
app.logger.debug("ArticleV2::get")

try:
self.__validate_and_get_job__() # inherited from StatisticsWriteView -> StatisticsView
self.__validate_and_get_job__()
# inherited from StatisticsWriteView -> StatisticsViewV2
# sets up job parameters, possibly with some massaging from the @post_load function
# (@postload is courtesy of the marshmallow module addition)

Expand Down
3 changes: 2 additions & 1 deletion src/views/v2/statistics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datetime import datetime
from typing import Optional
import json

from flask import request
from flask_restful import Resource, abort # type: ignore
Expand Down Expand Up @@ -39,7 +40,7 @@ class StatisticsViewV2(Resource):

# derived ("child") class must implement __setup_io__ from this base ("parent") class
def __setup_io__(self):
raise NotImplementedError()
raise NotImplementedError() # must be defined in parent class

def __setup_and_read_from_cache__(self):
self.__setup_io__() # sets up "io" property as FileIo instance
Expand Down

0 comments on commit 23f3310

Please sign in to comment.