From 75bf316282f5e6aafe0876e5b0b9f4d5a6971928 Mon Sep 17 00:00:00 2001 From: James O'Claire Date: Tue, 26 Nov 2024 11:45:59 +0800 Subject: [PATCH] Simplified queries per company top apps --- backend/api_app/controllers/apps.py | 49 ++++++++++----- backend/api_app/controllers/companies.py | 61 +++++++++++++------ backend/dbcon/queries.py | 29 ++++++--- .../sql/query_company_category_top_apps.sql | 23 +++++++ backend/dbcon/sql/query_company_top_apps.sql | 26 ++++---- frontend/src/hooks.ts | 17 +++++- 6 files changed, 148 insertions(+), 57 deletions(-) create mode 100644 backend/dbcon/sql/query_company_category_top_apps.sql diff --git a/backend/api_app/controllers/apps.py b/backend/api_app/controllers/apps.py index e490c63..2a68bb3 100644 --- a/backend/api_app/controllers/apps.py +++ b/backend/api_app/controllers/apps.py @@ -5,6 +5,7 @@ """ import datetime +import time import urllib.parse from typing import Self @@ -229,10 +230,11 @@ async def get_overview(self: Self) -> dict: A dictionary representation of the total counts """ - logger.info(f"{self.path} start") + start = time.perf_counter() overview_df = get_total_counts() overview_dict = overview_df.to_dict(orient="records")[0] - logger.info(f"{self.path} return") + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") return overview_dict @get(path="/collections/{collection:str}", cache=3600) @@ -248,10 +250,11 @@ async def get_apps_overview(self: Self, collection: str) -> Collection: A dictionary representation of the list of apps for homepage """ - logger.info(f"{self.path} start {collection=}") + start = time.perf_counter() home_dict = get_app_overview_dict(collection=collection) - logger.info(f"{self.path} return") + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") return home_dict @get(path="/{store_id:str}", cache=3600) @@ -265,7 +268,7 @@ async def get_app_detail(self: Self, store_id: str) -> AppDetail: json """ - logger.info(f"{self.path} start") + start = time.perf_counter() app_df = get_single_app(store_id) if app_df.empty: msg = f"Store ID not found: {store_id!r}" @@ -274,6 +277,8 @@ async def get_app_detail(self: Self, store_id: str) -> AppDetail: status_code=404, ) app_dict = app_df.to_dict(orient="records")[0] + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") return app_dict @get(path="/{store_id:str}/history", cache=3600) @@ -287,7 +292,7 @@ async def get_app_history_details(self: Self, store_id: str) -> AppHistory: json """ - logger.info(f"{self.path} start") + start = time.perf_counter() app_df = get_single_app(store_id) if app_df.empty: msg = f"Store ID not found: {store_id!r}" @@ -300,6 +305,8 @@ async def get_app_history_details(self: Self, store_id: str) -> AppHistory: app_name = app_dict["name"] hist_dict = app_history(store_app=store_app, app_name=app_name) + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") return hist_dict @get(path="/{store_id:str}/packageinfo", cache=3600) @@ -313,7 +320,7 @@ async def get_package_info(self: Self, store_id: str) -> PackageDetails: json """ - logger.info(f"{self.path} start") + start = time.perf_counter() df = get_app_package_details(store_id) @@ -362,6 +369,8 @@ async def get_package_info(self: Self, store_id: str) -> PackageDetails: .apply(list) .to_dict(), ) + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") return trackers_dict @get(path="/{store_id:str}/ranks", cache=3600) @@ -377,7 +386,7 @@ async def app_ranks(self: Self, store_id: str) -> AppRank: json """ - logger.info(f"{self.path} start") + start = time.perf_counter() df = get_ranks_for_app(store_id=store_id) if df.empty: msg = f"Ranks not found for {store_id!r}" @@ -402,6 +411,8 @@ async def app_ranks(self: Self, store_id: str) -> AppRank: .to_dict(orient="records") ) rank_dict = AppRank(latest=latest_dict, history=hist_dict) + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") return rank_dict @get(path="/developers/{developer_id:str}", cache=3600) @@ -417,7 +428,7 @@ async def get_developer_apps(self: Self, developer_id: str) -> DeveloperApps: json """ - logger.info(f"{self.path} start") + start = time.perf_counter() apps_df = get_single_developer(developer_id) if apps_df.empty: @@ -434,6 +445,8 @@ async def get_developer_apps(self: Self, developer_id: str) -> DeveloperApps: title=developer_name, apps=apps_dict, ) + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") return developer_apps @get(path="/{store_id:str}/adstxt", cache=3600) @@ -452,7 +465,7 @@ async def get_developer_adstxt(self: Self, store_id: str) -> AdsTxtEntries: json """ - logger.info(f"{self.path} start") + start = time.perf_counter() adstxt_df = get_single_apps_adstxt(store_id) if adstxt_df.empty: @@ -472,6 +485,8 @@ async def get_developer_adstxt(self: Self, store_id: str) -> AdsTxtEntries: direct_entries=direct_adstxt_dict, reseller_entries=reseller_adstxt_dict, ) + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") return txts @get(path="/search/{search_term:str}", cache=3600) @@ -484,9 +499,10 @@ async def search(self: Self, search_term: str) -> AppGroup: Can search packages, developers and app names. """ - logger.info(f"{self.path} term={search_term}") - + start = time.perf_counter() apps_dict = get_search_results(search_term) + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") return Response( apps_dict, background=BackgroundTask(search_both_stores, search_term), @@ -502,10 +518,11 @@ async def search_playstore(self: Self, search_term: str) -> AppGroup: Can search packages, developers and app names. """ - logger.info(f"{self.path} term={search_term} for playstore") - + start = time.perf_counter() results = google.search_play_store(search_term) app_group = AppGroup(title="Google Playstore Results", apps=results) + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") if len(results) > 0: return Response( app_group, @@ -525,6 +542,8 @@ async def search_applestore(self: Self, search_term: str) -> AppGroup: """ logger.info(f"{self.path} term={search_term} for apple store") + start = time.perf_counter() + ids = apple.search_app_store_for_ids(search_term) full_results = [{"store_id": store_id, "store": 2} for store_id in ids] results = apple.app_details_for_ids(ids[:10]) @@ -537,6 +556,8 @@ async def search_applestore(self: Self, search_term: str) -> AppGroup: results_dict = df.to_dict(orient="records") app_group = AppGroup(title="Apple App Store Results", apps=results_dict) + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") if len(results) > 0: return Response( app_group, diff --git a/backend/api_app/controllers/companies.py b/backend/api_app/controllers/companies.py index e5b1fb1..e89c41f 100644 --- a/backend/api_app/controllers/companies.py +++ b/backend/api_app/controllers/companies.py @@ -4,6 +4,7 @@ """ +import time import urllib from typing import Self @@ -48,7 +49,7 @@ def get_search_results(search_term: str) -> pd.DataFrame: - """Parse search term and return resulting APpGroup.""" + """Parse search term and return resulting AppGroup.""" decoded_input = urllib.parse.unquote(search_term) df = search_companies(search_input=decoded_input, limit=20) logger.info(f"{decoded_input=} returned rows: {df.shape[0]}") @@ -563,10 +564,13 @@ async def companies(self: Self) -> CompaniesOverview: An overview of companies across different platforms and sources. """ - logger.info("GET /api/companies start") + start = time.perf_counter() overview = get_overviews() + duration = round((time.perf_counter() - start), 2) + logger.info(f"GET /api/companies took {duration}ms") + return overview @get(path="/companies/categories/{category:str}", cache=3600) @@ -606,12 +610,13 @@ async def company_overview( An overview of companies, filtered for the specified company and category. """ - logger.info(f"GET /api/companies/{company_domain}/ start") - + start = time.perf_counter() df = get_company_overview(company_domain=company_domain) overview = make_company_category_sums(df=df) + duration = round((time.perf_counter() - start), 2) + logger.info(f"GET /api/companies/{company_domain} took {duration}ms") return overview @get( @@ -638,11 +643,13 @@ async def company_apps( An overview of companies, filtered for the specified company and category. """ - logger.info(f"GET /api/companies/{company_domain}/topapps {category=} start") - + start = time.perf_counter() results = get_company_apps_new(company_domain=company_domain, category=category) - logger.info(f"GET /api/companies/{company_domain}/topapps {category=} end") + duration = round((time.perf_counter() - start), 2) + logger.info( + f"GET /api/companies/{company_domain}/topapps {category=} took {duration}ms" + ) return results @get( @@ -666,7 +673,7 @@ async def company_parent_categories( A dictionary of parent categories for the specified company. """ - logger.info(f"GET /api/companies/{company_domain}/parentcategories start") + start = time.perf_counter() df = get_company_parent_categories(company_domain=company_domain) @@ -694,6 +701,10 @@ async def company_parent_categories( df = df.rename(columns={"name": "group", "app_count": "value"}) + duration = round((time.perf_counter() - start), 2) + logger.info( + f"GET /api/companies/{company_domain}/parentcategories took {duration}ms" + ) return df.to_dict(orient="records") @get( @@ -717,7 +728,7 @@ async def company_tree( An overview of companies, filtered for the specified company and category. """ - logger.info(f"GET /api/companies/{queried_domain}/tree start") + start = time.perf_counter() df = get_company_tree(company_domain=queried_domain) @@ -762,6 +773,9 @@ async def company_tree( children_companies=children_companies, ) + duration = round((time.perf_counter() - start), 2) + logger.info(f"GET /api/companies/{queried_domain}/tree took {duration}ms") + return tree @get( @@ -785,7 +799,7 @@ async def company_sdks( An overview of companies, filtered for the specified company and category. """ - logger.info(f"GET /api/companies/{company_domain}/sdks start") + start = time.perf_counter() df = get_company_sdks(company_domain=company_domain) @@ -799,6 +813,9 @@ async def company_sdks( }, ) + duration = round((time.perf_counter() - start), 2) + logger.info(f"GET /api/companies/{company_domain}/sdks took {duration}ms") + return mydict @get(path="/companies/types/", cache=True) @@ -811,12 +828,14 @@ async def all_adtech_types(self: Self) -> CompanyTypes: each with an id, name, type and total of apps """ - logger.info(f"{self.path} start") + start = time.perf_counter() company_types_df = get_adtech_categories() - logger.info(f"{self.path} return") company_types = CompanyTypes(types=company_types_df.to_dict(orient="records")) + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") + return company_types @get(path="/companies/types/{type_slug:str}", cache=True) @@ -833,9 +852,11 @@ async def adtech_type( each with an id, name, type and total of apps """ - logger.info(f"/companies/types/{type_slug}?{category=} start") + start = time.perf_counter() overview = get_overviews(category=category, type_slug=type_slug) - logger.info(f"/companies/types/{type_slug}?{category=} return") + + duration = round((time.perf_counter() - start), 2) + logger.info(f"/companies/types/{type_slug}?{category=} took {duration}ms") return overview @@ -849,7 +870,7 @@ async def get_companies_shortlist_top(self: Self) -> TopCompaniesOverviewShort: each with an id, name, type and total of apps """ - logger.info(f"{self.path} start") + start = time.perf_counter() adnetworks = get_companies_top( type_slug="ad-networks", app_category=None, limit=5 ) @@ -860,7 +881,6 @@ async def get_companies_shortlist_top(self: Self) -> TopCompaniesOverviewShort: top_ad_networks = make_top_companies(adnetworks) top_mmps = make_top_companies(mmps) top_analytics = make_top_companies(analytics) - logger.info(f"{self.path} return") top_companies = TopCompaniesOverviewShort( adnetworks=top_ad_networks, @@ -868,6 +888,9 @@ async def get_companies_shortlist_top(self: Self) -> TopCompaniesOverviewShort: analytics=top_analytics, ) + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path} took {duration}ms") + return top_companies @get(path="/companies/search/{search_term:str}", cache=True) @@ -879,7 +902,7 @@ async def get_companies_search(self: Self, search_term: str) -> list[CompanyDeta A list of CompanyDetail objects """ - logger.info(f"{self.path}/{search_term} start") + start = time.perf_counter() results = get_search_results(search_term=search_term) results["app_category"] = "all" @@ -889,6 +912,8 @@ async def get_companies_search(self: Self, search_term: str) -> list[CompanyDeta overview_df, _category_overview = prep_companies_overview_df( results, category_totals_df ) - logger.info(f"{self.path}/{search_term} return") + + duration = round((time.perf_counter() - start), 2) + logger.info(f"{self.path}/{search_term} took {duration}ms") return overview_df.to_dict(orient="records") diff --git a/backend/dbcon/queries.py b/backend/dbcon/queries.py index 24b9140..0497b90 100644 --- a/backend/dbcon/queries.py +++ b/backend/dbcon/queries.py @@ -53,6 +53,7 @@ def load_sql_file(file_name: str) -> str: QUERY_TOP_COMPANIES_MONTH = load_sql_file("query_top_companies_month.sql") QUERY_TOP_PARENT_COMPANIES_MONTH = load_sql_file("query_top_companies_month_parent.sql") QUERY_COMPANY_TOP_APPS = load_sql_file("query_company_top_apps.sql") +QUERY_COMPANY_CATEGORY_TOP_APPS = load_sql_file("query_company_category_top_apps.sql") QUERY_COMPANIES_PARENT_OVERVIEW = load_sql_file("query_companies_parent_overview.sql") QUERY_COMPANIES_PARENT_OVERVIEW_CATEGORY = load_sql_file( "query_companies_parent_overview_category.sql" @@ -471,15 +472,25 @@ def new_get_top_apps_for_company( if mapped_category == "games": mapped_category = "game%" - df = pd.read_sql( - QUERY_COMPANY_TOP_APPS, - con=DBCON.engine, - params={ - "company_domain": company_domain, - "mapped_category": mapped_category, - "mylimit": mylimit, - }, - ) + if mapped_category: + df = pd.read_sql( + QUERY_COMPANY_CATEGORY_TOP_APPS, + con=DBCON.engine, + params={ + "company_domain": company_domain, + "mapped_category": mapped_category, + "mylimit": mylimit, + }, + ) + else: + df = pd.read_sql( + QUERY_COMPANY_TOP_APPS, + con=DBCON.engine, + params={ + "company_domain": company_domain, + "mylimit": mylimit, + }, + ) if not df.empty: df["review_count"] = 0 df["rating"] = 5 diff --git a/backend/dbcon/sql/query_company_category_top_apps.sql b/backend/dbcon/sql/query_company_category_top_apps.sql new file mode 100644 index 0000000..810cf60 --- /dev/null +++ b/backend/dbcon/sql/query_company_category_top_apps.sql @@ -0,0 +1,23 @@ +WITH ranked_apps AS ( + SELECT * + FROM + adtech.company_top_apps + WHERE + company_domain = :company_domain + AND category = :mapped_category + AND app_company_category_rank <= :mylimit +) + +SELECT + ranked_apps.company_domain, + ranked_apps.store, + ranked_apps.tag_source, + ranked_apps.name, + ranked_apps.store_id, + ranked_apps.app_company_category_rank AS rank, + ranked_apps.rating_count, + ranked_apps.installs +FROM ranked_apps +ORDER BY + ranked_apps.store, ranked_apps.tag_source, + ranked_apps.app_company_category_rank; diff --git a/backend/dbcon/sql/query_company_top_apps.sql b/backend/dbcon/sql/query_company_top_apps.sql index 100f0a1..f48a7e9 100644 --- a/backend/dbcon/sql/query_company_top_apps.sql +++ b/backend/dbcon/sql/query_company_top_apps.sql @@ -1,21 +1,21 @@ WITH ranked_apps AS ( - SELECT - *, - ROW_NUMBER() OVER ( - PARTITION BY store, tag_source - ORDER BY - GREATEST( - COALESCE(rating_count, 0), COALESCE(installs, 0) - ) DESC - ) AS rank + SELECT * FROM adtech.company_top_apps WHERE company_domain = :company_domain - AND (:mapped_category IS NULL OR category = :mapped_category) + AND app_company_rank <= :mylimit ) -SELECT * +SELECT + ranked_apps.company_domain, + ranked_apps.store, + ranked_apps.tag_source, + ranked_apps.name, + ranked_apps.store_id, + ranked_apps.app_company_rank AS rank, + ranked_apps.rating_count, + ranked_apps.installs FROM ranked_apps -WHERE rank <= :mylimit -ORDER BY store, tag_source, rank; +ORDER BY + ranked_apps.store, ranked_apps.tag_source, ranked_apps.app_company_rank; diff --git a/frontend/src/hooks.ts b/frontend/src/hooks.ts index 4211bcb..6231c7d 100644 --- a/frontend/src/hooks.ts +++ b/frontend/src/hooks.ts @@ -1,22 +1,33 @@ import type { Handle } from '@sveltejs/kit'; export const handle: Handle = async ({ event, resolve }) => { - if (event.url.pathname.startsWith('/networks')) { + const route = event.url.pathname; + + if (route.startsWith('/networks')) { return new Response(undefined, { status: 301, headers: { Location: '/companies/types/ad-networks' } }); } - if (event.url.pathname.startsWith('/trackers')) { + if (route.startsWith('/trackers')) { return new Response(undefined, { status: 301, headers: { Location: '/companies/types/ad-attribution' } }); } - if (event.url.pathname.startsWith('/adtech')) { + if (route.startsWith('/adtech')) { return new Response(undefined, { status: 301, headers: { Location: '/companies' } }); } + + let start = performance.now(); + // For all other paths, proceed with the request as usual const response = await resolve(event); + let end = performance.now(); + + let duration = end - start; + duration = duration.toFixed(2); + + console.log(`${route} took ${duration}ms`); return response; };