Skip to content

Commit

Permalink
Start split of direct and reseller app-ads.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
ddxv committed Oct 15, 2024
1 parent dc20346 commit a13a164
Show file tree
Hide file tree
Showing 9 changed files with 191 additions and 89 deletions.
189 changes: 137 additions & 52 deletions backend/api_app/controllers/companies.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,20 @@ def get_company_apps_new(
mapped_category=category,
)

android_adstxt = df[
(df["tag_source"] == "app_ads") & (df["store"].str.startswith("Google"))
android_adstxt_reseller = df[
(df["tag_source"] == "app_ads_reseller")
& (df["store"].str.startswith("Google"))
]
ios_adstxt = df[
(df["tag_source"] == "app_ads") & (~df["store"].str.startswith("Google"))
ios_adstxt_reseller = df[
(df["tag_source"] == "app_ads_reseller")
& (~df["store"].str.startswith("Google"))
]

android_adstxt_direct = df[
(df["tag_source"] == "app_ads_direct") & (df["store"].str.startswith("Google"))
]
ios_adstxt_direct = df[
(df["tag_source"] == "app_ads_direct") & (~df["store"].str.startswith("Google"))
]

android_sdk = df[
Expand All @@ -64,12 +73,23 @@ def get_company_apps_new(
ios_sdk = df[(df["tag_source"] == "sdk") & (~df["store"].str.startswith("Google"))]

results = CompanyAppsOverview(
adstxt=CompanyPlatformOverview(
adstxt_reseller=CompanyPlatformOverview(
android=AppGroup(
apps=android_adstxt.to_dict(orient="records"),
apps=android_adstxt_reseller.to_dict(orient="records"),
title=company_name,
),
ios=AppGroup(apps=ios_adstxt.to_dict(orient="records"), title=company_name),
ios=AppGroup(
apps=ios_adstxt_reseller.to_dict(orient="records"), title=company_name,
),
),
adstxt_direct=CompanyPlatformOverview(
android=AppGroup(
apps=android_adstxt_direct.to_dict(orient="records"),
title=company_name,
),
ios=AppGroup(
apps=ios_adstxt_direct.to_dict(orient="records"), title=company_name,
),
),
sdk=CompanyPlatformOverview(
android=AppGroup(
Expand All @@ -88,23 +108,32 @@ def get_overviews(category: str | None = None) -> CompaniesOverview:

top_df = get_companies_top(app_category=category, limit=5)
top_sdk_df = top_df[top_df["tag_source"] == "sdk"].copy()
top_adstxt_df = top_df[top_df["tag_source"] == "app_ads"].copy()
top_adstxt_direct_df = top_df[top_df["tag_source"] == "app_ads_direct"].copy()
top_adstxt_reseller_df = top_df[top_df["tag_source"] == "app_ads_reseller"].copy()

top_sdk_df["company_title"] = np.where(
top_sdk_df["company_name"].isna(),
top_sdk_df["company_domain"],
top_sdk_df["company_name"],
)
top_adstxt_df["company_title"] = np.where(
top_adstxt_df["company_name"].isna(),
top_adstxt_df["company_domain"],
top_adstxt_df["company_name"],
top_adstxt_direct_df["company_title"] = np.where(
top_adstxt_direct_df["company_name"].isna(),
top_adstxt_direct_df["company_domain"],
top_adstxt_direct_df["company_name"],
)
top_adstxt_reseller_df["company_title"] = np.where(
top_adstxt_reseller_df["company_name"].isna(),
top_adstxt_reseller_df["company_domain"],
top_adstxt_reseller_df["company_name"],
)

top_sdk_df = top_sdk_df.rename(
columns={"company_title": "group", "app_count": "value"},
).sort_values(by=["value"], ascending=True)
top_adstxt_df = top_adstxt_df.rename(
top_adstxt_direct_df = top_adstxt_direct_df.rename(
columns={"company_title": "group", "app_count": "value"},
).sort_values(by=["value"], ascending=True)
top_adstxt_reseller_df = top_adstxt_reseller_df.rename(
columns={"company_title": "group", "app_count": "value"},
).sort_values(by=["value"], ascending=True)

Expand All @@ -124,19 +153,29 @@ def get_overviews(category: str | None = None) -> CompaniesOverview:
& (overview_df["tag_source"] == "sdk")
]

ios_adstxt = overview_df[
ios_adstxt_direct = overview_df[
(~overview_df["store"].str.contains("google", case=False))
& (overview_df["tag_source"] == "app_ads_direct")
]

ios_adstxt_reseller = overview_df[
(~overview_df["store"].str.contains("google", case=False))
& (overview_df["tag_source"] == "app_ads")
& (overview_df["tag_source"] == "app_ads_reseller")
]

android_sdk = overview_df[
(overview_df["store"].str.contains("google", case=False))
& (overview_df["tag_source"] == "sdk")
]

android_adstxt = overview_df[
android_adstxt_direct = overview_df[
(overview_df["store"].str.contains("google", case=False))
& (overview_df["tag_source"] == "app_ads")
& (overview_df["tag_source"] == "app_ads_direct")
]

android_adstxt_reseller = overview_df[
(overview_df["store"].str.contains("google", case=False))
& (overview_df["tag_source"] == "app_ads_reseller")
]

results = CompaniesOverview(
Expand All @@ -145,10 +184,15 @@ def get_overviews(category: str | None = None) -> CompaniesOverview:
ios=ios_sdk.to_dict(orient="records"),
top=top_sdk_df.to_dict(orient="records"),
),
adstxt=PlatformCompanies(
android=android_adstxt.to_dict(orient="records"),
ios=ios_adstxt.to_dict(orient="records"),
top=top_adstxt_df.to_dict(orient="records"),
adstxt_direct=PlatformCompanies(
android=android_adstxt_direct.to_dict(orient="records"),
ios=ios_adstxt_direct.to_dict(orient="records"),
top=top_adstxt_direct_df.to_dict(orient="records"),
),
adstxt_reseller=PlatformCompanies(
android=android_adstxt_reseller.to_dict(orient="records"),
ios=ios_adstxt_reseller.to_dict(orient="records"),
top=top_adstxt_reseller_df.to_dict(orient="records"),
),
categories=category_overview,
)
Expand Down Expand Up @@ -269,7 +313,8 @@ def make_category_uniques(df: pd.DataFrame) -> CategoryOverview:
is_apple = df["store"].str.contains("Apple")
is_google = df["store"].str.contains("Google")
is_sdk = df["tag_source"] == "sdk"
is_app_ads = df["tag_source"] == "app_ads"
is_app_ads_reseller = df["tag_source"] == "app_ads_reseller"
is_app_ads_direct = df["tag_source"] == "app_ads_direct"

# Function to calculate unique counts
def get_unique_counts(mask: pd.Series) -> int:
Expand All @@ -280,8 +325,16 @@ def get_unique_counts(mask: pd.Series) -> int:
"total_apps": df["company_domain"].nunique(),
"sdk_ios_total_apps": get_unique_counts(is_apple & is_sdk),
"sdk_android_total_apps": get_unique_counts(is_google & is_sdk),
"adstxt_ios_total_apps": get_unique_counts(is_apple & is_app_ads),
"adstxt_android_total_apps": get_unique_counts(is_google & is_app_ads),
"adstxt_direct_ios_total_apps": get_unique_counts(is_apple & is_app_ads_direct),
"adstxt_direct_android_total_apps": get_unique_counts(
is_google & is_app_ads_direct,
),
"adstxt_reseller_ios_total_apps": get_unique_counts(
is_apple & is_app_ads_reseller,
),
"adstxt_reseller_android_total_apps": get_unique_counts(
is_google & is_app_ads_reseller,
),
}
overview.update_stats("all", **overall_stats)

Expand All @@ -293,11 +346,17 @@ def get_unique_counts(mask: pd.Series) -> int:
"total_apps": get_unique_counts(cat_mask),
"sdk_ios_total_apps": get_unique_counts(cat_mask & is_apple & is_sdk),
"sdk_android_total_apps": get_unique_counts(cat_mask & is_google & is_sdk),
"adstxt_ios_total_apps": get_unique_counts(
cat_mask & is_apple & is_app_ads,
"adstxt_direct_ios_total_apps": get_unique_counts(
cat_mask & is_apple & is_app_ads_direct,
),
"adstxt_direct_android_total_apps": get_unique_counts(
cat_mask & is_google & is_app_ads_direct,
),
"adstxt_android_total_apps": get_unique_counts(
cat_mask & is_google & is_app_ads,
"adstxt_reseller_ios_total_apps": get_unique_counts(
cat_mask & is_apple & is_app_ads_reseller,
),
"adstxt_reseller_android_total_apps": get_unique_counts(
cat_mask & is_google & is_app_ads_reseller,
),
}
overview.update_stats(cat, **cat_stats)
Expand All @@ -312,10 +371,14 @@ def make_category_sums(df: pd.DataFrame) -> CategoryOverview:
"sdk_ios": (df["store"].str.contains("Apple")) & (df["tag_source"] == "sdk"),
"sdk_android": (df["store"].str.contains("Google"))
& (df["tag_source"] == "sdk"),
"adstxt_ios": (df["store"].str.contains("Apple"))
& (df["tag_source"] == "app_ads"),
"adstxt_android": (df["store"].str.contains("Google"))
& (df["tag_source"] == "app_ads"),
"adstxt_direct_ios": (df["store"].str.contains("Apple"))
& (df["tag_source"] == "app_ads_direct"),
"adstxt_direct_android": (df["store"].str.contains("Google"))
& (df["tag_source"] == "app_ads_direct"),
"adstxt_reseller_ios": (df["store"].str.contains("Apple"))
& (df["tag_source"] == "app_ads_reseller"),
"adstxt_reseller_android": (df["store"].str.contains("Google"))
& (df["tag_source"] == "app_ads_reseller"),
}

# Calculate sums for all conditions in one go
Expand All @@ -328,27 +391,35 @@ def make_category_sums(df: pd.DataFrame) -> CategoryOverview:
(
sdk_ios_total_apps,
sdk_android_total_apps,
adstxt_ios_total_apps,
adstxt_android_total_apps,
adstxt_direct_ios_total_apps,
adstxt_direct_android_total_apps,
adstxt_reseller_ios_total_apps,
adstxt_reseller_android_total_apps,
) = (
results["sdk_ios"],
results["sdk_android"],
results["adstxt_ios"],
results["adstxt_android"],
results["adstxt_direct_ios"],
results["adstxt_direct_android"],
results["adstxt_reseller_ios"],
results["adstxt_reseller_android"],
)

total_apps = (
sdk_ios_total_apps
+ sdk_android_total_apps
+ adstxt_ios_total_apps
+ adstxt_android_total_apps
+ adstxt_direct_ios_total_apps
+ adstxt_direct_android_total_apps
+ adstxt_reseller_ios_total_apps
+ adstxt_reseller_android_total_apps
)

overview.update_stats(
"all",
total_apps=total_apps,
adstxt_ios_total_apps=adstxt_ios_total_apps,
adstxt_android_total_apps=adstxt_android_total_apps,
adstxt_direct_ios_total_apps=adstxt_direct_ios_total_apps,
adstxt_direct_android_total_apps=adstxt_direct_android_total_apps,
adstxt_reseller_ios_total_apps=adstxt_reseller_ios_total_apps,
adstxt_reseller_android_total_apps=adstxt_reseller_android_total_apps,
sdk_ios_total_apps=sdk_ios_total_apps,
sdk_android_total_apps=sdk_android_total_apps,
)
Expand All @@ -361,11 +432,17 @@ def make_category_sums(df: pd.DataFrame) -> CategoryOverview:
"sdk_android": (df["store"].str.contains("Google"))
& (df["tag_source"] == "sdk")
& (df["app_category"] == cat),
"adstxt_ios": (df["store"].str.contains("Apple"))
& (df["tag_source"] == "app_ads")
"adstxt_direct_ios": (df["store"].str.contains("Apple"))
& (df["tag_source"] == "app_ads_direct")
& (df["app_category"] == cat),
"adstxt_direct_android": (df["store"].str.contains("Google"))
& (df["tag_source"] == "app_ads_direct")
& (df["app_category"] == cat),
"adstxt_reseller_ios": (df["store"].str.contains("Apple"))
& (df["tag_source"] == "app_ads_reseller")
& (df["app_category"] == cat),
"adstxt_android": (df["store"].str.contains("Google"))
& (df["tag_source"] == "app_ads")
"adstxt_reseller_android": (df["store"].str.contains("Google"))
& (df["tag_source"] == "app_ads_reseller")
& (df["app_category"] == cat),
}

Expand All @@ -379,27 +456,35 @@ def make_category_sums(df: pd.DataFrame) -> CategoryOverview:
(
sdk_ios_total_apps,
sdk_android_total_apps,
adstxt_ios_total_apps,
adstxt_android_total_apps,
adstxt_direct_ios_total_apps,
adstxt_direct_android_total_apps,
adstxt_reseller_ios_total_apps,
adstxt_reseller_android_total_apps,
) = (
results["sdk_ios"],
results["sdk_android"],
results["adstxt_ios"],
results["adstxt_android"],
results["adstxt_direct_ios"],
results["adstxt_direct_android"],
results["adstxt_reseller_ios"],
results["adstxt_reseller_android"],
)

total_apps = (
sdk_ios_total_apps
+ sdk_android_total_apps
+ adstxt_ios_total_apps
+ adstxt_android_total_apps
+ adstxt_direct_ios_total_apps
+ adstxt_direct_android_total_apps
+ adstxt_reseller_ios_total_apps
+ adstxt_reseller_android_total_apps
)

overview.update_stats(
cat,
total_apps=total_apps,
adstxt_ios_total_apps=adstxt_ios_total_apps,
adstxt_android_total_apps=adstxt_android_total_apps,
adstxt_direct_ios_total_apps=adstxt_direct_ios_total_apps,
adstxt_direct_android_total_apps=adstxt_direct_android_total_apps,
adstxt_reseller_ios_total_apps=adstxt_reseller_ios_total_apps,
adstxt_reseller_android_total_apps=adstxt_reseller_android_total_apps,
sdk_ios_total_apps=sdk_ios_total_apps,
sdk_android_total_apps=sdk_android_total_apps,
)
Expand Down
12 changes: 8 additions & 4 deletions backend/api_app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,10 @@ class CategoryAppStats:
"""Contains a list of CompanyDetail objects representing the top networks identified."""

total_apps: int = 0
adstxt_ios_total_apps: int = 0
adstxt_android_total_apps: int = 0
adstxt_direct_ios_total_apps: int = 0
adstxt_direct_android_total_apps: int = 0
adstxt_reseller_ios_total_apps: int = 0
adstxt_reseller_android_total_apps: int = 0
sdk_ios_total_apps: int = 0
sdk_android_total_apps: int = 0

Expand Down Expand Up @@ -234,7 +236,8 @@ class CompaniesOverview:
"""Contains a list of CompanyDetail objects representing the top networks identified."""

sdk: PlatformCompanies
adstxt: PlatformCompanies
adstxt_direct: PlatformCompanies
adstxt_reseller: PlatformCompanies
categories: CategoryOverview


Expand All @@ -253,7 +256,8 @@ class CompanyAppsOverview:
"""Overview of a company's apps on different platforms."""

sdk: CompanyPlatformOverview
adstxt: CompanyPlatformOverview
adstxt_direct: CompanyPlatformOverview
adstxt_reseller: CompanyPlatformOverview


@dataclass
Expand Down
6 changes: 4 additions & 2 deletions backend/dbcon/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,25 +226,27 @@ def get_app_package_details(store_id: str) -> pd.DataFrame:

def get_companies_parent_overview(app_category: str | None = None) -> pd.DataFrame:
"""Get overview of companies from multiple types like sdk and app-ads.txt."""
logger.info("query companies overview")
logger.info("query companies parent overview start")
df = pd.read_sql(
QUERY_COMPANIES_PARENT_OVERVIEW,
DBCON.engine,
params={"app_category": app_category},
)
df["store"] = df["store"].replace({1: "Google Play", 2: "Apple App Store"})
df.loc[df["app_category"].isna(), "app_category"] = "None"
logger.info("query companies parent overview return")
return df


def get_companies_top(app_category: str | None = None, limit: int = 10) -> pd.DataFrame:
"""Get overview of companies from multiple types like sdk and app-ads.txt."""
logger.info("query companies overview")
logger.info("query companies parent top start")
df = pd.read_sql(
QUERY_COMPANIES_PARENT_TOP,
DBCON.engine,
params={"app_category": app_category, "mylimit": limit},
)
logger.info("query companies parent top return")
return df


Expand Down
3 changes: 2 additions & 1 deletion frontend/src/lib/CompanyTableGrid.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@

<!-- App Ads.txt Section -->
<div class="bg-white p-6 rounded-lg shadow-md">
<h2 class="text-xl font-bold text-gray-800 mb-4">App Ads.txt</h2>
<h2 class="text-xl font-bold text-gray-800">App Ads.txt</h2>
<p class="text-small font-bold text-gray-800 mb-4">('DIRECT')</p>
<p class="text-lg text-gray-700 mb-2">
App-ads.txt files are an open standard by the IAB to help combat ad fraud. This data was
crawled from the URLs on the app's developer pages. Not all apps have app-ads.txt, many do
Expand Down
Loading

0 comments on commit a13a164

Please sign in to comment.