Skip to content

Commit

Permalink
typing
Browse files Browse the repository at this point in the history
  • Loading branch information
pablonyx committed Oct 26, 2024
1 parent 2f269ea commit 8838b28
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
4 changes: 2 additions & 2 deletions backend/alembic/versions/949b4a92a401_remove_rt.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
depends_on = None


def upgrade():
def upgrade() -> None:
# Deletes all RequestTracker connectors and associated data
bind = op.get_bind()
session = Session(bind=bind)
Expand Down Expand Up @@ -69,6 +69,6 @@ def upgrade():
session.commit()


def downgrade():
def downgrade() -> None:
# No-op downgrade as we cannot restore deleted data
pass
12 changes: 6 additions & 6 deletions backend/danswer/utils/sitemap.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import re
import xml.etree.ElementTree as ET
from typing import List
from typing import Set
from urllib.parse import urljoin

Expand All @@ -13,7 +12,7 @@

def _get_sitemap_locations_from_robots(base_url: str) -> Set[str]:
"""Extract sitemap URLs from robots.txt"""
sitemap_urls = set()
sitemap_urls: set = set()
try:
robots_url = urljoin(base_url, "/robots.txt")
resp = requests.get(robots_url, timeout=10)
Expand All @@ -29,7 +28,7 @@ def _get_sitemap_locations_from_robots(base_url: str) -> Set[str]:

def _extract_urls_from_sitemap(sitemap_url: str) -> Set[str]:
"""Extract URLs from a sitemap XML file"""
urls = set()
urls: set[str] = set()
try:
resp = requests.get(sitemap_url, timeout=10)
if resp.status_code != 200:
Expand All @@ -45,8 +44,9 @@ def _extract_urls_from_sitemap(sitemap_url: str) -> Set[str]:
if root.tag == f"{ns}sitemapindex":
# This is a sitemap index
for sitemap in root.findall(f".//{ns}loc"):
sub_urls = _extract_urls_from_sitemap(sitemap.text)
urls.update(sub_urls)
if sitemap.text:
sub_urls = _extract_urls_from_sitemap(sitemap.text)
urls.update(sub_urls)
else:
# This is a regular sitemap
for url in root.findall(f".//{ns}loc"):
Expand All @@ -59,7 +59,7 @@ def _extract_urls_from_sitemap(sitemap_url: str) -> Set[str]:
return urls


def list_pages_for_site(site: str) -> List[str]:
def list_pages_for_site(site: str) -> list[str]:
"""Get list of pages from a site's sitemaps"""
site = site.rstrip("/")
all_urls = set()
Expand Down

0 comments on commit 8838b28

Please sign in to comment.