diff --git a/.gitmodules b/.gitmodules index 1312579..968bbea 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,3 @@ [submodule "isisdl.wiki"] path = isisdl.wiki url = git@github.com:Emily3403/isisdl.wiki.git -[submodule "aur"] - path = aur - url = https://aur.archlinux.org/isisdl.git -[submodule "tldr"] - path = tldr - url = git@github.com:Emily3403/tldr.git diff --git a/aur b/aur deleted file mode 160000 index aa3cc7d..0000000 --- a/aur +++ /dev/null @@ -1 +0,0 @@ -Subproject commit aa3cc7d64ecebede506a251ae254e716c3931753 diff --git a/bin/.gitignore b/bin/.gitignore new file mode 100644 index 0000000..6a303f7 --- /dev/null +++ b/bin/.gitignore @@ -0,0 +1,9 @@ +# compile-isisdl +static-compile/*.build +static-compile/*.dist +static-compile/*.onefile-build +static-compile/isisdl-linux.bin +static-compile/isisdl-windows.exe + +# upload-PyPI +build/ diff --git a/checkCode.sh b/bin/check-code.sh similarity index 100% rename from checkCode.sh rename to bin/check-code.sh diff --git a/compile/compile_isisdl.bat b/bin/compile-isisdl.bat old mode 100644 new mode 100755 similarity index 100% rename from compile/compile_isisdl.bat rename to bin/compile-isisdl.bat diff --git a/bin/compile-isisdl.sh b/bin/compile-isisdl.sh new file mode 100755 index 0000000..1a75622 --- /dev/null +++ b/bin/compile-isisdl.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +set -e +SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" + +rm -rf "$SCRIPT_DIR"/static-compile/isisdl.* +rm -rf "$SCRIPT_DIR"/static-compile/venv + +python3.11 -m venv "$SCRIPT_DIR"/static-compile/venv +source "$SCRIPT_DIR"/static-compile/venv/bin/activate +pip install "$SCRIPT_DIR"/.. + +python3 -c 'from isisdl.settings import is_static +assert is_static, "Error: For the static build, is_static must be True" +' || exit 1 + + +pip install zstandard ordered-set nuitka +nuitka3 --standalone --onefile \ + --linux-onefile-icon="$SCRIPT_DIR"/static-compile/isisdl_icon.png \ + --output-dir="$SCRIPT_DIR"/static-compile \ + --output-filename=isisdl-linux.bin \ + "$SCRIPT_DIR"/../src/isisdl/__main__.py + +echo "new sha256sum is" +sha256sum "$SCRIPT_DIR"/static-compile/isisdl-linux.bin diff --git a/compile/python_icon.png b/bin/isisdl.png similarity index 100% rename from compile/python_icon.png rename to bin/isisdl.png diff --git a/bin/static-compile/isisdl_icon.png b/bin/static-compile/isisdl_icon.png new file mode 100644 index 0000000..b75cfea Binary files /dev/null and b/bin/static-compile/isisdl_icon.png differ diff --git a/bin/upload-PyPI.sh b/bin/upload-PyPI.sh new file mode 100755 index 0000000..1b08012 --- /dev/null +++ b/bin/upload-PyPI.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -e +SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" + +cd "$SCRIPT_DIR"/.. || exit 1 + +mkdir -p "$SCRIPT_DIR"/dist/ +rm "$SCRIPT_DIR"/dist/* 2> /dev/null +python3 -m build --outdir "$SCRIPT_DIR/dist/" + +twine upload "$SCRIPT_DIR"/dist/* diff --git a/compile/.gitignore b/compile/.gitignore deleted file mode 100644 index b72e57f..0000000 --- a/compile/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -*.build -*.dist -*.onefile-build -isisdl-linux.bin -isisdl-compress-linux.bin -isisdl-sync-linux.bin -isisdl-config-linux.bin diff --git a/compile/compile_isisdl.sh b/compile/compile_isisdl.sh deleted file mode 100755 index 15bf902..0000000 --- a/compile/compile_isisdl.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -rm -rf __main__.* -rm -rf venv - -python3.10 -m venv venv -source venv/bin/activate -pip install .. - - -python3 -c "from isisdl.settings import is_static -assert(is_static) -" || exit 1 - - -pip install zstandard ordered-set -pip install nuitka - -nuitka3 --standalone --onefile --linux-onefile-icon=python_icon.png ../src/isisdl/__main__.py - -mv ./__main__.bin ./isisdl-linux.bin - -echo "new sha256sum is" -sha256sum ./isisdl-linux.bin \ No newline at end of file diff --git a/compile/isisdl.png b/compile/isisdl.png deleted file mode 100755 index a81b04c..0000000 Binary files a/compile/isisdl.png and /dev/null differ diff --git a/isisdl.wiki b/isisdl.wiki deleted file mode 160000 index f29ec53..0000000 --- a/isisdl.wiki +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f29ec5391b3ab648023aa18edf84d90e02614ab6 diff --git a/src/isisdl/__main__.py b/src/isisdl/__main__.py index e8c1de0..1261732 100644 --- a/src/isisdl/__main__.py +++ b/src/isisdl/__main__.py @@ -2,12 +2,12 @@ import asyncio import sys -import isisdl.compress as compress +import isisdl.frontend.compress as compress from isisdl.api.crud import authenticate_new_session -from isisdl.api.downloading import download_media_urls, gather_media_urls +from isisdl.api.download import download_media_urls, gather_media_urls from isisdl.api.endpoints import UserCourseListAPI -from isisdl.backend import sync_database -from isisdl.backend.config import init_wizard, config_wizard +from isisdl.frontend import sync_database +from isisdl.frontend.config import init_wizard, config_wizard from isisdl.backend.crud import read_config, read_user from isisdl.backend.request_helper import CourseDownloader from isisdl.db_conf import init_database, DatabaseSessionMaker @@ -53,7 +53,7 @@ async def _new_main() -> None: return None downloaded_content = await download_media_urls(db, urls) - # - After downloading everything, run the hardlink resolution, this time based on checksums. + # After downloading everything, run the hardlink resolution, this time based on checksums. _ = downloaded_content diff --git a/src/isisdl/api/crud.py b/src/isisdl/api/crud.py index a7598b6..4086051 100644 --- a/src/isisdl/api/crud.py +++ b/src/isisdl/api/crud.py @@ -2,16 +2,19 @@ import re from base64 import standard_b64decode +from collections import defaultdict from html import unescape -from typing import Any +from itertools import chain +from typing import Any, Literal, cast, DefaultDict from aiohttp import ClientSession as InternetSession from sqlalchemy import select from sqlalchemy.orm import Session as DatabaseSession -from isisdl.api.models import AuthenticatedSession, Course, MediaURL, MediaType +from isisdl.api.models import AuthenticatedSession, Course, MediaURL, MediaType, NormalizedDocument from isisdl.backend.models import User, Config from isisdl.db_conf import add_or_update_objects_to_database +from isisdl.settings import url_finder, isis_ignore, extern_ignore, regex_is_isis_document, regex_is_isis_video from isisdl.utils import datetime_fromtimestamp_with_None, flat_map from isisdl.version import __version__ @@ -73,6 +76,12 @@ async def authenticate_new_session(user: User, config: Config) -> AuthenticatedS return AuthenticatedSession(session, session_key=session_key, api_token=api_token) +# --- Courses --- + +def read_courses(db: DatabaseSession) -> list[Course]: + return list(db.execute(select(Course)).scalars().all()) + + def parse_courses_from_API(db: DatabaseSession, courses: list[dict[str, Any]], config: Config) -> list[Course] | None: existing_courses = {it.id: it for it in read_courses(db)} @@ -85,11 +94,145 @@ def parse_courses_from_API(db: DatabaseSession, courses: list[dict[str, Any]], c ) -def create_videos_from_API(db: DatabaseSession, videos: list[dict[str, Any]], course_id: int) -> list[MediaURL] | None: +# --- Documents --- + +def read_media_urls(db: DatabaseSession) -> dict[int, dict[str, MediaURL]]: + final: DefaultDict[int, dict[str, MediaURL]] = defaultdict(dict) + for it in db.execute(select(MediaURL)).scalars().all(): + final[it.course_id][it.url] = it + + return dict(final) + + +def create_documents_from_API(db: DatabaseSession, data: list[NormalizedDocument], existing_documents: dict[str, MediaURL]) -> list[MediaURL] | None: + _data = cast(list[dict[str, Any]], data) # Erase the `NormalizedDocument` signature to make mypy happy + + return add_or_update_objects_to_database( + db, existing_documents, _data, MediaURL, lambda doc: doc["url"], + {it: it for it in NormalizedDocument.__annotations__.keys()}, + {"time_created": datetime_fromtimestamp_with_None, "time_modified": datetime_fromtimestamp_with_None}, + ) + + +def parse_documents_from_API(db: DatabaseSession, course_id: int, documents: list[dict[str, Any]], existing_documents: dict[str, MediaURL]) -> list[MediaURL]: + """ + TODO: Revise this docstring as it is not accurate anymore. Maybe a way for a transaction is possible, but I don't see it. + + Note that this function should be called using a `db.begin()` (transaction) for the db parameter as this function will create #Courses commits to the database. + To save trips to the database, one has to pass existing_documents parameter to this function. + """ + + api_data = list( + filter( + lambda it: it != {}, + + flat_map( + lambda it: it.get("contents", [{}]), + flat_map( + lambda it: it.get("modules", [{}]), + documents + ) + ) + ) + ) + + regex_data = parse_course_page_with_regex(documents, course_id) + data = filter_duplicates_and_normalize_documents(api_data, regex_data, course_id) + + return create_documents_from_API(db, data, existing_documents) or [] + + +def parse_course_page_with_regex(documents: list[dict[str, Any]], course_id: int) -> list[dict[str, Any]]: + files = [] + + for url in url_finder.findall(str(documents)): + if isis_ignore.match(url) is not None or extern_ignore.match(url) is not None: + continue + + files.append({"fileurl": url, "course_id": course_id, "relative_path": "", "filename": None, "filesize": None, "timecreated": None, "timemodified": None, "type": "url"}) + + return files + + +def filter_duplicates_and_normalize_documents(documents_data: list[dict[str, Any]], regex_data: list[dict[str, Any]], course_id: int) -> list[NormalizedDocument]: + duplicates = defaultdict(list) + + for it in chain(documents_data, regex_data): + file = normalize_file(it, course_id) + if file is None: + continue + + duplicates[it["fileurl"]].append(file) + + return [resolve_duplicates(files) for files in duplicates.values()] + + +def normalize_file(file: dict[str, Any], course_id: int) -> NormalizedDocument | None: + url = file.get("fileurl") + if url is None: + return None + + if url.endswith("?forcedownload=1"): + url = url[:-len("?forcedownload=1")] + + if isis_ignore.match(url) is not None or extern_ignore.match(url) is not None: + return None + + if regex_is_isis_video.match(url) is not None: + media_type = MediaType.video + elif regex_is_isis_document.match(url) is not None or file.get("type") != "url": + media_type = MediaType.document + else: + media_type = MediaType.extern + + return { + "url": url, + "course_id": course_id, + "media_type": media_type, + "relative_path": (file.get("filepath") or "").lstrip("/"), + "name": file.get("filename"), + "size": file.get("filesize"), + "time_created": file.get("timecreated") or file.get("timemodified"), + "time_modified": file.get("timemodified") or file.get("timecreated"), + } + + +def resolve_duplicates(files: list[NormalizedDocument]) -> NormalizedDocument: + """ + Determinism: + Files are sorted deterministicly by partitioning each attribute into the "Some" and "None" category. + Then, each attribute is sorted based on the "Some" category. + If there are multiple files with different attribute, the first one according to the sort order is chosen. + """ + if len(files) == 1: + return files[0] + + def resolve_conflict(attr: Literal["url"] | Literal["course_id"] | Literal["media_type"] | Literal["relative_path"] | Literal["name"] | Literal["size"] | Literal["time_created"] | Literal["time_modified"]) -> Any: + conflicting_attrs = sorted({it for file in files if (it := file[attr]) is not None}) + if len(conflicting_attrs) == 0: + return None + + return conflicting_attrs[0] + + return { + "url": resolve_conflict("url"), + "course_id": resolve_conflict("course_id"), + "media_type": resolve_conflict("media_type"), + "relative_path": resolve_conflict("relative_path"), + "name": resolve_conflict("name"), + "size": resolve_conflict("size"), + "time_created": resolve_conflict("time_created"), + "time_modified": resolve_conflict("time_modified"), + } + + +# --- Videos --- + + +def create_videos_from_API(db: DatabaseSession, videos: list[dict[str, Any]], course_id: int, existing_videos: dict[str, MediaURL]) -> list[MediaURL] | None: # Filter out duplicate videos videos = list({video["url"]: video for video in videos}.values()) - existing_videos = {it.url: it for it in read_media_urls(db) if it.media_type == MediaType.video} videos = list(map(lambda it: it | {"course_id": course_id, "media_type": MediaType.video, "relative_path": "Videos", "size": None, "time_modified": None}, videos)) return add_or_update_objects_to_database( @@ -103,20 +246,15 @@ def parse_videos_from_API(db: DatabaseSession, videos: list[dict[str, Any]], con if config.dl_download_videos is False: return [] + existing_videos = read_media_urls(db) + + # TODO: Make this a single transaction instead of one for each course return list( filter( lambda it: it is not None, flat_map( - lambda data: create_videos_from_API(db, data.get("videos"), data.get("courseid")) or [], + lambda data: create_videos_from_API(db, data.get("videos"), data.get("courseid"), existing_videos[data["courseid"]]) or [], map(lambda it: it.get("data", {}), videos) ) ) ) - - -def read_courses(db: DatabaseSession) -> list[Course]: - return list(db.execute(select(Course)).scalars().all()) - - -def read_media_urls(db: DatabaseSession) -> list[MediaURL]: - return list(db.execute(select(MediaURL)).scalars().all()) diff --git a/src/isisdl/api/downloading.py b/src/isisdl/api/download.py similarity index 95% rename from src/isisdl/api/downloading.py rename to src/isisdl/api/download.py index a62bb9b..bb5e424 100644 --- a/src/isisdl/api/downloading.py +++ b/src/isisdl/api/download.py @@ -4,7 +4,7 @@ from sqlalchemy.orm import Session as DatabaseSession -from isisdl.api.endpoints import VideoListAPI, DocumentListAPI +from isisdl.api.endpoints import DocumentListAPI, VideoListAPI from isisdl.api.models import MediaContainer, MediaURL, AuthenticatedSession, Course __all__ = ["download_media_urls"] diff --git a/src/isisdl/api/endpoints.py b/src/isisdl/api/endpoints.py index 6a09d13..cc3a6ad 100644 --- a/src/isisdl/api/endpoints.py +++ b/src/isisdl/api/endpoints.py @@ -2,18 +2,15 @@ import asyncio from abc import abstractmethod -from collections import defaultdict from json import JSONDecodeError from typing import Any, cast, TYPE_CHECKING from sqlalchemy.orm import Session as DatabaseSession -from isisdl.api.crud import parse_courses_from_API, read_media_urls, parse_videos_from_API -from isisdl.api.models import AuthenticatedSession, Course, Error, MediaType, MediaURL +from isisdl.api.crud import parse_courses_from_API, read_media_urls, parse_videos_from_API, parse_documents_from_API +from isisdl.api.models import AuthenticatedSession, Course, Error, MediaURL from isisdl.backend.models import User, Config -from isisdl.db_conf import add_or_update_objects_to_database -from isisdl.settings import isis_ignore, url_finder, extern_ignore, regex_is_isis_document, DEBUG_ASSERTS -from isisdl.utils import datetime_fromtimestamp_with_None, normalize_url, flat_map +from isisdl.settings import DEBUG_ASSERTS # TODO: AJAX @@ -172,196 +169,24 @@ async def _get(cls, session: AuthenticatedSession, data: dict[str, Any] | list[d if isinstance(response, Error) or not response.ok: return None + # TODO: Error handling return await response.json(), data["courseid"] - @staticmethod - def _normalize_file(file: dict[str, Any], url: str, course_id: int) -> dict[str, Any]: - file["fileurl"] = normalize_url(url) - file["filesize"] = file["filesize"] or None - file["timecreated"] = file["timecreated"] or file["timemodified"] - file["timemodified"] = file["timemodified"] or file["timecreated"] - - file_type = file.get("type", None) - if file_type == "url": - file["media_type"] = MediaType.extern - else: - file["media_type"] = MediaType.document - - file["course_id"] = course_id - file["relative_path"] = (file.get("filepath") or "").lstrip("/") - - return file - - @staticmethod - def _parse_files_from_regex(course_contents_str: str, course_id: int) -> list[dict[str, Any]]: - all_files = [] - - for url in url_finder.findall(course_contents_str): - if isis_ignore.match(url) is not None or extern_ignore.match(url) is not None: - continue - - all_files.append({ - "fileurl": url, "course_id": course_id, "media_type": MediaType.document if regex_is_isis_document.match(url) is not None else MediaType.extern, - "relative_path": "", "filename": None, "filesize": None, "timecreated": None, "timemodified": None - }) - - return all_files - - @staticmethod - def _filter_duplicates_from_files(duplicates: list[dict[str, Any]]) -> list[dict[str, Any]]: - maybe_duplicates = defaultdict(list) - - for duplicate in duplicates: - maybe_duplicates[(duplicate["fileurl"], duplicate["course_id"])].append(duplicate) - - files_without_duplicates = [] - for files in maybe_duplicates.values(): - if len(files) == 1: - files_without_duplicates.append(files[0]) - continue - - # TODO: This process is not necessarily deterministic. Make it as such - files_without_duplicates.append({ - "fileurl": files[0]["fileurl"], "course_id": files[0]["course_id"], - "media_type": MediaType.document if any(file["media_type"] == MediaType.document for file in files) else MediaType.extern, - "relative_path": next((relative_path for file in files if (relative_path := file["relative_path"])), ""), - "filename": next((name for file in files if (name := file["filename"]) is not None), None), - "filesize": next((name for file in files if (name := file["filesize"]) is not None), None), - "timecreated": next((name for file in files if (name := file["timecreated"]) is not None), None), - "timemodified": next((name for file in files if (name := file["timemodified"]) is not None), None), - }) - - return files_without_duplicates - @classmethod - async def get(cls, db: DatabaseSession, session: AuthenticatedSession, courses: list[Course]) -> Any: + async def get(cls, db: DatabaseSession, session: AuthenticatedSession, courses: list[Course]) -> list[MediaURL]: requests = [cls._get(session, data={"courseid": course.id}) for course in courses] - normalized_files_with_duplicates: list[dict[str, int | None | MediaType | str]] = [] + existing_documents = read_media_urls(db) + all_documents = [] for _response in asyncio.as_completed(requests): response = await _response if response is None: continue - course_contents, course_id = response - # TODO: Profile if using a faster json parser is worth it - - files_to_filter: list[dict[str, Any]] = list( - filter( - lambda it: it != {}, - - flat_map( - lambda it: it.get("contents", [{}]), - flat_map( - lambda it: it.get("modules", [{}]), - course_contents - ) - ) - ) - ) - - normalized_files_with_duplicates.extend( - cls._normalize_file(file, url, course_id) for file in files_to_filter - if (url := file.get("fileurl", None)) is not None and isis_ignore.match(url) is None and extern_ignore.match(url) is None - ) - - parsed_files_from_regex = cls._parse_files_from_regex(str(course_contents), course_id) - normalized_files_with_duplicates.extend(parsed_files_from_regex) - - files = cls._filter_duplicates_from_files(normalized_files_with_duplicates) - - existing_containers = {(it.course_id, normalize_url(it.url)): it for it in read_media_urls(db) if it.media_type in {MediaType.document, MediaType.extern}} - - return add_or_update_objects_to_database( - db, existing_containers, files, MediaURL, lambda x: (x["course_id"], normalize_url(x["fileurl"])), - {"url": "fileurl", "course_id": "course_id", "media_type": "media_type", "relative_path": "relative_path", - "name": "filename", "size": "filesize", "time_created": "timecreated", "time_modified": "timemodified"}, - {"url": normalize_url, "time_created": datetime_fromtimestamp_with_None, "time_modified": datetime_fromtimestamp_with_None} - ) - - @classmethod - async def old_get(cls, db: DatabaseSession, session: AuthenticatedSession, courses: list[Course]) -> Any: - requests = [cls._get(session, data={"courseid": course.id}) for course in courses] - - new_data_with_duplicates = [] - - # TODO: Performance benchmarks between asyncio.gather and asyncio.as_completed - # TODO: Profile the parsing and maybe improve the runtime - for _response in asyncio.as_completed(requests): - response = await _response - if response is None: - continue - - course_contents, course_id = response - - # Unfortunately, it doesn't seam as if python supports matching of nested dicts / lists - for week in course_contents: - match week: - case {"modules": modules}: - for module in modules: - match module: - case {"contents": files}: - for file in files: - match file: - case {"fileurl": url, "type": file_type, "filepath": relative_path}: - # if isis_ignore.match(url) is None and extern_ignore.match(url) is None: - # Normalize attributes - file["fileurl"] = normalize_url(url) - file["filesize"] = file["filesize"] or None - file["timecreated"] = file["timecreated"] or file["timemodified"] - - file["media_type"] = MediaType.extern - file["course_id"] = course_id - file["relative_path"] = (relative_path or "").lstrip("/") - - if file_type == "url": - file["media_type"] = MediaType.extern - else: - file["media_type"] = MediaType.document - - new_data_with_duplicates.append(file) - - case _: - pass - - # Now try to find as many urls as possible in the Course - regex_url_matches = {normalize_url(url) for url in url_finder.findall(str(course_contents)) if isis_ignore.match(url) is None and extern_ignore.match(url) is None} - for url in regex_url_matches: - new_data_with_duplicates.append({ - "fileurl": url, "course_id": course_id, "media_type": MediaType.document if regex_is_isis_document.match(url) is not None else MediaType.extern, - "relative_path": "", "filename": None, "filesize": None, "timecreated": None, "timemodified": None - }) - - # Remove the duplicate files - maybe_duplicate_files = defaultdict(list) - for file in new_data_with_duplicates: - maybe_duplicate_files[(file["fileurl"], file["course_id"])].append(file) - - new_data = [] - for files in maybe_duplicate_files.values(): - if len(files) == 1: - new_data.append(files[0]) - continue + documents, course_id = response + all_documents.extend(parse_documents_from_API(db, course_id, documents, existing_documents.get(course_id, {}))) - # TODO: This process is not necessarily deterministic. Make it as such - new_data.append({ - "fileurl": files[0]["fileurl"], "course_id": files[0]["course_id"], - "media_type": MediaType.document if any(file["media_type"] == MediaType.document for file in files) else MediaType.extern, - "relative_path": next((relative_path for file in files if (relative_path := file["relative_path"])), ""), - "filename": next((name for file in files if (name := file["filename"]) is not None), None), - "filesize": next((name for file in files if (name := file["filesize"]) is not None), None), - "timecreated": next((name for file in files if (name := file["timecreated"]) is not None), None), - "timemodified": next((name for file in files if (name := file["timemodified"]) is not None), None), - }) - - existing_containers = {(it.course_id, it.url): it for it in read_media_urls(db)} - - return add_or_update_objects_to_database( - db, existing_containers, new_data, MediaURL, lambda x: (x["course_id"], x["fileurl"]), - {"url": "fileurl", "course_id": "course_id", "media_type": "media_type", "relative_path": "relative_path", - "name": "filename", "size": "filesize", "time_created": "timecreated", "time_modified": "timemodified"}, - {"url": normalize_url, "time_created": datetime_fromtimestamp_with_None, "time_modified": datetime_fromtimestamp_with_None} - ) + return all_documents class CourseEnrollmentAPI(MoodleAPIEndpoint): diff --git a/src/isisdl/api/models.py b/src/isisdl/api/models.py index da9c9e2..f033257 100644 --- a/src/isisdl/api/models.py +++ b/src/isisdl/api/models.py @@ -5,7 +5,7 @@ from datetime import datetime from enum import Enum from types import TracebackType -from typing import Any, Type +from typing import Any, Type, TypedDict from aiohttp import ClientSession as InternetSession from aiohttp.client import _RequestContextManager @@ -38,10 +38,31 @@ class MediaType(Enum): document = 1 extern = 2 video = 3 - corrupted = 4 - hardlink = 5 + # TODO: Do I really need that big of a distinction? + corrupted_on_disk = 10 + not_available = 11 + not_available_for_legal_reasons = 12 + hardlink = 20 + + def __gt__(self, other: MediaType) -> bool: + return self.value > other.value + + +class NormalizedDocument(TypedDict): + url: str + course_id: int + media_type: MediaType + relative_path: str + + name: str | None + size: int | None + time_created: int | None + time_modified: int | None + + +# https://mypy.readthedocs.io/en/stable/literal_types.html#tagged-unions for size class MediaURL(DataBase): # type:ignore[valid-type, misc] """ This class is a glorified URL with some metadata associated with it. diff --git a/src/isisdl/backend/crud.py b/src/isisdl/backend/crud.py index d2a8cc0..b3e7ad9 100644 --- a/src/isisdl/backend/crud.py +++ b/src/isisdl/backend/crud.py @@ -11,7 +11,7 @@ from isisdl.settings import master_password, error_exit -async def store_user(db: DatabaseSession, config: Config, username: str, password: str, password_to_encrypt: str | None = None, user_id: int | None = None) -> User | None: +async def store_user(db: DatabaseSession, config: Config, username: str, password: str, password_to_encrypt: str | None = None, user_id: int | None = None) -> User | None: the_password_to_encrypt = password_to_encrypt if config.pw_encrypt_password else master_password if the_password_to_encrypt is None: return None @@ -46,4 +46,6 @@ def read_config(db: DatabaseSession) -> Config: if len(configs) != 1: error_exit(2, f"Could not load config! Got {len(configs)} config database entries, expected 1\nPlease make sure you have set the config with `isisdl --init`!") + # Now factor in the configuration file + return configs[0] diff --git a/src/isisdl/backend/database_helper.py b/src/isisdl/backend/database_helper.py index cf3f6b7..be36da5 100644 --- a/src/isisdl/backend/database_helper.py +++ b/src/isisdl/backend/database_helper.py @@ -10,8 +10,7 @@ from threading import Lock from typing import TYPE_CHECKING, cast, Set, Dict, List, Any, Union, DefaultDict, Iterable, Tuple -from isisdl.settings import database_file_location, error_text, bad_url_cache_reeval_times_mul, bad_url_cache_reeval_exp, \ - bad_url_cache_reeval_static_mul, random_salt_length +from isisdl.settings import database_file_location, error_text, bad_url_cache_reeval_times_mul, bad_url_cache_reeval_exp, bad_url_cache_reeval_static_mul, random_salt_length if TYPE_CHECKING: from isisdl.backend.request_helper import MediaContainer diff --git a/src/isisdl/backend/request_helper.py b/src/isisdl/backend/request_helper.py index 9d41314..b53f840 100644 --- a/src/isisdl/backend/request_helper.py +++ b/src/isisdl/backend/request_helper.py @@ -26,12 +26,10 @@ from isisdl.backend.crypt import get_credentials from isisdl.backend.status import StatusOptions, DownloadStatus, RequestHelperStatus -from isisdl.settings import download_base_timeout, download_timeout_multiplier, download_static_sleep_time, num_tries_download, status_time, perc_diff_for_checksum, error_text, extern_ignore, \ - log_file_location, datetime_str, regex_is_isis_document, token_queue_bandwidths_save_for, download_chunk_size, download_progress_bar_resolution, bandwidth_download_files_mavg_perc, \ - checksum_algorithm +from isisdl.settings import download_base_timeout, download_timeout_multiplier, download_static_sleep_time, num_tries_download, status_time, perc_diff_for_checksum, error_text, extern_ignore, log_file_location, datetime_str, regex_is_isis_document, token_queue_bandwidths_save_for, \ + download_chunk_size, download_progress_bar_resolution, bandwidth_download_files_mavg_perc, checksum_algorithm from isisdl.settings import enable_multithread, discover_num_threads, is_windows, is_macos, is_testing, testing_bad_urls, url_finder, isis_ignore -from isisdl.utils import User, path, sanitize_name, args, on_kill, database_helper, config, generate_error_message, logger, DownloadThrottler, MediaType, HumanBytes, normalize_url, \ - get_download_url_from_url +from isisdl.utils import User, path, sanitize_name, args, on_kill, database_helper, config, generate_error_message, logger, DownloadThrottler, MediaType, HumanBytes, get_download_url_from_url from isisdl.utils import calculate_local_checksum from isisdl.version import __version__ diff --git a/src/isisdl/db_conf.py b/src/isisdl/db_conf.py index 6c5a56b..5afa9c4 100644 --- a/src/isisdl/db_conf.py +++ b/src/isisdl/db_conf.py @@ -45,7 +45,7 @@ def __repr__(self) -> str: # This Callable can be used to create new Session objects for interacting with a database -DatabaseSessionMaker = sessionmaker(autocommit=False, bind=database_engine) +DatabaseSessionMaker = sessionmaker(bind=database_engine) DataBase: Type[DeclarativeMeta] = declarative_base(cls=DatabaseObject) DB_T = TypeVar("DB_T", bound=DatabaseObject) @@ -122,29 +122,29 @@ def translate_attribute(db_attr: str, data_attr: str, dict_item: dict[str, T]) - all_objects: list[DB_T] = [] - for dict_item in new_data: - maybe_item = existing_items.get(lookup_func(dict_item)) + try: + for dict_item in new_data: + maybe_item = existing_items.get(lookup_func(dict_item)) - if maybe_item is None: - kwargs = {} - for db_type_attr, new_data_attr in attr_translator.items(): - kwargs[db_type_attr] = translate_attribute(db_type_attr, new_data_attr, dict_item) + if maybe_item is None: + kwargs = {} + for db_type_attr, new_data_attr in attr_translator.items(): + kwargs[db_type_attr] = translate_attribute(db_type_attr, new_data_attr, dict_item) - db_item = db_type(**kwargs) + db_item = db_type(**kwargs) - else: - for db_type_attr, new_data_attr in attr_translator.items(): - if attr_update_blacklist is not None and db_type_attr in attr_update_blacklist: - continue + else: + for db_type_attr, new_data_attr in attr_translator.items(): + if attr_update_blacklist is not None and db_type_attr in attr_update_blacklist: + continue - setattr(maybe_item, db_type_attr, translate_attribute(db_type_attr, new_data_attr, dict_item)) + setattr(maybe_item, db_type_attr, translate_attribute(db_type_attr, new_data_attr, dict_item)) - db_item = maybe_item + db_item = maybe_item - db.add(db_item) - all_objects.append(db_item) + db.add(db_item) + all_objects.append(db_item) - try: db.commit() except SQLAlchemyError as e: error(f"Merging into the database failed: \"{e}\"") diff --git a/src/isisdl/autorun.py b/src/isisdl/frontend/autorun.py similarity index 100% rename from src/isisdl/autorun.py rename to src/isisdl/frontend/autorun.py diff --git a/src/isisdl/compress.py b/src/isisdl/frontend/compress.py similarity index 99% rename from src/isisdl/compress.py rename to src/isisdl/frontend/compress.py index 520dd5b..4095b87 100644 --- a/src/isisdl/compress.py +++ b/src/isisdl/frontend/compress.py @@ -18,8 +18,8 @@ from isisdl.backend.crypt import get_credentials from isisdl.backend.request_helper import RequestHelper, MediaContainer from isisdl.backend.status import print_log_messages, RequestHelperStatus -from isisdl.settings import is_windows, has_ffmpeg, status_time, ffmpeg_args, enable_multithread, compress_duration_for_to_low_efficiency, compress_std_mavg_size, \ - compress_minimum_stdev, compress_minimum_score, compress_score_mavg_size, compress_insta_kill_score, compress_duration_for_insta_kill, is_first_time, error_text +from isisdl.settings import is_windows, has_ffmpeg, status_time, ffmpeg_args, enable_multithread, compress_duration_for_to_low_efficiency, compress_std_mavg_size, compress_minimum_stdev, compress_minimum_score, compress_score_mavg_size, compress_insta_kill_score, compress_duration_for_insta_kill, \ + is_first_time, error_text from isisdl.utils import on_kill, HumanBytes, do_ffprobe, generate_error_message, OnKill, database_helper, MediaType diff --git a/src/isisdl/backend/config.py b/src/isisdl/frontend/config.py similarity index 100% rename from src/isisdl/backend/config.py rename to src/isisdl/frontend/config.py diff --git a/src/isisdl/backend/sync_database.py b/src/isisdl/frontend/sync_database.py similarity index 100% rename from src/isisdl/backend/sync_database.py rename to src/isisdl/frontend/sync_database.py diff --git a/src/isisdl/resources/changelog/1.3/1.3.13.md b/src/isisdl/resources/changelog/1.3/1.3.13.md index 35981df..310bd03 100644 --- a/src/isisdl/resources/changelog/1.3/1.3.13.md +++ b/src/isisdl/resources/changelog/1.3/1.3.13.md @@ -1,23 +1,23 @@ # Changelog version 1.3.13 - This changelog - - Now you will get notified about all new features through this text. - - This should be more convenient than tracking down individual release notes or going through commit messages + - Now you will get notified about all new features through this text. + - This should be more convenient than tracking down individual release notes or going through commit messages - Fixed (Apple) ARM bug with isisdl - - When executing isisdl on a ARM machine the following error would arise + - When executing isisdl on a ARM machine the following error would arise > `UnicodeError: encoding with 'idna' codec failed (UnicodeError: label too long)` - - This bug has now been resolved by bypassing all proxies. + - This bug has now been resolved by bypassing all proxies. - Changed the storage place of documents to be strictly in the root of the course. - - Previously some documents that originate from `isis.tu-berlin.de` were placed in the `Extern/` directory. - - The origin is now tracked and the files are placed in their respective directories accordingly. + - Previously some documents that originate from `isis.tu-berlin.de` were placed in the `Extern/` directory. + - The origin is now tracked and the files are placed in their respective directories accordingly. - More Content - - Due to a bug in the conflict-checker, some videos were not downloaded. This behaviour is now fixed. + - Due to a bug in the conflict-checker, some videos were not downloaded. This behaviour is now fixed. - Download diff - - There is now a new subprogram to compare the downloaded files of an arbitrary directory and compare the differences to the `isisdl` directory. - - This program is especially useful when comparing different ISIS / Moodle-downloaders and checking if isisdl grabs all the content. + - There is now a new subprogram to compare the downloaded files of an arbitrary directory and compare the differences to the `isisdl` directory. + - This program is especially useful when comparing different ISIS / Moodle-downloaders and checking if isisdl grabs all the content. (I realize that this changelog is probably not going to be read as the previous version of isisdl did not include a mechanism to view the changelog) \ No newline at end of file diff --git a/src/isisdl/resources/changelog/1.3/1.3.15.md b/src/isisdl/resources/changelog/1.3/1.3.15.md index 26ae462..942e17a 100644 --- a/src/isisdl/resources/changelog/1.3/1.3.15.md +++ b/src/isisdl/resources/changelog/1.3/1.3.15.md @@ -1,4 +1,4 @@ # Changelog version 1.3.14 - Download of all assignments - - There existed a bug since isisdl version 1.3.7 which lead to all the assignments not being downloaded anymore. \ No newline at end of file + - There existed a bug since isisdl version 1.3.7 which lead to all the assignments not being downloaded anymore. \ No newline at end of file diff --git a/src/isisdl/resources/changelog/1.4/1.4.0.md b/src/isisdl/resources/changelog/1.4/1.4.0.md index de855cd..5c4ef7d 100644 --- a/src/isisdl/resources/changelog/1.4/1.4.0.md +++ b/src/isisdl/resources/changelog/1.4/1.4.0.md @@ -4,8 +4,8 @@ - [ ] more time into compression → server, bug hunting - [ ] Case insensitive filesystem - [ ] Completions for zsh / fish - - Add step in the config wizard - - Place the config files in the according location + - Add step in the config wizard + - Place the config files in the according location - [ ] More / better data visualization → Carsten - [ ] More content → isia-tub / moodle-dl @@ -13,8 +13,8 @@ # Goals for Update 1.4 - [ ] Database update - - [ ] New MediaType: Link that can be checked back later - - [ ] Fix synchronisation + - [ ] New MediaType: Link that can be checked back later + - [ ] Fix synchronisation - [ ] Ncurses UI? - [ ] More metadata analysis diff --git a/src/isisdl/settings.py b/src/isisdl/settings.py index 0192c89..d1fd677 100644 --- a/src/isisdl/settings.py +++ b/src/isisdl/settings.py @@ -19,7 +19,7 @@ from psutil._common import sdiskpart from yaml import YAMLError, safe_load -import isisdl.autorun +import isisdl.frontend.autorun # --- Options for this executable --- @@ -80,7 +80,7 @@ def error_exit(code: int, reason: str) -> NoReturn: has_ffmpeg = shutil.which("ffmpeg") is not None # Check if being automatically run -is_autorun = sys.argv[0] == isisdl.autorun.__file__ +is_autorun = sys.argv[0] == isisdl.frontend.autorun.__file__ # The location of the source code on disk source_code_location = Path(isisdl.__file__).parent @@ -293,9 +293,8 @@ def error_exit(code: int, reason: str) -> NoReturn: ) # @formatter:on -regex_is_isis_document = re.compile( - r".*isis\.tu-berlin\.de/(?:webservice/|)pluginfile\.php/.*" -) +regex_is_isis_document = re.compile(r".*isis\.tu-berlin\.de/(?:webservice/|)pluginfile\.php/.*", re.IGNORECASE) +regex_is_isis_video = re.compile(r".*isis\.tu-berlin\.de/videoservice/file.php/.*", re.IGNORECASE) # @formatter:off extern_ignore = re.compile( diff --git a/src/isisdl/utils.py b/src/isisdl/utils.py index 9665813..5d9beb8 100644 --- a/src/isisdl/utils.py +++ b/src/isisdl/utils.py @@ -43,12 +43,10 @@ from isisdl import settings from isisdl.backend.database_helper import DatabaseHelper -from isisdl.settings import download_chunk_size, token_queue_bandwidths_save_for, forbidden_chars, replace_dot_at_end_of_dir_name, force_filesystem, has_ffmpeg, fstype, log_file_location, \ - source_code_location, intern_dir_location -from isisdl.settings import working_dir_location, is_windows, checksum_algorithm, checksum_num_bytes, example_config_file_location, config_dir_location, database_file_location, status_time, \ - discover_num_threads, status_progress_bar_resolution, download_progress_bar_resolution, config_file_location, is_first_time, is_autorun, parse_config_file, lock_file_location, \ - enable_lock, error_directory_location, systemd_dir_location, master_password, is_testing, systemd_timer_file_location, systemd_service_file_location, export_config_file_location, \ - python_executable, is_static, enable_multithread, subscribe_num_threads, subscribed_courses_file_location, error_text, token_queue_refresh_rate +from isisdl.settings import download_chunk_size, token_queue_bandwidths_save_for, forbidden_chars, replace_dot_at_end_of_dir_name, force_filesystem, has_ffmpeg, fstype, log_file_location, source_code_location, intern_dir_location +from isisdl.settings import working_dir_location, is_windows, checksum_algorithm, checksum_num_bytes, example_config_file_location, config_dir_location, database_file_location, status_time, discover_num_threads, status_progress_bar_resolution, download_progress_bar_resolution, config_file_location, \ + is_first_time, is_autorun, parse_config_file, lock_file_location, enable_lock, error_directory_location, systemd_dir_location, master_password, is_testing, systemd_timer_file_location, systemd_service_file_location, export_config_file_location, python_executable, is_static, enable_multithread, \ + subscribe_num_threads, subscribed_courses_file_location, error_text, token_queue_refresh_rate from isisdl.version import __version__ if TYPE_CHECKING: @@ -829,13 +827,6 @@ def path(*args: str) -> Path: return Path(working_dir_location, *args) -def normalize_url(url: str) -> str: - if url.endswith("?forcedownload=1"): - url = url[:-len("?forcedownload=1")] - - return url - - def remove_systemd_timer() -> None: if not os.path.exists(systemd_timer_file_location): return diff --git a/src/isisdl/version.py b/src/isisdl/version.py index 3e8d9f9..8c0d5d5 100644 --- a/src/isisdl/version.py +++ b/src/isisdl/version.py @@ -1 +1 @@ -__version__ = "1.4.0" +__version__ = "2.0.0" diff --git a/tests/test_0_config.py b/tests/test_0_config.py index 83b8016..c0cf6fa 100644 --- a/tests/test_0_config.py +++ b/tests/test_0_config.py @@ -4,7 +4,7 @@ from yaml import safe_load -from isisdl.backend.config import authentication_prompt, update_policy_prompt, whitelist_prompt, filename_prompt, throttler_prompt +from isisdl.frontend.config import authentication_prompt, update_policy_prompt, whitelist_prompt, filename_prompt, throttler_prompt from isisdl.backend.crypt import decryptor from isisdl.settings import export_config_file_location, master_password, is_windows from isisdl.utils import config, export_config, startup diff --git a/tldr b/tldr deleted file mode 160000 index 2599bfe..0000000 --- a/tldr +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2599bfedd3d608b13e3a710366094021f8031694 diff --git a/tox.ini b/tox.ini deleted file mode 100644 index e6f2dd6..0000000 --- a/tox.ini +++ /dev/null @@ -1,31 +0,0 @@ -[tox] -minversion = 3.10.0 -envlist = py310, py311, flake8, mypy -isolated_build = true - -[gh-actions] -python = - 3.10: py310 - 3.11: py311 - -[testenv] -setenv = - PYTHONPATH = {toxinidir} - ISISDL_ACTUAL_USERNAME = {env:ISISDL_ACTUAL_USERNAME} - ISISDL_ACTUAL_PASSWORD = {env:ISISDL_ACTUAL_PASSWORD} -deps = - -r{toxinidir}/requirements_dev.txt -commands = - pytest tests/api -vv -rA --basetemp={envtmpdir} - -[testenv:flake8] -basepython = python3.10 -deps = flake8 -commands = flake8 src/isisdl/backend/crud.py src/isisdl/backend/models.py src/isisdl/api src/isisdl/db_conf.py src/isisdl/__main__.py src/isisdl/settings.py src/isisdl/utils.py tests/api - -[testenv:mypy] -basepython = python3.10 -deps = - -r{toxinidir}/requirements_dev.txt -commands = mypy src/isisdl/backend/crud.py src/isisdl/backend/models.py src/isisdl/api src/isisdl/db_conf.py src/isisdl/__main__.py src/isisdl/settings.py src/isisdl/utils.py tests/api - diff --git a/uploadPyPI.sh b/uploadPyPI.sh deleted file mode 100755 index 5330dd1..0000000 --- a/uploadPyPI.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -mkdir -p dist/ -rm dist/* 2> /dev/null -python3 -m build - -twine upload dist/*