diff --git a/scrapers/Iwara.yml b/scrapers/Iwara.yml deleted file mode 100644 index 2dbf0203a..000000000 --- a/scrapers/Iwara.yml +++ /dev/null @@ -1,73 +0,0 @@ -name: iwara.tv -sceneByURL: - - action: scrapeJson - url: - - iwara.tv/video/ - queryURL: "{url}" - queryURLReplace: - url: - # only capture video id from url - - regex: .*video/([^/]+)(?:/[^/]*)?$ - with: https://api.iwara.tv/video/$1 - scraper: sceneScraper -sceneByFragment: - action: scrapeJson - queryURL: https://api.iwara.tv/video/{filename} - queryURLReplace: - filename: # expects an id in square brackets before extension, as saved by yt-dlp e.g. VIDEO_TITLE [VIDEO_ID].mp4 - - regex: .*\[([0-9a-zA-Z]{13,})\]\.[^\.]+$ - with: $1 - scraper: sceneScraper - -jsonScrapers: - sceneScraper: - scene: - Title: title - URL: - selector: "[id,slug]" - concat: / - postProcess: - - replace: - - regex: ^ - with: https://www.iwara.tv/video/ - Date: - selector: file.createdAt - postProcess: - - replace: - - regex: (\d{4}-\d{2}-\d{2}).* - with: $1 - - parseDate: "2006-01-02" - Studio: - Name: user.name - URL: - selector: user.username - postProcess: - - replace: - - regex: ^ - with: https://www.iwara.tv/profile/ - Tags: - Name: tags.#.id - Details: body - # Some videos have a custom thumbnail and we'll want to use that if possible - # example of scene with custom thumbnail: https://www.iwara.tv/video/J7W7n4VdKtohQ7/ - # example of scene with normal thumbnail: https://www.iwara.tv/video/2DORyCe5fVqXz6/ - Image: - selector: "[customThumbnail.id,file.id,thumbnail]" - concat: "," - postProcess: - - replace: - # If we found a customThumbnail use it and discard the rest - - regex: ([^,]+),[^,]+,.* - with: https://i.iwara.tv/image/original/$1/$1.jpg - # zero pad if thumbnail is less than 10 - - regex: \b([0-9])\b - with: 0$1 - - regex: (.*),(.*) - with: https://i.iwara.tv/image/original/$1/thumbnail-$2.jpg -# driver: -# headers: -# - Key: Authorization -# # look for token cookie starts with eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9 -# Value: Bearer INSERT_TOKEN - -# Last Updated June 14, 2024 diff --git a/scrapers/Iwara/Iwara.py b/scrapers/Iwara/Iwara.py new file mode 100644 index 000000000..551d39847 --- /dev/null +++ b/scrapers/Iwara/Iwara.py @@ -0,0 +1,90 @@ +import re +import json +import requests +import sys +from datetime import datetime +import py_common.log as log +from py_common.cache import cache_to_disk + +def fail(message: str): + print(f"Error: {message}", file=sys.stderr) + sys.exit(1) + +@cache_to_disk(key="iwara_auth_token", ttl=86400) +def login(force=False): + """Logs in to get an auth token""" + if force: + return relogin() + + username = "YOUR_USERNAME" + password = "YOUR_PASSWORD" + # if no credentials defined, return "undefined to bypass login" + if (username == "YOUR_USERNAME"): + log.info("Iwara login not specified") + return "undefined" + login_url = 'https://api.iwara.tv/user/login' + payload = {'email': username, 'password': password} + response = requests.post(login_url, json=payload) + if response.status_code != 200: + log.error("Iwara login failed") + sys.exit(1) + return response.json().get('token') + +def relogin(): + """Forces a new login""" + return login(force=True) + +def get_video_details(video_id, token): + try: + headers = {'Authorization': f'Bearer {token}'} + response = requests.get(f'https://api.iwara.tv/video/{video_id}', headers=headers) + response.raise_for_status() + if response.status_code == 401: + token = relogin() + return get_video_details(video_id, token) + elif response.status_code == 404: + fail("404 - Video might be behind login wall") + + video_data = response.json() + except requests.RequestException as e: + fail(f"Failed to fetch video data: {e}") + except json.JSONDecodeError: + fail("Failed to decode JSON from response") + + return { + "title": video_data.get('title'), + "url": f"https://www.iwara.tv/videos/{video_id}", + "image": f"https://files.iwara.tv/image/thumbnail/{video_data.get('file', {}).get('id')}/thumbnail-00.jpg", + "date": datetime.strptime(video_data.get('createdAt'), "%Y-%m-%dT%H:%M:%S.%fZ").date().isoformat(), + "details": video_data.get('body'), + "studio": { + "Name": video_data.get('user', {}).get('name'), + "URL": f"https://www.iwara.tv/profile/{video_data.get('user', {}).get('username')}" + }, + "tags": [{"name": tag.get('id')} for tag in video_data.get('tags', [])] + } + +def sceneByURL(params): + token = login() + video_url = params['url'] + match = re.search(r'/video/([^/]+)/', video_url) + if not match: + fail("Invalid video URL") + video_id = match.group(1) + return get_video_details(video_id, token) + +def sceneByFragment(params): + token = login() + video_id = params['video_id'] + return get_video_details(video_id, token) + +if __name__ == "__main__": + calledFunction = sys.argv[1] + params = json.loads(sys.stdin.read()) + + if calledFunction == "sceneByURL": + print(json.dumps(sceneByURL(params))) + elif calledFunction == "sceneByFragment": + print(json.dumps(sceneByFragment(params))) + else: + fail("This scrape method has not been implemented!") diff --git a/scrapers/Iwara/Iwara.yml b/scrapers/Iwara/Iwara.yml new file mode 100644 index 000000000..0bade2b3f --- /dev/null +++ b/scrapers/Iwara/Iwara.yml @@ -0,0 +1,20 @@ +name: iwara +# requires: py_common, requests, re, json, sys, datetime # specify additional Python packages if necessary + +sceneByURL: + - url: + - iwara.tv/video/ + action: script + script: + - python + - Iwara.py # Replace with your actual Python scraper file name + - sceneByURL + +sceneByFragment: + action: script + script: + - python + - Iwara.py # Adjust the file name as necessary + - sceneByFragment # Stash or the user will supply the video ID extracted from a filename or other source + +# Last Updated December 15, 2024