improvements and bug fixes

fix: #19 fix: #20 refactor: add comments in furbooru.py
Official-Husko · Mar 22, 2024 · ecd7954 · ecd7954
2 parents 3797ed0 + 7e69fda
commit ecd7954
Show file tree

Hide file tree

Showing 6 changed files with 180 additions and 189 deletions.
diff --git a/README.md b/README.md
@@ -9,6 +9,14 @@ Welcome to the successor of the [multporn image downloader v1][2] & [v2][1] and
 
 <br />
 
+#### Features:
+- Portable
+- Proxy Support
+- AI Training Compatible
+- Avoid Duplicates
+
+<br />
+
 #### Currently Supported:
 - [Rule34][3] (API)
 - [E621][4] (API)
@@ -46,7 +54,6 @@ Welcome to the successor of the [multporn image downloader v1][2] & [v2][1] and
 
 Further sites can be added. Just open a [support ticket][11] with the URL to the site.
 
-<br />
 <br />
 <br />
 

diff --git a/main.py b/main.py
@@ -6,7 +6,7 @@
 import sys
 import inquirer
 
-version = "1.6.1"
+version = "1.6.2"
 
 if os.name == 'nt':
     from ctypes import windll
@@ -15,7 +15,7 @@
 proxy_list = []
 header = {"User-Agent":f"nn-downloader/{version} (by Official Husko on GitHub)"}
 needed_folders = ["db", "media"]
-database_list = ["e621", "furbooru", "rule34", "e6ai", "e926"]
+database_list = ["e621", "e6ai", "e926", "furbooru", "rule34"]
 unsafe_chars = ["/", "\\", ":", "*", "?", "\"", "<", ">", "|", "\0", "$", "#", "@", "&", "%", "!", "`", "^", "(", ")", "{", "}", "[", "]", "=", "+", "~", ",", ";"]
 
 if sys.gettrace() is not None:
@@ -107,7 +107,7 @@ def clear_screen():
 
         site = answers.get("selection").lower()
 
-        if site in ["e621", "e6ai", "e926"]:
+        if site in ["e621", "e6ai", "e926", "furbooru", "rule34"]:
 
             print(colored("Please enter the tags you want to use.", "green"))
             user_tags = input(">> ").lower()
@@ -121,33 +121,41 @@ def clear_screen():
             max_sites = input(">> ").lower()
             print("")
 
-            apiUser = config["user_credentials"][site]["apiUser"]
-            apiKey = config["user_credentials"][site]["apiKey"]
+        if site in ["e621", "e6ai", "e926"]:
+            api_user = config.get("user_credentials",{}).get(site, {}).get("apiUser", "")
+            api_key = config.get("user_credentials", {}).get(site, {}).get("apiKey", "")
             if oneTimeDownload == True:
                 with open(f"db/{site}.db", "r") as db_reader:
                     database = db_reader.read().splitlines()
-            if apiKey == "" or apiUser == "":
-                print(colored("Please add your Api Key into the config.json", "red"))
-                sleep(5)
             else:
-                output = E6System.Fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], apiUser=apiUser, apiKey=apiKey, header=header, db=database, site=site, ai_training=ai_training)
-
+                database = False
+            if api_key == "" or api_user == "":
+                print(colored("Please add your API Key into the config.json", "red"))
+                sleep(10)
+                sys.exit(0)
+            else:
+                output = E6System.fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], api_user=api_user, api_key=api_key, header=header, db=database, site=site, ai_training=ai_training)
+
         elif site == "rule34":
             if oneTimeDownload == True:
                 with open("db/rule34.db", "r") as db_reader:
                     database = db_reader.read().splitlines()
-            output = RULE34.Fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], header=header, db=database)
+            else:
+                database = False
+            output = RULE34.fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], header=header, db=database)
 
         elif site == "furbooru":
-            apiKey = config["user_credentials"]["furbooru"]["apiKey"]
+            api_key = config.get("user_credentials", {}).get(site, {}).get("apiKey", "")
             if oneTimeDownload == True:
                 with open("db/furbooru.db", "r") as db_reader:
                     database = db_reader.read().splitlines()
-            if apiKey == "":
-                print(colored("Please add your Api Key into the config.json", "red"))
+            else:
+                database = False
+            if api_key == "":
+                print(colored("Please add your API Key into the config.json", "red"))
                 sleep(5)
             else:
-                output = FURBOORU.Fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], apiKey=apiKey, header=header, db=database)
+                output = FURBOORU.fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], api_key=api_key, header=header, db=database)
 
         elif site == "multporn":
             print(colored("Please enter the link. (e.g. https://multporn.net/comics/double_trouble_18)", "green"))

diff --git a/modules/e6systems.py b/modules/e6systems.py
@@ -1,119 +1,94 @@
-from requests.auth import HTTPBasicAuth
-import requests
+import os
+import json
 import random
+import requests
+from requests.auth import HTTPBasicAuth
 from termcolor import colored
 from alive_progress import alive_bar
 from time import sleep
 from datetime import datetime
-import os
-import json
 
 from main import unsafe_chars
-now = datetime.now()
-dt_now = now.strftime("%d-%m-%Y_%H-%M-%S")
 
-class E6System():
-    def Fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, apiUser ,apiKey, header, db, site, ai_training):
+class E6System:
+    @staticmethod
+    def fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, api_user, api_key, header, db, site, ai_training):
         try:
             approved_list = []
+            now = datetime.now()
+            dt_now = now.strftime("%d-%m-%Y_%H-%M-%S")
             page = 1
+
             while True:
                 URL = f"https://{site}.net/posts.json?tags={user_tags}&limit=320&page={page}"
-                if user_proxies == True:
-                    proxy = random.choice(proxy_list)
-                    raw_req = requests.get(URL, headers=header, proxies=proxy, auth=HTTPBasicAuth(apiUser, apiKey))
-                else:
-                    raw_req = requests.get(URL, headers=header, auth=HTTPBasicAuth(apiUser, apiKey))
-
+                proxy = random.choice(proxy_list) if user_proxies else None
+                raw_req = requests.get(URL, headers=header, proxies=proxy, auth=HTTPBasicAuth(api_user, api_key))
                 req = raw_req.json()
-
-                try:
-                    if req["message"] == "You cannot go beyond page 750. Please narrow your search terms.": 
-                        print(colored(req["message"] + " (API limit)", "red"))
-                        sleep(5)
-                        break
-                except:
-                    pass
 
-                if req["posts"] == []:
+                if "message" in req and req["message"] == "You cannot go beyond page 750. Please narrow your search terms.":
+                    print(colored(req["message"] + " (API limit)", "red"))
+                    sleep(5)
+                    break
+
+                if not req["posts"]:
                     print(colored("No images found or all downloaded! Try different tags.", "yellow"))
                     sleep(5)
                     break
-
+                
                 elif page == max_sites:
                     print(colored(f"Finished Downloading {max_sites} of {max_sites} pages.", "yellow"))
                     sleep(5)
                     break
 
-                else: 
+                else:
                     for item in req["posts"]:
                         image_id = item["id"]
-                        image_address = item["file"]["url"]
-                        post_tags1 = item["tags"]["general"]
-                        post_tags2 = item["tags"]["species"]
-                        post_tags3 = item["tags"]["character"]
-                        if site == "e6ai":
-                            post_tags4 = item["tags"]["director"]
-                            post_tags5 = item["tags"]["meta"]
-                        else:
-                            post_tags4 = item["tags"]["copyright"]
-                            post_tags5 = item["tags"]["artist"]
-
-                        if ai_training == True:
-                            meta_tags = item["tags"]
-                        else:
-                            meta_tags = []
+                        image_address = item["file"].get("url")
+                        meta_tags = item["tags"] if ai_training else []
+                        post_tags = [item["tags"][tag_type] for tag_type in ["general", "species", "character"]]
+                        post_tags += [item["tags"]["director"], item["tags"]["meta"]] if site == "e6ai" else [item["tags"]["copyright"], item["tags"]["artist"]]
+                        post_tags = sum(post_tags, [])
+                        user_blacklist_length = len(user_blacklist)
+
+                        passed = sum(blacklisted_tag in post_tags for blacklisted_tag in user_blacklist)
 
-                        post_tags = post_tags1 + post_tags2 + post_tags3 + post_tags4 + post_tags5
-                        image_format = item["file"]["ext"]
-                        user_blacklist_lenght = len(user_blacklist)
-                        passed = 0
+                        if passed == 0 and not db and image_address and not any(tag in user_blacklist for tag in post_tags):
+                            image_data = {"image_address": image_address, "image_format": item["file"]["ext"], "image_id": image_id, "meta_tags": meta_tags}
+                            approved_list.append(image_data)
 
-                        for blacklisted_tag in user_blacklist:
-                            if blacklisted_tag in post_tags:
-                                break
-                            else:
-                                passed += 1
-                        if passed == user_blacklist_lenght and str(image_id) not in db and image_address != None:
-                            image_data = {"image_address": image_address, "image_format": image_format, "image_id": image_id, "meta_tags": meta_tags}
+                        elif db and str(image_id) not in db and image_address and not any(tag in user_blacklist for tag in post_tags):
+                            image_data = {"image_address": image_address, "image_format": item["file"]["ext"], "image_id": image_id, "meta_tags": meta_tags}
                             approved_list.append(image_data)
-                        else:
-                            pass
 
-                # Download Each file
                 with alive_bar(len(approved_list), calibrate=1, dual_line=True, title='Downloading') as bar:
                     for data in approved_list:
-                        image_address = data["image_address"]
-                        image_format = data["image_format"]
-                        image_id = data["image_id"]
-                        meta_tags = data["meta_tags"]
+                        image_address = data.get("image_address")
+                        image_format = data.get("image_format")
+                        image_id = data.get("image_id")
+                        meta_tags = data.get("meta_tags")
                         bar.text = f'-> Downloading: {image_id}, please wait...'
-                        if user_proxies == True:
-                            proxy = random.choice(proxy_list)
-                            img_data = requests.get(image_address, proxies=proxy).content
-                        else:
-                            sleep(1)
-                            img_data = requests.get(image_address).content
-
-                        safe_user_tags = user_tags.replace(" ", "_")
-                        for char in unsafe_chars:
-                            safe_user_tags = safe_user_tags.replace(char, "")
 
-                        if not os.path.exists(f"media/{dt_now}_{safe_user_tags}"):
-                            os.mkdir(f"media/{dt_now}_{safe_user_tags}")
+                        proxy = random.choice(proxy_list) if user_proxies else None
+                        img_data = requests.get(image_address, proxies=proxy).content if user_proxies else requests.get(image_address).content
 
-                        if not os.path.exists(f"media/{dt_now}_{safe_user_tags}/meta") and ai_training == True:
-                            os.mkdir(f"media/{dt_now}_{safe_user_tags}/meta")
+                        safe_user_tags = "".join(char for char in user_tags if char not in unsafe_chars).replace(" ", "_")
+                        directory = f"media/{dt_now}_{safe_user_tags}"
+                        meta_directory = f"{directory}/meta"
 
-                        with open(f"media/{dt_now}_{safe_user_tags}/{str(image_id)}.{image_format}", 'wb') as handler:
-                            handler.write(img_data)
+                        os.makedirs(directory, exist_ok=True)
 
                         if ai_training == True:
-                            with open(f"media/{dt_now}_{safe_user_tags}/meta/{str(image_id)}.json", 'w') as handler:
+                            os.makedirs(meta_directory, exist_ok=True)
+                            with open(f"{meta_directory}/{str(image_id)}.json", 'w') as handler:
                                 json.dump(meta_tags, handler, indent=6)
 
-                        with open(f"db/{site}.db", "a") as db_writer:
-                            db_writer.write(f"{str(image_id)}\n")
+                        with open(f"{directory}/{str(image_id)}.{image_format}", 'wb') as handler:
+                            handler.write(img_data)
+
+                        if db != False:
+                            with open(f"db/{site}.db", "a") as db_writer:
+                                db_writer.write(f"{str(image_id)}\n")
+
                         bar()
 
                 print(colored(f"Page {page} Completed", "green"))
@@ -124,4 +99,4 @@ def Fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, apiU
             return {"status": "ok"}
 
         except Exception as e:
-            return {"status": "error", "uinput": user_tags, "exception": str(e), "extra": raw_req.content}
+            return {"status": "error", "uinput": user_tags, "exception": str(e), "extra": raw_req.content}