From c0cefac546887620f6a99575e7040aa9de0b141f Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Thu, 12 Dec 2024 23:27:31 +0100 Subject: [PATCH 1/8] create flask frontend --- .gitignore | 5 ++ app.py | 174 ++++++++++++++++++++++++++++++++++++ maigret/resources/data.json | 2 +- templates/base.html | 44 +++++++++ templates/index.html | 35 ++++++++ templates/results.html | 83 +++++++++++++++++ 6 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 app.py create mode 100644 templates/base.html create mode 100644 templates/index.html create mode 100644 templates/results.html diff --git a/.gitignore b/.gitignore index 4b887a9c..0cd418f1 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,8 @@ settings.json # other *.egg-info build +lib/vis-9.1.2/vis-network.min.js +lib/bindings/utils.js +lib/tom-select/tom-select.complete.min.js +lib/tom-select/tom-select.css +lib/vis-9.1.2/vis-network.css diff --git a/app.py b/app.py new file mode 100644 index 00000000..9a331166 --- /dev/null +++ b/app.py @@ -0,0 +1,174 @@ +# app.py +from flask import Flask, render_template, request, send_file, Response, flash +import logging +import asyncio +import os +from datetime import datetime +import maigret +from maigret.sites import MaigretDatabase +from maigret.report import generate_report_context + +app = Flask(__name__) +app.secret_key = 'your-secret-key-here' + +# Configuration +MAIGRET_DB_FILE = os.path.join('maigret', 'resources', 'data.json') +COOKIES_FILE = "cookies.txt" +UPLOAD_FOLDER = 'uploads' +REPORTS_FOLDER = 'reports' + +os.makedirs(UPLOAD_FOLDER, exist_ok=True) +os.makedirs(REPORTS_FOLDER, exist_ok=True) + +def setup_logger(log_level, name): + logger = logging.getLogger(name) + logger.setLevel(log_level) + return logger + +async def maigret_search(username, options): + logger = setup_logger(logging.WARNING, 'maigret') + + try: + db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE) + sites = db.ranked_sites_dict(top=int(options.get('top_sites', 500))) + + results = await maigret.search( + username=username, + site_dict=sites, + timeout=int(options.get('timeout', 30)), + logger=logger, + id_type=options.get('id_type', 'username'), + cookies=COOKIES_FILE if options.get('use_cookies') else None, + ) + + return results + except Exception as e: + logger.error(f"Error during search: {str(e)}") + raise + +async def search_multiple_usernames(usernames, options): + results = [] + for username in usernames: + try: + search_results = await maigret_search(username.strip(), options) + results.append((username.strip(), options['id_type'], search_results)) + except Exception as e: + logging.error(f"Error searching username {username}: {str(e)}") + return results + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/search', methods=['POST']) +def search(): + usernames_input = request.form.get('usernames', '').strip() + if not usernames_input: + return render_template('index.html', error="At least one username is required") + + try: + # Split usernames by common separators + usernames = [u.strip() for u in usernames_input.replace(',', ' ').split() if u.strip()] + + # Create timestamp for this search session + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}") + os.makedirs(session_folder, exist_ok=True) + + # Collect options from form + options = { + 'top_sites': request.form.get('top_sites', '500'), + 'timeout': request.form.get('timeout', '30'), + 'id_type': request.form.get('id_type', 'username'), + 'use_cookies': 'use_cookies' in request.form, + } + + # Run search asynchronously for all usernames + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + general_results = loop.run_until_complete(search_multiple_usernames(usernames, options)) + + # Save the combined graph in the session folder + graph_path = os.path.join(session_folder, "combined_graph.html") + maigret.report.save_graph_report(graph_path, general_results, MaigretDatabase().load_from_path(MAIGRET_DB_FILE)) + + # Save individual reports for each username + individual_reports = [] + for username, id_type, results in general_results: + report_base = os.path.join(session_folder, f"report_{username}") + + # Save reports in different formats + csv_path = f"{report_base}.csv" + json_path = f"{report_base}.json" + pdf_path = f"{report_base}.pdf" + html_path = f"{report_base}.html" + + context = generate_report_context(general_results) + + maigret.report.save_csv_report(csv_path, username, results) + maigret.report.save_json_report(json_path, username, results, report_type='ndjson') + maigret.report.save_pdf_report(pdf_path, context) + maigret.report.save_html_report(html_path, context) + + # Extract claimed profiles + claimed_profiles = [] + for site_name, site_data in results.items(): + if (site_data.get('status') and + site_data['status'].status == maigret.result.MaigretCheckStatus.CLAIMED): + claimed_profiles.append({ + 'site_name': site_name, + 'url': site_data.get('url_user', ''), + 'tags': site_data.get('status').tags if site_data.get('status') else [] + }) + + individual_reports.append({ + 'username': username, + 'csv_file': os.path.relpath(csv_path, REPORTS_FOLDER), + 'json_file': os.path.relpath(json_path, REPORTS_FOLDER), + 'pdf_file': os.path.relpath(pdf_path, REPORTS_FOLDER), + 'html_file': os.path.relpath(html_path, REPORTS_FOLDER), + 'claimed_profiles': claimed_profiles, + }) + + return render_template( + 'results.html', + usernames=usernames, + graph_file=os.path.relpath(graph_path, REPORTS_FOLDER), + individual_reports=individual_reports, + timestamp=timestamp + ) + + except Exception as e: + logging.error(f"Error processing search: {str(e)}", exc_info=True) + return render_template('index.html', error=f"An error occurred: {str(e)}") + +@app.route('/reports/') +def download_report(filename): + """Serve report files""" + try: + return send_file(os.path.join(REPORTS_FOLDER, filename)) + except Exception as e: + logging.error(f"Error serving file {filename}: {str(e)}") + return "File not found", 404 + +@app.route('/view_graph/') +def view_graph(graph_path): + """Serve the graph HTML directly""" + graph_file = os.path.join(REPORTS_FOLDER, graph_path) + try: + with open(graph_file, 'r', encoding='utf-8') as f: + content = f.read() + return content + except FileNotFoundError: + logging.error(f"Graph file not found: {graph_file}") + return "Graph not found", 404 + except Exception as e: + logging.error(f"Error serving graph {graph_file}: {str(e)}") + return "Error loading graph", 500 + +if __name__ == '__main__': + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + app.run(debug=True) \ No newline at end of file diff --git a/maigret/resources/data.json b/maigret/resources/data.json index 70c07c11..04e1528b 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -17474,7 +17474,7 @@ "method": "vimeo" }, "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM5Njc3MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiNGJkNDE4NzktM2VhOS00ZWRiLWIzZDUtNjAyNjQ3YjMyNTVhIn0.kPbKREujSfYsisyF0pS_HskTapRlHBfVLRw4cis1ezk" + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzQwMzc5MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiM2U2ZWQ1MDYtZTU0OC00ZGIwLWI4YTMtMzdiZWMyYzRiYTJiIn0.vojHtXWsDNBtjQjoVm6DSV9XHhWzu-PUMwjOJouMkG8" }, "urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1", "checkType": "status_code", diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 00000000..48008744 --- /dev/null +++ b/templates/base.html @@ -0,0 +1,44 @@ + + + + + + +Maigret Web Interface + + + + +
+
+ +
+ {% block content %}{% endblock %} +
+ + + + diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 00000000..32182002 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,35 @@ +{% extends "base.html" %} +{% block content %} +
+

Maigret Web Interface

+ + {% if error %} +
{{ error }}
+ {% endif %} + +
+
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+ + +
+
+{% endblock %} \ No newline at end of file diff --git a/templates/results.html b/templates/results.html new file mode 100644 index 00000000..a1ab0914 --- /dev/null +++ b/templates/results.html @@ -0,0 +1,83 @@ +{% extends "base.html" %} +{% block content %} +
+

Search Results

+

Search session: {{ timestamp }}

+ +
+

Combined Network Graph

+ +
+ +
+

Individual Reports

+
+ {% for report in individual_reports %} +
+

+ +

+
+
+ + + {% if report.claimed_profiles %} +
+
+ Found Profiles +
+
    + {% for profile in report.claimed_profiles %} +
  • +
    +
    + {{ profile.site_name }} + + {{ profile.url }} + +
    + {% if profile.tags %} +
    + {% for tag in profile.tags %} + {{ tag }} + {% endfor %} +
    + {% endif %} +
    +
  • + {% endfor %} +
+
+ {% endif %} +
+
+
+ {% endfor %} +
+
+ + New Search +
+{% endblock %} \ No newline at end of file From f43ebbb6fae2afbf8b206adfe96714b47636d67e Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 13 Dec 2024 10:59:01 +0100 Subject: [PATCH 2/8] update webinterface --- .gitignore | 3 +++ maigret/maigret.py | 12 +++++++++ maigret/resources/settings.json | 3 ++- maigret/settings.py | 1 + app.py => maigret/web/app.py | 25 ++++++++----------- .../web/templates}/base.html | 0 .../web/templates}/index.html | 0 .../web/templates}/results.html | 0 pyproject.toml | 2 ++ 9 files changed, 31 insertions(+), 15 deletions(-) rename app.py => maigret/web/app.py (91%) rename {templates => maigret/web/templates}/base.html (100%) rename {templates => maigret/web/templates}/index.html (100%) rename {templates => maigret/web/templates}/results.html (100%) diff --git a/.gitignore b/.gitignore index 0cd418f1..76a6d4a2 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,6 @@ lib/bindings/utils.js lib/tom-select/tom-select.complete.min.js lib/tom-select/tom-select.css lib/vis-9.1.2/vis-network.css +poetry.lock +filetree.sh +poetry.lock diff --git a/maigret/maigret.py b/maigret/maigret.py index eb1b8a92..6d5d205c 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -324,6 +324,14 @@ def setup_arguments_parser(settings: Settings): default=False, help="Show database statistics (most frequent sites engines and tags).", ) + modes_group.add_argument( + "--web", + action="store", + metavar='PORT', + type=int, + default=settings.web_interface_port, + help="Launches the web interface on the specified port (Default: 5000).", + ) output_group = parser.add_argument_group( 'Output options', 'Options to change verbosity and view of the console output' @@ -484,6 +492,10 @@ async def main(): elif args.verbose: log_level = logging.WARNING logger.setLevel(log_level) + + if args.web: + from maigret.web.app import app + app.run(port=args.web) # Use the port from arguments # Usernames initial list usernames = { diff --git a/maigret/resources/settings.json b/maigret/resources/settings.json index 656f3587..82f90f77 100644 --- a/maigret/resources/settings.json +++ b/maigret/resources/settings.json @@ -53,5 +53,6 @@ "xmind_report": false, "graph_report": false, "pdf_report": false, - "html_report": false + "html_report": false, + "web_interface_port": 5000 } \ No newline at end of file diff --git a/maigret/settings.py b/maigret/settings.py index 31b9ca51..a355d39a 100644 --- a/maigret/settings.py +++ b/maigret/settings.py @@ -42,6 +42,7 @@ class Settings: pdf_report: bool html_report: bool graph_report: bool + web_interface_port: int # submit mode settings presence_strings: list diff --git a/app.py b/maigret/web/app.py similarity index 91% rename from app.py rename to maigret/web/app.py index 9a331166..672d7aa5 100644 --- a/app.py +++ b/maigret/web/app.py @@ -151,20 +151,17 @@ def download_report(filename): logging.error(f"Error serving file {filename}: {str(e)}") return "File not found", 404 -@app.route('/view_graph/') -def view_graph(graph_path): - """Serve the graph HTML directly""" - graph_file = os.path.join(REPORTS_FOLDER, graph_path) - try: - with open(graph_file, 'r', encoding='utf-8') as f: - content = f.read() - return content - except FileNotFoundError: - logging.error(f"Graph file not found: {graph_file}") - return "Graph not found", 404 - except Exception as e: - logging.error(f"Error serving graph {graph_file}: {str(e)}") - return "Error loading graph", 500 +#@app.route('/view_graph/') +#def view_graph(graph_path): +# """Serve the graph HTML directly""" +# graph_file = os.path.join(REPORTS_FOLDER, graph_path) +# try: +# with open(graph_file, 'r', encoding='utf-8') as f: +# content = f.read() +# return content +# except Exception as e: +# logging.error(f"Error serving graph {graph_file}: {str(e)}") +# return "Error loading graph", 500 if __name__ == '__main__': logging.basicConfig( diff --git a/templates/base.html b/maigret/web/templates/base.html similarity index 100% rename from templates/base.html rename to maigret/web/templates/base.html diff --git a/templates/index.html b/maigret/web/templates/index.html similarity index 100% rename from templates/index.html rename to maigret/web/templates/index.html diff --git a/templates/results.html b/maigret/web/templates/results.html similarity index 100% rename from templates/results.html rename to maigret/web/templates/results.html diff --git a/pyproject.toml b/pyproject.toml index 19e16330..b6c6f0f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,8 @@ networkx = "^2.6.3" pyvis = "^0.3.2" reportlab = "^4.2.0" cloudscraper = "^1.2.71" +flask = {extras = ["async"], version = "^3.1.0"} +asgiref = "^3.8.1" [tool.poetry.group.dev.dependencies] From a862309682bf1a18039d7d475c03524c8efd9994 Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 13 Dec 2024 14:51:05 +0100 Subject: [PATCH 3/8] update --- maigret/web/app.py | 194 ++++++++++++++++++++--------- maigret/web/templates/base.html | 2 +- maigret/web/templates/results.html | 98 ++++----------- maigret/web/templates/status.html | 26 ++++ 4 files changed, 186 insertions(+), 134 deletions(-) create mode 100644 maigret/web/templates/status.html diff --git a/maigret/web/app.py b/maigret/web/app.py index 672d7aa5..be194973 100644 --- a/maigret/web/app.py +++ b/maigret/web/app.py @@ -1,9 +1,10 @@ # app.py -from flask import Flask, render_template, request, send_file, Response, flash +from flask import Flask, render_template, request, send_file, Response, flash, redirect, url_for import logging -import asyncio import os +import asyncio from datetime import datetime +from threading import Thread import maigret from maigret.sites import MaigretDatabase from maigret.report import generate_report_context @@ -11,6 +12,10 @@ app = Flask(__name__) app.secret_key = 'your-secret-key-here' +# Add background job tracking +background_jobs = {} +job_results = {} + # Configuration MAIGRET_DB_FILE = os.path.join('maigret', 'resources', 'data.json') COOKIES_FILE = "cookies.txt" @@ -26,7 +31,7 @@ def setup_logger(log_level, name): return logger async def maigret_search(username, options): - logger = setup_logger(logging.WARNING, 'maigret') + logger = setup_logger(logging.DEBUG, 'maigret') try: db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE) @@ -56,43 +61,24 @@ async def search_multiple_usernames(usernames, options): logging.error(f"Error searching username {username}: {str(e)}") return results -@app.route('/') -def index(): - return render_template('index.html') - -@app.route('/search', methods=['POST']) -def search(): - usernames_input = request.form.get('usernames', '').strip() - if not usernames_input: - return render_template('index.html', error="At least one username is required") - +def process_search_task(usernames, options, timestamp): try: - # Split usernames by common separators - usernames = [u.strip() for u in usernames_input.replace(',', ' ').split() if u.strip()] - - # Create timestamp for this search session - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}") - os.makedirs(session_folder, exist_ok=True) - - # Collect options from form - options = { - 'top_sites': request.form.get('top_sites', '500'), - 'timeout': request.form.get('timeout', '30'), - 'id_type': request.form.get('id_type', 'username'), - 'use_cookies': 'use_cookies' in request.form, - } - - # Run search asynchronously for all usernames + # Setup event loop for async operations loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) + + # Run the search general_results = loop.run_until_complete(search_multiple_usernames(usernames, options)) - # Save the combined graph in the session folder + # Create session folder + session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}") + os.makedirs(session_folder, exist_ok=True) + + # Save the combined graph graph_path = os.path.join(session_folder, "combined_graph.html") maigret.report.save_graph_report(graph_path, general_results, MaigretDatabase().load_from_path(MAIGRET_DB_FILE)) - # Save individual reports for each username + # Save individual reports individual_reports = [] for username, id_type, results in general_results: report_base = os.path.join(session_folder, f"report_{username}") @@ -123,46 +109,136 @@ def search(): individual_reports.append({ 'username': username, - 'csv_file': os.path.relpath(csv_path, REPORTS_FOLDER), - 'json_file': os.path.relpath(json_path, REPORTS_FOLDER), - 'pdf_file': os.path.relpath(pdf_path, REPORTS_FOLDER), - 'html_file': os.path.relpath(html_path, REPORTS_FOLDER), + # Must create paths relative to REPORTS_FOLDER + 'csv_file': os.path.join(f"search_{timestamp}", f"report_{username}.csv"), + 'json_file': os.path.join(f"search_{timestamp}", f"report_{username}.json"), + 'pdf_file': os.path.join(f"search_{timestamp}", f"report_{username}.pdf"), + 'html_file': os.path.join(f"search_{timestamp}", f"report_{username}.html"), 'claimed_profiles': claimed_profiles, }) - return render_template( - 'results.html', - usernames=usernames, - graph_file=os.path.relpath(graph_path, REPORTS_FOLDER), - individual_reports=individual_reports, - timestamp=timestamp - ) - + # Save results and mark job as complete + job_results[timestamp] = { + 'status': 'completed', + 'session_folder': f"search_{timestamp}", + 'graph_file': os.path.join(f"search_{timestamp}", "combined_graph.html"), + 'usernames': usernames, + 'individual_reports': individual_reports + } except Exception as e: - logging.error(f"Error processing search: {str(e)}", exc_info=True) - return render_template('index.html', error=f"An error occurred: {str(e)}") + job_results[timestamp] = { + 'status': 'failed', + 'error': str(e) + } + finally: + background_jobs[timestamp]['completed'] = True + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/search', methods=['POST']) +def search(): + usernames_input = request.form.get('usernames', '').strip() + if not usernames_input: + flash('At least one username is required', 'danger') + return redirect(url_for('index')) + + # Split usernames by common separators + usernames = [u.strip() for u in usernames_input.replace(',', ' ').split() if u.strip()] + + # Create timestamp for this search session + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + logging.info(f"Starting search for usernames: {usernames}") + + # Collect options from form + options = { + 'top_sites': request.form.get('top_sites', '500'), + 'timeout': request.form.get('timeout', '30'), + 'id_type': request.form.get('id_type', 'username'), + 'use_cookies': 'use_cookies' in request.form, + } + + # Start background job + background_jobs[timestamp] = { + 'completed': False, + 'thread': Thread(target=process_search_task, args=(usernames, options, timestamp)) + } + background_jobs[timestamp]['thread'].start() + + logging.info(f"Search job started with timestamp: {timestamp}") + flash('Search started. Please wait while we process your request...', 'info') + + # Redirect to status page + return redirect(url_for('status', timestamp=timestamp)) + + +@app.route('/status/') +def status(timestamp): + logging.info(f"Status check for timestamp: {timestamp}") + + # Validate timestamp + if timestamp not in background_jobs: + flash('Invalid search session', 'danger') + return redirect(url_for('index')) + + # Check if job is completed + if background_jobs[timestamp]['completed']: + result = job_results.get(timestamp) + if not result: + flash('No results found for this search session', 'warning') + return redirect(url_for('index')) + + if result['status'] == 'completed': + # Redirect to results page + return redirect(url_for('results', session_id=result['session_folder'])) + else: + error_msg = result.get('error', 'Unknown error occurred') + flash(f'Search failed: {error_msg}', 'danger') + return redirect(url_for('index')) + + # If job is still running, show status page + logging.info(f"Job still running for timestamp: {timestamp}") + return render_template('status.html', timestamp=timestamp) + + +@app.route('/results/') +def results(session_id): + # Validate session_id format + if not session_id.startswith('search_'): + flash('Invalid results session format', 'danger') + return redirect(url_for('index')) + + # Find matching result data + result_data = next( + (r for r in job_results.values() + if r.get('status') == 'completed' and r['session_folder'] == session_id), + None + ) + + if not result_data: + flash('Results not found or search is still in progress', 'warning') + return redirect(url_for('index')) + + return render_template( + 'results.html', + usernames=result_data['usernames'], + graph_file=result_data['graph_file'], + individual_reports=result_data['individual_reports'], + timestamp=session_id.replace('search_', '') + ) @app.route('/reports/') def download_report(filename): """Serve report files""" try: - return send_file(os.path.join(REPORTS_FOLDER, filename)) + file_path = os.path.join(REPORTS_FOLDER, filename) + return send_file(file_path) except Exception as e: logging.error(f"Error serving file {filename}: {str(e)}") return "File not found", 404 -#@app.route('/view_graph/') -#def view_graph(graph_path): -# """Serve the graph HTML directly""" -# graph_file = os.path.join(REPORTS_FOLDER, graph_path) -# try: -# with open(graph_file, 'r', encoding='utf-8') as f: -# content = f.read() -# return content -# except Exception as e: -# logging.error(f"Error serving graph {graph_file}: {str(e)}") -# return "Error loading graph", 500 - if __name__ == '__main__': logging.basicConfig( level=logging.INFO, diff --git a/maigret/web/templates/base.html b/maigret/web/templates/base.html index 48008744..2ab4d2f6 100644 --- a/maigret/web/templates/base.html +++ b/maigret/web/templates/base.html @@ -1,6 +1,6 @@ - + diff --git a/maigret/web/templates/results.html b/maigret/web/templates/results.html index a1ab0914..5d349ad7 100644 --- a/maigret/web/templates/results.html +++ b/maigret/web/templates/results.html @@ -1,83 +1,33 @@ {% extends "base.html" %} {% block content %}
-

Search Results

-

Search session: {{ timestamp }}

+

Processing Search

-
-

Combined Network Graph

- -
- -
-

Individual Reports

-
- {% for report in individual_reports %} -
-

- -

-
-
- - - {% if report.claimed_profiles %} -
-
- Found Profiles -
-
    - {% for profile in report.claimed_profiles %} -
  • -
    -
    - {{ profile.site_name }} - - {{ profile.url }} - -
    - {% if profile.tags %} -
    - {% for tag in profile.tags %} - {{ tag }} - {% endfor %} -
    - {% endif %} -
    -
  • - {% endfor %} -
-
- {% endif %} -
-
-
+ {% with messages = get_flashed_messages() %} + {% if messages %} + {% for message in messages %} +
{{ message }}
{% endfor %} + {% endif %} + {% endwith %} + +
+
+ Loading...
+

Processing your search request. Please wait...

- - New Search + +
{% endblock %} \ No newline at end of file diff --git a/maigret/web/templates/status.html b/maigret/web/templates/status.html new file mode 100644 index 00000000..d64b9a1b --- /dev/null +++ b/maigret/web/templates/status.html @@ -0,0 +1,26 @@ +{% extends "base.html" %} +{% block content %} +
+
+
+

Processing Search Request

+
+ Your search is being processed. This page will automatically redirect to the results when complete. +
+
+
+
+
+ Loading... +
+
+
+ + +
+{% endblock %} \ No newline at end of file From a03b36fb5a02dd24b94ebafd34dac1c92ac95e0a Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Sat, 14 Dec 2024 00:58:51 +0100 Subject: [PATCH 4/8] updates to webinterface --- maigret/maigret.py | 3 +- maigret/web/app.py | 29 ++++---------- maigret/web/templates/results.html | 61 ++++++++++++++++++++---------- maigret/web/templates/status.html | 30 +++++---------- 4 files changed, 62 insertions(+), 61 deletions(-) diff --git a/maigret/maigret.py b/maigret/maigret.py index 6d5d205c..ca56b994 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -495,7 +495,8 @@ async def main(): if args.web: from maigret.web.app import app - app.run(port=args.web) # Use the port from arguments + port = args.web if isinstance(args.web, int) else 5000 + app.run(port=port) # Use the port from arguments # Usernames initial list usernames = { diff --git a/maigret/web/app.py b/maigret/web/app.py index be194973..a4c1bb5b 100644 --- a/maigret/web/app.py +++ b/maigret/web/app.py @@ -6,6 +6,7 @@ from datetime import datetime from threading import Thread import maigret +import maigret.settings from maigret.sites import MaigretDatabase from maigret.report import generate_report_context @@ -20,7 +21,7 @@ MAIGRET_DB_FILE = os.path.join('maigret', 'resources', 'data.json') COOKIES_FILE = "cookies.txt" UPLOAD_FOLDER = 'uploads' -REPORTS_FOLDER = 'reports' +REPORTS_FOLDER = os.path.abspath('/tmp/maigret_reports') os.makedirs(UPLOAD_FOLDER, exist_ok=True) os.makedirs(REPORTS_FOLDER, exist_ok=True) @@ -31,8 +32,7 @@ def setup_logger(log_level, name): return logger async def maigret_search(username, options): - logger = setup_logger(logging.DEBUG, 'maigret') - + logger = setup_logger(logging.WARNING, 'maigret') try: db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE) sites = db.ranked_sites_dict(top=int(options.get('top_sites', 500))) @@ -45,7 +45,6 @@ async def maigret_search(username, options): id_type=options.get('id_type', 'username'), cookies=COOKIES_FILE if options.get('use_cookies') else None, ) - return results except Exception as e: logger.error(f"Error during search: {str(e)}") @@ -83,7 +82,6 @@ def process_search_task(usernames, options, timestamp): for username, id_type, results in general_results: report_base = os.path.join(session_folder, f"report_{username}") - # Save reports in different formats csv_path = f"{report_base}.csv" json_path = f"{report_base}.json" pdf_path = f"{report_base}.pdf" @@ -96,7 +94,6 @@ def process_search_task(usernames, options, timestamp): maigret.report.save_pdf_report(pdf_path, context) maigret.report.save_html_report(html_path, context) - # Extract claimed profiles claimed_profiles = [] for site_name, site_data in results.items(): if (site_data.get('status') and @@ -109,7 +106,6 @@ def process_search_task(usernames, options, timestamp): individual_reports.append({ 'username': username, - # Must create paths relative to REPORTS_FOLDER 'csv_file': os.path.join(f"search_{timestamp}", f"report_{username}.csv"), 'json_file': os.path.join(f"search_{timestamp}", f"report_{username}.json"), 'pdf_file': os.path.join(f"search_{timestamp}", f"report_{username}.pdf"), @@ -144,7 +140,6 @@ def search(): flash('At least one username is required', 'danger') return redirect(url_for('index')) - # Split usernames by common separators usernames = [u.strip() for u in usernames_input.replace(',', ' ').split() if u.strip()] # Create timestamp for this search session @@ -152,11 +147,10 @@ def search(): logging.info(f"Starting search for usernames: {usernames}") - # Collect options from form options = { 'top_sites': request.form.get('top_sites', '500'), 'timeout': request.form.get('timeout', '30'), - 'id_type': request.form.get('id_type', 'username'), + 'id_type': 'username', # fixed as username 'use_cookies': 'use_cookies' in request.form, } @@ -168,7 +162,6 @@ def search(): background_jobs[timestamp]['thread'].start() logging.info(f"Search job started with timestamp: {timestamp}") - flash('Search started. Please wait while we process your request...', 'info') # Redirect to status page return redirect(url_for('status', timestamp=timestamp)) @@ -191,35 +184,30 @@ def status(timestamp): return redirect(url_for('index')) if result['status'] == 'completed': - # Redirect to results page + # Redirect to results page once done return redirect(url_for('results', session_id=result['session_folder'])) else: error_msg = result.get('error', 'Unknown error occurred') flash(f'Search failed: {error_msg}', 'danger') return redirect(url_for('index')) - # If job is still running, show status page - logging.info(f"Job still running for timestamp: {timestamp}") + # If job is still running, show status page with a simple spinner return render_template('status.html', timestamp=timestamp) @app.route('/results/') def results(session_id): - # Validate session_id format if not session_id.startswith('search_'): flash('Invalid results session format', 'danger') return redirect(url_for('index')) - # Find matching result data result_data = next( (r for r in job_results.values() if r.get('status') == 'completed' and r['session_folder'] == session_id), None ) - if not result_data: - flash('Results not found or search is still in progress', 'warning') - return redirect(url_for('index')) + return render_template( 'results.html', @@ -231,7 +219,6 @@ def results(session_id): @app.route('/reports/') def download_report(filename): - """Serve report files""" try: file_path = os.path.join(REPORTS_FOLDER, filename) return send_file(file_path) @@ -244,4 +231,4 @@ def download_report(filename): level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) - app.run(debug=True) \ No newline at end of file + app.run(debug=True) diff --git a/maigret/web/templates/results.html b/maigret/web/templates/results.html index 5d349ad7..7ce144e3 100644 --- a/maigret/web/templates/results.html +++ b/maigret/web/templates/results.html @@ -1,7 +1,7 @@ {% extends "base.html" %} {% block content %}
-

Processing Search

+

Search Results

{% with messages = get_flashed_messages() %} {% if messages %} @@ -11,23 +11,46 @@

Processing Search

{% endif %} {% endwith %} -
-
- Loading... -
-

Processing your search request. Please wait...

-
+

The search has completed. Below are the results:

- + + {% if graph_file %} +

Combined Graph

+ + {% endif %} + +
+ + + {% if individual_reports %} +

Individual Reports

+
    + {% for report in individual_reports %} +
  • +
    {{ report.username }}
    +

    + CSV Report | + JSON Report | + PDF Report | + HTML Report +

    + {% if report.claimed_profiles %} + Claimed Profiles: +
      + {% for profile in report.claimed_profiles %} +
    • + {{ profile.site_name }} (Tags: {{ profile.tags|join(', ') }}) +
    • + {% endfor %} +
    + {% else %} +

    No claimed profiles found.

    + {% endif %} +
  • + {% endfor %} +
+ {% else %} +

No individual reports available.

+ {% endif %}
-{% endblock %} \ No newline at end of file +{% endblock %} diff --git a/maigret/web/templates/status.html b/maigret/web/templates/status.html index d64b9a1b..3fca1255 100644 --- a/maigret/web/templates/status.html +++ b/maigret/web/templates/status.html @@ -1,26 +1,16 @@ {% extends "base.html" %} {% block content %} -
-
-
-

Processing Search Request

-
- Your search is being processed. This page will automatically redirect to the results when complete. -
-
-
-
-
- Loading... -
-
+
+

Search in progress...

+

Your request is being processed in the background. This page will automatically redirect once the results are ready.

+
+ Loading...
-
-{% endblock %} \ No newline at end of file +{% endblock %} From dac9abeb797570cebc144857287c6a0fb2593aaf Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Sat, 14 Dec 2024 01:01:40 +0100 Subject: [PATCH 5/8] webinterface: minor changes --- maigret/maigret.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/maigret/maigret.py b/maigret/maigret.py index ca56b994..133c23c5 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -326,13 +326,15 @@ def setup_arguments_parser(settings: Settings): ) modes_group.add_argument( "--web", - action="store", metavar='PORT', - type=int, - default=settings.web_interface_port, - help="Launches the web interface on the specified port (Default: 5000).", + type=int, + nargs='?', + const=5000, # default if --web is provided without a port + default=None, + help="Launches the web interface on the specified port (default: 5000 if no PORT is provided).", ) + output_group = parser.add_argument_group( 'Output options', 'Options to change verbosity and view of the console output' ) @@ -493,10 +495,11 @@ async def main(): log_level = logging.WARNING logger.setLevel(log_level) - if args.web: + if args.web is not None: from maigret.web.app import app - port = args.web if isinstance(args.web, int) else 5000 - app.run(port=port) # Use the port from arguments + port = args.web if args.web else 5000 # args.web is either the specified port or 5000 by const + app.run(port=port) + # Usernames initial list usernames = { From c7639b9eec247fe97ccaf4bf6858c9127286a322 Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Sat, 14 Dec 2024 01:19:20 +0100 Subject: [PATCH 6/8] fix to make pull request --- .gitignore | 10 +--------- maigret/resources/data.json | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 76a6d4a2..99a9ca03 100644 --- a/.gitignore +++ b/.gitignore @@ -42,12 +42,4 @@ settings.json # other *.egg-info -build -lib/vis-9.1.2/vis-network.min.js -lib/bindings/utils.js -lib/tom-select/tom-select.complete.min.js -lib/tom-select/tom-select.css -lib/vis-9.1.2/vis-network.css -poetry.lock -filetree.sh -poetry.lock +build \ No newline at end of file diff --git a/maigret/resources/data.json b/maigret/resources/data.json index c9b66988..45e80d57 100644 --- a/maigret/resources/data.json +++ b/maigret/resources/data.json @@ -17476,7 +17476,7 @@ "method": "vimeo" }, "headers": { - "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzQwMzc5MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiM2U2ZWQ1MDYtZTU0OC00ZGIwLWI4YTMtMzdiZWMyYzRiYTJiIn0.vojHtXWsDNBtjQjoVm6DSV9XHhWzu-PUMwjOJouMkG8" + "Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM5Njc3MjAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiNGJkNDE4NzktM2VhOS00ZWRiLWIzZDUtNjAyNjQ3YjMyNTVhIn0.kPbKREujSfYsisyF0pS_HskTapRlHBfVLRw4cis1ezk" }, "urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1", "checkType": "status_code", From f8f7c996caccbe619e9378f1257d362646ef03b5 Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Sat, 14 Dec 2024 12:20:51 +0100 Subject: [PATCH 7/8] fix poetry --- poetry.lock | 72 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 44ced517..6fa075b5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -151,13 +151,13 @@ python-socks = {version = ">=2.4.3,<3.0.0", extras = ["asyncio"]} [[package]] name = "aiosignal" -version = "1.3.1" +version = "1.3.2" description = "aiosignal: a list of registered asynchronous callbacks" optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, - {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, + {file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"}, + {file = "aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"}, ] [package.dependencies] @@ -192,6 +192,23 @@ files = [ [package.extras] with-fonttools = ["fonttools (>=4.0)"] +[[package]] +name = "asgiref" +version = "3.8.1" +description = "ASGI specs, helper code, and adapters" +optional = false +python-versions = ">=3.8" +files = [ + {file = "asgiref-3.8.1-py3-none-any.whl", hash = "sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47"}, + {file = "asgiref-3.8.1.tar.gz", hash = "sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} + +[package.extras] +tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] + [[package]] name = "asn1crypto" version = "1.5.1" @@ -315,6 +332,17 @@ d = ["aiohttp (>=3.10)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "blinker" +version = "1.9.0" +description = "Fast, simple object-to-object and broadcast signaling" +optional = false +python-versions = ">=3.9" +files = [ + {file = "blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc"}, + {file = "blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf"}, +] + [[package]] name = "certifi" version = "2024.8.30" @@ -771,6 +799,29 @@ mccabe = ">=0.7.0,<0.8.0" pycodestyle = ">=2.12.0,<2.13.0" pyflakes = ">=3.2.0,<3.3.0" +[[package]] +name = "flask" +version = "3.1.0" +description = "A simple framework for building complex web applications." +optional = false +python-versions = ">=3.9" +files = [ + {file = "flask-3.1.0-py3-none-any.whl", hash = "sha256:d667207822eb83f1c4b50949b1623c8fc8d51f2341d65f72e1a1815397551136"}, + {file = "flask-3.1.0.tar.gz", hash = "sha256:5f873c5184c897c8d9d1b05df1e3d01b14910ce69607a117bd3277098a5836ac"}, +] + +[package.dependencies] +asgiref = {version = ">=3.2", optional = true, markers = "extra == \"async\""} +blinker = ">=1.9" +click = ">=8.1.3" +itsdangerous = ">=2.2" +Jinja2 = ">=3.1.2" +Werkzeug = ">=3.1" + +[package.extras] +async = ["asgiref (>=3.2)"] +dotenv = ["python-dotenv"] + [[package]] name = "frozenlist" version = "1.5.0" @@ -997,6 +1048,17 @@ qtconsole = ["qtconsole"] test = ["packaging", "pickleshare", "pytest", "pytest-asyncio (<0.22)", "testpath"] test-extra = ["curio", "ipython[test]", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "trio"] +[[package]] +name = "itsdangerous" +version = "2.2.0" +description = "Safely pass data to untrusted environments and back." +optional = false +python-versions = ">=3.8" +files = [ + {file = "itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef"}, + {file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"}, +] + [[package]] name = "jedi" version = "0.19.2" @@ -2950,4 +3012,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "8074573cbda8b96a0c5e85c5ab04b5f1a62a6e84dffbad5fd7a1c4cdff8a0a82" +content-hash = "b25ba6ce790999bbdbd4e6892dd56c84d359684824b49c4f0dd882e1dcbedc0d" From 86ea0b9212901eb89c7181d23e22f27f3d20427d Mon Sep 17 00:00:00 2001 From: Soxoj Date: Sun, 15 Dec 2024 12:57:01 +0100 Subject: [PATCH 8/8] CLI test fixes --- Makefile | 2 +- maigret/maigret.py | 12 ++-- maigret/web/app.py | 156 +++++++++++++++++++++++++++++---------------- tests/test_cli.py | 13 ++-- 4 files changed, 117 insertions(+), 66 deletions(-) diff --git a/Makefile b/Makefile index 534ddf9b..1e0bdf0f 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ LINT_FILES=maigret wizard.py tests test: - coverage run --source=./maigret -m pytest tests + coverage run --source=./maigret,./maigret/web -m pytest tests coverage report -m coverage html diff --git a/maigret/maigret.py b/maigret/maigret.py index 133c23c5..01620eff 100755 --- a/maigret/maigret.py +++ b/maigret/maigret.py @@ -330,11 +330,9 @@ def setup_arguments_parser(settings: Settings): type=int, nargs='?', const=5000, # default if --web is provided without a port - default=None, + default=settings.web_interface_port, help="Launches the web interface on the specified port (default: 5000 if no PORT is provided).", ) - - output_group = parser.add_argument_group( 'Output options', 'Options to change verbosity and view of the console output' ) @@ -494,12 +492,14 @@ async def main(): elif args.verbose: log_level = logging.WARNING logger.setLevel(log_level) - + if args.web is not None: from maigret.web.app import app - port = args.web if args.web else 5000 # args.web is either the specified port or 5000 by const - app.run(port=port) + port = ( + args.web if args.web else 5000 + ) # args.web is either the specified port or 5000 by const + app.run(port=port) # Usernames initial list usernames = { diff --git a/maigret/web/app.py b/maigret/web/app.py index a4c1bb5b..1494e6f1 100644 --- a/maigret/web/app.py +++ b/maigret/web/app.py @@ -1,5 +1,14 @@ # app.py -from flask import Flask, render_template, request, send_file, Response, flash, redirect, url_for +from flask import ( + Flask, + render_template, + request, + send_file, + Response, + flash, + redirect, + url_for, +) import logging import os import asyncio @@ -26,17 +35,19 @@ os.makedirs(UPLOAD_FOLDER, exist_ok=True) os.makedirs(REPORTS_FOLDER, exist_ok=True) + def setup_logger(log_level, name): logger = logging.getLogger(name) logger.setLevel(log_level) return logger + async def maigret_search(username, options): logger = setup_logger(logging.WARNING, 'maigret') try: db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE) sites = db.ranked_sites_dict(top=int(options.get('top_sites', 500))) - + results = await maigret.search( username=username, site_dict=sites, @@ -50,6 +61,7 @@ async def maigret_search(username, options): logger.error(f"Error during search: {str(e)}") raise + async def search_multiple_usernames(usernames, options): results = [] for username in usernames: @@ -60,109 +72,140 @@ async def search_multiple_usernames(usernames, options): logging.error(f"Error searching username {username}: {str(e)}") return results + def process_search_task(usernames, options, timestamp): try: # Setup event loop for async operations loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) - + # Run the search - general_results = loop.run_until_complete(search_multiple_usernames(usernames, options)) - + general_results = loop.run_until_complete( + search_multiple_usernames(usernames, options) + ) + # Create session folder session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}") os.makedirs(session_folder, exist_ok=True) - + # Save the combined graph graph_path = os.path.join(session_folder, "combined_graph.html") - maigret.report.save_graph_report(graph_path, general_results, MaigretDatabase().load_from_path(MAIGRET_DB_FILE)) - + maigret.report.save_graph_report( + graph_path, + general_results, + MaigretDatabase().load_from_path(MAIGRET_DB_FILE), + ) + # Save individual reports individual_reports = [] for username, id_type, results in general_results: report_base = os.path.join(session_folder, f"report_{username}") - + csv_path = f"{report_base}.csv" json_path = f"{report_base}.json" pdf_path = f"{report_base}.pdf" html_path = f"{report_base}.html" - + context = generate_report_context(general_results) - + maigret.report.save_csv_report(csv_path, username, results) - maigret.report.save_json_report(json_path, username, results, report_type='ndjson') + maigret.report.save_json_report( + json_path, username, results, report_type='ndjson' + ) maigret.report.save_pdf_report(pdf_path, context) maigret.report.save_html_report(html_path, context) claimed_profiles = [] for site_name, site_data in results.items(): - if (site_data.get('status') and - site_data['status'].status == maigret.result.MaigretCheckStatus.CLAIMED): - claimed_profiles.append({ - 'site_name': site_name, - 'url': site_data.get('url_user', ''), - 'tags': site_data.get('status').tags if site_data.get('status') else [] - }) - - individual_reports.append({ - 'username': username, - 'csv_file': os.path.join(f"search_{timestamp}", f"report_{username}.csv"), - 'json_file': os.path.join(f"search_{timestamp}", f"report_{username}.json"), - 'pdf_file': os.path.join(f"search_{timestamp}", f"report_{username}.pdf"), - 'html_file': os.path.join(f"search_{timestamp}", f"report_{username}.html"), - 'claimed_profiles': claimed_profiles, - }) - + if ( + site_data.get('status') + and site_data['status'].status + == maigret.result.MaigretCheckStatus.CLAIMED + ): + claimed_profiles.append( + { + 'site_name': site_name, + 'url': site_data.get('url_user', ''), + 'tags': ( + site_data.get('status').tags + if site_data.get('status') + else [] + ), + } + ) + + individual_reports.append( + { + 'username': username, + 'csv_file': os.path.join( + f"search_{timestamp}", f"report_{username}.csv" + ), + 'json_file': os.path.join( + f"search_{timestamp}", f"report_{username}.json" + ), + 'pdf_file': os.path.join( + f"search_{timestamp}", f"report_{username}.pdf" + ), + 'html_file': os.path.join( + f"search_{timestamp}", f"report_{username}.html" + ), + 'claimed_profiles': claimed_profiles, + } + ) + # Save results and mark job as complete job_results[timestamp] = { 'status': 'completed', 'session_folder': f"search_{timestamp}", 'graph_file': os.path.join(f"search_{timestamp}", "combined_graph.html"), 'usernames': usernames, - 'individual_reports': individual_reports + 'individual_reports': individual_reports, } except Exception as e: - job_results[timestamp] = { - 'status': 'failed', - 'error': str(e) - } + job_results[timestamp] = {'status': 'failed', 'error': str(e)} finally: background_jobs[timestamp]['completed'] = True + @app.route('/') def index(): return render_template('index.html') + @app.route('/search', methods=['POST']) def search(): usernames_input = request.form.get('usernames', '').strip() if not usernames_input: flash('At least one username is required', 'danger') return redirect(url_for('index')) - - usernames = [u.strip() for u in usernames_input.replace(',', ' ').split() if u.strip()] - + + usernames = [ + u.strip() for u in usernames_input.replace(',', ' ').split() if u.strip() + ] + # Create timestamp for this search session timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - + logging.info(f"Starting search for usernames: {usernames}") - + options = { 'top_sites': request.form.get('top_sites', '500'), 'timeout': request.form.get('timeout', '30'), 'id_type': 'username', # fixed as username 'use_cookies': 'use_cookies' in request.form, } - + # Start background job background_jobs[timestamp] = { 'completed': False, - 'thread': Thread(target=process_search_task, args=(usernames, options, timestamp)) + 'thread': Thread( + target=process_search_task, args=(usernames, options, timestamp) + ), } background_jobs[timestamp]['thread'].start() - + logging.info(f"Search job started with timestamp: {timestamp}") - + # Redirect to status page return redirect(url_for('status', timestamp=timestamp)) @@ -170,19 +213,19 @@ def search(): @app.route('/status/') def status(timestamp): logging.info(f"Status check for timestamp: {timestamp}") - + # Validate timestamp if timestamp not in background_jobs: flash('Invalid search session', 'danger') return redirect(url_for('index')) - + # Check if job is completed if background_jobs[timestamp]['completed']: result = job_results.get(timestamp) if not result: flash('No results found for this search session', 'warning') return redirect(url_for('index')) - + if result['status'] == 'completed': # Redirect to results page once done return redirect(url_for('results', session_id=result['session_folder'])) @@ -190,7 +233,7 @@ def status(timestamp): error_msg = result.get('error', 'Unknown error occurred') flash(f'Search failed: {error_msg}', 'danger') return redirect(url_for('index')) - + # If job is still running, show status page with a simple spinner return render_template('status.html', timestamp=timestamp) @@ -200,23 +243,25 @@ def results(session_id): if not session_id.startswith('search_'): flash('Invalid results session format', 'danger') return redirect(url_for('index')) - + result_data = next( - (r for r in job_results.values() - if r.get('status') == 'completed' and r['session_folder'] == session_id), - None + ( + r + for r in job_results.values() + if r.get('status') == 'completed' and r['session_folder'] == session_id + ), + None, ) - - return render_template( 'results.html', usernames=result_data['usernames'], graph_file=result_data['graph_file'], individual_reports=result_data['individual_reports'], - timestamp=session_id.replace('search_', '') + timestamp=session_id.replace('search_', ''), ) + @app.route('/reports/') def download_report(filename): try: @@ -226,9 +271,10 @@ def download_report(filename): logging.error(f"Error serving file {filename}: {str(e)}") return "File not found", 404 + if __name__ == '__main__': logging.basicConfig( level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', ) app.run(debug=True) diff --git a/tests/test_cli.py b/tests/test_cli.py index 16d78a25..60e0d9eb 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -42,6 +42,7 @@ 'use_disabled_sites': False, 'username': [], 'verbose': False, + 'web': 5000, 'with_domains': False, 'xmind': False, } @@ -55,7 +56,8 @@ def test_args_search_mode(argparser): want_args = dict(DEFAULT_ARGS) want_args.update({'username': ['username']}) - assert args == Namespace(**want_args) + for arg in vars(args): + assert getattr(args, arg) == want_args[arg] def test_args_search_mode_several_usernames(argparser): @@ -66,7 +68,8 @@ def test_args_search_mode_several_usernames(argparser): want_args = dict(DEFAULT_ARGS) want_args.update({'username': ['username1', 'username2']}) - assert args == Namespace(**want_args) + for arg in vars(args): + assert getattr(args, arg) == want_args[arg] def test_args_self_check_mode(argparser): @@ -81,7 +84,8 @@ def test_args_self_check_mode(argparser): } ) - assert args == Namespace(**want_args) + for arg in vars(args): + assert getattr(args, arg) == want_args[arg] def test_args_multiple_sites(argparser): @@ -97,4 +101,5 @@ def test_args_multiple_sites(argparser): } ) - assert args == Namespace(**want_args) + for arg in vars(args): + assert getattr(args, arg) == want_args[arg]