diff --git a/README.md b/README.md index e4b7973e5..023a3f376 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,7 @@ In its current state, this bot does exactly what it needs to do. However, improv I have tried to simplify the code so anyone can read it and start contributing at any skill level. Don't be shy :) contribute! - [ ] Creating better documentation and adding a command line interface. +- [ ] Login using cookie to bypass captcha - [x] Allowing the user to choose background music for their videos. - [x] Allowing users to choose a reddit thread instead of being randomized. - [x] Allowing users to choose a background that is picked instead of the Minecraft one. diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 6b56e9993..839334594 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -73,7 +73,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): with sync_playwright() as p: print_substep("Launching Headless Browser...") - browser = p.chromium.launch( + browser = p.firefox.launch( headless=True ) # headless=False will show the browser for debugging purposes # Device scale factor (or dsf for short) allows us to increase the resolution of the screenshots @@ -99,6 +99,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): page.goto("https://www.reddit.com/login", timeout=0) page.set_viewport_size(ViewportSize(width=1920, height=1080)) page.wait_for_load_state() + page.wait_for_timeout(timeout=500) page.locator(f'input[name="username"]').fill(settings.config["reddit"]["creds"]["username"]) page.locator(f'input[name="password"]').fill(settings.config["reddit"]["creds"]["password"]) @@ -160,10 +161,13 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): reddit_object["thread_title"], to_language=lang, translator="google", - ) + ) + + page.wait_for_timeout(2000) #small delay + page.wait_for_selector('shreddit-post > h1', timeout=5000) # we wait for the title to appere page.evaluate( - "tl_content => document.querySelector('[data-adclicklocation=\"title\"] > div > div > h1').textContent = tl_content", + "tl_content => document.querySelector('shreddit-post > h1').textContent = tl_content", texts_in_tl, ) else: @@ -177,12 +181,12 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): # zoom the body of the page page.evaluate("document.body.style.zoom=" + str(zoom)) # as zooming the body doesn't change the properties of the divs, we need to adjust for the zoom - location = page.locator('[data-test-id="post-content"]').bounding_box() + location = page.locator('shreddit-post').bounding_box() for i in location: location[i] = float("{:.2f}".format(location[i] * zoom)) page.screenshot(clip=location, path=postcontentpath) else: - page.locator('[data-test-id="post-content"]').screenshot(path=postcontentpath) + page.locator('shreddit-post').screenshot(path=postcontentpath) except Exception as e: print_substep("Something went wrong!", style="red") resp = input( @@ -220,7 +224,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): if page.locator('[data-testid="content-gate"]').is_visible(): page.locator('[data-testid="content-gate"] button').click() - page.goto(f"https://new.reddit.com/{comment['comment_url']}") + page.goto(f"https://new.reddit.com{comment['comment_url']}") # translate code @@ -230,30 +234,63 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): translator="google", to_language=settings.config["reddit"]["thread"]["post_lang"], ) + + page.wait_for_timeout(timeout=1500) page.evaluate( - '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content', + '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id}-comment-rtjson-content > div:nth-child(1) > p`).textContent = tl_content', [comment_tl, comment["comment_id"]], ) try: if settings.config["settings"]["zoom"] != 1: - # store zoom settings + if settings.config["settings"]["zoom"] != 1: + # Store zoom settings zoom = settings.config["settings"]["zoom"] - # zoom the body of the page - page.evaluate("document.body.style.zoom=" + str(zoom)) - # scroll comment into view - page.locator(f"#t1_{comment['comment_id']}").scroll_into_view_if_needed() - # as zooming the body doesn't change the properties of the divs, we need to adjust for the zoom - location = page.locator(f"#t1_{comment['comment_id']}").bounding_box() - for i in location: - location[i] = float("{:.2f}".format(location[i] * zoom)) - page.screenshot( - clip=location, - path=f"assets/temp/{reddit_id}/png/comment_{idx}.png", - ) + + # Zoom the body of the page + page.evaluate(f"document.body.style.zoom = {zoom}") + + # Scroll the parent of the comment into view + parent_locator = page.locator(f"#t1_{comment['comment_id']}-comment-rtjson-content").locator('xpath=..') + parent_locator.scroll_into_view_if_needed() + + # Get the bounding box of the parent element + location = parent_locator.bounding_box() + + # Ensure bounding box is valid + if location: + # Adjust for zoom (since zooming doesn't change the div's actual properties) + location['x'] = float("{:.2f}".format(location['x'] * zoom)) + location['y'] = float("{:.2f}".format(location['y'] * zoom)) + location['width'] = float("{:.2f}".format(location['width'] * zoom)) + location['height'] = float("{:.2f}".format(location['height'] * zoom)) + + # Adjust the height to be exactly 200px + location['height'] = 200 + + # Take a screenshot of the parent element with the adjusted height + page.screenshot( + clip=location, + path=f"assets/temp/{reddit_id}/png/comment_{idx}.png", + ) + else: + print("Could not get the bounding box of the parent element.") else: - page.locator(f"#t1_{comment['comment_id']}").screenshot( - path=f"assets/temp/{reddit_id}/png/comment_{idx}.png" - ) + # getting the bounding_box + parent_locator = page.locator(f"#t1_{comment['comment_id']}-comment-rtjson-content").locator('xpath=..') + location = parent_locator.bounding_box() + + # bounding_box is valid ? + if location: + # ajust 200px + location['height'] = 200 + + # make screenshot + page.screenshot( + path=f"assets/temp/{reddit_id}/png/comment_{idx}.png", + clip=location # resize to 200px + ) + else: + print("No se pudo obtener el cuadro delimitador del elemento.") except TimeoutError: del reddit_object["comments"] screenshot_num += 1