Skip to content

Commit

Permalink
cli param for ytldp_tmpdir
Browse files Browse the repository at this point in the history
  • Loading branch information
Barbara Miller committed Dec 12, 2024
1 parent a49b978 commit a86962e
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 2 deletions.
1 change: 1 addition & 0 deletions brozzler/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ def browse_page(
skip_extract_outlinks=False,
skip_visit_hashtags=False,
skip_youtube_dl=False,
ytdlp_tmpdir = '/tmp',
simpler404=False,
page_timeout=300,
behavior_timeout=900,
Expand Down
14 changes: 14 additions & 0 deletions brozzler/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,12 @@ def brozzle_page(argv=None):
arg_parser.add_argument(
"--skip-youtube-dl", dest="skip_youtube_dl", action="store_true"
)
arg_parser.add_argument(
"--ytdlp_tmpdir",
dest="ytdlp_tmpdir",
default="/tmp",
help="specify a temp dir for ytdlp; defaults to /tmp",
)
arg_parser.add_argument("--simpler404", dest="simpler404", action="store_true")
add_common_options(arg_parser, argv)

Expand Down Expand Up @@ -292,6 +298,7 @@ def brozzle_page(argv=None):
skip_extract_outlinks=args.skip_extract_outlinks,
skip_visit_hashtags=args.skip_visit_hashtags,
skip_youtube_dl=args.skip_youtube_dl,
ytdlp_tmpdir = args.ytdlp_tmpdir,
simpler404=args.simpler404,
screenshot_full_page=args.screenshot_full_page,
download_throughput=args.download_throughput,
Expand Down Expand Up @@ -533,6 +540,12 @@ def brozzler_worker(argv=None):
action="store_true",
help=argparse.SUPPRESS,
)
arg_parser.add_argument(
"--ytdlp_tmpdir",
dest="ytdlp_tmpdir",
default="/tmp",
help="argparse.SUPPRESS",
)
arg_parser.add_argument(
"--stealth",
dest="stealth",
Expand Down Expand Up @@ -613,6 +626,7 @@ def get_skip_av_seeds():
skip_extract_outlinks=args.skip_extract_outlinks,
skip_visit_hashtags=args.skip_visit_hashtags,
skip_youtube_dl=args.skip_youtube_dl,
ytdlp_tmpdir=args.ytdlp_tmpdir,
stealth=args.stealth,
metrics_port=args.metrics_port,
registry_url=args.registry_url,
Expand Down
3 changes: 3 additions & 0 deletions brozzler/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(
skip_extract_outlinks=False,
skip_visit_hashtags=False,
skip_youtube_dl=False,
ytdlp_tmpdir='/tmp',
simpler404=False,
screenshot_full_page=False,
page_timeout=300,
Expand All @@ -89,6 +90,7 @@ def __init__(
self._skip_extract_outlinks = skip_extract_outlinks
self._skip_visit_hashtags = skip_visit_hashtags
self._skip_youtube_dl = skip_youtube_dl
self._ytdlp_tmpdir = ytdlp_tmpdir
self._simpler404 = simpler404
self._screenshot_full_page = screenshot_full_page
self._page_timeout = page_timeout
Expand Down Expand Up @@ -445,6 +447,7 @@ def _on_service_worker_version_updated(chrome_msg):
skip_extract_outlinks=self._skip_extract_outlinks,
skip_visit_hashtags=self._skip_visit_hashtags,
skip_youtube_dl=self._skip_youtube_dl,
ytdlp_tmpdir = self._ytdlp_tmpdir,
simpler404=self._simpler404,
screenshot_full_page=self._screenshot_full_page,
page_timeout=self._page_timeout,
Expand Down
4 changes: 2 additions & 2 deletions brozzler/ydl.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
thread_local = threading.local()

ytdlp_proxy = ""
ytdlp_tmp = "/tmp"
ytdlp_wait = 10
max_ytdlp_attempts = 4

Expand Down Expand Up @@ -421,7 +420,8 @@ def do_youtube_dl(worker, site, page):
Returns:
`list` of `str`: outlink urls
"""
with tempfile.TemporaryDirectory(prefix="brzl-ydl-", dir=ytdlp_tmp) as tempdir:
with tempfile.TemporaryDirectory(prefix="brzl-ydl-", dir=worker._ytdlp_tmpdir) as tempdir:
logging.info("using temporary directory: %s", tempdir)
ydl = _build_youtube_dl(worker, tempdir, site, page)
ie_result = _try_youtube_dl(worker, ydl, site, page)
outlinks = set()
Expand Down

0 comments on commit a86962e

Please sign in to comment.