From d3fd6f2d394b866dc27e29ab50b7ab19f1eb2e37 Mon Sep 17 00:00:00 2001 From: Peter Lamberg Date: Mon, 22 Jun 2015 19:16:53 +0300 Subject: [PATCH 1/5] Don't use open-uri to download streams, because it has a known bug in handling redirects and at least one feed (https://feeds.feedburner.com/FurloBros) triggers it. (cherry picked from commit d985e90) --- bin/podcatcher | 64 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/bin/podcatcher b/bin/podcatcher index 38efb0b..e004c47 100755 --- a/bin/podcatcher +++ b/bin/podcatcher @@ -1339,8 +1339,40 @@ class Cache @cache.sort!() do |e,e2| e.file.mtime() <=> e2.file.mtime() end - end - def createplaylist(urls) + end + #Work around bug in open-uri redirect handling. http://stackoverflow.com/q/27407938/1148030 + #http://docs.ruby-lang.org/en/2.0.0/Net/HTTP.html#class-Net::HTTP-label-Response+Data + def download(download_url, content) + content.redirection_url = nil + done = false + for redirect_count in 0...10 + Net::HTTP.start(download_url.host, download_url.port, :use_ssl => download_url.scheme == 'https') do |http| + request = Net::HTTP::Get.new download_url + request['User-Agent'] = USER_AGENT + request['Referer'] = content.feedurl if content.feedurl and (content.feedurl =~ %r{^http:} or content.feedurl =~ %r{^ftp:}) + response = http.request(request) + case response + when Net::HTTPSuccess + content.file = filename(content, @cache_dir) + content.file.open("wb") do |fout| + response.read_body do |chunk| + fout.write chunk + end + end + done = true + when Net::HTTPRedirection + redirection_url = URI(response['location']) + $stderr.puts "Redirected to #{redirection_url}" if @opt.verbose + content.redirection_url = redirection_url # content.redirection_url is used for finding the correct filename in case of redirection + download_url = redirection_url + else + raise "Unknown response #{response} from #{download_url}" + end + end + end + raise ArgumentError, 'too many HTTP redirects' if not done + end + def createplaylist(urls) playlist = Playlist.new @opt.playlist_type if @opt.strategy == :cache playlist.start @@ -1911,29 +1943,9 @@ class Cache end else $stderr.puts "Fetching: #{content.url} (#{content.size.to_s} bytes)" if @opt.verbose and i == 1 - if not @opt.simulate - headers = {"User-Agent" => USER_AGENT} - headers["Referer"] = content.feedurl if content.feedurl and (content.feedurl =~ %r{^http:} or content.feedurl =~ %r{^ftp:}) + if not @opt.simulate content.download_url = content.url unless content.download_url - open(content.download_url, headers) do |fin| - if fin.base_uri.instance_of?(URI::HTTP) - if fin.status[0] =~ Regexp.new('^3') - content.download_url = fin.meta['location'] - raise "redirecting" - elsif fin.status[0] !~ Regexp.new('^2') - raise 'failed' - end - end - # write content to cache - content.redirection_url = fin.base_uri.to_s # content.redirection_url is used for finding the correct filename in case of redirection - content.redirection_url = nil if content.redirection_url.eql?(content.url) - content.file = filename(content, @cache_dir) - content.file.open("wb") do |fout| - fin.each_byte() do |b| - fout.putc b - end - end - end + download(URI(content.download_url), content) content.size = content.file.size @history.add content end @@ -1946,7 +1958,7 @@ class Cache rescue SystemExit exit 1 rescue Exception - end + end $stderr.puts "Attempt #{i} aborted" if @opt.verbose if content.file and i == @opt.retries if content.file.exist? @@ -2058,7 +2070,7 @@ private rescue SystemExit exit 1 rescue Exception - end + end $stderr.puts "Attempt #{i} aborted" if @opt.verbose doc = "" sleep 5 From 82693f9813636035f9e8c8ef591e2b422c1f5904 Mon Sep 17 00:00:00 2001 From: Peter Lamberg Date: Mon, 22 Jun 2015 22:06:08 +0300 Subject: [PATCH 2/5] .request_uri is 1.9 interim hack https://bugs.ruby-lang.org/issues/7973 (cherry picked from commit f8d5e49) --- bin/podcatcher | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/podcatcher b/bin/podcatcher index e004c47..25b51e1 100755 --- a/bin/podcatcher +++ b/bin/podcatcher @@ -1347,7 +1347,7 @@ class Cache done = false for redirect_count in 0...10 Net::HTTP.start(download_url.host, download_url.port, :use_ssl => download_url.scheme == 'https') do |http| - request = Net::HTTP::Get.new download_url + request = Net::HTTP::Get.new download_url.request_uri #.request_uri is 1.9 interim hack https://bugs.ruby-lang.org/issues/7973 request['User-Agent'] = USER_AGENT request['Referer'] = content.feedurl if content.feedurl and (content.feedurl =~ %r{^http:} or content.feedurl =~ %r{^ftp:}) response = http.request(request) From 05b0128ec7cef015f79d693bfcaeff22b8012884 Mon Sep 17 00:00:00 2001 From: Peter Lamberg Date: Mon, 22 Jun 2015 22:53:57 +0300 Subject: [PATCH 3/5] Fix handling of response in download. --- bin/podcatcher | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/bin/podcatcher b/bin/podcatcher index 25b51e1..141c7c1 100755 --- a/bin/podcatcher +++ b/bin/podcatcher @@ -1350,23 +1350,24 @@ class Cache request = Net::HTTP::Get.new download_url.request_uri #.request_uri is 1.9 interim hack https://bugs.ruby-lang.org/issues/7973 request['User-Agent'] = USER_AGENT request['Referer'] = content.feedurl if content.feedurl and (content.feedurl =~ %r{^http:} or content.feedurl =~ %r{^ftp:}) - response = http.request(request) - case response - when Net::HTTPSuccess - content.file = filename(content, @cache_dir) - content.file.open("wb") do |fout| - response.read_body do |chunk| - fout.write chunk + http.request(request) do |response| + case response + when Net::HTTPSuccess + content.file = filename(content, @cache_dir) + content.file.open("wb") do |fout| + response.read_body do |chunk| + fout.write chunk + end end + done = true + when Net::HTTPRedirection + redirection_url = URI(response['location']) + $stderr.puts "Redirected to #{redirection_url}" if @opt.verbose + content.redirection_url = redirection_url # content.redirection_url is used for finding the correct filename in case of redirection + download_url = redirection_url + else + raise "Unknown response #{response} from #{download_url}" end - done = true - when Net::HTTPRedirection - redirection_url = URI(response['location']) - $stderr.puts "Redirected to #{redirection_url}" if @opt.verbose - content.redirection_url = redirection_url # content.redirection_url is used for finding the correct filename in case of redirection - download_url = redirection_url - else - raise "Unknown response #{response} from #{download_url}" end end end From 2efd62a6ca8c9057cf2e102efb7de6c2a0a7f1c1 Mon Sep 17 00:00:00 2001 From: Peter Lamberg Date: Mon, 22 Jun 2015 23:09:15 +0300 Subject: [PATCH 4/5] Fix redirection loop logic. --- bin/podcatcher | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/podcatcher b/bin/podcatcher index 141c7c1..918b82c 100755 --- a/bin/podcatcher +++ b/bin/podcatcher @@ -1345,7 +1345,8 @@ class Cache def download(download_url, content) content.redirection_url = nil done = false - for redirect_count in 0...10 + redirect_count = 0 + while redirect_count < 10 and not done Net::HTTP.start(download_url.host, download_url.port, :use_ssl => download_url.scheme == 'https') do |http| request = Net::HTTP::Get.new download_url.request_uri #.request_uri is 1.9 interim hack https://bugs.ruby-lang.org/issues/7973 request['User-Agent'] = USER_AGENT @@ -1365,6 +1366,7 @@ class Cache $stderr.puts "Redirected to #{redirection_url}" if @opt.verbose content.redirection_url = redirection_url # content.redirection_url is used for finding the correct filename in case of redirection download_url = redirection_url + redirect_count += 1 else raise "Unknown response #{response} from #{download_url}" end From 93dcfa29cd103ca89e1cca1dea19ce927c121f8f Mon Sep 17 00:00:00 2001 From: Peter Lamberg Date: Sun, 9 Aug 2015 20:15:43 +0300 Subject: [PATCH 5/5] Make sure half downloaded files are deleted. --- bin/podcatcher | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/bin/podcatcher b/bin/podcatcher index 918b82c..cce7c73 100755 --- a/bin/podcatcher +++ b/bin/podcatcher @@ -1343,37 +1343,42 @@ class Cache #Work around bug in open-uri redirect handling. http://stackoverflow.com/q/27407938/1148030 #http://docs.ruby-lang.org/en/2.0.0/Net/HTTP.html#class-Net::HTTP-label-Response+Data def download(download_url, content) + content.file = nil content.redirection_url = nil done = false redirect_count = 0 - while redirect_count < 10 and not done - Net::HTTP.start(download_url.host, download_url.port, :use_ssl => download_url.scheme == 'https') do |http| - request = Net::HTTP::Get.new download_url.request_uri #.request_uri is 1.9 interim hack https://bugs.ruby-lang.org/issues/7973 - request['User-Agent'] = USER_AGENT - request['Referer'] = content.feedurl if content.feedurl and (content.feedurl =~ %r{^http:} or content.feedurl =~ %r{^ftp:}) - http.request(request) do |response| - case response - when Net::HTTPSuccess - content.file = filename(content, @cache_dir) - content.file.open("wb") do |fout| - response.read_body do |chunk| - fout.write chunk + begin + while redirect_count < 10 and not done + Net::HTTP.start(download_url.host, download_url.port, :use_ssl => download_url.scheme == 'https') do |http| + request = Net::HTTP::Get.new download_url.request_uri #.request_uri is 1.9 interim hack https://bugs.ruby-lang.org/issues/7973 + request['User-Agent'] = USER_AGENT + request['Referer'] = content.feedurl if content.feedurl and (content.feedurl =~ %r{^http:} or content.feedurl =~ %r{^ftp:}) + http.request(request) do |response| + case response + when Net::HTTPSuccess + content.file = filename(content, @cache_dir) + content.file.open("wb") do |fout| + response.read_body do |chunk| + fout.write chunk + end end + done = true + when Net::HTTPRedirection + redirection_url = URI(response['location']) + $stderr.puts "Redirected to #{redirection_url}" if @opt.verbose + content.redirection_url = redirection_url # content.redirection_url is used for finding the correct filename in case of redirection + download_url = redirection_url + redirect_count += 1 + else + raise "Unknown response #{response} from #{download_url}" end - done = true - when Net::HTTPRedirection - redirection_url = URI(response['location']) - $stderr.puts "Redirected to #{redirection_url}" if @opt.verbose - content.redirection_url = redirection_url # content.redirection_url is used for finding the correct filename in case of redirection - download_url = redirection_url - redirect_count += 1 - else - raise "Unknown response #{response} from #{download_url}" end end end + raise ArgumentError, 'too many HTTP redirects' if not done + rescue + content.file.unlink unless content.file.nil? # don't leave possible half downloaded file lying around end - raise ArgumentError, 'too many HTTP redirects' if not done end def createplaylist(urls) playlist = Playlist.new @opt.playlist_type