Skip to content

Commit

Permalink
[minhateca] Add extractor (Fixes #4094)
Browse files Browse the repository at this point in the history
  • Loading branch information
phihag committed Dec 4, 2014
1 parent 9776bc7 commit 4349c07
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 2 deletions.
1 change: 1 addition & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ def test_parse_filesize(self):
self.assertEqual(parse_filesize('2 MiB'), 2097152)
self.assertEqual(parse_filesize('5 GB'), 5000000000)
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
self.assertEqual(parse_filesize('1,24 KB'), 1240)

if __name__ == '__main__':
unittest.main()
1 change: 1 addition & 0 deletions youtube_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mgoon import MgoonIE
from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE
Expand Down
71 changes: 71 additions & 0 deletions youtube_dl/extractor/minhateca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
int_or_none,
parse_filesize,
)


class MinhatecaIE(InfoExtractor):
_VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
_TEST = {
'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
'info_dict': {
'id': '125848331',
'ext': 'mp4',
'title': 'youtube-dl test video',
'thumbnail': 're:^https?://.*\.jpg$',
'filesize_approx': 1530000,
'duration': 9,
'view_count': int,
}
}

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

token = self._html_search_regex(
r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
webpage, 'request token')
token_data = [
('fileId', video_id),
('__RequestVerificationToken', token),
]
req = compat_urllib_request.Request(
'http://minhateca.com.br/action/License/Download',
data=compat_urllib_parse.urlencode(token_data))
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
data = self._download_json(
req, video_id, note='Downloading metadata')

video_url = data['redirectUrl']
title_str = self._html_search_regex(
r'<h1.*?>(.*?)</h1>', webpage, 'title')
title, _, ext = title_str.rpartition('.')
filesize_approx = parse_filesize(self._html_search_regex(
r'<p class="fileSize">(.*?)</p>',
webpage, 'file size approximation', fatal=False))
duration = int_or_none(self._html_search_regex(
r'(?s)<p class="fileLeng[ht][th]">.*?([0-9]+)\s*s',
webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex(
r'<p class="downloadsCounter">([0-9]+)</p>',
webpage, 'view count', fatal=False))

return {
'id': video_id,
'url': video_url,
'title': title,
'ext': ext,
'filesize_approx': filesize_approx,
'duration': duration,
'view_count': view_count,
'thumbnail': self._og_search_thumbnail(webpage),
}
7 changes: 5 additions & 2 deletions youtube_dl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,11 +1090,14 @@ def parse_filesize(s):
}

units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
m = re.match(
r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
if not m:
return None

return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
num_str = m.group('num').replace(',', '.')
mult = _UNIT_TABLE[m.group('unit')]
return int(float(num_str) * mult)


def get_term_width():
Expand Down

0 comments on commit 4349c07

Please sign in to comment.