mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-09 20:19:28 +01:00
[youtube] Improve cache and add an option to print the extracted signatures
This commit is contained in:
parent
c4417ddb61
commit
edf3e38ebd
@ -40,7 +40,7 @@ class FileDownloader(object):
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
cachedir: Location of the cache files in the filesystem.
|
||||
False to disable filesystem cache.
|
||||
"NONE" to disable filesystem cache.
|
||||
"""
|
||||
|
||||
params = None
|
||||
|
@ -167,6 +167,7 @@ def parseOpts(overrideArguments=None):
|
||||
help='Output descriptions of all supported extractors', default=False)
|
||||
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
|
||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||
general.add_option('--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache', help='Location in the filesystem where youtube-dl can store downloaded information permanently. NONE to disable filesystem caching, %default by default')
|
||||
|
||||
|
||||
selection.add_option('--playlist-start',
|
||||
@ -272,6 +273,10 @@ def parseOpts(overrideArguments=None):
|
||||
verbosity.add_option('--dump-intermediate-pages',
|
||||
action='store_true', dest='dump_intermediate_pages', default=False,
|
||||
help='print downloaded pages to debug problems(very verbose)')
|
||||
verbosity.add_option('--youtube-print-sig-code',
|
||||
action='store_true', dest='youtube_print_sig_code', default=False,
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
|
||||
|
||||
filesystem.add_option('-t', '--title',
|
||||
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
|
||||
@ -613,6 +618,7 @@ def _real_main(argv=None):
|
||||
'min_filesize': opts.min_filesize,
|
||||
'max_filesize': opts.max_filesize,
|
||||
'daterange': date,
|
||||
'youtube_print_sig_code': opts.youtube_print_sig_code
|
||||
})
|
||||
|
||||
if opts.verbose:
|
||||
|
@ -1,13 +1,13 @@
|
||||
# coding: utf-8
|
||||
|
||||
import collections
|
||||
import errno
|
||||
import itertools
|
||||
import io
|
||||
import json
|
||||
import operator
|
||||
import os.path
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
import string
|
||||
import struct
|
||||
@ -17,6 +17,7 @@ import zlib
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_chr,
|
||||
compat_http_client,
|
||||
compat_parse_qs,
|
||||
compat_urllib_error,
|
||||
@ -30,6 +31,7 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
orderedSet,
|
||||
write_json_file,
|
||||
)
|
||||
|
||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
@ -433,18 +435,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
# Read from filesystem cache
|
||||
func_id = '%s_%s_%d' % (player_type, player_id, slen)
|
||||
assert os.path.basename(func_id) == func_id
|
||||
cache_dir = self.downloader.params.get('cachedir',
|
||||
u'~/.youtube-dl/cache')
|
||||
cache_dir = self._downloader.params.get('cachedir',
|
||||
u'~/.youtube-dl/cache')
|
||||
|
||||
if cache_dir is not False:
|
||||
if cache_dir != u'NONE':
|
||||
cache_fn = os.path.join(os.path.expanduser(cache_dir),
|
||||
u'youtube-sigfuncs',
|
||||
func_id + '.json')
|
||||
try:
|
||||
with io.open(cache_fn, '', encoding='utf-8') as cachef:
|
||||
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
|
||||
cache_spec = json.load(cachef)
|
||||
return lambda s: u''.join(s[i] for i in cache_spec)
|
||||
except OSError:
|
||||
except IOError:
|
||||
pass # No cache available
|
||||
|
||||
if player_type == 'js':
|
||||
@ -464,13 +466,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
assert False, 'Invalid player type %r' % player_type
|
||||
|
||||
if cache_dir is not False:
|
||||
cache_res = res(map(compat_chr, range(slen)))
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
shutil.makedirs(os.path.dirname(cache_fn))
|
||||
write_json_file(cache_spec, cache_fn)
|
||||
try:
|
||||
cache_res = res(map(compat_chr, range(slen)))
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
try:
|
||||
os.makedirs(os.path.dirname(cache_fn))
|
||||
except OSError as ose:
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
write_json_file(cache_spec, cache_fn)
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
self._downloader.report_warning(
|
||||
u'Writing cache to %r failed: %s' % (cache_fn, tb))
|
||||
|
||||
return res
|
||||
|
||||
def _print_sig_code(self, func, slen):
|
||||
def gen_sig_code(idxs):
|
||||
def _genslice(start, end, step):
|
||||
starts = u'' if start == 0 else str(start)
|
||||
ends = u':%d' % (end+step)
|
||||
steps = u'' if step == 1 else (':%d' % step)
|
||||
return u's[%s%s%s]' % (starts, ends, steps)
|
||||
|
||||
step = None
|
||||
for i, prev in zip(idxs[1:], idxs[:-1]):
|
||||
if step is not None:
|
||||
if i - prev == step:
|
||||
continue
|
||||
yield _genslice(start, prev, step)
|
||||
step = None
|
||||
continue
|
||||
if i - prev in [-1, 1]:
|
||||
step = i - prev
|
||||
start = prev
|
||||
continue
|
||||
else:
|
||||
yield u's[%d]' % prev
|
||||
if step is None:
|
||||
yield u's[%d]' % i
|
||||
else:
|
||||
yield _genslice(start, i, step)
|
||||
|
||||
cache_res = func(map(compat_chr, range(slen)))
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
expr_code = u' + '.join(gen_sig_code(cache_spec))
|
||||
code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
|
||||
self.to_screen(u'Extracted signature:\n' + code)
|
||||
|
||||
def _parse_sig_js(self, jscode):
|
||||
funcname = self._search_regex(
|
||||
r'signature=([a-zA-Z]+)', jscode,
|
||||
@ -1007,7 +1051,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
video_id, player_url, len(s)
|
||||
)
|
||||
self._player_cache[player_url] = func
|
||||
return self._player_cache[player_url](s)
|
||||
func = self._player_cache[player_url]
|
||||
if self._downloader.params.get('youtube_print_sig_code'):
|
||||
self._print_sig_code(func, len(s))
|
||||
return func(s)
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
self._downloader.report_warning(
|
||||
|
Loading…
Reference in New Issue
Block a user