diff --git a/.gitignore b/.gitignore index ecb98b18..603c63e8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,7 @@ test-results.xml # production for react-app build + +# python bytecode +.pyc +__pycache__ diff --git a/src/Utils/Logger.ts b/src/Utils/Logger.ts index 490e163b..090c6151 100644 --- a/src/Utils/Logger.ts +++ b/src/Utils/Logger.ts @@ -33,26 +33,38 @@ const isDebugMode = process.env.VSCODE_DEBUG_MODE === "true"; * - message: watch out */ function _logStr(severity: string, tag: string, ...msgs: MsgList) { - let logStrList = []; - if (msgs.length === 0) { // Do not print return ""; } - for (let m of msgs) { - if (m instanceof Error) { - const err = m as Error; - logStrList.push( - `\nError was thrown:\n- name: ${err.name}\n- message: ${err.message}` - ); - } else if (typeof m === "object") { - logStrList.push(`\n${m.constructor.name}: ${JSON.stringify(m)}`); - } else { - logStrList.push(`${m}`); + const flatten = (msgs: MsgList) => { + let logStrList = []; + for (let m of msgs) { + if (m instanceof Error) { + const err = m as Error; + logStrList.push( + `\nError was thrown:\n- name: ${err.name}\n- message: ${err.message}` + ); + } else if (typeof m === "object") { + logStrList.push(`\n${m.constructor.name}: ${JSON.stringify(m)}`); + } else { + logStrList.push(`${m}`); + } } - } - const msg = logStrList.join(" "); + return logStrList.join(" "); + }; + + const redact = (msg: string) => { + // Replace Github Personal Access Tokens with ******** + const classicPAT = "ghp_[a-zA-Z0-9]+"; + const findGrainedPAT = "github_pat_[a-zA-Z0-9_]+"; + const regex = new RegExp(`(${classicPAT})|(${findGrainedPAT})`, "g"); + + return msg.replace(regex, "*********************"); + }; + + const msg = redact(flatten(msgs)); const time = new Date().toLocaleString(); return `[${time}][${tag}][${severity}] ${msg}`; @@ -116,6 +128,8 @@ export class Logger { * @brief Print msg and a line feed character without adding '[time][tag][severity]' * @detail When log is long and need to be splitted into many chunks, append() could be used * after the first chunk. + * + * @todo streamify logger to format consistently (ex. redact is not applied to this function) */ public static appendLine(msg: string) { Logger.checkShow(); @@ -126,6 +140,8 @@ export class Logger { * @brief Print msg without adding '[time][tag][severity]' * @detail When log is long and need to be splitted into many chunks, append() could be used * after the first chunk. + * + * @todo streamify logger to format consistently (ex. redact is not applied to this function) */ public static append(msg: string) { Logger.checkShow(); diff --git a/third_party/catapult/common/py_vulcanize/README.chromium b/third_party/catapult/common/py_vulcanize/README.chromium new file mode 100644 index 00000000..128566e4 --- /dev/null +++ b/third_party/catapult/common/py_vulcanize/README.chromium @@ -0,0 +1,9 @@ +Name: py_vulcanize +URL: N/A +Version: N/A +Shipped: yes + +Description: +Py-vulcanize, formerly known as TVCM (trace-viewer component model). +This code doesn't actually live anywhere else currently, but it may +be split out into a separate repository in the future. diff --git a/third_party/catapult/common/py_vulcanize/py_vulcanize/__init__.py b/third_party/catapult/common/py_vulcanize/py_vulcanize/__init__.py new file mode 100644 index 00000000..087a104e --- /dev/null +++ b/third_party/catapult/common/py_vulcanize/py_vulcanize/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Trace-viewer component model. + +This module implements trace-viewer's component model. +""" + +from __future__ import absolute_import +from py_vulcanize.generate import * # pylint: disable=wildcard-import +from py_vulcanize.project import Project diff --git a/third_party/catapult/common/py_vulcanize/py_vulcanize/fake_fs.py b/third_party/catapult/common/py_vulcanize/py_vulcanize/fake_fs.py new file mode 100644 index 00000000..a26b92fc --- /dev/null +++ b/third_party/catapult/common/py_vulcanize/py_vulcanize/fake_fs.py @@ -0,0 +1,168 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import builtins +import codecs +import collections +import os +import six + +from io import BytesIO + + +class WithableStringIO(six.StringIO): + + def __enter__(self, *args): + return self + + def __exit__(self, *args): + pass + +class WithableBytesIO(BytesIO): + + def __enter__(self, *args): + return self + + def __exit__(self, *args): + pass + +class FakeFS(object): + + def __init__(self, initial_filenames_and_contents=None): + self._file_contents = {} + if initial_filenames_and_contents: + for k, v in six.iteritems(initial_filenames_and_contents): + self._file_contents[k] = v + + self._bound = False + self._real_codecs_open = codecs.open + self._real_open = builtins.open + + self._real_abspath = os.path.abspath + self._real_exists = os.path.exists + self._real_walk = os.walk + self._real_listdir = os.listdir + + def __enter__(self): + self.Bind() + return self + + def __exit__(self, *args): + self.Unbind() + + def Bind(self): + assert not self._bound + codecs.open = self._FakeCodecsOpen + builtins.open = self._FakeOpen + os.path.abspath = self._FakeAbspath + os.path.exists = self._FakeExists + os.walk = self._FakeWalk + os.listdir = self._FakeListDir + self._bound = True + + def Unbind(self): + assert self._bound + codecs.open = self._real_codecs_open + builtins.open = self._real_open + os.path.abspath = self._real_abspath + os.path.exists = self._real_exists + os.walk = self._real_walk + os.listdir = self._real_listdir + self._bound = False + + def AddFile(self, path, contents): + assert path not in self._file_contents + path = os.path.normpath(path) + self._file_contents[path] = contents + + def _FakeOpen(self, path, mode=None): + if mode is None: + mode = 'r' + if mode == 'r' or mode == 'rU' or mode == 'rb': + if path not in self._file_contents: + return self._real_open(path, mode) + + if mode == 'rb': + return WithableBytesIO(self._file_contents[path]) + else: + return WithableStringIO(self._file_contents[path]) + + raise NotImplementedError() + + def _FakeCodecsOpen(self, path, mode=None, + encoding=None): # pylint: disable=unused-argument + if mode is None: + mode = 'r' + if mode == 'r' or mode == 'rU' or mode == 'rb': + if path not in self._file_contents: + return self._real_open(path, mode) + + if mode == 'rb': + return WithableBytesIO(self._file_contents[path]) + else: + return WithableStringIO(self._file_contents[path]) + + raise NotImplementedError() + + def _FakeAbspath(self, path): + """Normalize the path and ensure it starts with os.path.sep. + + The tests all assume paths start with things like '/my/project', + and this abspath implementaion makes that assumption work correctly + on Windows. + """ + normpath = os.path.normpath(path) + if not normpath.startswith(os.path.sep): + normpath = os.path.sep + normpath + return normpath + + def _FakeExists(self, path): + if path in self._file_contents: + return True + return self._real_exists(path) + + def _FakeWalk(self, top): + assert os.path.isabs(top) + all_filenames = list(self._file_contents.keys()) + pending_prefixes = collections.deque() + pending_prefixes.append(top) + visited_prefixes = set() + while len(pending_prefixes): + prefix = pending_prefixes.popleft() + if prefix in visited_prefixes: + continue + visited_prefixes.add(prefix) + if prefix.endswith(os.path.sep): + prefix_with_trailing_sep = prefix + else: + prefix_with_trailing_sep = prefix + os.path.sep + + dirs = set() + files = [] + for filename in all_filenames: + if not filename.startswith(prefix_with_trailing_sep): + continue + relative_to_prefix = os.path.relpath(filename, prefix) + + dirpart = os.path.dirname(relative_to_prefix) + if len(dirpart) == 0: + files.append(relative_to_prefix) + continue + parts = dirpart.split(os.sep) + if len(parts) == 0: + dirs.add(dirpart) + else: + pending = os.path.join(prefix, parts[0]) + dirs.add(parts[0]) + pending_prefixes.appendleft(pending) + + dirs = sorted(dirs) + yield prefix, dirs, files + + def _FakeListDir(self, dirname): + raise NotImplementedError() diff --git a/third_party/catapult/common/py_vulcanize/py_vulcanize/fake_fs_unittest.py b/third_party/catapult/common/py_vulcanize/py_vulcanize/fake_fs_unittest.py new file mode 100644 index 00000000..b8516f71 --- /dev/null +++ b/third_party/catapult/common/py_vulcanize/py_vulcanize/fake_fs_unittest.py @@ -0,0 +1,54 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from __future__ import absolute_import +import os +import unittest + +from py_vulcanize import fake_fs + + +class FakeFSUnittest(unittest.TestCase): + + def testBasic(self): + fs = fake_fs.FakeFS() + fs.AddFile('/blah/x', 'foobar') + with fs: + assert os.path.exists(os.path.normpath('/blah/x')) + self.assertEquals( + 'foobar', + open(os.path.normpath('/blah/x'), 'r').read()) + + def testWithableOpen(self): + fs = fake_fs.FakeFS() + fs.AddFile('/blah/x', 'foobar') + with fs: + with open(os.path.normpath('/blah/x'), 'r') as f: + self.assertEquals('foobar', f.read()) + + def testWalk(self): + fs = fake_fs.FakeFS() + fs.AddFile('/x/w2/w3/z3.txt', '') + fs.AddFile('/x/w/z.txt', '') + fs.AddFile('/x/y.txt', '') + fs.AddFile('/a.txt', 'foobar') + with fs: + gen = os.walk(os.path.normpath('/')) + r = next(gen) + self.assertEquals((os.path.normpath('/'), ['x'], ['a.txt']), r) + + r = next(gen) + self.assertEquals((os.path.normpath('/x'), ['w', 'w2'], ['y.txt']), r) + + r = next(gen) + self.assertEquals((os.path.normpath('/x/w'), [], ['z.txt']), r) + + r = next(gen) + self.assertEquals((os.path.normpath('/x/w2'), ['w3'], []), r) + + r = next(gen) + self.assertEquals((os.path.normpath('/x/w2/w3'), [], ['z3.txt']), r) + + with self.assertRaises(StopIteration): + next(gen) diff --git a/third_party/catapult/common/py_vulcanize/py_vulcanize/generate.py b/third_party/catapult/common/py_vulcanize/py_vulcanize/generate.py new file mode 100644 index 00000000..7d7e6303 --- /dev/null +++ b/third_party/catapult/common/py_vulcanize/py_vulcanize/generate.py @@ -0,0 +1,301 @@ +# Copyright (c) 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import subprocess +import sys +import tempfile + +from py_vulcanize import html_generation_controller + +try: + from six import StringIO +except ImportError: + from io import StringIO + + + +html_warning_message = """ + + + +""" + +js_warning_message = """ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +/* WARNING: This file is auto generated. + * + * Do not edit directly. + */ +""" + +css_warning_message = """ +/* Copyright 2015 The Chromium Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. */ + +/* WARNING: This file is auto-generated. + * + * Do not edit directly. + */ +""" + +origin_trial_tokens = [ + # WebComponent V0 origin trial token for googleusercontent.com + subdomains. + # This is the domain from which traces in cloud storage are served. + # Expires Nov 5, 2020. See https://crbug.com/1021137 + "AnYuQDtUf6OrWCmR9Okd67JhWVTbmnRedvPi1TEvAxac8+1p6o9q08FoDO6oCbLD0xEqev+SkZFiIhFSzlY9HgUAAABxeyJvcmlnaW4iOiJodHRwczovL2dvb2dsZXVzZXJjb250ZW50LmNvbTo0NDMiLCJmZWF0dXJlIjoiV2ViQ29tcG9uZW50c1YwIiwiZXhwaXJ5IjoxNjA0NjE0NTM4LCJpc1N1YmRvbWFpbiI6dHJ1ZX0=", + # This is for chromium-build-stats.appspot.com (ukai@) + # Expires Feb 2, 2021. see https://crbug.com/1050215 + "AkFXw3wHnOs/XXYqFXpc3diDLrRFd9PTgGs/gs43haZmngI/u1g8L4bDnSKLZkB6fecjmjTwcAMQFCpWMAoHSQEAAAB8eyJvcmlnaW4iOiJodHRwczovL2Nocm9taXVtLWJ1aWxkLXN0YXRzLmFwcHNwb3QuY29tOjQ0MyIsImZlYXR1cmUiOiJXZWJDb21wb25lbnRzVjAiLCJleHBpcnkiOjE2MTIyMjM5OTksImlzU3ViZG9tYWluIjp0cnVlfQ==", + # This is for chromium-build-stats-staging.appspot.com (ukai@) + # Expires Feb 2, 2021, see https://crbug.com/1050215 + "AtQY4wpX9+nj+Vn27cTgygzIPbtB2WoAoMQR5jK9mCm/H2gRIDH6MmGVAaziv9XnYTDKjhBnQYtecbTiIHCQiAIAAACEeyJvcmlnaW4iOiJodHRwczovL2Nocm9taXVtLWJ1aWxkLXN0YXRzLXN0YWdpbmcuYXBwc3BvdC5jb206NDQzIiwiZmVhdHVyZSI6IldlYkNvbXBvbmVudHNWMCIsImV4cGlyeSI6MTYxMjIyMzk5OSwiaXNTdWJkb21haW4iOnRydWV9" + # + # Add more tokens here if traces are served from other domains. + # WebComponent V0 origin tiral token is generated on + # https://developers.chrome.com/origintrials/#/trials/active +] + +def _AssertIsUTF8(f): + if isinstance(f, StringIO): + return + assert f.encoding == 'utf-8' + + +def _MinifyJS(input_js): + py_vulcanize_path = os.path.abspath(os.path.join( + os.path.dirname(__file__), '..')) + rjsmin_path = os.path.abspath( + os.path.join(py_vulcanize_path, 'third_party', 'rjsmin', 'rjsmin.py')) + + with tempfile.NamedTemporaryFile() as _: + args = [ + sys.executable, + rjsmin_path + ] + p = subprocess.Popen(args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + res = p.communicate(input=input_js.encode('utf-8')) + errorcode = p.wait() + if errorcode != 0: + sys.stderr.write('rJSmin exited with error code %d' % errorcode) + sys.stderr.write(res[1].decode('utf-8')) + raise Exception('Failed to minify, omgah') + return res[0].decode('utf-8') + + +def GenerateJS(load_sequence, + use_include_tags_for_scripts=False, + dir_for_include_tag_root=None, + minify=False, + report_sizes=False): + f = StringIO() + GenerateJSToFile(f, + load_sequence, + use_include_tags_for_scripts, + dir_for_include_tag_root, + minify=minify, + report_sizes=report_sizes) + + return f.getvalue() + + +def GenerateJSToFile(f, + load_sequence, + use_include_tags_for_scripts=False, + dir_for_include_tag_root=None, + minify=False, + report_sizes=False): + _AssertIsUTF8(f) + if use_include_tags_for_scripts and dir_for_include_tag_root is None: + raise Exception('Must provide dir_for_include_tag_root') + + f.write(js_warning_message) + f.write('\n') + + if not minify: + flatten_to_file = f + else: + flatten_to_file = StringIO() + + for module in load_sequence: + module.AppendJSContentsToFile(flatten_to_file, + use_include_tags_for_scripts, + dir_for_include_tag_root) + if minify: + js = flatten_to_file.getvalue() + minified_js = _MinifyJS(js) + f.write(minified_js) + f.write('\n') + + if report_sizes: + for module in load_sequence: + s = StringIO() + module.AppendJSContentsToFile(s, + use_include_tags_for_scripts, + dir_for_include_tag_root) + + # Add minified size info. + js = s.getvalue() + min_js_size = str(len(_MinifyJS(js))) + + # Print names for this module. Some domain-specific simplifications + # are included to make pivoting more obvious. + parts = module.name.split('.') + if parts[:2] == ['base', 'ui']: + parts = ['base_ui'] + parts[2:] + if parts[:2] == ['tracing', 'importer']: + parts = ['importer'] + parts[2:] + tln = parts[0] + sln = '.'.join(parts[:2]) + + # Output + print(('%i\t%s\t%s\t%s\t%s' % + (len(js), min_js_size, module.name, tln, sln))) + sys.stdout.flush() + + +class ExtraScript(object): + + def __init__(self, script_id=None, text_content=None, content_type=None): + if script_id is not None: + assert script_id[0] != '#' + self.script_id = script_id + self.text_content = text_content + self.content_type = content_type + + def WriteToFile(self, output_file): + _AssertIsUTF8(output_file) + attrs = [] + if self.script_id: + attrs.append('id="%s"' % self.script_id) + if self.content_type: + attrs.append('content-type="%s"' % self.content_type) + + if len(attrs) > 0: + output_file.write('\n') + + +def _MinifyCSS(css_text): + py_vulcanize_path = os.path.abspath(os.path.join( + os.path.dirname(__file__), '..')) + rcssmin_path = os.path.abspath( + os.path.join(py_vulcanize_path, 'third_party', 'rcssmin', 'rcssmin.py')) + + with tempfile.NamedTemporaryFile() as _: + rcssmin_args = [sys.executable, rcssmin_path] + p = subprocess.Popen(rcssmin_args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + res = p.communicate(input=css_text.encode('utf-8')) + errorcode = p.wait() + if errorcode != 0: + sys.stderr.write('rCSSmin exited with error code %d' % errorcode) + sys.stderr.write(res[1]) + raise Exception('Failed to generate css for %s.' % css_text) + return res[0].decode('utf-8') + + +def GenerateStandaloneHTMLAsString(*args, **kwargs): + f = StringIO() + GenerateStandaloneHTMLToFile(f, *args, **kwargs) + return f.getvalue() + +def _WriteOriginTrialTokens(output_file): + for token in origin_trial_tokens: + output_file.write(' \n') + +def GenerateStandaloneHTMLToFile(output_file, + load_sequence, + title=None, + flattened_js_url=None, + extra_scripts=None, + minify=False, + report_sizes=False, + output_html_head_and_body=True): + """Writes a HTML file with the content of all modules in a load sequence. + + The load_sequence is a list of (HTML or JS) Module objects; the order that + they're inserted into the file depends on their type and position in the load + sequence. + """ + _AssertIsUTF8(output_file) + extra_scripts = extra_scripts or [] + + if output_html_head_and_body: + output_file.write( + '\n' + '\n' + '
\n' + ' \n') + _WriteOriginTrialTokens(output_file) + if title: + output_file.write('"|\'|)(?P[^"\'()]*)(?P=quote)\)', + InlineUrl, self.contents) + + def AppendDirectlyDependentFilenamesTo(self, dependent_filenames): + for i in self.images: + dependent_filenames.append(i.resource.absolute_path) + + def _Load(self, containing_dirname): + if self.contents.find('@import') != -1: + raise Exception('@imports are not supported') + + matches = re.findall( + 'url\((?:["|\']?)([^"\'()]*)(?:["|\']?)\)', + self.contents) + + def resolve_url(url): + if os.path.isabs(url): + # FIXME: module is used here, but py_vulcanize.module is never imported. + # However, py_vulcanize.module cannot be imported since py_vulcanize.module may import + # style_sheet, leading to an import loop. + raise module.DepsException('URL references must be relative') + # URLS are relative to this module's directory + abs_path = os.path.abspath(os.path.join(containing_dirname, url)) + image = self.loader.LoadImage(abs_path) + image.aliases.append(url) + return image + + self._images = [resolve_url(x) for x in matches] + + +class StyleSheet(object): + """Represents a stylesheet resource referenced by a module via the + base.requireStylesheet(xxx) directive.""" + + def __init__(self, loader, name, resource): + self.loader = loader + self.name = name + self.resource = resource + self._parsed_style_sheet = None + + @property + def filename(self): + return self.resource.absolute_path + + @property + def contents(self): + return self.resource.contents + + def __repr__(self): + return 'StyleSheet(%s)' % self.name + + @property + def images(self): + self._InitParsedStyleSheetIfNeeded() + return self._parsed_style_sheet.images + + def AppendDirectlyDependentFilenamesTo(self, dependent_filenames): + self._InitParsedStyleSheetIfNeeded() + + dependent_filenames.append(self.resource.absolute_path) + self._parsed_style_sheet.AppendDirectlyDependentFilenamesTo( + dependent_filenames) + + @property + def contents_with_inlined_images(self): + self._InitParsedStyleSheetIfNeeded() + return self._parsed_style_sheet.contents_with_inlined_images + + def load(self): + self._InitParsedStyleSheetIfNeeded() + + def _InitParsedStyleSheetIfNeeded(self): + if self._parsed_style_sheet: + return + module_dirname = os.path.dirname(self.resource.absolute_path) + self._parsed_style_sheet = ParsedStyleSheet( + self.loader, module_dirname, self.contents) diff --git a/third_party/catapult/common/py_vulcanize/py_vulcanize/style_sheet_unittest.py b/third_party/catapult/common/py_vulcanize/py_vulcanize/style_sheet_unittest.py new file mode 100644 index 00000000..89bc0f47 --- /dev/null +++ b/third_party/catapult/common/py_vulcanize/py_vulcanize/style_sheet_unittest.py @@ -0,0 +1,68 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from __future__ import absolute_import +import base64 +import os +import unittest + +from py_vulcanize import project as project_module +from py_vulcanize import resource_loader +from py_vulcanize import fake_fs +from py_vulcanize import module + + +class StyleSheetUnittest(unittest.TestCase): + + def testImages(self): + fs = fake_fs.FakeFS() + fs.AddFile('/src/foo/x.css', """ +.x .y { + background-image: url(../images/bar.jpeg); +} +""") + fs.AddFile('/src/images/bar.jpeg', b'hello world') + with fs: + project = project_module.Project([os.path.normpath('/src/')]) + loader = resource_loader.ResourceLoader(project) + + foo_x = loader.LoadStyleSheet('foo.x') + self.assertEquals(1, len(foo_x.images)) + + r0 = foo_x.images[0] + self.assertEquals(os.path.normpath('/src/images/bar.jpeg'), + r0.absolute_path) + + inlined = foo_x.contents_with_inlined_images + self.assertEquals(""" +.x .y { + background-image: url(data:image/jpeg;base64,%s); +} +""" % base64.standard_b64encode(b'hello world').decode('utf-8'), inlined) + + def testURLResolveFails(self): + fs = fake_fs.FakeFS() + fs.AddFile('/src/foo/x.css', """ +.x .y { + background-image: url(../images/missing.jpeg); +} +""") + with fs: + project = project_module.Project([os.path.normpath('/src')]) + loader = resource_loader.ResourceLoader(project) + + self.assertRaises(module.DepsException, + lambda: loader.LoadStyleSheet('foo.x')) + + def testImportsCauseFailure(self): + fs = fake_fs.FakeFS() + fs.AddFile('/src/foo/x.css', """ +@import url(awesome.css); +""") + with fs: + project = project_module.Project([os.path.normpath('/src')]) + loader = resource_loader.ResourceLoader(project) + + self.assertRaises(Exception, + lambda: loader.LoadStyleSheet('foo.x')) diff --git a/third_party/catapult/third_party/beautifulsoup4-4.9.3/COPYING.txt b/third_party/catapult/third_party/beautifulsoup4-4.9.3/COPYING.txt new file mode 100644 index 00000000..fb6ae69c --- /dev/null +++ b/third_party/catapult/third_party/beautifulsoup4-4.9.3/COPYING.txt @@ -0,0 +1,27 @@ +Beautiful Soup is made available under the MIT license: + + Copyright (c) 2004-2017 Leonard Richardson + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +Beautiful Soup incorporates code from the html5lib library, which is +also made available under the MIT license. Copyright (c) 2006-2013 +James Graham and other contributors diff --git a/third_party/catapult/third_party/beautifulsoup4-4.9.3/LICENSE b/third_party/catapult/third_party/beautifulsoup4-4.9.3/LICENSE new file mode 100644 index 00000000..4c068bab --- /dev/null +++ b/third_party/catapult/third_party/beautifulsoup4-4.9.3/LICENSE @@ -0,0 +1,30 @@ +Beautiful Soup is made available under the MIT license: + + Copyright (c) 2004-2019 Leonard Richardson + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +Beautiful Soup incorporates code from the html5lib library, which is +also made available under the MIT license. Copyright (c) 2006-2013 +James Graham and other contributors + +Beautiful Soup depends on the soupsieve library, which is also made +available under the MIT license. Copyright (c) 2018 Isaac Muse diff --git a/third_party/catapult/third_party/beautifulsoup4-4.9.3/MANIFEST.in b/third_party/catapult/third_party/beautifulsoup4-4.9.3/MANIFEST.in new file mode 100644 index 00000000..33821b97 --- /dev/null +++ b/third_party/catapult/third_party/beautifulsoup4-4.9.3/MANIFEST.in @@ -0,0 +1,10 @@ +include test-all-versions +include convert-py3k +include LICENSE +include *.txt +include doc*/Makefile +include doc*/source/*.py +include doc*/source/*.rst +include doc*/source/*.jpg +include scripts/*.py +include scripts/*.txt diff --git a/third_party/catapult/third_party/beautifulsoup4-4.9.3/NEWS.txt b/third_party/catapult/third_party/beautifulsoup4-4.9.3/NEWS.txt new file mode 100644 index 00000000..625bb34c --- /dev/null +++ b/third_party/catapult/third_party/beautifulsoup4-4.9.3/NEWS.txt @@ -0,0 +1,1547 @@ += 4.9.3 (20201003) + +* Implemented a significant performance optimization to the process of + searching the parse tree. Patch by Morotti. [bug=1898212] + += 4.9.2 (20200926) + +* Fixed a bug that caused too many tags to be popped from the tag + stack during tree building, when encountering a closing tag that had + no matching opening tag. [bug=1880420] + +* Fixed a bug that inconsistently moved elements over when passing + a Tag, rather than a list, into Tag.extend(). [bug=1885710] + +* Specify the soupsieve dependency in a way that complies with + PEP 508. Patch by Mike Nerone. [bug=1893696] + +* Change the signatures for BeautifulSoup.insert_before and insert_after + (which are not implemented) to match PageElement.insert_before and + insert_after, quieting warnings in some IDEs. [bug=1897120] + += 4.9.1 (20200517) + +* Added a keyword argument 'on_duplicate_attribute' to the + BeautifulSoupHTMLParser constructor (used by the html.parser tree + builder) which lets you customize the handling of markup that + contains the same attribute more than once, as in: + [bug=1878209] + +* Added a distinct subclass, GuessedAtParserWarning, for the warning + issued when BeautifulSoup is instantiated without a parser being + specified. [bug=1873787] + +* Added a distinct subclass, MarkupResemblesLocatorWarning, for the + warning issued when BeautifulSoup is instantiated with 'markup' that + actually seems to be a URL or the path to a file on + disk. [bug=1873787] + +* The new NavigableString subclasses (Stylesheet, Script, and + TemplateString) can now be imported directly from the bs4 package. + +* If you encode a document with a Python-specific encoding like + 'unicode_escape', that encoding is no longer mentioned in the final + XML or HTML document. Instead, encoding information is omitted or + left blank. [bug=1874955] + +* Fixed test failures when run against soupselect 2.0. Patch by Tomáš + Chvátal. [bug=1872279] + += 4.9.0 (20200405) + +* Added PageElement.decomposed, a new property which lets you + check whether you've already called decompose() on a Tag or + NavigableString. + +* Embedded CSS and Javascript is now stored in distinct Stylesheet and + Script tags, which are ignored by methods like get_text() since most + people don't consider this sort of content to be 'text'. This + feature is not supported by the html5lib treebuilder. [bug=1868861] + +* Added a Russian translation by 'authoress' to the repository. + +* Fixed an unhandled exception when formatting a Tag that had been + decomposed.[bug=1857767] + +* Fixed a bug that happened when passing a Unicode filename containing + non-ASCII characters as markup into Beautiful Soup, on a system that + allows Unicode filenames. [bug=1866717] + +* Added a performance optimization to PageElement.extract(). Patch by + Arthur Darcet. + += 4.8.2 (20191224) + +* Added Python docstrings to all public methods of the most commonly + used classes. + +* Added a Chinese translation by Deron Wang and a Brazilian Portuguese + translation by Cezar Peixeiro to the repository. + +* Fixed two deprecation warnings. Patches by Colin + Watson and Nicholas Neumann. [bug=1847592] [bug=1855301] + +* The html.parser tree builder now correctly handles DOCTYPEs that are + not uppercase. [bug=1848401] + +* PageElement.select() now returns a ResultSet rather than a regular + list, making it consistent with methods like find_all(). + += 4.8.1 (20191006) + +* When the html.parser or html5lib parsers are in use, Beautiful Soup + will, by default, record the position in the original document where + each tag was encountered. This includes line number (Tag.sourceline) + and position within a line (Tag.sourcepos). Based on code by Chris + Mayo. [bug=1742921] + +* When instantiating a BeautifulSoup object, it's now possible to + provide a dictionary ('element_classes') of the classes you'd like to be + instantiated instead of Tag, NavigableString, etc. + +* Fixed the definition of the default XML namespace when using + lxml 4.4. Patch by Isaac Muse. [bug=1840141] + +* Fixed a crash when pretty-printing tags that were not created + during initial parsing. [bug=1838903] + +* Copying a Tag preserves information that was originally obtained from + the TreeBuilder used to build the original Tag. [bug=1838903] + +* Raise an explanatory exception when the underlying parser + completely rejects the incoming markup. [bug=1838877] + +* Avoid a crash when trying to detect the declared encoding of a + Unicode document. [bug=1838877] + +* Avoid a crash when unpickling certain parse trees generated + using html5lib on Python 3. [bug=1843545] + += 4.8.0 (20190720, "One Small Soup") + +This release focuses on making it easier to customize Beautiful Soup's +input mechanism (the TreeBuilder) and output mechanism (the Formatter). + +* You can customize the TreeBuilder object by passing keyword + arguments into the BeautifulSoup constructor. Those keyword + arguments will be passed along into the TreeBuilder constructor. + + The main reason to do this right now is to change how which + attributes are treated as multi-valued attributes (the way 'class' + is treated by default). You can do this with the + 'multi_valued_attributes' argument. [bug=1832978] + +* The role of Formatter objects has been greatly expanded. The Formatter + class now controls the following: + + - The function to call to perform entity substitution. (This was + previously Formatter's only job.) + - Which tags should be treated as containing CDATA and have their + contents exempt from entity substitution. + - The order in which a tag's attributes are output. [bug=1812422] + - Whether or not to put a '/' inside a void element, e.g. '
' vs '
' + + All preexisting code should work as before. + +* Added a new method to the API, Tag.smooth(), which consolidates + multiple adjacent NavigableString elements. [bug=1697296] + +* ' (which is valid in XML, XHTML, and HTML 5, but not HTML 4) is always + recognized as a named entity and converted to a single quote. [bug=1818721] + += 4.7.1 (20190106) + +* Fixed a significant performance problem introduced in 4.7.0. [bug=1810617] + +* Fixed an incorrectly raised exception when inserting a tag before or + after an identical tag. [bug=1810692] + +* Beautiful Soup will no longer try to keep track of namespaces that + are not defined with a prefix; this can confuse soupselect. [bug=1810680] + +* Tried even harder to avoid the deprecation warning originally fixed in + 4.6.1. [bug=1778909] + += 4.7.0 (20181231) + +* Beautiful Soup's CSS Selector implementation has been replaced by a + dependency on Isaac Muse's SoupSieve project (the soupsieve package + on PyPI). The good news is that SoupSieve has a much more robust and + complete implementation of CSS selectors, resolving a large number + of longstanding issues. The bad news is that from this point onward, + SoupSieve must be installed if you want to use the select() method. + + You don't have to change anything lf you installed Beautiful Soup + through pip (SoupSieve will be automatically installed when you + upgrade Beautiful Soup) or if you don't use CSS selectors from + within Beautiful Soup. + + SoupSieve documentation: https://facelessuser.github.io/soupsieve/ + +* Added the PageElement.extend() method, which works like list.append(). + [bug=1514970] + +* PageElement.insert_before() and insert_after() now take a variable + number of arguments. [bug=1514970] + +* Fix a number of problems with the tree builder that caused + trees that were superficially okay, but which fell apart when bits + were extracted. Patch by Isaac Muse. [bug=1782928,1809910] + +* Fixed a problem with the tree builder in which elements that + contained no content (such as empty comments and all-whitespace + elements) were not being treated as part of the tree. Patch by Isaac + Muse. [bug=1798699] + +* Fixed a problem with multi-valued attributes where the value + contained whitespace. Thanks to Jens Svalgaard for the + fix. [bug=1787453] + +* Clarified ambiguous license statements in the source code. Beautiful + Soup is released under the MIT license, and has been since 4.4.0. + +* This file has been renamed from NEWS.txt to CHANGELOG. + += 4.6.3 (20180812) + +* Exactly the same as 4.6.2. Re-released to make the README file + render properly on PyPI. + += 4.6.2 (20180812) + +* Fix an exception when a custom formatter was asked to format a void + element. [bug=1784408] + += 4.6.1 (20180728) + +* Stop data loss when encountering an empty numeric entity, and + possibly in other cases. Thanks to tos.kamiya for the fix. [bug=1698503] + +* Preserve XML namespaces introduced inside an XML document, not just + the ones introduced at the top level. [bug=1718787] + +* Added a new formatter, "html5", which represents void elements + as "" rather than " ". [bug=1716272] + +* Fixed a problem where the html.parser tree builder interpreted + a string like "&foo " as the character entity "&foo;" [bug=1728706] + +* Correctly handle invalid HTML numeric character entities like + which reference code points that are not Unicode code points. Note + that this is only fixed when Beautiful Soup is used with the + html.parser parser -- html5lib already worked and I couldn't fix it + with lxml. [bug=1782933] + +* Improved the warning given when no parser is specified. [bug=1780571] + +* When markup contains duplicate elements, a select() call that + includes multiple match clauses will match all relevant + elements. [bug=1770596] + +* Fixed code that was causing deprecation warnings in recent Python 3 + versions. Includes a patch from Ville Skyttä. [bug=1778909] [bug=1689496] + +* Fixed a Windows crash in diagnose() when checking whether a long + markup string is a filename. [bug=1737121] + +* Stopped HTMLParser from raising an exception in very rare cases of + bad markup. [bug=1708831] + +* Fixed a bug where find_all() was not working when asked to find a + tag with a namespaced name in an XML document that was parsed as + HTML. [bug=1723783] + +* You can get finer control over formatting by subclassing + bs4.element.Formatter and passing a Formatter instance into (e.g.) + encode(). [bug=1716272] + +* You can pass a dictionary of `attrs` into + BeautifulSoup.new_tag. This makes it possible to create a tag with + an attribute like 'name' that would otherwise be masked by another + argument of new_tag. [bug=1779276] + +* Clarified the deprecation warning when accessing tag.fooTag, to cover + the possibility that you might really have been looking for a tag + called 'fooTag'. + += 4.6.0 (20170507) = + +* Added the `Tag.get_attribute_list` method, which acts like `Tag.get` for + getting the value of an attribute, but which always returns a list, + whether or not the attribute is a multi-value attribute. [bug=1678589] + +* It's now possible to use a tag's namespace prefix when searching, + e.g. soup.find('namespace:tag') [bug=1655332] + +* Improved the handling of empty-element tags like
when using the + html.parser parser. [bug=1676935] + +* HTML parsers treat all HTML4 and HTML5 empty element tags (aka void + element tags) correctly. [bug=1656909] + +* Namespace prefix is preserved when an XML tag is copied. Thanks + to Vikas for a patch and test. [bug=1685172] + += 4.5.3 (20170102) = + +* Fixed foster parenting when html5lib is the tree builder. Thanks to + Geoffrey Sneddon for a patch and test. + +* Fixed yet another problem that caused the html5lib tree builder to + create a disconnected parse tree. [bug=1629825] + += 4.5.2 (20170102) = + +* Apart from the version number, this release is identical to + 4.5.3. Due to user error, it could not be completely uploaded to + PyPI. Use 4.5.3 instead. + += 4.5.1 (20160802) = + +* Fixed a crash when passing Unicode markup that contained a + processing instruction into the lxml HTML parser on Python + 3. [bug=1608048] + += 4.5.0 (20160719) = + +* Beautiful Soup is no longer compatible with Python 2.6. This + actually happened a few releases ago, but it's now official. + +* Beautiful Soup will now work with versions of html5lib greater than + 0.99999999. [bug=1603299] + +* If a search against each individual value of a multi-valued + attribute fails, the search will be run one final time against the + complete attribute value considered as a single string. That is, if + a tag has class="foo bar" and neither "foo" nor "bar" matches, but + "foo bar" does, the tag is now considered a match. + + This happened in previous versions, but only when the value being + searched for was a string. Now it also works when that value is + a regular expression, a list of strings, etc. [bug=1476868] + +* Fixed a bug that deranged the tree when a whitespace element was + reparented into a tag that contained an identical whitespace + element. [bug=1505351] + +* Added support for CSS selector values that contain quoted spaces, + such as tag[style="display: foo"]. [bug=1540588] + +* Corrected handling of XML processing instructions. [bug=1504393] + +* Corrected an encoding error that happened when a BeautifulSoup + object was copied. [bug=1554439] + +* The contents of