From 7806ecb97d8d4b91a652c5ea9b51f4dd2fd74e37 Mon Sep 17 00:00:00 2001 From: Justin Mayer Date: Mon, 8 Jul 2024 19:25:40 +0200 Subject: [PATCH] Modernize and bring up to latest plugin standards --- .cruft.json | 27 ++++++ .editorconfig | 15 ++++ .github/FUNDING.yml | 2 + .github/workflows/main.yml | 68 +++++++++++++++ .gitignore | 3 + .pre-commit-config.yaml | 28 ++++++ CONTRIBUTING.md | 9 ++ README.md | 72 ++++++++-------- pelican/plugins/deadlinks/__init__.py | 6 +- pelican/plugins/deadlinks/deadlinks.py | 109 +++++++++++------------- pyproject.toml | 104 +++++++++++++++++++++++ tasks.py | 113 +++++++++++++++++++++++++ 12 files changed, 457 insertions(+), 99 deletions(-) create mode 100644 .cruft.json create mode 100644 .editorconfig create mode 100644 .github/FUNDING.yml create mode 100644 .github/workflows/main.yml create mode 100644 .pre-commit-config.yaml create mode 100644 CONTRIBUTING.md create mode 100644 pyproject.toml create mode 100644 tasks.py diff --git a/.cruft.json b/.cruft.json new file mode 100644 index 0000000..764f51e --- /dev/null +++ b/.cruft.json @@ -0,0 +1,27 @@ +{ + "template": "https://github.com/getpelican/cookiecutter-pelican-plugin", + "commit": "b4b9b4f83b624cfd7728a3f1527d272be28ee916", + "checkout": null, + "context": { + "cookiecutter": { + "plugin_name": "Dead Links", + "repo_name": "deadlinks", + "package_name": "deadlinks", + "distribution_name": "pelican-deadlinks", + "version": "0.0.0", + "description": "Pelican plugin to scan links and check their status codes", + "authors": "{name = \"Justin Mayer\", email = \"entroP@gmail.com\"}", + "keywords": "\"pelican\", \"plugin\", \"link\", \"checker\"", + "readme": "README.md", + "contributing": "CONTRIBUTING.md", + "license": "MIT License|MIT", + "repo_url": "https://github.com/pelican-plugins/deadlinks", + "dev_status": "5 - Production/Stable", + "tests_exist": false, + "python_version": ">=3.8.1,<4.0", + "pelican_version": ">=4.5", + "_template": "https://github.com/getpelican/cookiecutter-pelican-plugin" + } + }, + "directory": null +} diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..862c1e1 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true + +[*.py] +max_line_length = 88 + +[*.yml] +indent_size = 2 diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..c2e5ca8 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,2 @@ +github: justinmayer +liberapay: pelican diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..2deb7da --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,68 @@ +name: build + +on: [push, pull_request] + +env: + PYTEST_ADDOPTS: "--color=yes" + +permissions: + contents: read + +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python & PDM + uses: pdm-project/setup-pdm@v4 + with: + python-version: "3.10" + + - name: Install dependencies + run: pdm install + + - name: Run linters + run: pdm run invoke lint --diff + + deploy: + name: Deploy + environment: Deployment + needs: [lint] + runs-on: ubuntu-latest + if: github.ref=='refs/heads/main' && github.event_name!='pull_request' + + permissions: + contents: write + id-token: write + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Check release + id: check_release + run: | + python -m pip install autopub httpx + python -m pip install https://github.com/scikit-build/github-release/archive/master.zip + autopub check + + - name: Publish + if: ${{ steps.check_release.outputs.autopub_release=='true' }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + autopub prepare + autopub commit + autopub build + autopub githubrelease + + - name: Upload package to PyPI + if: ${{ steps.check_release.outputs.autopub_release=='true' }} + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index 73ca9e8..37c60f8 100644 --- a/.gitignore +++ b/.gitignore @@ -92,3 +92,6 @@ ENV/ *.swp *.orig +# PDM +.pdm-python +pdm.lock diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..82c5d8f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +--- +ci: + autoupdate_schedule: quarterly + +# See https://pre-commit.com/hooks.html for info on hooks +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-added-large-files + - id: check-ast + - id: check-case-conflict + - id: check-docstring-first + - id: check-merge-conflict + - id: check-toml + - id: check-yaml + - id: debug-statements + - id: detect-private-key + - id: end-of-file-fixer + - id: forbid-new-submodules + - id: trailing-whitespace + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.1 + hooks: + - id: ruff + - id: ruff-format + args: ["--check"] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..c1c6fa2 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,9 @@ +Contributing +============ + +Contributions are welcome and much appreciated. Every little bit helps. You can contribute by improving the documentation, adding missing features, and fixing bugs. You can also help out by reviewing and commenting on [existing issues][]. + +To start contributing to this plugin, review the [Contributing to Pelican][] documentation, beginning with the **Contributing Code** section. + +[existing issues]: https://github.com/pelican-plugins/deadlinks/issues +[Contributing to Pelican]: https://docs.getpelican.com/en/latest/contribute.html diff --git a/README.md b/README.md index 64deeaa..edaa5bc 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,39 @@ -# Dead Links +Dead Links: A Plugin for Pelican +================================ -This plugin scans for links and checks status code of requested url. -For responses such as 403 or 404, the plugin adds a "disabled" class -to the anchor, extends anchor with a span label and dumps warning to -the logger. +[![Build Status](https://img.shields.io/github/actions/workflow/status/pelican-plugins/deadlinks/main.yml?branch=main)](https://github.com/pelican-plugins/deadlinks/actions) +[![PyPI Version](https://img.shields.io/pypi/v/pelican-deadlinks)](https://pypi.org/project/pelican-deadlinks/) +[![Downloads](https://img.shields.io/pypi/dm/pelican-deadlinks)](https://pypi.org/project/pelican-deadlinks/) +![License](https://img.shields.io/pypi/l/pelican-deadlinks?color=blue) +This Pelican plugin scans links and check their status codes. For responses such as 403 or 404, the plugin adds a `disabled` class to the anchor, extends the anchor with a `span` label, and prints a warning to the console logger. -# Requirements +Installation +------------ -BeautifulSoup4, requests +This plugin can be installed via: -To install them using pip, type: `pip install bs4 requests` + python -m pip install pelican-deadlinks -# Installation +As long as you have not explicitly added a `PLUGINS` setting to your Pelican settings file, then the newly-installed plugin should be automatically detected and enabled. Otherwise, you must add `deadlinks` to your existing `PLUGINS` list. For more information, please see the [How to Use Plugins](https://docs.getpelican.com/en/latest/plugins.html#how-to-use-plugins) documentation. -Clone repository somewhere (let's assume destination is ./plugins/custom/deadlinks) -and edit configuration file: +Usage +----- -```python - PLUGINS_PATH = [ - # [...] - 'plugins/custom' - ] - PLUGINS = [ - # [...] - 'deadlinks' - ] -``` - -# Settings +To enable the dead link checker, set the `DEADLINKS_VALIDATION` option in your Pelican settings file to `True`. Alternatively, if you don’t want to validate links every time, you can selectively enable link validation at run-time via: -To enable dead link checker, set the `DEADLINK_VALIDATION` option in your -Pelican configuration file to True. + pelican content -e DEADLINKS_VALIDATION=true -Additionally following options might be changed: +Additionally, the following options can be changed: ```python - DEADLINK_OPTS = { - 'archive': True, - 'classes': ['custom-class1', 'disabled'], - 'labels': True, - 'timeout_duration_ms': 1000, - 'timeout_is_error': False, - } +DEADLINKS_OPTIONS = { + "archive": True, + "classes": ["custom-class1", "disabled"], + "labels": True, + "timeout_duration_ms": 1000, + "timeout_is_error": False, +} ``` Options: @@ -54,3 +45,18 @@ Options: | `labels` | Insert bootstrap's label after the anchor element | False | | `timeout_duration_ms` | Time in ms after which request is considered as timed out | 1000 | | `timeout_is_error` | True/False. When enabled every time out is considered as dead link | False | + +Contributing +------------ + +Contributions are welcome and much appreciated. Every little bit helps. You can contribute by improving the documentation, adding missing features, and fixing bugs. You can also help out by reviewing and commenting on [existing issues][]. + +To start contributing to this plugin, review the [Contributing to Pelican][] documentation, beginning with the **Contributing Code** section. + +[existing issues]: https://github.com/pelican-plugins/deadlinks/issues +[Contributing to Pelican]: https://docs.getpelican.com/en/latest/contribute.html + +License +------- + +This project is licensed under the MIT license. diff --git a/pelican/plugins/deadlinks/__init__.py b/pelican/plugins/deadlinks/__init__.py index 4daa840..866fb84 100644 --- a/pelican/plugins/deadlinks/__init__.py +++ b/pelican/plugins/deadlinks/__init__.py @@ -1,5 +1 @@ -# -*- coding: utf8 -*- - -from .deadlinks import * - - +from .deadlinks import * # noqa: F403,PGH004,RUF100 diff --git a/pelican/plugins/deadlinks/deadlinks.py b/pelican/plugins/deadlinks/deadlinks.py index 3a00cbe..8322f50 100644 --- a/pelican/plugins/deadlinks/deadlinks.py +++ b/pelican/plugins/deadlinks/deadlinks.py @@ -1,11 +1,10 @@ -# -*- coding: utf8 -*- - import logging + from bs4 import BeautifulSoup -from pelican import signals import requests -from requests.exceptions import Timeout, RequestException +from requests.exceptions import RequestException, Timeout +from pelican import signals log = logging.getLogger(__name__) @@ -13,27 +12,26 @@ MS_IN_SECOND = 1000.0 DEFAULT_OPTS = { - 'archive': True, - 'classes': [], - 'labels': False, - 'timeout_duration_ms': 1000, - 'timeout_is_error': False, + "archive": True, + "classes": [], + "labels": False, + "timeout_duration_ms": 1000, + "timeout_is_error": False, } -SPAN_WARNING = u'' -SPAN_DANGER = u'' -ARCHIVE_URL = u'http://web.archive.org/web/*/{url}' +SPAN_WARNING = '' +SPAN_DANGER = '' +ARCHIVE_URL = "https://web.archive.org/web/*/{url}" def get_status_code(url, opts): - """ - Open connection to the given url and check status code. + """Open connection to the given url and check status code. :param url: URL of the website to be checked :return: (availibility, success, HTTP code) """ availibility, success, code = (False, False, None) - timeout_duration_seconds = get_opt(opts, 'timeout_duration_ms') / MS_IN_SECOND + timeout_duration_seconds = get_opt(opts, "timeout_duration_ms") / MS_IN_SECOND try: r = requests.get(url, timeout=timeout_duration_seconds) code = r.status_code @@ -49,8 +47,7 @@ def get_status_code(url, opts): def user_enabled(inst, opt): - """ - Check whether the option is enabled. + """Check whether the option is enabled. :param inst: instance from content object init :param url: Option to be checked @@ -60,8 +57,7 @@ def user_enabled(inst, opt): def get_opt(opts, name): - """ - Get value of the given option + """Get value of the given option. :param opts: Table with options :param name: Name of option @@ -71,91 +67,84 @@ def get_opt(opts, name): def add_class(node, name): - """ - Add class value to a given tag + """Add class value to a given tag. :param node: HTML tag :param name: class attribute value to add """ - node['class'] = node.get('class', []) + [name, ] + node["class"] = [*node.get("class", []), name] def change_to_archive(anchor): - """ - Modify href attribute to point to archive.org instead of url directly. - """ - src = anchor['href'] + """Modify href attribute to point to archive.org instead of url directly.""" + src = anchor["href"] dst = ARCHIVE_URL.format(url=src) - anchor['href'] = dst + anchor["href"] = dst def on_connection_error(anchor, opts): - """ - Called on connection error (URLError being thrown) + """Call on connection error (URLError being thrown). :param anchor: Anchor element () :param opts: Dict with user options """ - classes = get_opt(opts, 'classes') + classes = get_opt(opts, "classes") for cls in classes: add_class(anchor, cls) - labels = get_opt(opts, 'labels') + labels = get_opt(opts, "labels") if labels: - soup = BeautifulSoup(SPAN_DANGER, 'html.parser') - soup.span.append('not available') + soup = BeautifulSoup(SPAN_DANGER, "html.parser") + soup.span.append("not available") idx = anchor.parent.contents.index(anchor) + 1 anchor.parent.insert(idx, soup) - archive = get_opt(opts, 'archive') + archive = get_opt(opts, "archive") if archive: change_to_archive(anchor) def on_access_error(anchor, code, opts): - """ - Called on access error (such as 403, 404) + """Call on access error (such as 403, 404). :param anchor: Anchor element () :param code: Error code (403, 404, ...) :param opts: Dict with user options """ - classes = get_opt(opts, 'classes') + classes = get_opt(opts, "classes") for cls in classes: add_class(anchor, cls) - labels = get_opt(opts, 'labels') + labels = get_opt(opts, "labels") if labels: - soup = BeautifulSoup(SPAN_WARNING, 'html.parser') + soup = BeautifulSoup(SPAN_WARNING, "html.parser") soup.span.append(str(code)) idx = anchor.parent.contents.index(anchor) + 1 anchor.parent.insert(idx, soup) - archive = get_opt(opts, 'archive') + archive = get_opt(opts, "archive") if archive: change_to_archive(anchor) -def content_object_init(instance): - """ - Pelican callback - """ +def content_object_init(instance): # noqa: PLR0912 + """Pelican callback.""" if instance._content is None: return - if not user_enabled(instance, 'DEADLINK_VALIDATION'): + if not user_enabled(instance, "DEADLINKS_VALIDATION"): log.debug("Configured not to validate links") return settings = instance.settings - siteurl = settings.get('SITEURL', '') - opts = settings.get('DEADLINK_OPTS', DEFAULT_OPTS) + siteurl = settings.get("SITEURL", "") + opts = settings.get("DEADLINKS_OPTIONS", DEFAULT_OPTS) cache = {} - soup_doc = BeautifulSoup(instance._content, 'html.parser') + soup_doc = BeautifulSoup(instance._content, "html.parser") - for anchor in soup_doc(['a', 'object']): - if 'href' not in anchor.attrs: + for anchor in soup_doc(["a", "object"]): + if "href" not in anchor.attrs: continue - url = anchor['href'] + url = anchor["href"] # local files and other links are not really intresting - if not url.startswith('http'): + if not url.startswith("http"): continue # Previous case works also for debugging environment (with SITEURL @@ -174,17 +163,17 @@ def content_object_init(instance): cache[url] = (avail, success, code) if not avail: - timeout_is_error = get_opt(opts, 'timeout_is_error') + timeout_is_error = get_opt(opts, "timeout_is_error") if timeout_is_error: - log.warning('Dead link: %s (not available)', url) + log.warning("Dead link: %s (not available)", url) on_connection_error(anchor, opts) else: - log.warning('Skipping: %s (not available)', url) + log.warning("Skipping: %s (not available)", url) continue elif not success: - if code >= 400 and code < 500: - log.warning('Dead link: %s (error code: %d)', url, code) + if code >= 400 and code < 500: # noqa: PLR2004 + log.warning("Dead link: %s (error code: %d)", url, code) on_access_error(anchor, code, opts) continue else: @@ -192,13 +181,11 @@ def content_object_init(instance): pass # Error codes from out of range [400, 500) are considered good too - log.debug('Good link: %s (%d)', url, code) + log.debug("Good link: %s (%d)", url, code) instance._content = soup_doc.decode() def register(): - """ - Part of Pelican API - """ + """Register the plugin.""" signals.content_object_init.connect(content_object_init) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b85f64d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,104 @@ +[project] +name = "pelican-deadlinks" +version = "0.0.0" +description = "Pelican plugin to scan links and check their status codes" +authors = [{name = "Justin Mayer", email = "entroP@gmail.com"}] +license = {text = "MIT"} +readme = "README.md" +keywords = ["pelican", "plugin", "link", "checker"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Framework :: Pelican", + "Framework :: Pelican :: Plugins", + "Intended Audience :: End Users/Desktop", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Internet :: WWW/HTTP", + "Topic :: Software Development :: Libraries :: Python Modules", +] +requires-python = ">=3.8.1,<4.0" +dependencies = [ + "pelican>=4.5", + "beautifulsoup4>=4.12.3", + "requests>=2.32.3", +] + +[project.urls] +Homepage = "https://github.com/pelican-plugins/deadlinks" +"Issue Tracker" = "https://github.com/pelican-plugins/deadlinks/issues" +Funding = "https://donate.getpelican.com/" + +[project.optional-dependencies] +markdown = ["markdown>=3.4"] + +[tool.pdm] + +[tool.pdm.dev-dependencies] +lint = [ + "invoke>=2.2", + "ruff>=0.5.0,<0.6.0" +] + +[tool.pdm.build] +source-includes = [ + "CHANGELOG.md", + "CONTRIBUTING.md", +] +includes = ["pelican/"] +excludes = ["**/.DS_Store", "**/test_data/**", "tasks.py"] + +[tool.autopub] +project-name = "Dead Links" +git-username = "botpub" +git-email = "52496925+botpub@users.noreply.github.com" +append-github-contributor = true + +[tool.ruff.lint] +select = [ + "B", # flake8-bugbear + "BLE", # flake8-blind-except + "C4", # flake8-comprehensions + "D", # pydocstyle + "E", # pycodestyle + "F", # pyflakes + "I", # isort + "ICN", # flake8-import-conventions + "ISC", # flake8-implicit-str-concat + "PGH", # pygrep-hooks + "PL", # pylint + "RET", # flake8-return + "RUF", # ruff-specific rules + "SIM", # flake8-simplify + "T10", # flake8-debugger + "T20", # flake8-print + "TID", # flake8-tidy-imports + "TRY", # tryceratops + "UP", # pyupgrade + "W", # pycodestyle + "YTT", # flake8-2020 +] + +ignore = [ + "D100", # missing docstring in public module + "D104", # missing docstring in public package + "D203", # blank line before class docstring + "D213", # multi-line docstring summary should start at the second line + "ISC001", # disabled so `ruff format` works without warning + "RET507", # unnecessary `else`/`elif` after `continue` statement +] + +[tool.ruff.lint.isort] +combine-as-imports = true +force-sort-within-sections = true +known-first-party = ["pelican"] + +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" diff --git a/tasks.py b/tasks.py new file mode 100644 index 0000000..6827bac --- /dev/null +++ b/tasks.py @@ -0,0 +1,113 @@ +from inspect import cleandoc +import logging +import os +from pathlib import Path +from shutil import which + +from invoke import task + +logger = logging.getLogger(__name__) + +PKG_NAME = "deadlinks" +PKG_PATH = Path(f"pelican/plugins/{PKG_NAME}") + +ACTIVE_VENV = os.environ.get("VIRTUAL_ENV", None) +VENV_HOME = Path(os.environ.get("WORKON_HOME", "~/.local/share/virtualenvs")) +VENV_PATH = Path(ACTIVE_VENV) if ACTIVE_VENV else (VENV_HOME.expanduser() / PKG_NAME) +VENV = str(VENV_PATH.expanduser()) +BIN_DIR = "bin" if os.name != "nt" else "Scripts" +VENV_BIN = Path(VENV) / Path(BIN_DIR) + +TOOLS = ("cruft", "pdm", "pre-commit") +PDM = which("pdm") if which("pdm") else (VENV_BIN / "pdm") +CMD_PREFIX = f"{VENV_BIN}/" if ACTIVE_VENV else f"{PDM} run " +CRUFT = which("cruft") if which("cruft") else f"{CMD_PREFIX}cruft" +PRECOMMIT = which("pre-commit") if which("pre-commit") else f"{CMD_PREFIX}pre-commit" +PTY = os.name != "nt" + + +@task +def tests(c, deprecations=False): + """Run the test suite, optionally with `--deprecations`.""" + deprecations_flag = "" if deprecations else "-W ignore::DeprecationWarning" + c.run(f"{CMD_PREFIX}pytest {deprecations_flag}", pty=PTY) + + +@task +def format(c, check=False, diff=False): + """Run Ruff's auto-formatter, optionally with `--check` or `--diff`.""" + check_flag, diff_flag = "", "" + if check: + check_flag = "--check" + if diff: + diff_flag = "--diff" + c.run( + f"{CMD_PREFIX}ruff format {check_flag} {diff_flag} {PKG_PATH} tasks.py", pty=PTY + ) + + +@task +def ruff(c, fix=False, diff=False): + """Run Ruff to ensure code meets project standards.""" + diff_flag, fix_flag = "", "" + if fix: + fix_flag = "--fix" + if diff: + diff_flag = "--diff" + c.run(f"{CMD_PREFIX}ruff check {diff_flag} {fix_flag} .", pty=PTY) + + +@task +def lint(c, fix=False, diff=False): + """Check code style via linting tools.""" + ruff(c, fix=fix, diff=diff) + format(c, check=(not fix), diff=diff) + + +@task +def tools(c): + """Install development tools in the virtual environment if not already on PATH.""" + for tool in TOOLS: + if not which(tool): + logger.info(f"** Installing {tool} **") + c.run(f"{CMD_PREFIX}pip install {tool}") + + +@task +def precommit(c): + """Install pre-commit hooks to .git/hooks/pre-commit.""" + logger.info("** Installing pre-commit hooks **") + c.run(f"{PRECOMMIT} install") + + +@task +def update(c, check=False): + """Apply upstream plugin template changes to this project.""" + if check: + logger.info("** Checking for upstream template changes **") + c.run(f"{CRUFT} check", pty=PTY) + else: + logger.info("** Updating project from upstream template **") + c.run(f"{CRUFT} update", pty=PTY) + + +@task +def setup(c): + """Set up the development environment.""" + if which("pdm") or ACTIVE_VENV: + tools(c) + c.run(f"{CMD_PREFIX}python -m pip install --upgrade pip", pty=PTY) + c.run(f"{PDM} update --dev", pty=PTY) + precommit(c) + logger.info("\nDevelopment environment should now be set up and ready!\n") + else: + error_message = """ + PDM is not installed, and there is no active virtual environment available. + You can either manually create and activate a virtual environment, or you can + install PDM via: + + curl -sSL https://raw.githubusercontent.com/pdm-project/pdm/main/install-pdm.py | python3 - + + Once you have taken one of the above two steps, run `invoke setup` again. + """ # noqa: E501 + raise SystemExit(cleandoc(error_message))