Skip to content

Commit

Permalink
Script to check broken links
Browse files Browse the repository at this point in the history
  • Loading branch information
ruipedro16 committed Oct 7, 2024
1 parent b9eff6c commit 5de0523
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 0 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/check-broken-links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Check Broken Links

on:
push:
branches:
- main

workflow_dispatch:

permissions:
contents: read

jobs:
check-links:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python environment
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: Install dependencies
run: |
pip install -r scripts/requirements.txt
- name: Run Broken Links Check
run: |
python scripts/check_broken_links.py
1 change: 1 addition & 0 deletions scripts/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
venv/
81 changes: 81 additions & 0 deletions scripts/check_broken_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import requests
import re
import os
import sys

from colorama import Fore, Style

posts_path = "../content/posts"
URL_REGEX = re.compile(r"https?://(?:www\.)?[a-zA-Z0-9./_-]+")

broken_links = []


def extract_links_from_file(file_path):
with open(file_path, "r", encoding="utf-8") as file:
content = file.read()
return URL_REGEX.findall(content)


def check_url(url):
print(f" - Checking link: {url}: ", end="")
try:
response = requests.head(url, allow_redirects=True, timeout=5)

# Consider the URL reachable if the status code is 200-399
if response.status_code in range(200, 400):
print(f"{Fore.GREEN}OK{Style.RESET_ALL}")
else:
print(f"{Fore.RED}Broken{Style.RESET_ALL}")
broken_links.append(url)

except requests.RequestException:
# If there is any exception (network issue, invalid URL, etc.), consider it broken
print(f"{Fore.RED}Broken{Style.RESET_ALL}")
broken_links.append(url)


def extract_links_from_posts():
all_links = []

for root, _, files in os.walk(posts_path):
for file_name in files:
if file_name.endswith(".md"):
file_path = os.path.join(root, file_name)
links = extract_links_from_file(file_path)

if links:
all_links.extend(links)
print(f"Links found in {file_path}:")
for link in links:
print(f" - {link}")
check_url(link)
else:
print(f"No links found in {file_path}.")

all_links = set(list(all_links)) # Remove duplicates

return all_links


def main():
extract_links_from_posts()

print("")
print("=" * 100, end="\n\n")

if not broken_links:
print(f"{Fore.GREEN}No broken links found{Style.RESET_ALL}")
return 0
else:
print(f"{Fore.RED}Broken links found:{Style.RESET_ALL}")
for link in broken_links:
print(f" - {link}")
return 1


if __name__ == "__main__":
sys.exit(main())
6 changes: 6 additions & 0 deletions scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
certifi==2024.8.30
charset-normalizer==3.3.2
colorama==0.4.6
idna==3.10
requests==2.32.3
urllib3==2.2.3

0 comments on commit 5de0523

Please sign in to comment.