Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
repos:
- repo: "https://github.com/pre-commit/pre-commit-hooks"
rev: "v5.0.0"
rev: "v6.0.0"
hooks:
- id: "trailing-whitespace"
exclude: "^pulpproject.org"
Expand Down
10 changes: 10 additions & 0 deletions .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
- id: "linkchecker"
name: "Link Checker"
description: "Validates site: links in markdown files"
entry: "linkchecker"
language: "python"
types: ["markdown"]
pass_filenames: true
require_serial: true
...
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ readme = "README.md"

[project.scripts]
pulp-docs = "pulp_docs.cli:main"
linkchecker = "linkchecker.cli:main"

[project.entry-points."mkdocs.plugins"]
PulpDocs = "pulp_docs.plugin:PulpDocsPlugin"
Expand All @@ -34,7 +35,7 @@ PulpDocs = "pulp_docs.plugin:PulpDocsPlugin"
###########

[tool.hatch.build.targets.wheel]
packages = ["src/pulp_docs"]
packages = ["src/pulp_docs", "src/linkchecker"]

[tool.hatch.build.targets.wheel.force-include]
# enable using the installed package directly for development
Expand Down
5 changes: 5 additions & 0 deletions src/linkchecker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from linkchecker.cli import linkchecker

__all__ = [
"linkchecker",
]
137 changes: 137 additions & 0 deletions src/linkchecker/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import argparse
import os
from typing import NamedTuple

HEADER_ERROR = "Found {n} broken links:"


class LinkError(NamedTuple):
link_target: str
src_filename: str
src_line: str
src_lineno: int


def linkchecker(component_rootdir: str, filenames: list[str]) -> int:
cumulative_errors = []
for file in filenames:
link_errors = check_file(component_rootdir, file)
if not link_errors:
continue
cumulative_errors.extend(link_errors)
report_errors(link_errors=cumulative_errors, component_rootdir=component_rootdir)
if cumulative_errors:
return 1
return 0


def check_file(component_rootdir: str, src_filename: str) -> list[LinkError]:
if not file_exists(src_filename):
# log.warning(f"{file} does not exist.")
return []

link_errors = []
with open(src_filename, "r") as fd:
for src_lineno, src_line in enumerate(fd):
invalid_links = check_line(src_line, component_rootdir)
for link_target in invalid_links:
link_error = LinkError(
link_target=link_target,
src_line=src_line,
src_filename=src_filename,
src_lineno=src_lineno,
)
link_errors.append(link_error)
return link_errors


def check_line(line: str, basedir: str) -> list[str]:
"""Return invalid link in line."""
invalid_links = []
relative_path, component_name = os.path.split(basedir)

for original_link in get_links(line):
link = original_link.removeprefix("site:")
# Filter out external component links
if not link.startswith(f"{component_name}/"):
continue
link_target = os.path.join(relative_path, link)
if not file_exists(link_target):
# Store original site: format
invalid_links.append(original_link)
return invalid_links


def get_links(line: str) -> list[str]:
"""Extract site: links from a markdown line."""
import re

links = []
# Match inline links: [text](site:path)
inline_pattern = r"\[([^\]]+)\]\((site:[^\)]+)\)"
# Match reference links: [ref]: site:path
reference_pattern = r"\[[^\]]+\]:\s*(site:[^\s]+)"

for match in re.finditer(inline_pattern, line):
links.append(match.group(2))
for match in re.finditer(reference_pattern, line):
links.append(match.group(1))

return links


def file_exists(file: str) -> bool:
"""Check if a file exists, treating .md extension as optional."""
if os.path.exists(file):
return True
# Try with .md extension
if os.path.exists(file + ".md"):
return True
return False


def report_errors(link_errors: list[LinkError], component_rootdir: str):
"""Print link errors to stdout."""
if not link_errors:
return
print(HEADER_ERROR.format(n=len(link_errors)))
for error in link_errors:
# line_str = error.src_line.strip()[:85] + " (...)"
filename = os.path.relpath(error.src_filename, component_rootdir)
lineno = error.src_lineno + 1
print(f"{filename}:{lineno}:{error.link_target}")


def parse_arguments():
"""Parse command line arguments."""

parser = argparse.ArgumentParser(description="Check markdown links")
parser.add_argument(
"--basedir",
default=".",
help="Base directory for link checking (default: current directory)",
)
parser.add_argument("files", nargs="+", help="Markdown files to check")
args = parser.parse_args()

# Shell-expand and normalize the basedir path
basedir = os.path.expanduser(args.basedir)
basedir = os.path.expandvars(basedir)
basedir = os.path.abspath(basedir)

if not os.path.exists(basedir):
parser.error(f"basedir does not exist: {basedir}")
if not os.path.isdir(basedir):
parser.error(f"basedir is not a directory: {basedir}")

return basedir, args.files


def main():
"""CLI entry point for the linkchecker command."""
basedir, files = parse_arguments()
exit(linkchecker(basedir, files))


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions test_requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pre-commit~=4.5
pytest~=8.4
9 changes: 9 additions & 0 deletions tests/assets/invalid_links/component-a/bar.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[valid](site:component-a/foo)
[invalid](site:component-a/NOEXIST1)
[ignored](site:component-b/foo)
[ignored](site:component-b/bar)

[valid]: site:component-a/foo
[invalid]: site:component-a/NOEXIST2
[ignored]: site:component-b/foo
[ignored]: site:component-b/bar
Empty file.
9 changes: 9 additions & 0 deletions tests/assets/invalid_links/component-b/bar.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[valid](site:component-b/foo)
[invalid](site:component-b/NOEXIST)
[ignored](site:component-a/foo)
[ignored](site:component-a/bar)

[valid]: (site:component-b/foo
[invalid]: (site:component-b/NOEXIST
[ignored]: (site:component-a/foo
[ignored]: (site:component-a/bar
Empty file.
72 changes: 72 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import os
import shutil
import subprocess
from pathlib import Path
from textwrap import dedent

import pytest


@pytest.fixture
def create_file(tmp_path: Path):
def _create_file(filename: str, content: str) -> Path:
if not filename:
raise ValueError("filename can't be empty")
newfile = tmp_path / filename
newfile.parent.mkdir(parents=True, exist_ok=True)
newfile.write_text(dedent(content))
return newfile

return _create_file


@pytest.fixture
def create_tree(create_file, tmp_path):
def _create_tree(tree_text: str) -> tuple[Path, list[Path]]:
files = []
fragments = dedent(tree_text).split("=== ")
for fragment in fragments:
if not fragment.strip():
continue
filename, content = fragment.split("\n", maxsplit=1)
content = content.strip() or f"Placeholder for {filename=}"
files.append(create_file(filename.strip(), content))
return tmp_path, files

return _create_tree


@pytest.fixture
def repository_root() -> Path:
return Path(__file__).parent.parent.resolve()


@pytest.fixture
def assets_tmpdir(repository_root: Path, tmp_path: Path) -> Path:
assets_dir = repository_root / "tests" / "assets"
_assets_tmpdir = tmp_path / "assets"

def ignore_fn(src, names):
return [".git"]

shutil.copytree(assets_dir, _assets_tmpdir, ignore=ignore_fn)
return _assets_tmpdir.resolve()


@pytest.fixture
def precommit_test(repository_root: Path, assets_tmpdir: Path):
def git(*args: str):
if not os.getcwd().startswith("/tmp"):
RuntimeError("This must be used in a temporary directory.")
subprocess.check_call(("git",) + args)

def _precommit_test(hookid: str, fixture: str) -> tuple[int, str, str]:
component_dir = assets_tmpdir / fixture
os.chdir(component_dir)
git("init")
git("add", ".")
cmd = ["pre-commit", "try-repo", str(repository_root), hookid, "-a", "-v"]
result = subprocess.run(cmd, capture_output=True)
return result.returncode, result.stdout.decode(), result.stderr.decode()

return _precommit_test
76 changes: 76 additions & 0 deletions tests/test_linkchecker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from textwrap import dedent
from typing import NamedTuple

import pytest

from linkchecker.cli import HEADER_ERROR, linkchecker


class Scenario(NamedTuple):
tree: str
component_dir: str
exit_code: int
output: str


COMMON_TREE = """
=== A/docs/guides/foo.md
=== A/docs/reference/bar.md
=== B/docs/guides/foo.md
=== B/docs/reference/bar.md
"""

cases = [
Scenario(
tree=f"""
=== A/docs/index.md
[valid](site:A/docs/guides/foo)
[valid](site:A/docs/guides/foo.md)

[valid]: site:A/docs/reference/bar
[valid]: site:A/docs/reference/bar.md
{COMMON_TREE}
""",
component_dir="A",
exit_code=0,
output="",
),
Scenario(
tree=f"""
=== A/docs/index.md
[valid](site:A/docs/guides/foo)
[valid](site:A/docs/reference/bar)
[invalid](site:A/docs/guides/NOEXIT.md)
[invalid](site:A/docs/reference/NOEXIT.md)
{COMMON_TREE}
""",
component_dir="A",
exit_code=1,
output=f"""
{HEADER_ERROR.format(n=2)}
docs/index.md:3:site:A/docs/guides/NOEXIT.md
docs/index.md:4:site:A/docs/reference/NOEXIT.md
""",
),
]


@pytest.mark.parametrize("case", cases)
def test_linkchecker_main(case: Scenario, create_tree, capsys):
"""Test checking all files in the docs directory."""
basedir, files = create_tree(case.tree)
exit_code = linkchecker(str(basedir / case.component_dir), files)
out, err = capsys.readouterr()
assert exit_code == case.exit_code
assert out.strip() == dedent(case.output).strip()


def test_precommit_hook(precommit_test):
exitcode, stdout, stderr = precommit_test(
hookid="linkchecker", fixture="invalid_links/component-a"
)
print(stdout)
assert exitcode == 1
assert HEADER_ERROR.format(n=2) in stdout
assert "site:component-a/NOEXIST1" in stdout
assert "site:component-a/NOEXIST2" in stdout
26 changes: 26 additions & 0 deletions tests/test_testutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
def test_create_tree(create_tree):
tree = """
=== docs/index.md
# hello world

[foo](bar.md)

=== docs/foo.md
This is the foo file
"""
rootdir, files = create_tree(tree)
created_file_content = {}
for file in rootdir.rglob("*"):
if not file.is_file():
continue
content = file.read_text()
created_file_content[str(file.relative_to(rootdir))] = content

assert len(files) == 2
assert list(created_file_content.keys()) == ["docs/foo.md", "docs/index.md"]

assert "[foo](bar.md)" in created_file_content["docs/index.md"]
assert "[foo](bar.md)" not in created_file_content["docs/foo.md"]

assert "This is the foo file" in created_file_content["docs/foo.md"]
assert "This is the foo file" not in created_file_content["docs/index.md"]