| #!/usr/bin/env python3 |
| # SPDX-License-Identifier: GPL-2.0 |
| # Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>. |
| # |
| # pylint: disable=R0903,R0912,R0913,R0914,R0915,R0917 |
| |
| """ |
| docdiff - Check differences between kernel‑doc output between two different |
| commits. |
| |
| Examples |
| -------- |
| |
| Compare the kernel‑doc output between the last two 5.15 releases:: |
| |
| $ kdoc_diff v6.18..v6.19 |
| |
| Both outputs are cached |
| |
| Force a complete documentation scan and clean any previous cache from |
| 6.19 to the current HEAD:: |
| |
| $ kdoc_diff 6.19.. --full --clean |
| |
| Check differences only on a single driver since origin/main:: |
| |
| $ kdoc_diff origin/main drivers/media |
| |
| Generate an YAML file and use it to check for regressions:: |
| |
| $ kdoc_diff HEAD~ drivers/media --regression |
| |
| |
| """ |
| |
| import os |
| import sys |
| import argparse |
| import subprocess |
| import shutil |
| import re |
| import signal |
| |
| from glob import iglob |
| |
| |
| SRC_DIR = os.path.dirname(os.path.realpath(__file__)) |
| WORK_DIR = os.path.abspath(os.path.join(SRC_DIR, "../..")) |
| |
| KDOC_BINARY = os.path.join(SRC_DIR, "kernel-doc") |
| KDOC_PARSER_TEST = os.path.join(WORK_DIR, "tools/unittests/test_kdoc_parser.py") |
| |
| CACHE_DIR = ".doc_diff_cache" |
| YAML_NAME = "out.yaml" |
| |
| DIR_NAME = { |
| "full": os.path.join(CACHE_DIR, "full"), |
| "partial": os.path.join(CACHE_DIR, "partial"), |
| "no-cache": os.path.join(CACHE_DIR, "no_cache"), |
| "tmp": os.path.join(CACHE_DIR, "__tmp__"), |
| } |
| |
| class GitHelper: |
| """Handles all Git operations""" |
| |
| def __init__(self, work_dir=None): |
| self.work_dir = work_dir |
| |
| def is_inside_repository(self): |
| """Check if we're inside a Git repository""" |
| try: |
| output = subprocess.check_output(["git", "rev-parse", |
| "--is-inside-work-tree"], |
| cwd=self.work_dir, |
| stderr=subprocess.STDOUT, |
| universal_newlines=True) |
| |
| return output.strip() == "true" |
| except subprocess.CalledProcessError: |
| return False |
| |
| def is_valid_commit(self, commit_hash): |
| """ |
| Validate that a ref (branch, tag, commit hash, etc.) can be |
| resolved to a commit. |
| """ |
| try: |
| subprocess.check_output(["git", "rev-parse", commit_hash], |
| cwd=self.work_dir, |
| stderr=subprocess.STDOUT) |
| return True |
| except subprocess.CalledProcessError: |
| return False |
| |
| def get_short_hash(self, commit_hash): |
| """Get short commit hash""" |
| try: |
| return subprocess.check_output(["git", "rev-parse", "--short", |
| commit_hash], |
| cwd=self.work_dir, |
| stderr=subprocess.STDOUT, |
| universal_newlines=True).strip() |
| except subprocess.CalledProcessError: |
| return "" |
| |
| def has_uncommitted_changes(self): |
| """Check for uncommitted changes""" |
| try: |
| subprocess.check_output(["git", "diff-index", |
| "--quiet", "HEAD", "--"], |
| cwd=self.work_dir, |
| stderr=subprocess.STDOUT) |
| return False |
| except subprocess.CalledProcessError: |
| return True |
| |
| def get_current_branch(self): |
| """Get current branch name""" |
| return subprocess.check_output(["git", "branch", "--show-current"], |
| cwd=self.work_dir, |
| universal_newlines=True).strip() |
| |
| def checkout_commit(self, commit_hash, quiet=True): |
| """Checkout a commit safely""" |
| args = ["git", "checkout", "-f"] |
| if quiet: |
| args.append("-q") |
| args.append(commit_hash) |
| try: |
| subprocess.check_output(args, cwd=self.work_dir, |
| stderr=subprocess.STDOUT) |
| |
| # Double-check if branch actually switched |
| branch = self.get_short_hash("HEAD") |
| if commit_hash != branch: |
| raise RuntimeError(f"Branch changed to '{branch}' instead of '{commit_hash}'") |
| |
| return True |
| except subprocess.CalledProcessError as e: |
| print(f"ERROR: Failed to checkout {commit_hash}: {e}", |
| file=sys.stderr) |
| return False |
| |
| |
| class CacheManager: |
| """Manages persistent cache directories""" |
| |
| def __init__(self, work_dir): |
| self.work_dir = work_dir |
| |
| def initialize(self): |
| """Create cache directories if they don't exist""" |
| for dir_path in DIR_NAME.values(): |
| abs_path = os.path.join(self.work_dir, dir_path) |
| if not os.path.exists(abs_path): |
| os.makedirs(abs_path, exist_ok=True, mode=0o755) |
| |
| def get_commit_cache(self, commit_hash, path): |
| """Generate cache path for a commit""" |
| hash_short = GitHelper(self.work_dir).get_short_hash(commit_hash) |
| if not hash_short: |
| hash_short = commit_hash |
| |
| return os.path.join(path, hash_short) |
| |
| class KernelDocRunner: |
| """Runs kernel-doc documentation generator""" |
| |
| def __init__(self, work_dir, kdoc_binary): |
| self.work_dir = work_dir |
| self.kdoc_binary = kdoc_binary |
| self.kdoc_files = None |
| |
| def find_kdoc_references(self): |
| """Find all files marked with kernel-doc:: directives""" |
| if self.kdoc_files: |
| print("Using cached Kdoc refs") |
| return self.kdoc_files |
| |
| print("Finding kernel-doc entries in Documentation...") |
| |
| files = os.path.join(self.work_dir, 'Documentation/**/*.rst') |
| pattern = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)") |
| kdoc_files = set() |
| |
| for file_path in iglob(files, recursive=True): |
| try: |
| with open(file_path, 'r', encoding='utf-8') as fp: |
| for line in fp: |
| match = pattern.match(line.strip()) |
| if match: |
| kdoc_files.add(match.group(1)) |
| |
| except OSError: |
| continue |
| |
| self.kdoc_files = list(kdoc_files) |
| |
| return self.kdoc_files |
| |
| def gen_yaml(self, yaml_file, kdoc_files): |
| """Runs kernel-doc to generate a yaml file with man and rst.""" |
| cmd = [self.kdoc_binary, "--man", "--rst", "--yaml", yaml_file] |
| cmd += kdoc_files |
| |
| print(f"YAML regression test file will be stored at: {yaml_file}") |
| |
| try: |
| subprocess.check_call(cmd, cwd=self.work_dir, |
| stdout=subprocess.DEVNULL, |
| stderr=subprocess.DEVNULL) |
| except subprocess.CalledProcessError: |
| return False |
| |
| return True |
| |
| def run_unittest(self, yaml_file): |
| """Run unit tests with the generated yaml file""" |
| cmd = [KDOC_PARSER_TEST, "-q", "--yaml", yaml_file] |
| result = subprocess.run(cmd, cwd=self.work_dir) |
| |
| if result.returncode: |
| print("To check for problems, try to run it again with -v\n") |
| print("Use -k <regex> to filter results\n\n\t$", end="") |
| print(" ".join(cmd) + "\n") |
| |
| return True |
| |
| def normal_run(self, tmp_dir, output_dir, kdoc_files): |
| """Generate man, rst and errors, storing them at tmp_dir.""" |
| os.makedirs(tmp_dir, exist_ok=True) |
| |
| try: |
| with open(os.path.join(tmp_dir, "man.log"), "w", encoding="utf-8") as out: |
| subprocess.check_call([self.kdoc_binary, "--man"] + kdoc_files, |
| cwd=self.work_dir, |
| stdout=out, stderr=subprocess.DEVNULL) |
| |
| with open(os.path.join(tmp_dir, "rst.log"), "w", encoding="utf-8") as out: |
| with open(os.path.join(tmp_dir, "err.log"), "w", encoding="utf-8") as err: |
| subprocess.check_call([self.kdoc_binary, "--rst"] + kdoc_files, |
| cwd=self.work_dir, |
| stdout=out, stderr=err) |
| except subprocess.CalledProcessError: |
| return False |
| |
| if output_dir: |
| os.replace(tmp_dir, output_dir) |
| |
| return True |
| |
| def run(self, commit_hash, tmp_dir, output_dir, kdoc_files, is_regression, |
| is_end): |
| """Run kernel-doc on its several ways""" |
| if not kdoc_files: |
| raise RuntimeError("No kernel-doc references found") |
| |
| git_helper = GitHelper(self.work_dir) |
| if not git_helper.checkout_commit(commit_hash, quiet=True): |
| raise RuntimeError(f"ERROR: can't checkout commit {commit_hash}") |
| |
| print(f"Processing {commit_hash}...") |
| |
| if not is_regression: |
| return self.normal_run(tmp_dir, output_dir, kdoc_files) |
| |
| yaml_file = os.path.join(tmp_dir, YAML_NAME) |
| |
| if not is_end: |
| return self.gen_yaml(yaml_file, kdoc_files) |
| |
| return self.run_unittest(yaml_file) |
| |
| class DiffManager: |
| """Compare documentation output directories with an external diff.""" |
| def __init__(self, diff_tool="diff", diff_args=None): |
| self.diff_tool = diff_tool |
| # default: unified, no context, ignore whitespace changes |
| self.diff_args = diff_args or ["-u0", "-w"] |
| |
| def diff_directories(self, dir1, dir2): |
| """Compare two directories using an external diff.""" |
| print(f"\nDiffing {dir1} and {dir2}:") |
| |
| dir1_files = set() |
| dir2_files = set() |
| has_diff = False |
| |
| for root, _, files in os.walk(dir1): |
| for file in files: |
| dir1_files.add(os.path.relpath(os.path.join(root, file), dir1)) |
| for root, _, files in os.walk(dir2): |
| for file in files: |
| dir2_files.add(os.path.relpath(os.path.join(root, file), dir2)) |
| |
| common_files = sorted(dir1_files & dir2_files) |
| for file in common_files: |
| f1 = os.path.join(dir1, file) |
| f2 = os.path.join(dir2, file) |
| |
| cmd = [self.diff_tool] + self.diff_args + [f1, f2] |
| try: |
| result = subprocess.run( |
| cmd, capture_output=True, text=True, check=False |
| ) |
| if result.stdout: |
| has_diff = True |
| print(f"\n{file}") |
| print(result.stdout, end="") |
| except FileNotFoundError: |
| print(f"ERROR: {self.diff_tool} not found") |
| sys.exit(1) |
| |
| # Show files that exist only in one directory |
| only_in_dir1 = dir1_files - dir2_files |
| only_in_dir2 = dir2_files - dir1_files |
| if only_in_dir1 or only_in_dir2: |
| has_diff = True |
| print("\nDifferential files:") |
| for f in sorted(only_in_dir1): |
| print(f" - {f} (only in {dir1})") |
| for f in sorted(only_in_dir2): |
| print(f" + {f} (only in {dir2})") |
| |
| if not has_diff: |
| print("\nNo differences between those two commits") |
| |
| |
| class SignalHandler(): |
| """Signal handler class.""" |
| |
| def restore(self, force_exit=False): |
| """Restore original HEAD state.""" |
| if self.restored: |
| return |
| |
| print(f"Restoring original branch: {self.original_head}") |
| try: |
| subprocess.check_call( |
| ["git", "checkout", "-f", self.original_head], |
| cwd=self.git_helper.work_dir, |
| stderr=subprocess.STDOUT, |
| ) |
| except subprocess.CalledProcessError as e: |
| print(f"Failed to restore: {e}", file=sys.stderr) |
| |
| for sig, handler in self.old_handler.items(): |
| signal.signal(sig, handler) |
| |
| self.restored = True |
| |
| if force_exit: |
| sys.exit(1) |
| |
| def signal_handler(self, sig, _): |
| """Handle interrupt signals.""" |
| print(f"\nSignal {sig} received. Restoring original state...") |
| |
| self.restore(force_exit=True) |
| |
| def __enter__(self): |
| """Allow using it via with command.""" |
| for sig in [signal.SIGINT, signal.SIGTERM]: |
| self.old_handler[sig] = signal.getsignal(sig) |
| signal.signal(sig, self.signal_handler) |
| |
| return self |
| |
| def __exit__(self, *args): |
| """Restore signals at the end of with block.""" |
| self.restore() |
| |
| def __init__(self, git_helper, original_head): |
| self.git_helper = git_helper |
| self.original_head = original_head |
| self.old_handler = {} |
| self.restored = False |
| |
| def parse_commit_range(value): |
| """Handle a commit range.""" |
| if ".." not in value: |
| begin = value |
| end = "HEAD" |
| else: |
| begin, _, end = value.partition("..") |
| if not end: |
| end = "HEAD" |
| |
| if not begin: |
| raise argparse.ArgumentTypeError("Need a commit begginning") |
| |
| |
| print(f"Range: {begin} to {end}") |
| |
| return begin, end |
| |
| |
| def main(): |
| """Main code""" |
| parser = argparse.ArgumentParser(description="Compare kernel documentation between commits") |
| parser.add_argument("commits", type=parse_commit_range, |
| help="commit range like old..new") |
| parser.add_argument("files", nargs="*", |
| help="files to process – if supplied the --full flag is ignored") |
| |
| parser.add_argument("--full", "-f", action="store_true", |
| help="Force a full scan of Documentation/*") |
| |
| parser.add_argument("--regression", "-r", action="store_true", |
| help="Use YAML format to check for regressions") |
| |
| parser.add_argument("--work-dir", "-w", default=WORK_DIR, |
| help="work dir (default: %(default)s)") |
| |
| parser.add_argument("--clean", "-c", action="store_true", |
| help="Clean caches") |
| |
| args = parser.parse_args() |
| |
| if args.files and args.full: |
| raise argparse.ArgumentError(args.full, |
| "cannot combine '--full' with an explicit file list") |
| |
| work_dir = os.path.abspath(args.work_dir) |
| |
| # Initialize cache |
| cache = CacheManager(work_dir) |
| cache.initialize() |
| |
| # Validate git repository |
| git_helper = GitHelper(work_dir) |
| if not git_helper.is_inside_repository(): |
| raise RuntimeError("Must run inside Git repository") |
| |
| old_commit, new_commit = args.commits |
| |
| old_commit = git_helper.get_short_hash(old_commit) |
| new_commit = git_helper.get_short_hash(new_commit) |
| |
| # Validate commits |
| for commit in [old_commit, new_commit]: |
| if not git_helper.is_valid_commit(commit): |
| raise RuntimeError(f"Commit '{commit}' does not exist") |
| |
| # Check for uncommitted changes |
| if git_helper.has_uncommitted_changes(): |
| raise RuntimeError("Uncommitted changes present. Commit or stash first.") |
| |
| runner = KernelDocRunner(git_helper.work_dir, KDOC_BINARY) |
| |
| # Get files to be parsed |
| cache_msg = " (results will be cached)" |
| if args.full: |
| kdoc_files = ["."] |
| diff_type = "full" |
| print(f"Parsing all files at {work_dir}") |
| if not args.files: |
| diff_type = "partial" |
| kdoc_files = runner.find_kdoc_references() |
| print(f"Parsing files with kernel-doc markups at {work_dir}/Documentation") |
| else: |
| diff_type = "no-cache" |
| cache_msg = "" |
| kdoc_files = args.files |
| |
| tmp_dir = DIR_NAME["tmp"] |
| out_path = DIR_NAME[diff_type] |
| |
| if not args.regression: |
| print(f"Output will be stored at: {out_path}{cache_msg}") |
| |
| # Just in case - should never happen in practice |
| if not kdoc_files: |
| raise argparse.ArgumentError(args.files, |
| "No kernel-doc references found") |
| |
| original_head = git_helper.get_current_branch() |
| |
| old_cache = cache.get_commit_cache(old_commit, out_path) |
| new_cache = cache.get_commit_cache(new_commit, out_path) |
| |
| with SignalHandler(git_helper, original_head): |
| if args.clean or diff_type == "no-cache": |
| for cache_dir in [old_cache, new_cache]: |
| if cache_dir and os.path.exists(cache_dir): |
| shutil.rmtree(cache_dir) |
| |
| if args.regression or not os.path.exists(old_cache): |
| old_success = runner.run(old_commit, tmp_dir, old_cache, kdoc_files, |
| args.regression, False) |
| else: |
| old_success = True |
| |
| if args.regression or not os.path.exists(new_cache): |
| new_success = runner.run(new_commit, tmp_dir, new_cache, kdoc_files, |
| args.regression, True) |
| else: |
| new_success = True |
| |
| if not (old_success and new_success): |
| raise RuntimeError("Failed to generate documentation") |
| |
| if not args.regression: |
| diff_manager = DiffManager() |
| diff_manager.diff_directories(old_cache, new_cache) |
| |
| if __name__ == "__main__": |
| main() |