diff options
author | Peter Krempa <pkrempa@redhat.com> | 2023-02-14 13:14:25 +0100 |
---|---|---|
committer | Peter Krempa <pkrempa@redhat.com> | 2023-02-15 12:38:45 +0100 |
commit | 61dcca1b2361c7a05fff194314b1781e73830977 (patch) | |
tree | 3485bfda61c4044375575142d533c746f282c6f9 | |
parent | 5f3a897b936dc82376ab2b3525020376bb3ff50d (diff) | |
download | libvirt-61dcca1b2361c7a05fff194314b1781e73830977.tar.gz |
scripts: check-html-references: Detect pages that are not linked to
Prevent sub-pages without a way to reach them.
Signed-off-by: Peter Krempa <pkrempa@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
-rwxr-xr-x | scripts/check-html-references.py | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/scripts/check-html-references.py b/scripts/check-html-references.py index 8e726928e0..4f08feab59 100755 --- a/scripts/check-html-references.py +++ b/scripts/check-html-references.py @@ -40,6 +40,8 @@ def get_file_list(prefix): filelist.append(os.path.join(root, file)) + filelist.sort() + return filelist @@ -118,9 +120,54 @@ def check_targets(targets, anchors): return False +def check_usage_crawl(page, targets, visited): + visited.append(page) + + tocrawl = [] + + for filename, docname, target, _ in targets: + if page != filename: + continue + + targetpage = target.split("#", 1)[0] + + if targetpage not in visited and targetpage not in tocrawl: + tocrawl.append(targetpage) + + for crawl in tocrawl: + check_usage_crawl(crawl, targets, visited) + + +# crawls the document references starting from entrypoint and tries to find +# unreachable pages +def check_usage(targets, files, entrypoint): + visited = [] + fail = False + + check_usage_crawl(entrypoint, targets, visited) + + for file in files: + if file not in visited: + brokendoc = file + + for filename, docname, _, _ in targets: + if filename != file: + continue + if docname: + brokendoc = docname + break + + print(f'ERROR: \'{brokendoc}\': is not referenced from anywhere') + fail = True + + return fail + + parser = argparse.ArgumentParser(description='HTML reference checker') parser.add_argument('--webroot', required=True, help='path to the web root') +parser.add_argument('--entrypoint', default="index.html", + help='file name of web entry point relative to --webroot') parser.add_argument('--external', action="store_true", help='print external references instead') @@ -128,8 +175,12 @@ args = parser.parse_args() files = get_file_list(os.path.abspath(args.webroot)) +entrypoint = os.path.join(os.path.abspath(args.webroot), args.entrypoint) + targets, anchors = process_all(files) +fail = False + if args.external: prev = None externallinks.sort() @@ -140,6 +191,12 @@ if args.external: prev = ext else: if check_targets(targets, anchors): + fail = True + + if check_usage(targets, files, entrypoint): + fail = True + + if fail: sys.exit(1) sys.exit(0) |