mirror of
https://gitlab.com/libvirt/libvirt.git
synced 2024-12-22 17:34:18 +03:00
scripts: check-html-refernces: Add checking for image file usage
Check both that a file is referenced from our pages and also that pages reference existing images. The mode for dumping external references now also dumps images. '--ignore-image' can be used repeatedly to suppress errors for specific images. Signed-off-by: Peter Krempa <pkrempa@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
This commit is contained in:
parent
0e3970bf9b
commit
edac6ca756
@ -24,25 +24,32 @@ import xml.etree.ElementTree as ET
|
|||||||
|
|
||||||
ns = {'html': 'http://www.w3.org/1999/xhtml'}
|
ns = {'html': 'http://www.w3.org/1999/xhtml'}
|
||||||
externallinks = []
|
externallinks = []
|
||||||
|
externalimages = []
|
||||||
|
|
||||||
|
|
||||||
def get_file_list(prefix):
|
def get_file_list(prefix):
|
||||||
filelist = []
|
filelist = []
|
||||||
|
imagelist = []
|
||||||
|
imageformats = ['.jpg', '.svg', '.png']
|
||||||
|
|
||||||
for root, dir, files in os.walk(prefix):
|
for root, dir, files in os.walk(prefix):
|
||||||
for file in files:
|
for file in files:
|
||||||
if not re.search('\\.html$', file):
|
ext = os.path.splitext(file)[1]
|
||||||
continue
|
|
||||||
|
|
||||||
|
if ext == '.html':
|
||||||
# the 404 page doesn't play well
|
# the 404 page doesn't play well
|
||||||
if '404.html' in file:
|
if '404.html' in file:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
filelist.append(os.path.join(root, file))
|
filelist.append(os.path.join(root, file))
|
||||||
|
|
||||||
filelist.sort()
|
elif ext in imageformats:
|
||||||
|
imagelist.append(os.path.join(root, file))
|
||||||
|
|
||||||
return filelist
|
filelist.sort()
|
||||||
|
imagelist.sort()
|
||||||
|
|
||||||
|
return filelist, imagelist
|
||||||
|
|
||||||
|
|
||||||
# loads an XHTML and extracts all anchors, local and remote links for the one file
|
# loads an XHTML and extracts all anchors, local and remote links for the one file
|
||||||
@ -50,12 +57,14 @@ def process_file(filename):
|
|||||||
tree = ET.parse(filename)
|
tree = ET.parse(filename)
|
||||||
root = tree.getroot()
|
root = tree.getroot()
|
||||||
docname = root.get('data-sourcedoc')
|
docname = root.get('data-sourcedoc')
|
||||||
|
dirname = os.path.dirname(filename)
|
||||||
|
|
||||||
if not docname:
|
if not docname:
|
||||||
docname = filename
|
docname = filename
|
||||||
|
|
||||||
anchors = [filename]
|
anchors = [filename]
|
||||||
targets = []
|
targets = []
|
||||||
|
images = []
|
||||||
|
|
||||||
for elem in root.findall('.//html:a', ns):
|
for elem in root.findall('.//html:a', ns):
|
||||||
target = elem.get('href')
|
target = elem.get('href')
|
||||||
@ -68,7 +77,6 @@ def process_file(filename):
|
|||||||
if re.search('://', target):
|
if re.search('://', target):
|
||||||
externallinks.append(target)
|
externallinks.append(target)
|
||||||
elif target[0] != '#' and 'mailto:' not in target:
|
elif target[0] != '#' and 'mailto:' not in target:
|
||||||
dirname = os.path.dirname(filename)
|
|
||||||
targetfull = os.path.normpath(os.path.join(dirname, target))
|
targetfull = os.path.normpath(os.path.join(dirname, target))
|
||||||
|
|
||||||
targets.append((filename, docname, targetfull, target))
|
targets.append((filename, docname, targetfull, target))
|
||||||
@ -87,20 +95,33 @@ def process_file(filename):
|
|||||||
if an:
|
if an:
|
||||||
anchors.append(filename + '#' + an)
|
anchors.append(filename + '#' + an)
|
||||||
|
|
||||||
return (anchors, targets)
|
# find local images
|
||||||
|
for elem in root.findall('.//html:img', ns):
|
||||||
|
src = elem.get('src')
|
||||||
|
|
||||||
|
if src:
|
||||||
|
if re.search('://', src):
|
||||||
|
externalimages.append(src)
|
||||||
|
else:
|
||||||
|
imagefull = os.path.normpath(os.path.join(dirname, src))
|
||||||
|
images.append((imagefull, docname))
|
||||||
|
|
||||||
|
return (anchors, targets, images)
|
||||||
|
|
||||||
|
|
||||||
def process_all(filelist):
|
def process_all(filelist):
|
||||||
anchors = []
|
anchors = []
|
||||||
targets = []
|
targets = []
|
||||||
|
images = []
|
||||||
|
|
||||||
for file in filelist:
|
for file in filelist:
|
||||||
anchor, target = process_file(file)
|
anchor, target, image = process_file(file)
|
||||||
|
|
||||||
targets = targets + target
|
targets = targets + target
|
||||||
anchors = anchors + anchor
|
anchors = anchors + anchor
|
||||||
|
images = images + image
|
||||||
|
|
||||||
return (targets, anchors)
|
return (targets, anchors, images)
|
||||||
|
|
||||||
|
|
||||||
def check_targets(targets, anchors):
|
def check_targets(targets, anchors):
|
||||||
@ -163,6 +184,46 @@ def check_usage(targets, files, entrypoint):
|
|||||||
return fail
|
return fail
|
||||||
|
|
||||||
|
|
||||||
|
# checks that images present in the directory are being used and also that
|
||||||
|
# pages link to existing images. For favicons, which are not referenced from
|
||||||
|
# the '.html' files there's a builtin set of exceptions.
|
||||||
|
def check_images(usedimages, imagefiles, ignoreimages):
|
||||||
|
favicons = [
|
||||||
|
'android-chrome-192x192.png',
|
||||||
|
'android-chrome-256x256.png',
|
||||||
|
'apple-touch-icon.png',
|
||||||
|
'favicon-16x16.png',
|
||||||
|
'favicon-32x32.png',
|
||||||
|
'mstile-150x150.png',
|
||||||
|
]
|
||||||
|
fail = False
|
||||||
|
|
||||||
|
if ignoreimages:
|
||||||
|
favicons = favicons + ignoreimages
|
||||||
|
|
||||||
|
for usedimage, docname in usedimages:
|
||||||
|
if usedimage not in imagefiles:
|
||||||
|
print(f'ERROR: \'{docname}\' references image \'{usedimage}\' not among images')
|
||||||
|
fail = True
|
||||||
|
|
||||||
|
for imagefile in imagefiles:
|
||||||
|
used = False
|
||||||
|
|
||||||
|
if imagefile in (usedimage[0] for usedimage in usedimages):
|
||||||
|
used = True
|
||||||
|
else:
|
||||||
|
for favicon in favicons:
|
||||||
|
if favicon in imagefile:
|
||||||
|
used = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not used:
|
||||||
|
print(f'ERROR: Image \'{imagefile}\' is not used by any page')
|
||||||
|
fail = True
|
||||||
|
|
||||||
|
return fail
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='HTML reference checker')
|
parser = argparse.ArgumentParser(description='HTML reference checker')
|
||||||
parser.add_argument('--webroot', required=True,
|
parser.add_argument('--webroot', required=True,
|
||||||
help='path to the web root')
|
help='path to the web root')
|
||||||
@ -170,14 +231,16 @@ parser.add_argument('--entrypoint', default="index.html",
|
|||||||
help='file name of web entry point relative to --webroot')
|
help='file name of web entry point relative to --webroot')
|
||||||
parser.add_argument('--external', action="store_true",
|
parser.add_argument('--external', action="store_true",
|
||||||
help='print external references instead')
|
help='print external references instead')
|
||||||
|
parser.add_argument('--ignore-images', action='append',
|
||||||
|
help='paths to images that should be considered as used')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
files = get_file_list(os.path.abspath(args.webroot))
|
files, imagefiles = get_file_list(os.path.abspath(args.webroot))
|
||||||
|
|
||||||
entrypoint = os.path.join(os.path.abspath(args.webroot), args.entrypoint)
|
entrypoint = os.path.join(os.path.abspath(args.webroot), args.entrypoint)
|
||||||
|
|
||||||
targets, anchors = process_all(files)
|
targets, anchors, usedimages = process_all(files)
|
||||||
|
|
||||||
fail = False
|
fail = False
|
||||||
|
|
||||||
@ -186,7 +249,14 @@ if args.external:
|
|||||||
externallinks.sort()
|
externallinks.sort()
|
||||||
for ext in externallinks:
|
for ext in externallinks:
|
||||||
if ext != prev:
|
if ext != prev:
|
||||||
print(ext)
|
print(f'link: {ext}')
|
||||||
|
|
||||||
|
prev = ext
|
||||||
|
|
||||||
|
externalimages.sort()
|
||||||
|
for ext in externalimages:
|
||||||
|
if ext != prev:
|
||||||
|
print(f'image: {ext}')
|
||||||
|
|
||||||
prev = ext
|
prev = ext
|
||||||
else:
|
else:
|
||||||
@ -196,6 +266,9 @@ else:
|
|||||||
if check_usage(targets, files, entrypoint):
|
if check_usage(targets, files, entrypoint):
|
||||||
fail = True
|
fail = True
|
||||||
|
|
||||||
|
if check_images(usedimages, imagefiles, args.ignore_images):
|
||||||
|
fail = True
|
||||||
|
|
||||||
if fail:
|
if fail:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user