#!/usr/bin/env python3 # SPDX-License-Identifier: LGPL-2.1-or-later import collections import re import sys from copy import deepcopy from xml_helper import tree, xml_parse, xml_print COLOPHON = '''\ This index contains {count} entries in {sections} sections, referring to {pages} individual manual pages. ''' def _extract_directives(directive_groups, formatting, page): t = xml_parse(page) section = t.find('./refmeta/manvolnum').text pagename = t.find('./refmeta/refentrytitle').text storopt = directive_groups['options'] for variablelist in t.iterfind('.//variablelist'): klass = variablelist.attrib.get('class') searchpath = variablelist.attrib.get('xpath','./varlistentry/term/varname') storvar = directive_groups[klass or 'miscellaneous'] # <option>s go in OPTIONS, unless class is specified for xpath, stor in ((searchpath, storvar), ('./varlistentry/term/option', storvar if klass else storopt)): for name in variablelist.iterfind(xpath): text = re.sub(r'([= ]).*', r'\1', name.text).rstrip() if text.startswith('-'): # for options, merge options with and without mandatory arg text = text.partition('=')[0] stor[text].append((pagename, section)) if text not in formatting: # use element as formatted display if name.text[-1] in "= '": name.clear() else: name.tail = '' name.text = text formatting[text] = name extra = variablelist.attrib.get('extra-ref') if extra: stor[extra].append((pagename, section)) if extra not in formatting: elt = tree.Element("varname") elt.text= extra formatting[extra] = elt storfile = directive_groups['filenames'] for xpath, absolute_only in (('.//refsynopsisdiv//filename', False), ('.//refsynopsisdiv//command', False), ('.//filename', True)): for name in t.iterfind(xpath): if absolute_only and not (name.text and name.text.startswith('/')): continue if name.attrib.get('index') == 'false': continue name.tail = '' if name.text: if name.text.endswith('*'): name.text = name.text[:-1] if not name.text.startswith('.'): text = name.text.partition(' ')[0] if text != name.text: name.clear() name.text = text if text.endswith('/'): text = text[:-1] storfile[text].append((pagename, section)) if text not in formatting: # use element as formatted display formatting[text] = name else: text = ' '.join(name.itertext()) storfile[text].append((pagename, section)) formatting[text] = name for name in t.iterfind('.//constant'): if name.attrib.get('index') == 'false': continue name.tail = '' if name.text.startswith('('): # a cast, strip it name.text = name.text.partition(' ')[2] klass = name.attrib.get('class') or 'constants' storfile = directive_groups[klass] storfile[name.text].append((pagename, section)) formatting[name.text] = name storfile = directive_groups['specifiers'] for name in t.iterfind(".//table[@class='specifiers']//entry/literal"): if name.text[0] != '%' or name.getparent().text is not None: continue if name.attrib.get('index') == 'false': continue storfile[name.text].append((pagename, section)) formatting[name.text] = name for name in t.iterfind(".//literal[@class='specifiers']"): storfile[name.text].append((pagename, section)) formatting[name.text] = name def _make_section(template, name, directives, formatting): varlist = template.find(f".//*[@id='{name}']") for varname, manpages in sorted(directives.items()): entry = tree.SubElement(varlist, 'varlistentry') term = tree.SubElement(entry, 'term') display = deepcopy(formatting[varname]) term.append(display) para = tree.SubElement(tree.SubElement(entry, 'listitem'), 'para') b = None for manpage, manvolume in sorted(set(manpages)): if b is not None: b.tail = ', ' b = tree.SubElement(para, 'citerefentry') c = tree.SubElement(b, 'refentrytitle') c.text = manpage c.attrib['target'] = varname d = tree.SubElement(b, 'manvolnum') d.text = manvolume entry.tail = '\n\n' def _make_colophon(template, groups): count = 0 pages = set() for group in groups: count += len(group) for pagelist in group.values(): pages |= set(pagelist) para = template.find(".//para[@id='colophon']") para.text = COLOPHON.format(count=count, sections=len(groups), pages=len(pages)) def _make_page(template, directive_groups, formatting): """Create an XML tree from directive_groups. directive_groups = { 'class': {'variable': [('manpage', 'manvolume'), ...], 'variable2': ...}, ... } """ for name, directives in directive_groups.items(): _make_section(template, name, directives, formatting) _make_colophon(template, directive_groups.values()) return template def make_page(template_path, xml_files): "Extract directives from xml_files and return XML index tree." template = xml_parse(template_path) names = [vl.get('id') for vl in template.iterfind('.//variablelist')] directive_groups = {name:collections.defaultdict(list) for name in names} formatting = {} for page in xml_files: try: _extract_directives(directive_groups, formatting, page) except Exception as e: raise ValueError("failed to process " + page) from e return _make_page(template, directive_groups, formatting) if __name__ == '__main__': with open(sys.argv[1], 'wb') as f: _template_path = sys.argv[2] _xml_files = sys.argv[3:] _xml = make_page(_template_path, _xml_files) f.write(xml_print(_xml))