2017-05-01 02:26:56 +02:00
#!/usr/bin/env python3
2020-11-09 13:23:58 +09:00
# SPDX-License-Identifier: LGPL-2.1-or-later
2012-11-13 18:39:18 +01:00
2012-08-09 18:08:14 +02:00
import collections
2013-01-26 10:47:16 -05:00
import re
2023-07-17 19:58:38 +02:00
import sys
2021-05-14 17:53:31 +02:00
from copy import deepcopy
2012-08-09 18:08:14 +02:00
2023-07-17 19:58:38 +02:00
from xml_helper import tree , xml_parse , xml_print
2013-01-14 21:34:19 -05:00
COLOPHON = ''' \
This index contains { count } entries in { sections } sections ,
referring to { pages } individual manual pages .
'''
2021-05-14 17:53:31 +02:00
def _extract_directives ( directive_groups , formatting , page ) :
2013-03-29 14:22:27 -04:00
t = xml_parse ( page )
2012-08-09 18:08:14 +02:00
section = t . find ( ' ./refmeta/manvolnum ' ) . text
pagename = t . find ( ' ./refmeta/refentrytitle ' ) . text
2013-02-13 21:51:31 -05:00
storopt = directive_groups [ ' options ' ]
2012-08-09 18:08:14 +02:00
for variablelist in t . iterfind ( ' .//variablelist ' ) :
2013-01-26 10:47:16 -05:00
klass = variablelist . attrib . get ( ' class ' )
2020-04-14 20:15:19 +02:00
searchpath = variablelist . attrib . get ( ' xpath ' , ' ./varlistentry/term/varname ' )
2013-01-26 10:47:16 -05:00
storvar = directive_groups [ klass or ' miscellaneous ' ]
# <option>s go in OPTIONS, unless class is specified
2020-04-14 20:15:19 +02:00
for xpath , stor in ( ( searchpath , storvar ) ,
2013-01-26 10:47:16 -05:00
( ' ./varlistentry/term/option ' ,
storvar if klass else storopt ) ) :
for name in variablelist . iterfind ( xpath ) :
text = re . sub ( r ' ([= ]).* ' , r ' \ 1 ' , name . text ) . rstrip ( )
2019-11-21 22:02:07 +01:00
if text . startswith ( ' - ' ) :
# for options, merge options with and without mandatory arg
text = text . partition ( ' = ' ) [ 0 ]
2021-05-14 17:53:31 +02:00
stor [ text ] . append ( ( pagename , section ) )
2013-01-26 11:27:35 -05:00
if text not in formatting :
# use element as formatted display
2019-11-21 20:33:26 +01:00
if name . text [ - 1 ] in " = ' " :
2013-02-27 21:31:02 -05:00
name . clear ( )
else :
name . tail = ' '
2013-01-26 11:27:35 -05:00
name . text = text
formatting [ text ] = name
2020-04-18 19:48:13 +02:00
extra = variablelist . attrib . get ( ' extra-ref ' )
if extra :
2021-05-14 17:53:31 +02:00
stor [ extra ] . append ( ( pagename , section ) )
2020-04-18 19:48:13 +02:00
if extra not in formatting :
elt = tree . Element ( " varname " )
elt . text = extra
formatting [ extra ] = elt
2012-08-09 18:08:14 +02:00
2013-02-13 21:51:31 -05:00
storfile = directive_groups [ ' filenames ' ]
2013-05-03 00:55:20 -04:00
for xpath , absolute_only in ( ( ' .//refsynopsisdiv//filename ' , False ) ,
( ' .//refsynopsisdiv//command ' , False ) ,
( ' .//filename ' , True ) ) :
2013-02-13 21:51:31 -05:00
for name in t . iterfind ( xpath ) :
2013-05-03 00:55:20 -04:00
if absolute_only and not ( name . text and name . text . startswith ( ' / ' ) ) :
continue
2019-11-21 20:22:12 +01:00
if name . attrib . get ( ' index ' ) == ' false ' :
2013-05-03 00:55:20 -04:00
continue
2013-02-13 21:51:31 -05:00
name . tail = ' '
if name . text :
2013-05-03 00:55:20 -04:00
if name . text . endswith ( ' * ' ) :
name . text = name . text [ : - 1 ]
2013-02-13 21:51:31 -05:00
if not name . text . startswith ( ' . ' ) :
text = name . text . partition ( ' ' ) [ 0 ]
if text != name . text :
name . clear ( )
name . text = text
2013-05-03 00:55:20 -04:00
if text . endswith ( ' / ' ) :
text = text [ : - 1 ]
2021-05-14 17:53:31 +02:00
storfile [ text ] . append ( ( pagename , section ) )
2013-02-13 21:51:31 -05:00
if text not in formatting :
# use element as formatted display
formatting [ text ] = name
else :
text = ' ' . join ( name . itertext ( ) )
2021-05-14 17:53:31 +02:00
storfile [ text ] . append ( ( pagename , section ) )
2013-02-13 21:51:31 -05:00
formatting [ text ] = name
2013-06-26 19:48:19 -04:00
for name in t . iterfind ( ' .//constant ' ) :
2019-11-21 20:22:12 +01:00
if name . attrib . get ( ' index ' ) == ' false ' :
2013-06-26 19:48:19 -04:00
continue
name . tail = ' '
if name . text . startswith ( ' ( ' ) : # a cast, strip it
name . text = name . text . partition ( ' ' ) [ 2 ]
2021-06-27 16:33:12 +02:00
klass = name . attrib . get ( ' class ' ) or ' constants '
storfile = directive_groups [ klass ]
2021-05-14 17:53:31 +02:00
storfile [ name . text ] . append ( ( pagename , section ) )
2013-06-26 19:48:19 -04:00
formatting [ name . text ] = name
2020-05-07 14:59:24 +02:00
storfile = directive_groups [ ' specifiers ' ]
for name in t . iterfind ( " .//table[@class= ' specifiers ' ]//entry/literal " ) :
if name . text [ 0 ] != ' % ' or name . getparent ( ) . text is not None :
continue
if name . attrib . get ( ' index ' ) == ' false ' :
continue
2021-05-14 17:53:31 +02:00
storfile [ name . text ] . append ( ( pagename , section ) )
2020-05-07 14:59:24 +02:00
formatting [ name . text ] = name
2020-05-07 15:05:29 +02:00
for name in t . iterfind ( " .//literal[@class= ' specifiers ' ] " ) :
2021-05-14 17:53:31 +02:00
storfile [ name . text ] . append ( ( pagename , section ) )
2020-05-07 15:05:29 +02:00
formatting [ name . text ] = name
2020-05-07 14:59:24 +02:00
2013-01-26 11:27:35 -05:00
def _make_section ( template , name , directives , formatting ) :
2023-07-17 19:58:38 +02:00
varlist = template . find ( f " .//*[@id= ' { name } ' ] " )
2012-08-09 18:08:14 +02:00
for varname , manpages in sorted ( directives . items ( ) ) :
entry = tree . SubElement ( varlist , ' varlistentry ' )
2013-01-26 11:27:35 -05:00
term = tree . SubElement ( entry , ' term ' )
2021-05-14 17:53:31 +02:00
display = deepcopy ( formatting [ varname ] )
2013-05-29 22:31:20 -04:00
term . append ( display )
2013-01-26 11:27:35 -05:00
2012-08-09 18:08:14 +02:00
para = tree . SubElement ( tree . SubElement ( entry , ' listitem ' ) , ' para ' )
b = None
2013-01-26 10:47:16 -05:00
for manpage , manvolume in sorted ( set ( manpages ) ) :
2013-05-29 22:31:20 -04:00
if b is not None :
b . tail = ' , '
b = tree . SubElement ( para , ' citerefentry ' )
c = tree . SubElement ( b , ' refentrytitle ' )
c . text = manpage
2015-11-22 23:43:36 -05:00
c . attrib [ ' target ' ] = varname
2013-05-29 22:31:20 -04:00
d = tree . SubElement ( b , ' manvolnum ' )
d . text = manvolume
2012-08-09 18:08:14 +02:00
entry . tail = ' \n \n '
2013-01-14 21:34:19 -05:00
def _make_colophon ( template , groups ) :
count = 0
pages = set ( )
for group in groups :
count + = len ( group )
for pagelist in group . values ( ) :
pages | = set ( pagelist )
para = template . find ( " .//para[@id= ' colophon ' ] " )
para . text = COLOPHON . format ( count = count ,
sections = len ( groups ) ,
pages = len ( pages ) )
2013-01-26 11:27:35 -05:00
def _make_page ( template , directive_groups , formatting ) :
2012-08-09 18:08:14 +02:00
""" Create an XML tree from directive_groups.
directive_groups = {
' class ' : { ' variable ' : [ ( ' manpage ' , ' manvolume ' ) , . . . ] ,
' variable2 ' : . . . } ,
. . .
}
"""
for name , directives in directive_groups . items ( ) :
2013-05-29 22:31:20 -04:00
_make_section ( template , name , directives , formatting )
2012-08-09 18:08:14 +02:00
2013-01-14 21:34:19 -05:00
_make_colophon ( template , directive_groups . values ( ) )
2013-01-14 21:02:49 -05:00
return template
2012-08-09 18:08:14 +02:00
2020-05-07 14:31:13 +02:00
def make_page ( template_path , xml_files ) :
2012-08-09 18:08:14 +02:00
" Extract directives from xml_files and return XML index tree. "
2020-05-07 14:31:13 +02:00
template = xml_parse ( template_path )
2013-01-14 21:02:49 -05:00
names = [ vl . get ( ' id ' ) for vl in template . iterfind ( ' .//variablelist ' ) ]
2021-05-14 17:53:31 +02:00
directive_groups = { name : collections . defaultdict ( list )
for name in names }
2013-01-26 11:27:35 -05:00
formatting = { }
2021-05-14 17:53:31 +02:00
for page in xml_files :
try :
_extract_directives ( directive_groups , formatting , page )
2023-07-17 19:58:38 +02:00
except Exception as e :
raise ValueError ( " failed to process " + page ) from e
2012-08-09 18:08:14 +02:00
2013-01-26 11:27:35 -05:00
return _make_page ( template , directive_groups , formatting )
2012-08-09 18:08:14 +02:00
2021-05-14 17:53:31 +02:00
if __name__ == ' __main__ ' :
with open ( sys . argv [ 1 ] , ' wb ' ) as f :
2023-07-17 19:58:38 +02:00
_template_path = sys . argv [ 2 ]
_xml_files = sys . argv [ 3 : ]
_xml = make_page ( _template_path , _xml_files )
f . write ( xml_print ( _xml ) )