2017-05-01 03:26:56 +03:00
#!/usr/bin/env python3
2020-11-09 07:23:58 +03:00
# SPDX-License-Identifier: LGPL-2.1-or-later
2012-11-13 21:39:18 +04:00
2012-08-09 20:08:14 +04:00
import sys
import collections
2013-01-26 19:47:16 +04:00
import re
2021-05-12 12:24:25 +03:00
import concurrent . futures
2017-07-03 03:26:32 +03:00
from xml_helper import xml_parse , xml_print , tree
2012-08-09 20:08:14 +04:00
2013-01-15 06:34:19 +04:00
COLOPHON = ''' \
This index contains { count } entries in { sections } sections ,
referring to { pages } individual manual pages .
'''
2021-05-12 12:24:25 +03:00
def _extract_directives ( page , names ) :
directive_groups = { name : collections . defaultdict ( set ) for name in names }
2013-03-29 22:22:27 +04:00
t = xml_parse ( page )
2012-08-09 20:08:14 +04:00
section = t . find ( ' ./refmeta/manvolnum ' ) . text
pagename = t . find ( ' ./refmeta/refentrytitle ' ) . text
2021-05-12 12:24:25 +03:00
formatting = { }
2013-02-14 06:51:31 +04:00
storopt = directive_groups [ ' options ' ]
2012-08-09 20:08:14 +04:00
for variablelist in t . iterfind ( ' .//variablelist ' ) :
2013-01-26 19:47:16 +04:00
klass = variablelist . attrib . get ( ' class ' )
2020-04-14 21:15:19 +03:00
searchpath = variablelist . attrib . get ( ' xpath ' , ' ./varlistentry/term/varname ' )
2013-01-26 19:47:16 +04:00
storvar = directive_groups [ klass or ' miscellaneous ' ]
# <option>s go in OPTIONS, unless class is specified
2020-04-14 21:15:19 +03:00
for xpath , stor in ( ( searchpath , storvar ) ,
2013-01-26 19:47:16 +04:00
( ' ./varlistentry/term/option ' ,
storvar if klass else storopt ) ) :
for name in variablelist . iterfind ( xpath ) :
text = re . sub ( r ' ([= ]).* ' , r ' \ 1 ' , name . text ) . rstrip ( )
2019-11-22 00:02:07 +03:00
if text . startswith ( ' - ' ) :
# for options, merge options with and without mandatory arg
text = text . partition ( ' = ' ) [ 0 ]
2021-05-12 12:24:25 +03:00
stor [ text ] . add ( ( pagename , section ) )
2013-01-26 20:27:35 +04:00
if text not in formatting :
# use element as formatted display
2019-11-21 22:33:26 +03:00
if name . text [ - 1 ] in " = ' " :
2013-02-28 06:31:02 +04:00
name . clear ( )
else :
name . tail = ' '
2013-01-26 20:27:35 +04:00
name . text = text
formatting [ text ] = name
2020-04-18 20:48:13 +03:00
extra = variablelist . attrib . get ( ' extra-ref ' )
if extra :
2021-05-12 12:24:25 +03:00
stor [ extra ] . add ( ( pagename , section ) )
2020-04-18 20:48:13 +03:00
if extra not in formatting :
elt = tree . Element ( " varname " )
elt . text = extra
formatting [ extra ] = elt
2012-08-09 20:08:14 +04:00
2013-02-14 06:51:31 +04:00
storfile = directive_groups [ ' filenames ' ]
2013-05-03 08:55:20 +04:00
for xpath , absolute_only in ( ( ' .//refsynopsisdiv//filename ' , False ) ,
( ' .//refsynopsisdiv//command ' , False ) ,
( ' .//filename ' , True ) ) :
2013-02-14 06:51:31 +04:00
for name in t . iterfind ( xpath ) :
2013-05-03 08:55:20 +04:00
if absolute_only and not ( name . text and name . text . startswith ( ' / ' ) ) :
continue
2019-11-21 22:22:12 +03:00
if name . attrib . get ( ' index ' ) == ' false ' :
2013-05-03 08:55:20 +04:00
continue
2013-02-14 06:51:31 +04:00
name . tail = ' '
if name . text :
2013-05-03 08:55:20 +04:00
if name . text . endswith ( ' * ' ) :
name . text = name . text [ : - 1 ]
2013-02-14 06:51:31 +04:00
if not name . text . startswith ( ' . ' ) :
text = name . text . partition ( ' ' ) [ 0 ]
if text != name . text :
name . clear ( )
name . text = text
2013-05-03 08:55:20 +04:00
if text . endswith ( ' / ' ) :
text = text [ : - 1 ]
2021-05-12 12:24:25 +03:00
storfile [ text ] . add ( ( pagename , section ) )
2013-02-14 06:51:31 +04:00
if text not in formatting :
# use element as formatted display
formatting [ text ] = name
else :
text = ' ' . join ( name . itertext ( ) )
2021-05-12 12:24:25 +03:00
storfile [ text ] . add ( ( pagename , section ) )
2013-02-14 06:51:31 +04:00
formatting [ text ] = name
2013-06-27 03:48:19 +04:00
storfile = directive_groups [ ' constants ' ]
for name in t . iterfind ( ' .//constant ' ) :
2019-11-21 22:22:12 +03:00
if name . attrib . get ( ' index ' ) == ' false ' :
2013-06-27 03:48:19 +04:00
continue
name . tail = ' '
if name . text . startswith ( ' ( ' ) : # a cast, strip it
name . text = name . text . partition ( ' ' ) [ 2 ]
2021-05-12 12:24:25 +03:00
storfile [ name . text ] . add ( ( pagename , section ) )
2013-06-27 03:48:19 +04:00
formatting [ name . text ] = name
2020-05-07 15:59:24 +03:00
storfile = directive_groups [ ' specifiers ' ]
for name in t . iterfind ( " .//table[@class= ' specifiers ' ]//entry/literal " ) :
if name . text [ 0 ] != ' % ' or name . getparent ( ) . text is not None :
continue
if name . attrib . get ( ' index ' ) == ' false ' :
continue
2021-05-12 12:24:25 +03:00
storfile [ name . text ] . add ( ( pagename , section ) )
2020-05-07 15:59:24 +03:00
formatting [ name . text ] = name
2020-05-07 16:05:29 +03:00
for name in t . iterfind ( " .//literal[@class= ' specifiers ' ] " ) :
2021-05-12 12:24:25 +03:00
storfile [ name . text ] . add ( ( pagename , section ) )
2020-05-07 16:05:29 +03:00
formatting [ name . text ] = name
2020-05-07 15:59:24 +03:00
2021-05-12 12:24:25 +03:00
# Serialize to allow pickling
formatting = { name : xml_print ( value ) for name , value in formatting . items ( ) }
return directive_groups , formatting
def extract_directives ( arg ) :
page , names = arg
try :
return _extract_directives ( page , names )
except Exception :
raise ValueError ( " Failed to process {} " . format ( page ) )
2013-01-26 20:27:35 +04:00
def _make_section ( template , name , directives , formatting ) :
2013-01-15 06:02:49 +04:00
varlist = template . find ( " .//*[@id= ' {} ' ] " . format ( name ) )
2012-08-09 20:08:14 +04:00
for varname , manpages in sorted ( directives . items ( ) ) :
entry = tree . SubElement ( varlist , ' varlistentry ' )
2013-01-26 20:27:35 +04:00
term = tree . SubElement ( entry , ' term ' )
2021-05-12 12:24:25 +03:00
display = tree . fromstring ( formatting [ varname ] )
2013-05-30 06:31:20 +04:00
term . append ( display )
2013-01-26 20:27:35 +04:00
2012-08-09 20:08:14 +04:00
para = tree . SubElement ( tree . SubElement ( entry , ' listitem ' ) , ' para ' )
b = None
2013-01-26 19:47:16 +04:00
for manpage , manvolume in sorted ( set ( manpages ) ) :
2013-05-30 06:31:20 +04:00
if b is not None :
b . tail = ' , '
b = tree . SubElement ( para , ' citerefentry ' )
c = tree . SubElement ( b , ' refentrytitle ' )
c . text = manpage
2015-11-23 07:43:36 +03:00
c . attrib [ ' target ' ] = varname
2013-05-30 06:31:20 +04:00
d = tree . SubElement ( b , ' manvolnum ' )
d . text = manvolume
2012-08-09 20:08:14 +04:00
entry . tail = ' \n \n '
2013-01-15 06:34:19 +04:00
def _make_colophon ( template , groups ) :
count = 0
pages = set ( )
for group in groups :
count + = len ( group )
for pagelist in group . values ( ) :
pages | = set ( pagelist )
para = template . find ( " .//para[@id= ' colophon ' ] " )
para . text = COLOPHON . format ( count = count ,
sections = len ( groups ) ,
pages = len ( pages ) )
2013-01-26 20:27:35 +04:00
def _make_page ( template , directive_groups , formatting ) :
2012-08-09 20:08:14 +04:00
""" Create an XML tree from directive_groups.
directive_groups = {
' class ' : { ' variable ' : [ ( ' manpage ' , ' manvolume ' ) , . . . ] ,
' variable2 ' : . . . } ,
. . .
}
"""
for name , directives in directive_groups . items ( ) :
2013-05-30 06:31:20 +04:00
_make_section ( template , name , directives , formatting )
2012-08-09 20:08:14 +04:00
2013-01-15 06:34:19 +04:00
_make_colophon ( template , directive_groups . values ( ) )
2013-01-15 06:02:49 +04:00
return template
2012-08-09 20:08:14 +04:00
2020-05-07 15:31:13 +03:00
def make_page ( template_path , xml_files ) :
2012-08-09 20:08:14 +04:00
" Extract directives from xml_files and return XML index tree. "
2020-05-07 15:31:13 +03:00
template = xml_parse ( template_path )
2013-01-15 06:02:49 +04:00
names = [ vl . get ( ' id ' ) for vl in template . iterfind ( ' .//variablelist ' ) ]
2021-05-12 12:24:25 +03:00
with concurrent . futures . ProcessPoolExecutor ( ) as pool :
args = ( ( xml_file , names ) for xml_file in xml_files )
results = list ( pool . map ( extract_directives , args ) )
directive_groups = { name : collections . defaultdict ( set ) for name in names }
2013-01-26 20:27:35 +04:00
formatting = { }
2021-05-12 12:24:25 +03:00
for d_g , f in reversed ( results ) :
for group , mapping in d_g . items ( ) :
for name , value in mapping . items ( ) :
directive_groups [ group ] [ name ] . update ( value )
formatting . update ( f )
2012-08-09 20:08:14 +04:00
2013-01-26 20:27:35 +04:00
return _make_page ( template , directive_groups , formatting )
2012-08-09 20:08:14 +04:00
2021-05-12 12:24:25 +03:00
def main ( output , template_path , * xml_files ) :
with open ( output , ' wb ' ) as f :
2020-05-07 15:31:13 +03:00
xml = make_page ( template_path , xml_files )
f . write ( xml_print ( xml ) )
2021-05-12 12:24:25 +03:00
if __name__ == ' __main__ ' :
main ( * sys . argv [ 1 : ] )