#!/usr/bin/python -u # # tries to parse the output of gtk-doc declaration files and make # an XML reusable description from them # # TODO: try to extracts comments from the DocBook output of import sys import string macros = {} variables = {} structs = {} typedefs = {} enums = {} functions = {} user_functions = {} ret_types = {} types = {} sections = [] files = {} identifiers_file = {} identifiers_type = {} ################################################################## # # Parsing: libxml-decl.txt # ################################################################## def mormalizeTypeSpaces(raw, function): global types tokens = string.split(raw) type = '' for token in tokens: if type != '': type = type + ' ' + token else: type = token if types.has_key(type): types[type].append(function) else: types[type] = [function] return type def removeComments(raw): while string.find(raw, '/*') > 0: e = string.find(raw, '/*') tmp = raw[0:e] raw = raw[e:] e = string.find(raw, '*/') if e > 0: raw = tmp + raw[e + 2:] else: raw = tmp return raw def extractArgs(raw, function): raw = removeComments(raw) raw = string.replace(raw, '\n', ' ') raw = string.replace(raw, '\r', ' ') list = string.split(raw, ",") ret = [] for arg in list: i = len(arg) if i == 0: continue i = i - 1 c = arg[i] while string.find(string.letters, c) >= 0 or \ string.find(string.digits, c) >= 0 or c == '_': i = i - 1 if i < 0: break c = arg[i] name = arg[i+1:] while string.find(string.whitespace, c) >= 0: i = i - 1 if i < 0: break c = arg[i] type = mormalizeTypeSpaces(arg[0:i+1], function) if name == 'void' and type == '': pass else: ret.append([type, name, '']) return ret def extractTypes(raw, function): global ret_types tokens = string.split(raw) type = '' for token in tokens: if type != '': type = type + ' ' + token else: type = token if ret_types.has_key(type): ret_types[type].append(function) else: ret_types[type] = [function] return type def parseMacro(): global input global macros global variables var = 1 line = input.readline()[:-1] while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] elif string.find(line, "#define") >= 0: var = 0 line = input.readline()[:-1] if var == 1: variables[name] = ['', ''] # type, info identifiers_type[name] = "variable" else: macros[name] = [[], ''] # args, info identifiers_type[name] = "macro" def parseStruct(): global input global structs line = input.readline()[:-1] while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] line = input.readline()[:-1] structs[name] = '' identifiers_type[name] = "struct" def parseTypedef(): global input global typedefs line = input.readline()[:-1] while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] line = input.readline()[:-1] typedefs[name] = '' identifiers_type[name] = "typedef" def parseEnum(): global input global enums line = input.readline()[:-1] consts = [] while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] elif string.find(line, 'enum') >= 0: pass elif string.find(line, '{') >= 0: pass elif string.find(line, '}') >= 0: pass elif string.find(line, ';') >= 0: pass else: comment = string.find(line, '/*') if comment >= 0: line = line[0:comment] decls = string.split(line, ",") for decl in decls: val = string.split(decl, "=")[0] tokens = string.split(val) if len(tokens) >= 1: token = tokens[0] if string.find(string.letters, token[0]) >= 0: consts.append(token) identifiers_type[token] = "const" line = input.readline()[:-1] enums[name] = [consts, ''] identifiers_type[name] = "enum" def parseStaticFunction(): global input global user_functions line = input.readline()[:-1] type = None signature = "" while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] elif line[0:9] == "" and line[-10:] == "": type = extractTypes(line[9:-10], name) else: signature = signature + line line = input.readline()[:-1] args = extractArgs(signature, name) user_functions[name] = [[type, ''] , args, ''] identifiers_type[name] = "functype" def parseFunction(): global input global functions line = input.readline()[:-1] type = None signature = "" while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] elif line[0:9] == "" and line[-10:] == "": type = extractTypes(line[9:-10], name) else: signature = signature + line line = input.readline()[:-1] args = extractArgs(signature, name) functions[name] = [[type, ''] , args, ''] identifiers_type[name] = "function" print "Parsing: libxml-decl.txt" input = open('libxml-decl.txt') while 1: line = input.readline() if not line: break line = line[:-1] if line == "": parseMacro() elif line == "": parseEnum() elif line == "": parseFunction() elif line == "": parseStruct() elif line == "": parseTypedef() elif line == "": parseStaticFunction() elif len(line) >= 1 and line[0] == "<": print "unhandled %s" % (line) print "Parsed: %d macros. %d structs, %d typedefs, %d enums" % ( len(macros.keys()), len(structs.keys()), len(typedefs.keys()), len(enums)) c = 0 for enum in enums.keys(): consts = enums[enum][0] c = c + len(consts) print " %d variables, %d constants, %d functions and %d functypes" % ( len(variables.keys()), c, len(functions.keys()), len(user_functions.keys())) print "The functions manipulates %d different types" % (len(types.keys())) print "The functions returns %d different types" % (len(ret_types.keys())) ################################################################## # # Parsing: libxml-decl-list.txt # ################################################################## def parseSection(): global input global sections global files global identifiers_file tokens = [] line = input.readline()[:-1] while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] elif len(line) > 0: tokens.append(line) line = input.readline()[:-1] sections.append(name) files[name] = tokens for token in tokens: identifiers_file[token] = name # # Small transitivity for enum values # if enums.has_key(token): for const in enums[token][0]: identifiers_file[const] = name print "Parsing: libxml-decl-list.txt" input = open('libxml-decl-list.txt') while 1: line = input.readline() if not line: break line = line[:-1] if line == "
": parseSection() elif len(line) >= 1 and line[0] == "<": print "unhandled %s" % (line) print "Parsed: %d files %d identifiers" % (len(files), len(identifiers_file.keys())) ################################################################## # # Parsing: xml/*.xml # To enrich the existing info with extracted comments # ################################################################## nbcomments = 0 def insertParameterComment(id, name, value, is_param): global nbcomments if functions.has_key(id): if is_param == 1: args = functions[id][1] found = 0 for arg in args: if arg[1] == name: arg[2] = value found = 1 break if found == 0 and name != '...': print "Arg %s not found on function %s description" % (name, id) return else: ret = functions[id][0] ret[1] = value elif user_functions.has_key(id): if is_param == 1: args = user_functions[id][1] found = 0 for arg in args: if arg[1] == name: arg[2] = value found = 1 break if found == 0 and name != '...': print "Arg %s not found on functype %s description" % (name, id) print args return else: ret = user_functions[id][0] ret[1] = value elif macros.has_key(id): if is_param == 1: args = macros[id][0] found = 0 for arg in args: if arg[0] == name: arg[1] = value found = 1 break if found == 0: args.append([name, value]) else: print "Return info for macro %s: %s" % (id, value) # ret = macros[id][0] # ret[1] = value else: print "lost specific comment %s: %s: %s" % (id, name, value) return nbcomments = nbcomments + 1 def insertComment(name, title, value): global nbcomments if functions.has_key(name): functions[name][2] = value return "function" elif typedefs.has_key(name): typedefs[name] = value return "typedef" elif macros.has_key(name): macros[name][1] = value return "macro" elif variables.has_key(name): variables[name][1] = value return "variable" elif structs.has_key(name): structs[name] = value return "struct" elif enums.has_key(name): enums[name][1] = value return "enum" elif user_functions.has_key(name): user_functions[name][2] = value return "user_function" else: print "lost comment %s: %s" % (name, value) return "unknown" nbcomments = nbcomments + 1 import os import xmllib try: import sgmlop except ImportError: sgmlop = None # accelerator not available debug = 0 if sgmlop: class FastParser: """sgmlop based XML parser. this is typically 15x faster than SlowParser...""" def __init__(self, target): # setup callbacks self.finish_starttag = target.start self.finish_endtag = target.end self.handle_data = target.data # activate parser self.parser = sgmlop.XMLParser() self.parser.register(self) self.feed = self.parser.feed self.entity = { "amp": "&", "gt": ">", "lt": "<", "apos": "'", "quot": '"' } def close(self): try: self.parser.close() finally: self.parser = self.feed = None # nuke circular reference def handle_entityref(self, entity): # entity try: self.handle_data(self.entity[entity]) except KeyError: self.handle_data("&%s;" % entity) else: FastParser = None class SlowParser(xmllib.XMLParser): """slow but safe standard parser, based on the XML parser in Python's standard library.""" def __init__(self, target): self.unknown_starttag = target.start self.handle_data = target.data self.unknown_endtag = target.end xmllib.XMLParser.__init__(self) def getparser(target = None): # get the fastest available parser, and attach it to an # unmarshalling object. return both objects. if target == None: target = docParser() if FastParser: return FastParser(target), target return SlowParser(target), target class docParser: def __init__(self): self._methodname = None self._data = [] self.id = None self.title = None self.descr = None self.string = None def close(self): if debug: print "close" def getmethodname(self): return self._methodname def data(self, text): if debug: print "data %s" % text self._data.append(text) def start(self, tag, attrs): if debug: print "start %s, %s" % (tag, attrs) if tag == 'refsect2': self.id = None self.title = None self.descr = None self.string = None self.type = None self.in_parameter = 0 self.is_parameter = 0 self.parameter = None self.parameter_info = None self.entry = 0 elif tag == 'para': self._data = [] elif tag == 'title': self._data = [] elif tag == 'tgroup': self.in_parameter = 1 elif tag == 'row': self._data = [] self.entry = 0 elif tag == 'entry': self.entry = self.entry + 1 elif tag == 'parameter' and self.in_parameter == 1: self._data = [] elif tag == 'anchor' and self.id == None: if attrs.has_key('id'): self.id = attrs['id'] self.id = string.replace(self.id, '-CAPS', '') self.id = string.replace(self.id, '-', '_') def end(self, tag): if debug: print "end %s" % tag if tag == 'refsect2': self.type = insertComment(self.id, self.title, self.string) self.string = None elif tag == 'row': if self.parameter_info != None and self.parameter_info != '': insertParameterComment(self.id, self.parameter, self.parameter_info, self.is_parameter) self.parameter_info = None self.parameter = 0 self.is_parameter = 0 elif tag == 'parameter' and self.in_parameter == 1 and self.entry == 1: str = '' for c in self._data: str = str + c str = string.replace(str, '\n', ' ') str = string.replace(str, '\r', ' ') str = string.replace(str, ' ', ' ') str = string.replace(str, ' ', ' ') str = string.replace(str, ' ', ' ') while len(str) >= 1 and str[0] == ' ': str=str[1:] self.parameter = str self.is_parameter = 1 self._data = [] elif tag == 'para' or tag == 'entry': str = '' for c in self._data: str = str + c str = string.replace(str, '\n', ' ') str = string.replace(str, '\r', ' ') str = string.replace(str, ' ', ' ') str = string.replace(str, ' ', ' ') str = string.replace(str, ' ', ' ') while len(str) >= 1 and str[0] == ' ': str=str[1:] if self.string == None: self.string = str elif self.in_parameter == 1: self.parameter_info = str self._data = [] elif tag == 'title': str = '' for c in self._data: str = str + c str = string.replace(str, '\n', ' ') str = string.replace(str, '\r', ' ') str = string.replace(str, ' ', ' ') str = string.replace(str, ' ', ' ') str = string.replace(str, ' ', ' ') while len(str) >= 1 and str[0] == ' ': str=str[1:] self.title = str xmlfiles = 0 filenames = os.listdir("xml") for filename in filenames: try: f = open("xml/" + filename, 'r') except IOError, msg: print file, ":", msg continue data = f.read() (parser, target) = getparser() parser.feed(data) parser.close() xmlfiles = xmlfiles + 1 print "Parsed: %d XML files collexting %d comments" % (xmlfiles, nbcomments) ################################################################## # # Saving: libxml2-api.xml # ################################################################## def escape(raw): raw = string.replace(raw, '&', '&') raw = string.replace(raw, '<', '<') raw = string.replace(raw, '>', '>') raw = string.replace(raw, "'", ''') raw = string.replace(raw, '"', '"') return raw print "Saving XML description libxml2-api.xml" output = open("libxml2-api.xml", "w") output.write("\n") output.write(" \n") for file in files.keys(): output.write(" \n" % file) for symbol in files[file]: output.write(" \n" % (symbol)) output.write(" \n") output.write(" \n") output.write(" \n") symbols=macros.keys() for i in structs.keys(): symbols.append(i) for i in variables.keys(): variables.append(i) for i in typedefs.keys(): symbols.append(i) for i in enums.keys(): symbols.append(i) for j in enums[i][0]: symbols.append(j) for i in functions.keys(): symbols.append(i) for i in user_functions.keys(): symbols.append(i) symbols.sort() prev = None for i in symbols: if i == prev: # print "Symbol %s redefined" % (i) continue else: prev = i if identifiers_type.has_key(i): type = identifiers_type[i] if identifiers_file.has_key(i): file = identifiers_file[i] else: file = None output.write(" <%s name='%s'" % (type, i)) if file != None: output.write(" file='%s'" % (file)) if type == "function": output.write(">\n"); (ret, args, doc) = functions[i] if doc != None and doc != '': output.write(" %s\n" % (escape(doc))) if ret[1] != None and ret[1] != '': output.write(" \n" % ( ret[0], escape(ret[1]))) else: if ret[0] != 'void' and\ ret[0][0:4] != 'void': # This one is actually a bug in GTK Doc print "Description for return on %s is missing" % (i) output.write(" \n" % (ret[0])) for arg in args: if arg[2] != None and arg[2] != '': output.write(" \n" % (arg[1], arg[0], escape(arg[2]))) else: if arg[0] != '...': print "Description for %s on %s is missing" % (arg[1], i) output.write(" \n" % ( arg[1], arg[0])) output.write(" \n" % (type)); elif type == 'functype': output.write(">\n"); (ret, args, doc) = user_functions[i] if doc != None and doc != '': output.write(" %s\n" % (escape(doc))) if ret[1] != None and ret[1] != '': output.write(" \n" % ( ret[0], escape(ret[1]))) else: if ret[0] != 'void' and\ ret[0][0:4] != 'void': # This one is actually a bug in GTK Doc print "Description for return on %s is missing" % (i) output.write(" \n" % (ret[0])) for arg in args: if arg[2] != None and arg[2] != '': output.write(" \n" % (arg[1], arg[0], escape(arg[2]))) else: if arg[0] != '...': print "Description for %s on %s is missing" % (arg[1], i) output.write(" \n" % ( arg[1], arg[0])) output.write(" \n" % (type)); elif type == 'macro': output.write(">\n"); if macros[i][1] != None and macros[i][1] != '': output.write(" %s\n" % (escape(macros[i][1]))) else: print "Description for %s is missing" % (i) args = macros[i][0] for arg in args: if arg[1] != None and arg[1] != '': output.write(" \n" % (arg[0], escape(arg[1]))) else: print "Description for %s on %s is missing" % (arg[1], i) output.write(" \n" % (arg[0])) output.write(" \n" % (type)); elif type == 'struct': if structs[i] != None and structs[i] != '': output.write(" info='%s'/>\n" % (escape(structs[i]))) else: output.write("/>\n"); elif type == 'variable': if variables[i][1] != None and variables[i][1] != '': output.write(" info='%s'/>\n" % (escape(variables[i]))) else: output.write("/>\n"); elif type == 'typedef': if typedefs[i] != None and typedefs[i] != '': output.write(" info='%s'/>\n" % (escape(typedefs[i]))) else: output.write("/>\n"); else: output.write("/>\n"); else: print "Symbol %s not found in identifiers list" % (i) output.write(" \n") output.write("\n") print "generated XML for %d symbols" % (len(symbols))