mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-01-15 23:24:06 +03:00
257 lines
6.9 KiB
Python
257 lines
6.9 KiB
Python
|
#!/usr/bin/python -u
|
||
|
import sys
|
||
|
import string
|
||
|
import time
|
||
|
|
||
|
sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
|
||
|
|
||
|
try:
|
||
|
blocks = open("Blocks-4.txt", "r")
|
||
|
except:
|
||
|
print "Missing Blocks-4.txt, aborting ..."
|
||
|
sys.exit(1)
|
||
|
|
||
|
BlockNames = {}
|
||
|
for line in blocks.readlines():
|
||
|
if line[0] == '#':
|
||
|
continue
|
||
|
line = string.strip(line)
|
||
|
if line == '':
|
||
|
continue
|
||
|
try:
|
||
|
fields = string.split(line, ';')
|
||
|
range = string.strip(fields[0])
|
||
|
(start, end) = string.split(range, "..")
|
||
|
name = string.strip(fields[1])
|
||
|
name = string.replace(name, ' ', '')
|
||
|
except:
|
||
|
print "Failed to process line: %s" % (line)
|
||
|
continue
|
||
|
BlockNames[name] = ("0x"+start, "0x"+end)
|
||
|
blocks.close()
|
||
|
print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
|
||
|
|
||
|
try:
|
||
|
data = open("UnicodeData-3.1.0.txt", "r")
|
||
|
except:
|
||
|
print "Missing UnicodeData-3.1.0.txt, aborting ..."
|
||
|
sys.exit(1)
|
||
|
|
||
|
nbchar = 0;
|
||
|
Categories = {}
|
||
|
for line in data.readlines():
|
||
|
if line[0] == '#':
|
||
|
continue
|
||
|
line = string.strip(line)
|
||
|
if line == '':
|
||
|
continue
|
||
|
try:
|
||
|
fields = string.split(line, ';')
|
||
|
point = string.strip(fields[0])
|
||
|
value = 0
|
||
|
while point != '':
|
||
|
value = value * 16
|
||
|
if point[0] >= '0' and point[0] <= '9':
|
||
|
value = value + ord(point[0]) - ord('0')
|
||
|
elif point[0] >= 'A' and point[0] <= 'F':
|
||
|
value = value + 10 + ord(point[0]) - ord('A')
|
||
|
elif point[0] >= 'a' and point[0] <= 'f':
|
||
|
value = value + 10 + ord(point[0]) - ord('a')
|
||
|
point = point[1:]
|
||
|
name = fields[2]
|
||
|
except:
|
||
|
print "Failed to process line: %s" % (line)
|
||
|
continue
|
||
|
|
||
|
nbchar = nbchar + 1
|
||
|
try:
|
||
|
Categories[name].append(value)
|
||
|
except:
|
||
|
try:
|
||
|
Categories[name] = [value]
|
||
|
except:
|
||
|
print "Failed to process line: %s" % (line)
|
||
|
try:
|
||
|
Categories[name[0]].append(value)
|
||
|
except:
|
||
|
try:
|
||
|
Categories[name[0]] = [value]
|
||
|
except:
|
||
|
print "Failed to process line: %s" % (line)
|
||
|
|
||
|
blocks.close()
|
||
|
print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
|
||
|
#reduce the number list into ranges
|
||
|
for cat in Categories.keys():
|
||
|
list = Categories[cat]
|
||
|
start = -1
|
||
|
prev = -1
|
||
|
end = -1
|
||
|
ranges = []
|
||
|
for val in list:
|
||
|
if start == -1:
|
||
|
start = val
|
||
|
prev = val
|
||
|
continue
|
||
|
elif val == prev + 1:
|
||
|
prev = val
|
||
|
continue
|
||
|
elif prev == start:
|
||
|
ranges.append((prev, prev))
|
||
|
start = val
|
||
|
prev = val
|
||
|
continue
|
||
|
else:
|
||
|
ranges.append((start, prev))
|
||
|
start = val
|
||
|
prev = val
|
||
|
continue
|
||
|
if prev == start:
|
||
|
ranges.append((prev, prev))
|
||
|
else:
|
||
|
ranges.append((start, prev))
|
||
|
Categories[cat] = ranges
|
||
|
|
||
|
#
|
||
|
# Generate the resulting files
|
||
|
#
|
||
|
try:
|
||
|
header = open("xmlunicode.h", "w")
|
||
|
except:
|
||
|
print "Failed to open xmlunicode.h"
|
||
|
sys.exit(1)
|
||
|
|
||
|
try:
|
||
|
output = open("xmlunicode.c", "w")
|
||
|
except:
|
||
|
print "Failed to open xmlunicode.c"
|
||
|
sys.exit(1)
|
||
|
|
||
|
date = time.asctime(time.localtime(time.time()))
|
||
|
|
||
|
header.write(
|
||
|
"""/*
|
||
|
* xmlunicode.h: this header exports interfaces for the Unicode character APIs
|
||
|
*
|
||
|
* This file is automatically generated from the
|
||
|
* UCS description files of the Unicode Character Database
|
||
|
* http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
|
||
|
* using the genUnicode.py Python script.
|
||
|
*
|
||
|
* Generation date: %s
|
||
|
* Sources: %s
|
||
|
* Daniel Veillard <veillard@redhat.com>
|
||
|
*/
|
||
|
|
||
|
#ifndef __XML_UNICODE_H__
|
||
|
#define __XML_UNICODE_H__
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
extern "C" {
|
||
|
#endif
|
||
|
|
||
|
""" % (date, sources));
|
||
|
output.write(
|
||
|
"""/*
|
||
|
* xmlunicode.c: this module implements the Unicode character APIs
|
||
|
*
|
||
|
* This file is automatically generated from the
|
||
|
* UCS description files of the Unicode Character Database
|
||
|
* http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
|
||
|
* using the genUnicode.py Python script.
|
||
|
*
|
||
|
* Generation date: %s
|
||
|
* Sources: %s
|
||
|
* Daniel Veillard <veillard@redhat.com>
|
||
|
*/
|
||
|
|
||
|
#define IN_LIBXML
|
||
|
#include "libxml.h"
|
||
|
|
||
|
#ifdef LIBXML_UNICODE_ENABLED
|
||
|
|
||
|
#include <string.h>
|
||
|
#include <libxml/xmlversion.h>
|
||
|
#include <libxml/xmlunicode.h>
|
||
|
|
||
|
""" % (date, sources));
|
||
|
|
||
|
keys = BlockNames.keys()
|
||
|
keys.sort()
|
||
|
for block in keys:
|
||
|
(start, end) = BlockNames[block]
|
||
|
name = string.replace(block, '-', '')
|
||
|
header.write("int\txmlUCSIs%s\t(int code);\n" % name)
|
||
|
output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
|
||
|
output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
|
||
|
(block))
|
||
|
output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
|
||
|
output.write("int\nxmlUCSIs%s(int code) {\n" % name)
|
||
|
output.write(" return((code >= %s) && (code <= %s));\n" % (start, end))
|
||
|
output.write("}\n\n")
|
||
|
|
||
|
header.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
|
||
|
output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
|
||
|
output.write(" * @block: UCS block name\n")
|
||
|
output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
|
||
|
output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
|
||
|
output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
|
||
|
keys = BlockNames.keys()
|
||
|
keys.sort()
|
||
|
for block in keys:
|
||
|
name = string.replace(block, '-', '')
|
||
|
output.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" %
|
||
|
(block, name));
|
||
|
output.write(" return(-1);\n}\n\n")
|
||
|
|
||
|
|
||
|
keys = Categories.keys()
|
||
|
keys.sort()
|
||
|
for name in keys:
|
||
|
ranges = Categories[name]
|
||
|
header.write("int\txmlUCSIsCat%s\t(int code);\n" % name)
|
||
|
output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
|
||
|
output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
|
||
|
(name))
|
||
|
output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
|
||
|
output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
|
||
|
start = 1
|
||
|
for range in ranges:
|
||
|
(begin, end) = range;
|
||
|
if start:
|
||
|
output.write(" return(");
|
||
|
start = 0
|
||
|
else:
|
||
|
output.write(" ||\n ");
|
||
|
if (begin == end):
|
||
|
output.write("(code == %s)" % (hex(begin)))
|
||
|
else:
|
||
|
output.write("((code >= %s) && (code <= %s))" % (
|
||
|
hex(begin), hex(end)))
|
||
|
output.write(");\n}\n\n")
|
||
|
|
||
|
header.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
|
||
|
output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
|
||
|
output.write(" * @cat: UCS Category name\n")
|
||
|
output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
|
||
|
output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
|
||
|
output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
|
||
|
keys = Categories.keys()
|
||
|
keys.sort()
|
||
|
for name in keys:
|
||
|
output.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" %
|
||
|
(name, name));
|
||
|
output.write(" return(-1);\n}\n\n")
|
||
|
|
||
|
header.write("""
|
||
|
#ifdef __cplusplus
|
||
|
}
|
||
|
#endif
|
||
|
#endif /* __XML_UNICODE_H__ */
|
||
|
""");
|
||
|
output.write("""
|
||
|
#endif /* LIBXML_UNICODE_ENABLED */
|
||
|
""");
|
||
|
header.close()
|
||
|
output.close()
|