1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-01-29 21:46:59 +03:00

Port genChRanges.py to Python 3

This commit is contained in:
Nick Wellnhofer 2022-08-18 21:58:07 +02:00
parent 18d79460ab
commit b22b6deba9

View File

@ -16,7 +16,6 @@
#
import sys
import string
import time
#
@ -28,21 +27,21 @@ def makeRange(lst):
ret = []
pos = 0
while pos < len(lst):
try: # index generates exception if not present
s = lst[pos:].index(1) # look for start of next range
except:
break # if no more, finished
pos += s # pointer to start of possible range
try:
e = lst[pos:].index(0) # look for end of range
e += pos
except: # if no end, set to end of list
e = len(lst)
ret.append((pos, e-1)) # append range tuple to list
pos = e + 1 # ready to check for next range
try: # index generates exception if not present
s = lst[pos:].index(1) # look for start of next range
except:
break # if no more, finished
pos += s # pointer to start of possible range
try:
e = lst[pos:].index(0) # look for end of range
e += pos
except: # if no end, set to end of list
e = len(lst)
ret.append((pos, e-1)) # append range tuple to list
pos = e + 1 # ready to check for next range
return ret
sources = "chvalid.def" # input filename
sources = "chvalid.def" # input filename
# minTableSize gives the minimum number of ranges which must be present
# before a 256-byte lookup table is produced. If there are less than this
@ -57,13 +56,13 @@ state = 0
try:
defines = open("chvalid.def", "r")
except:
print "Missing chvalid.def, aborting ..."
print("Missing chvalid.def, aborting ...")
sys.exit(1)
#
# The lines in the .def file have three types:-
# name: Defines a new function block
# ur: Defines individual or ranges of unicode values
# ur: Defines individual or ranges of unicode values
# end: Indicates the end of the function block
#
# These lines are processed below.
@ -72,111 +71,111 @@ for line in defines.readlines():
# ignore blank lines, or lines beginning with '#'
if line[0] == '#':
continue
line = string.strip(line)
line = line.strip()
if line == '':
continue
# split line into space-separated fields, then split on type
try:
fields = string.split(line, ' ')
#
# name line:
# validate any previous function block already ended
# validate this function not already defined
# initialize an entry in the function dicitonary
# including a mask table with no values yet defined
#
if fields[0] == 'name':
name = fields[1]
if state != 0:
print "'name' %s found before previous name" \
"completed" % (fields[1])
continue
state = 1
if Functs.has_key(name):
print "name '%s' already present - may give" \
" wrong results" % (name)
else:
# dict entry with two list elements (chdata, rangedata)
Functs[name] = [ [], [] ]
for v in range(256):
Functs[name][0].append(0)
#
# end line:
# validate there was a preceding function name line
# set state to show no current function active
#
elif fields[0] == 'end':
if state == 0:
print "'end' found outside of function block"
continue
state = 0
fields = line.split(' ')
#
# name line:
# validate any previous function block already ended
# validate this function not already defined
# initialize an entry in the function dicitonary
# including a mask table with no values yet defined
#
if fields[0] == 'name':
name = fields[1]
if state != 0:
print("'name' %s found before previous name" \
"completed" % (fields[1]))
continue
state = 1
if name in Functs:
print("name '%s' already present - may give" \
" wrong results" % (name))
else:
# dict entry with two list elements (chdata, rangedata)
Functs[name] = [ [], [] ]
for v in range(256):
Functs[name][0].append(0)
#
# end line:
# validate there was a preceding function name line
# set state to show no current function active
#
elif fields[0] == 'end':
if state == 0:
print("'end' found outside of function block")
continue
state = 0
#
# ur line:
# validate function has been defined
# process remaining fields on the line, which may be either
# individual unicode values or ranges of values
#
elif fields[0] == 'ur':
if state != 1:
raise ValidationError, "'ur' found outside of 'name' block"
for el in fields[1:]:
pos = string.find(el, '..')
# pos <=0 means not a range, so must be individual value
if pos <= 0:
# cheap handling of hex or decimal values
if el[0:2] == '0x':
value = int(el[2:],16)
elif el[0] == "'":
value = ord(el[1])
else:
value = int(el)
if ((value < 0) | (value > 0x1fffff)):
raise ValidationError, 'Illegal value (%s) in ch for'\
' name %s' % (el,name)
# for ur we have only ranges (makes things simpler),
# so convert val to range
currange = (value, value)
# pos > 0 means this is a range, so isolate/validate
# the interval
else:
# split the range into it's first-val, last-val
(first, last) = string.split(el, "..")
# convert values from text into binary
if first[0:2] == '0x':
start = int(first[2:],16)
elif first[0] == "'":
start = ord(first[1])
else:
start = int(first)
if last[0:2] == '0x':
end = int(last[2:],16)
elif last[0] == "'":
end = ord(last[1])
else:
end = int(last)
if (start < 0) | (end > 0x1fffff) | (start > end):
raise ValidationError, "Invalid range '%s'" % el
currange = (start, end)
# common path - 'currange' has the range, now take care of it
# We split on single-byte values vs. multibyte
if currange[1] < 0x100: # single-byte
for ch in range(currange[0],currange[1]+1):
# validate that value not previously defined
if Functs[name][0][ch]:
msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
raise ValidationError, msg
Functs[name][0][ch] = 1
else: # multi-byte
if currange in Functs[name][1]:
raise ValidationError, "range already defined in" \
" function"
else:
Functs[name][1].append(currange)
#
# ur line:
# validate function has been defined
# process remaining fields on the line, which may be either
# individual unicode values or ranges of values
#
elif fields[0] == 'ur':
if state != 1:
raise Exception("'ur' found outside of 'name' block")
for el in fields[1:]:
pos = el.find('..')
# pos <=0 means not a range, so must be individual value
if pos <= 0:
# cheap handling of hex or decimal values
if el[0:2] == '0x':
value = int(el[2:],16)
elif el[0] == "'":
value = ord(el[1])
else:
value = int(el)
if ((value < 0) | (value > 0x1fffff)):
raise Exception('Illegal value (%s) in ch for'\
' name %s' % (el,name))
# for ur we have only ranges (makes things simpler),
# so convert val to range
currange = (value, value)
# pos > 0 means this is a range, so isolate/validate
# the interval
else:
# split the range into it's first-val, last-val
(first, last) = el.split("..")
# convert values from text into binary
if first[0:2] == '0x':
start = int(first[2:],16)
elif first[0] == "'":
start = ord(first[1])
else:
start = int(first)
if last[0:2] == '0x':
end = int(last[2:],16)
elif last[0] == "'":
end = ord(last[1])
else:
end = int(last)
if (start < 0) | (end > 0x1fffff) | (start > end):
raise Exception("Invalid range '%s'" % el)
currange = (start, end)
# common path - 'currange' has the range, now take care of it
# We split on single-byte values vs. multibyte
if currange[1] < 0x100: # single-byte
for ch in range(currange[0],currange[1]+1):
# validate that value not previously defined
if Functs[name][0][ch]:
msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
raise Exception(msg)
Functs[name][0][ch] = 1
else: # multi-byte
if currange in Functs[name][1]:
raise Exception("range already defined in" \
" function")
else:
Functs[name][1].append(currange)
except:
print "Failed to process line: %s" % (line)
raise
print("Failed to process line: %s" % (line))
raise
#
# At this point, the entire definition file has been processed. Now we
# enter the output phase, where we generate the two files chvalid.c and'
@ -194,13 +193,13 @@ for line in defines.readlines():
try:
header = open("include/libxml/chvalid.h", "w")
except:
print "Failed to open include/libxml/chvalid.h"
print("Failed to open include/libxml/chvalid.h")
sys.exit(1)
try:
output = open("chvalid.c", "w")
except:
print "Failed to open chvalid.c"
print("Failed to open chvalid.c")
sys.exit(1)
date = time.asctime(time.localtime(time.time()))
@ -236,37 +235,37 @@ extern "C" {
typedef struct _xmlChSRange xmlChSRange;
typedef xmlChSRange *xmlChSRangePtr;
struct _xmlChSRange {
unsigned short low;
unsigned short high;
unsigned short\tlow;
unsigned short\thigh;
};
typedef struct _xmlChLRange xmlChLRange;
typedef xmlChLRange *xmlChLRangePtr;
struct _xmlChLRange {
unsigned int low;
unsigned int high;
unsigned int\tlow;
unsigned int\thigh;
};
typedef struct _xmlChRangeGroup xmlChRangeGroup;
typedef xmlChRangeGroup *xmlChRangeGroupPtr;
struct _xmlChRangeGroup {
int nbShortRange;
int nbLongRange;
const xmlChSRange *shortRange; /* points to an array of ranges */
const xmlChLRange *longRange;
int\t\t\tnbShortRange;
int\t\t\tnbLongRange;
const xmlChSRange\t*shortRange;\t/* points to an array of ranges */
const xmlChLRange\t*longRange;
};
/**
* Range checking routine
*/
XMLPUBFUN int XMLCALL
xmlCharInRange(unsigned int val, const xmlChRangeGroup *group);
\t\txmlCharInRange(unsigned int val, const xmlChRangeGroup *group);
""" % (date, sources));
output.write(
"""/*
* chvalid.c: this module implements the character range
* validation APIs
* chvalid.c:\tthis module implements the character range
*\t\tvalidation APIs
*
* This file is automatically generated from the cvs source
* definition files using the genChRanges.py Python script
@ -299,8 +298,7 @@ output.write(
# compares, otherwise we output a 256-byte table and a macro to use it.
#
fkeys = Functs.keys() # Dictionary of all defined functions
fkeys.sort() # Put some order to our output
fkeys = sorted(Functs.keys())
for f in fkeys:
@ -308,12 +306,12 @@ for f in fkeys:
# If the total number of such ranges is less than minTableSize, we generate
# an inline macro for direct comparisons; if greater, we generate a lookup
# table.
if max(Functs[f][0]) > 0: # only check if at least one entry
if max(Functs[f][0]) > 0: # only check if at least one entry
rangeTable = makeRange(Functs[f][0])
numRanges = len(rangeTable)
if numRanges >= minTableSize: # table is worthwhile
header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
header.write("""
numRanges = len(rangeTable)
if numRanges >= minTableSize: # table is worthwhile
header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
header.write("""
/**
* %s_ch:
* @c: char to validate
@ -321,29 +319,29 @@ for f in fkeys:
* Automatically generated by genChRanges.py
*/
""" % f)
header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
# write the constant data to the code file
output.write("const unsigned char %s_tab[256] = {\n" % f)
pline = " "
for n in range(255):
pline += " 0x%02x," % Functs[f][0][n]
if len(pline) > 72:
output.write(pline + "\n")
pline = " "
output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])
# write the constant data to the code file
output.write("const unsigned char %s_tab[256] = {\n" % f)
pline = " "
for n in range(255):
pline += " 0x%02x," % Functs[f][0][n]
if len(pline) > 72:
output.write(pline + "\n")
pline = " "
output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])
else: # inline check is used
# first another little optimisation - if space is present,
# put it at the front of the list so it is checked first
try:
ix = rangeTable.remove((0x20, 0x20))
rangeTable.insert(0, (0x20, 0x20))
except:
pass
firstFlag = 1
header.write("""
else: # inline check is used
# first another little optimisation - if space is present,
# put it at the front of the list so it is checked first
try:
ix = rangeTable.remove((0x20, 0x20))
rangeTable.insert(0, (0x20, 0x20))
except:
pass
firstFlag = 1
header.write("""
/**
* %s_ch:
* @c: char to validate
@ -351,32 +349,32 @@ for f in fkeys:
* Automatically generated by genChRanges.py
*/
""" % f)
# okay, I'm tired of the messy lineup - let's automate it!
pline = "#define %s_ch(c)" % f
# 'ntab' is number of tabs needed to position to col. 33 from name end
ntab = 4 - (len(pline)) / 8
if ntab < 0:
ntab = 0
just = ""
for i in range(ntab):
just += "\t"
pline = pline + just + "("
for rg in rangeTable:
if not firstFlag:
pline += " || \\\n\t\t\t\t "
else:
firstFlag = 0
if rg[0] == rg[1]: # single value - check equal
pline += "((c) == 0x%x)" % rg[0]
else: # value range
# since we are doing char, also change range ending in 0xff
if rg[1] != 0xff:
pline += "((0x%x <= (c)) &&" % rg[0]
pline += " ((c) <= 0x%x))" % rg[1]
else:
pline += " (0x%x <= (c))" % rg[0]
pline += ")\n"
header.write(pline)
# okay, I'm tired of the messy lineup - let's automate it!
pline = "#define %s_ch(c)" % f
# 'ntab' is number of tabs needed to position to col. 33 from name end
ntab = 4 - (len(pline)) // 8
if ntab < 0:
ntab = 0
just = ""
for i in range(ntab):
just += "\t"
pline = pline + just + "("
for rg in rangeTable:
if not firstFlag:
pline += " || \\\n\t\t\t\t "
else:
firstFlag = 0
if rg[0] == rg[1]: # single value - check equal
pline += "((c) == 0x%x)" % rg[0]
else: # value range
# since we are doing char, also change range ending in 0xff
if rg[1] != 0xff:
pline += "((0x%x <= (c)) &&" % rg[0]
pline += " ((c) <= 0x%x))" % rg[1]
else:
pline += " (0x%x <= (c))" % rg[0]
pline += ")\n"
header.write(pline)
header.write("""
/**
@ -387,44 +385,44 @@ for f in fkeys:
*/
""" % f)
pline = "#define %sQ(c)" % f
ntab = 4 - (len(pline)) / 8
ntab = 4 - (len(pline)) // 8
if ntab < 0:
ntab = 0
ntab = 0
just = ""
for i in range(ntab):
just += "\t"
just += "\t"
header.write(pline + just + "(((c) < 0x100) ? \\\n\t\t\t\t ")
if max(Functs[f][0]) > 0:
header.write("%s_ch((c)) :" % f)
header.write("%s_ch((c)) :" % f)
else:
header.write("0 :")
header.write("0 :")
# if no ranges defined, value invalid if >= 0x100
numRanges = len(Functs[f][1])
if numRanges == 0:
header.write(" 0)\n\n")
header.write(" 0)\n\n")
else:
if numRanges >= minTableSize:
header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f)
else: # if < minTableSize, generate inline code
firstFlag = 1
for rg in Functs[f][1]:
if not firstFlag:
pline += " || \\\n\t\t\t\t "
else:
firstFlag = 0
pline = "\\\n\t\t\t\t("
if rg[0] == rg[1]: # single value - check equal
pline += "((c) == 0x%x)" % rg[0]
else: # value range
pline += "((0x%x <= (c)) &&" % rg[0]
pline += " ((c) <= 0x%x))" % rg[1]
pline += "))\n\n"
header.write(pline)
if numRanges >= minTableSize:
header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f)
else: # if < minTableSize, generate inline code
firstFlag = 1
for rg in Functs[f][1]:
if not firstFlag:
pline += " || \\\n\t\t\t\t "
else:
firstFlag = 0
pline = "\\\n\t\t\t\t("
if rg[0] == rg[1]: # single value - check equal
pline += "((c) == 0x%x)" % rg[0]
else: # value range
pline += "((0x%x <= (c)) &&" % rg[0]
pline += " ((c) <= 0x%x))" % rg[1]
pline += "))\n\n"
header.write(pline)
if len(Functs[f][1]) > 0:
header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
#
@ -432,49 +430,49 @@ for f in fkeys:
#
for f in fkeys:
if len(Functs[f][1]) > 0: # only generate if unicode ranges present
rangeTable = Functs[f][1]
rangeTable.sort() # ascending tuple sequence
numShort = 0
numLong = 0
for rg in rangeTable:
if rg[1] < 0x10000: # if short value
if numShort == 0: # first occurrence
pline = "static const xmlChSRange %s_srng[] = {" % f
else:
pline += ","
numShort += 1
if len(pline) > 60:
output.write(pline + "\n")
pline = " "
if len(Functs[f][1]) > 0: # only generate if unicode ranges present
rangeTable = Functs[f][1]
rangeTable.sort() # ascending tuple sequence
numShort = 0
numLong = 0
for rg in rangeTable:
if rg[1] < 0x10000: # if short value
if numShort == 0: # first occurrence
pline = "static const xmlChSRange %s_srng[] = {" % f
else:
pline += ","
numShort += 1
if len(pline) > 60:
output.write(pline + "\n")
pline = " "
else:
pline += " "
pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
else: # if long value
if numLong == 0: # first occurrence
if numShort > 0: # if there were shorts, finish them off
output.write(pline + "};\n")
pline = "static const xmlChLRange %s_lrng[] = { " % f
else:
pline += ", "
numLong += 1
if len(pline) > 60:
output.write(pline + "\n")
pline = " "
pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
output.write(pline + "};\n") # finish off last group
pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
else: # if long value
if numLong == 0: # first occurrence
if numShort > 0: # if there were shorts, finish them off
output.write(pline + "};\n")
pline = "static const xmlChLRange %s_lrng[] = { " % f
else:
pline += ", "
numLong += 1
if len(pline) > 60:
output.write(pline + "\n")
pline = " "
pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
output.write(pline + "};\n") # finish off last group
pline = "const xmlChRangeGroup %sGroup =\n\t{%d, %d, " % (f, numShort, numLong)
if numShort > 0:
pline += "%s_srng" % f
else:
pline += "(xmlChSRangePtr)0"
if numLong > 0:
pline += ", %s_lrng" % f
else:
pline += ", (xmlChLRangePtr)0"
output.write(pline + "};\n\n")
pline = "const xmlChRangeGroup %sGroup =\n\t{%d, %d, " % (f, numShort, numLong)
if numShort > 0:
pline += "%s_srng" % f
else:
pline += "(xmlChSRangePtr)0"
if numLong > 0:
pline += ", %s_lrng" % f
else:
pline += ", (xmlChLRangePtr)0"
output.write(pline + "};\n\n")
output.write(
"""
@ -495,43 +493,43 @@ xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
const xmlChLRange *lptr;
if (rptr == NULL) return(0);
if (val < 0x10000) { /* is val in 'short' or 'long' array? */
if (rptr->nbShortRange == 0)
return 0;
low = 0;
high = rptr->nbShortRange - 1;
sptr = rptr->shortRange;
while (low <= high) {
mid = (low + high) / 2;
if ((unsigned short) val < sptr[mid].low) {
high = mid - 1;
} else {
if ((unsigned short) val > sptr[mid].high) {
low = mid + 1;
} else {
return 1;
}
}
}
if (val < 0x10000) {\t/* is val in 'short' or 'long' array? */
\tif (rptr->nbShortRange == 0)
\t return 0;
\tlow = 0;
\thigh = rptr->nbShortRange - 1;
\tsptr = rptr->shortRange;
\twhile (low <= high) {
\t mid = (low + high) / 2;
\t if ((unsigned short) val < sptr[mid].low) {
\t\thigh = mid - 1;
\t } else {
\t\tif ((unsigned short) val > sptr[mid].high) {
\t\t low = mid + 1;
\t\t} else {
\t\t return 1;
\t\t}
\t }
\t}
} else {
if (rptr->nbLongRange == 0) {
return 0;
}
low = 0;
high = rptr->nbLongRange - 1;
lptr = rptr->longRange;
while (low <= high) {
mid = (low + high) / 2;
if (val < lptr[mid].low) {
high = mid - 1;
} else {
if (val > lptr[mid].high) {
low = mid + 1;
} else {
return 1;
}
}
}
\tif (rptr->nbLongRange == 0) {
\t return 0;
\t}
\tlow = 0;
\thigh = rptr->nbLongRange - 1;
\tlptr = rptr->longRange;
\twhile (low <= high) {
\t mid = (low + high) / 2;
\t if (val < lptr[mid].low) {
\t\thigh = mid - 1;
\t } else {
\t\tif (val > lptr[mid].high) {
\t\t low = mid + 1;
\t\t} else {
\t\t return 1;
\t\t}
\t }
\t}
}
return 0;
}