1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-12 16:58:16 +03:00

enhanced for range checking, updated to Unicode version 4.0.1 (API docs

* genUnicode.py, xmlunicode.c, include/libxml/xmlunicode.h,
  python/libxml2class.txt: enhanced for range checking,
  updated to Unicode version 4.0.1 (API docs also updated)
* python/generator.py: minor change to fix a warning
This commit is contained in:
William M. Brack 2003-11-09 12:45:26 +00:00
parent fe9fc792ed
commit ea939087b9
8 changed files with 2732 additions and 3224 deletions

View File

@ -1,3 +1,10 @@
Sun Nov 9 20:28:21 HKT 2003 William Brack <wbrack@mmm.com.hk>
* genUnicode.py, xmlunicode.c, include/libxml/xmlunicode.h,
python/libxml2class.txt: enhanced for range checking,
updated to Unicode version 4.0.1 (API docs also updated)
* python/generator.py: minor change to fix a warning
Wed Nov 5 23:46:36 CET 2003 Daniel Veillard <daniel@veillard.com>
* Makefile.am: apply fix from Karl Eichwalder for script path

File diff suppressed because it is too large Load Diff

View File

@ -1,40 +1,88 @@
#!/usr/bin/python -u
#
# Original script modified in November 2003 to take advantage of
# the character-validation range routines, and updated to the
# current Unicode information (Version 4.0.1)
#
# NOTE: there is an 'alias' facility for blocks which are not present in
# the current release, but are needed for ABI compatibility. This
# must be accomplished MANUALLY! Define the alias in the variable
# 'blockAliases', then MANUALLY provide a function to return the
# appropriate value.
#
import sys
import string
import time
sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
webpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1d5b.html"
sources = "Blocks-4.0.1d1b.txt UnicodeData-4.0.1d1b.txt"
blockAliases = "CombiningMarksforSymbols Greek PrivateUse"
# minTableSize gives the minimum number of ranges which must be present
# before a range table is produced. If there are less than this
# number, inline comparisons are generated
minTableSize = 8
(blockfile, catfile) = string.split(sources)
#
# First create a dictionary for the block names
#
BlockNames = {}
#
# Next put in aliases for blocks not currently present, but needed
# for ABI compatibility (THIS IS A HORRIBLE HACK!)
#
aliases = string.split(blockAliases, ' ')
for name in aliases:
BlockNames[name] = []
#
# Now process the "blocks" file, reducing it to a dictionary
# indexed by blockname, containing a tuple with the applicable
# block range
#
try:
blocks = open("Blocks-4.txt", "r")
blocks = open(blockfile, "r")
except:
print "Missing Blocks-4.txt, aborting ..."
print "Missing %s, aborting ..." % blockfile
sys.exit(1)
BlockNames = {}
for line in blocks.readlines():
if line[0] == '#':
continue
line = string.strip(line)
if line == '':
continue
continue
try:
fields = string.split(line, ';')
range = string.strip(fields[0])
(start, end) = string.split(range, "..")
name = string.strip(fields[1])
name = string.replace(name, ' ', '')
fields = string.split(line, ';')
range = string.strip(fields[0])
(start, end) = string.split(range, "..")
name = string.strip(fields[1])
name = string.replace(name, ' ', '')
except:
print "Failed to process line: %s" % (line)
continue
continue
BlockNames[name] = ("0x"+start, "0x"+end)
blocks.close()
print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
#
# Next process the Categories file. This is more complex, since
# the file is in code sequence, and we need to invert it. We use
# a dictionary with index category-name, with each entry containing
# all the ranges (codepoints) of that category. Note that category
# names comprise two parts - the general category, and the "subclass"
# within that category. Therefore, both "general category" (which is
# the first character of the 2-character category-name) and the full
# (2-character) name are entered into this dictionary.
#
try:
data = open("UnicodeData-3.1.0.txt", "r")
data = open(catfile, "r")
except:
print "Missing UnicodeData-3.1.0.txt, aborting ..."
print "Missing %s, aborting ..." % catfile
sys.exit(1)
nbchar = 0;
@ -44,44 +92,50 @@ for line in data.readlines():
continue
line = string.strip(line)
if line == '':
continue
continue
try:
fields = string.split(line, ';')
point = string.strip(fields[0])
value = 0
while point != '':
value = value * 16
if point[0] >= '0' and point[0] <= '9':
value = value + ord(point[0]) - ord('0')
elif point[0] >= 'A' and point[0] <= 'F':
value = value + 10 + ord(point[0]) - ord('A')
elif point[0] >= 'a' and point[0] <= 'f':
value = value + 10 + ord(point[0]) - ord('a')
point = point[1:]
name = fields[2]
fields = string.split(line, ';')
point = string.strip(fields[0])
value = 0
while point != '':
value = value * 16
if point[0] >= '0' and point[0] <= '9':
value = value + ord(point[0]) - ord('0')
elif point[0] >= 'A' and point[0] <= 'F':
value = value + 10 + ord(point[0]) - ord('A')
elif point[0] >= 'a' and point[0] <= 'f':
value = value + 10 + ord(point[0]) - ord('a')
point = point[1:]
name = fields[2]
except:
print "Failed to process line: %s" % (line)
continue
continue
nbchar = nbchar + 1
# update entry for "full name"
try:
Categories[name].append(value)
Categories[name].append(value)
except:
try:
Categories[name] = [value]
except:
print "Failed to process line: %s" % (line)
Categories[name] = [value]
except:
print "Failed to process line: %s" % (line)
# update "general category" name
try:
Categories[name[0]].append(value)
Categories[name[0]].append(value)
except:
try:
Categories[name[0]] = [value]
except:
print "Failed to process line: %s" % (line)
Categories[name[0]] = [value]
except:
print "Failed to process line: %s" % (line)
blocks.close()
print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
#reduce the number list into ranges
#
# The data is now all read. Time to process it into a more useful form.
#
# reduce the number list into ranges
for cat in Categories.keys():
list = Categories[cat]
start = -1
@ -90,35 +144,45 @@ for cat in Categories.keys():
ranges = []
for val in list:
if start == -1:
start = val
prev = val
continue
elif val == prev + 1:
prev = val
continue
elif prev == start:
ranges.append((prev, prev))
start = val
prev = val
continue
else:
ranges.append((start, prev))
start = val
prev = val
continue
start = val
prev = val
continue
elif val == prev + 1:
prev = val
continue
elif prev == start:
ranges.append((prev, prev))
start = val
prev = val
continue
else:
ranges.append((start, prev))
start = val
prev = val
continue
if prev == start:
ranges.append((prev, prev))
else:
ranges.append((start, prev))
Categories[cat] = ranges
#
# Assure all data is in alphabetic order, since we will be doing binary
# searches on the tables.
#
bkeys = BlockNames.keys()
bkeys.sort()
ckeys = Categories.keys()
ckeys.sort()
#
# Generate the resulting files
#
try:
header = open("xmlunicode.h", "w")
header = open("include/libxml/xmlunicode.h", "w")
except:
print "Failed to open xmlunicode.h"
print "Failed to open include/libxml/xmlunicode.h"
sys.exit(1)
try:
@ -135,7 +199,7 @@ header.write(
*
* This file is automatically generated from the
* UCS description files of the Unicode Character Database
* http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
* %s
* using the genUnicode.py Python script.
*
* Generation date: %s
@ -152,14 +216,15 @@ header.write(
extern "C" {
#endif
""" % (date, sources));
""" % (webpage, date, sources));
output.write(
"""/*
* xmlunicode.c: this module implements the Unicode character APIs
*
* This file is automatically generated from the
* UCS description files of the Unicode Character Database
* http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
* %s
* using the genUnicode.py Python script.
*
* Generation date: %s
@ -175,15 +240,124 @@ output.write(
#include <string.h>
#include <libxml/xmlversion.h>
#include <libxml/xmlunicode.h>
#include <libxml/chvalid.h>
""" % (date, sources));
typedef int (xmlIntFunc)(int); /* just to keep one's mind untwisted */
keys = BlockNames.keys()
keys.sort()
for block in keys:
(start, end) = BlockNames[block]
typedef struct {
const char *rangename;
xmlIntFunc *func;
} xmlUnicodeRange;
typedef struct {
xmlUnicodeRange *table;
int numentries;
} xmlUnicodeNameTable;
static xmlIntFunc *xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname);
static xmlUnicodeRange xmlUnicodeBlocks[] = {
""" % (webpage, date, sources));
flag = 0
for block in bkeys:
name = string.replace(block, '-', '')
if flag:
output.write(',\n')
else:
flag = 1
output.write(' {"%s", xmlUCSIs%s}' % (name, name))
output.write('};\n\n')
output.write('static xmlUnicodeRange xmlUnicodeCats[] = {\n')
flag = 0;
for name in ckeys:
if flag:
output.write(',\n')
else:
flag = 1
output.write(' {"%s", xmlUCSIsCat%s}' % (name, name))
output.write('};\n\n')
#
# For any categories with more than minTableSize ranges we generate
# a range table suitable for xmlCharInRange
#
for name in ckeys:
if len(Categories[name]) > minTableSize:
numshort = 0
numlong = 0
ranges = Categories[name]
sptr = "NULL"
lptr = "NULL"
for range in ranges:
(low, high) = range
if high < 0x10000:
if numshort == 0:
pline = "static xmlChSRange xml%sS[] = {" % name
sptr = "xml%sS" % name
else:
pline += ", "
numshort += 1
else:
if numlong == 0:
if numshort > 0:
output.write(pline + " };\n")
pline = "static xmlChLRange xml%sL[] = {" % name
lptr = "xml%sL" % name
else:
pline += ", "
numlong += 1
if len(pline) > 60:
output.write(pline + "\n")
pline = " "
pline += "{%s, %s}" % (hex(low), hex(high))
output.write(pline + " };\nstatic xmlChRangeGroup xml%sG = {%s,%s,%s,%s};\n\n"
% (name, numshort, numlong, sptr, lptr))
output.write(
"""xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s};
xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s};
/**
* xmlUnicodeLookup:
* @tptr: pointer to the name table
* @name: name to be found
*
* binary table lookup for user-supplied name
*
* Returns pointer to range function if found, otherwise NULL
*/
static xmlIntFunc
*xmlUnicodeLookup(xmlUnicodeNameTable *tptr, const char *tname) {
int low, high, mid, cmp;
xmlUnicodeRange *sptr;
low = 0;
high = tptr->numentries - 1;
sptr = tptr->table;
while (low <= high) {
mid = (low + high) / 2;
if ((cmp=strcmp(tname, sptr[mid].rangename)) == 0)
return (sptr[mid].func);
if (cmp < 0)
high = mid - 1;
else
low = mid + 1;
}
return (NULL);
}
""" % (len(BlockNames), len(Categories)) )
for block in bkeys:
name = string.replace(block, '-', '')
header.write("XMLPUBFUN int XMLCALL xmlUCSIs%s\t(int code);\n" % name)
if len(BlockNames[block]) == 0: # ignore aliases
continue
(start, end) = BlockNames[block]
output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
(block))
@ -192,24 +366,30 @@ for block in keys:
output.write(" return((code >= %s) && (code <= %s));\n" % (start, end))
output.write("}\n\n")
header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
output.write(" * @block: UCS block name\n")
output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
keys = BlockNames.keys()
keys.sort()
for block in keys:
name = string.replace(block, '-', '')
output.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" %
(block, name));
output.write(" return(-1);\n}\n\n")
header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsBlock\t(int code, const char *block);\n\n")
output.write(
"""/**
* xmlUCSIsBlock:
* @code: UCS code point
* @block: UCS block name
*
* Check whether the character is part of the UCS Block
*
* Returns 1 if true, 0 if false and -1 on unknown block
*/
int
xmlUCSIsBlock(int code, const char *block) {
xmlIntFunc *func;
func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block);
if (func == NULL)
return (-1);
return (func(code));
}
keys = Categories.keys()
keys.sort()
for name in keys:
""")
for name in ckeys:
ranges = Categories[name]
header.write("XMLPUBFUN int XMLCALL xmlUCSIsCat%s\t(int code);\n" % name)
output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
@ -217,33 +397,99 @@ for name in keys:
(name))
output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
start = 1
for range in ranges:
(begin, end) = range;
if start:
output.write(" return(");
start = 0
else:
output.write(" ||\n ");
if (begin == end):
output.write("(code == %s)" % (hex(begin)))
else:
output.write("((code >= %s) && (code <= %s))" % (
hex(begin), hex(end)))
if len(Categories[name]) > minTableSize:
output.write(" return(xmlCharInRange((unsigned int)code, &xml%sG)"
% name)
else:
start = 1
for range in ranges:
(begin, end) = range;
if start:
output.write(" return(");
start = 0
else:
output.write(" ||\n ");
if (begin == end):
output.write("(code == %s)" % (hex(begin)))
else:
output.write("((code >= %s) && (code <= %s))" % (
hex(begin), hex(end)))
output.write(");\n}\n\n")
header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
output.write(" * @cat: UCS Category name\n")
output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
keys = Categories.keys()
keys.sort()
for name in keys:
output.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" %
(name, name));
output.write(" return(-1);\n}\n\n")
header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsCat\t(int code, const char *cat);\n")
output.write(
"""/**
* xmlUCSIsCat:
* @code: UCS code point
* @cat: UCS Category name
*
* Check whether the character is part of the UCS Category
*
* Returns 1 if true, 0 if false and -1 on unknown category
*/
int
xmlUCSIsCat(int code, const char *cat) {
xmlIntFunc *func;
func = xmlUnicodeLookup(&xmlUnicodeCatTbl, cat);
if (func == NULL)
return (-1);
return (func(code));
}
/*
The following routines are an UGLY HACK to provide aliases for block
names which are not in the current release, but are needed for ABI
compatibility.
*/
/**
* xmlUCSIsCombiningMarksforSymbols:
* @code: UCS code point
*
* Check whether the character is part of CombiningMarksforSymbols UCS Block
*
* Returns 1 if true 0 otherwise
*/
int
xmlUCSIsCombiningMarksforSymbols(int code) {
return((code >= 0x20D0) && (code <= 0x20FF));
}
/**
* xmlUCSIsGreek:
* @code: UCS code point
*
* Check whether the character is part of Greek UCS Block
*
* Returns 1 if true 0 otherwise
*/
int
xmlUCSIsGreek(int code) {
return((code >= 0x370) && (code <= 0x3FF));
}
/**
* xmlUCSIsPrivateUse:
* @code: UCS code point
*
* Check whether the character is part of PrivateUse UCS Block
*
* Returns 1 if true 0 otherwise
*/
int
xmlUCSIsPrivateUse(int code) {
if ( ((code >= 0xE000) && (code <= 0xF8FF)) ||
((code >= 0xF0000) && (code <= 0xFFFFD))||
((code >= 0x100000)&& (code <= 0x10FFFD)) )
return (1);
else
return (0);
}
#endif /* LIBXML_UNICODE_ENABLED */
""")
header.write("""
#ifdef __cplusplus
@ -251,8 +497,6 @@ header.write("""
#endif
#endif /* __XML_UNICODE_H__ */
""");
output.write("""
#endif /* LIBXML_UNICODE_ENABLED */
""");
header.close()
output.close()

View File

@ -3,11 +3,11 @@
*
* This file is automatically generated from the
* UCS description files of the Unicode Character Database
* http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
* http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1d5b.html
* using the genUnicode.py Python script.
*
* Generation date: Mon Aug 25 10:45:50 2003
* Sources: Blocks-4.txt UnicodeData-3.1.0.txt
* Generation date: Sun Nov 9 20:13:11 2003
* Sources: Blocks-4.0.1d1b.txt UnicodeData-4.0.1d1b.txt
* Daniel Veillard <veillard@redhat.com>
*/
@ -20,6 +20,7 @@
extern "C" {
#endif
XMLPUBFUN int XMLCALL xmlUCSIsAegeanNumbers (int code);
XMLPUBFUN int XMLCALL xmlUCSIsAlphabeticPresentationForms (int code);
XMLPUBFUN int XMLCALL xmlUCSIsArabic (int code);
XMLPUBFUN int XMLCALL xmlUCSIsArabicPresentationFormsA (int code);
@ -33,6 +34,7 @@ XMLPUBFUN int XMLCALL xmlUCSIsBopomofo (int code);
XMLPUBFUN int XMLCALL xmlUCSIsBopomofoExtended (int code);
XMLPUBFUN int XMLCALL xmlUCSIsBoxDrawing (int code);
XMLPUBFUN int XMLCALL xmlUCSIsBraillePatterns (int code);
XMLPUBFUN int XMLCALL xmlUCSIsBuhid (int code);
XMLPUBFUN int XMLCALL xmlUCSIsByzantineMusicalSymbols (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCJKCompatibility (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCJKCompatibilityForms (int code);
@ -45,11 +47,14 @@ XMLPUBFUN int XMLCALL xmlUCSIsCJKUnifiedIdeographsExtensionA (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCJKUnifiedIdeographsExtensionB (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCherokee (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCombiningDiacriticalMarks (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCombiningDiacriticalMarksforSymbols (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCombiningHalfMarks (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCombiningMarksforSymbols (int code);
XMLPUBFUN int XMLCALL xmlUCSIsControlPictures (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCurrencySymbols (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCypriotSyllabary (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCyrillic (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCyrillicSupplement (int code);
XMLPUBFUN int XMLCALL xmlUCSIsDeseret (int code);
XMLPUBFUN int XMLCALL xmlUCSIsDevanagari (int code);
XMLPUBFUN int XMLCALL xmlUCSIsDingbats (int code);
@ -62,12 +67,14 @@ XMLPUBFUN int XMLCALL xmlUCSIsGeorgian (int code);
XMLPUBFUN int XMLCALL xmlUCSIsGothic (int code);
XMLPUBFUN int XMLCALL xmlUCSIsGreek (int code);
XMLPUBFUN int XMLCALL xmlUCSIsGreekExtended (int code);
XMLPUBFUN int XMLCALL xmlUCSIsGreekandCoptic (int code);
XMLPUBFUN int XMLCALL xmlUCSIsGujarati (int code);
XMLPUBFUN int XMLCALL xmlUCSIsGurmukhi (int code);
XMLPUBFUN int XMLCALL xmlUCSIsHalfwidthandFullwidthForms (int code);
XMLPUBFUN int XMLCALL xmlUCSIsHangulCompatibilityJamo (int code);
XMLPUBFUN int XMLCALL xmlUCSIsHangulJamo (int code);
XMLPUBFUN int XMLCALL xmlUCSIsHangulSyllables (int code);
XMLPUBFUN int XMLCALL xmlUCSIsHanunoo (int code);
XMLPUBFUN int XMLCALL xmlUCSIsHebrew (int code);
XMLPUBFUN int XMLCALL xmlUCSIsHighPrivateUseSurrogates (int code);
XMLPUBFUN int XMLCALL xmlUCSIsHighSurrogates (int code);
@ -78,18 +85,26 @@ XMLPUBFUN int XMLCALL xmlUCSIsKanbun (int code);
XMLPUBFUN int XMLCALL xmlUCSIsKangxiRadicals (int code);
XMLPUBFUN int XMLCALL xmlUCSIsKannada (int code);
XMLPUBFUN int XMLCALL xmlUCSIsKatakana (int code);
XMLPUBFUN int XMLCALL xmlUCSIsKatakanaPhoneticExtensions (int code);
XMLPUBFUN int XMLCALL xmlUCSIsKhmer (int code);
XMLPUBFUN int XMLCALL xmlUCSIsKhmerSymbols (int code);
XMLPUBFUN int XMLCALL xmlUCSIsLao (int code);
XMLPUBFUN int XMLCALL xmlUCSIsLatin1Supplement (int code);
XMLPUBFUN int XMLCALL xmlUCSIsLatinExtendedA (int code);
XMLPUBFUN int XMLCALL xmlUCSIsLatinExtendedB (int code);
XMLPUBFUN int XMLCALL xmlUCSIsLatinExtendedAdditional (int code);
XMLPUBFUN int XMLCALL xmlUCSIsLetterlikeSymbols (int code);
XMLPUBFUN int XMLCALL xmlUCSIsLimbu (int code);
XMLPUBFUN int XMLCALL xmlUCSIsLinearBIdeograms (int code);
XMLPUBFUN int XMLCALL xmlUCSIsLinearBSyllabary (int code);
XMLPUBFUN int XMLCALL xmlUCSIsLowSurrogates (int code);
XMLPUBFUN int XMLCALL xmlUCSIsMalayalam (int code);
XMLPUBFUN int XMLCALL xmlUCSIsMathematicalAlphanumericSymbols (int code);
XMLPUBFUN int XMLCALL xmlUCSIsMathematicalOperators (int code);
XMLPUBFUN int XMLCALL xmlUCSIsMiscellaneousMathematicalSymbolsA (int code);
XMLPUBFUN int XMLCALL xmlUCSIsMiscellaneousMathematicalSymbolsB (int code);
XMLPUBFUN int XMLCALL xmlUCSIsMiscellaneousSymbols (int code);
XMLPUBFUN int XMLCALL xmlUCSIsMiscellaneousSymbolsandArrows (int code);
XMLPUBFUN int XMLCALL xmlUCSIsMiscellaneousTechnical (int code);
XMLPUBFUN int XMLCALL xmlUCSIsMongolian (int code);
XMLPUBFUN int XMLCALL xmlUCSIsMusicalSymbols (int code);
@ -99,26 +114,42 @@ XMLPUBFUN int XMLCALL xmlUCSIsOgham (int code);
XMLPUBFUN int XMLCALL xmlUCSIsOldItalic (int code);
XMLPUBFUN int XMLCALL xmlUCSIsOpticalCharacterRecognition (int code);
XMLPUBFUN int XMLCALL xmlUCSIsOriya (int code);
XMLPUBFUN int XMLCALL xmlUCSIsOsmanya (int code);
XMLPUBFUN int XMLCALL xmlUCSIsPhoneticExtensions (int code);
XMLPUBFUN int XMLCALL xmlUCSIsPrivateUse (int code);
XMLPUBFUN int XMLCALL xmlUCSIsPrivateUseArea (int code);
XMLPUBFUN int XMLCALL xmlUCSIsRunic (int code);
XMLPUBFUN int XMLCALL xmlUCSIsShavian (int code);
XMLPUBFUN int XMLCALL xmlUCSIsSinhala (int code);
XMLPUBFUN int XMLCALL xmlUCSIsSmallFormVariants (int code);
XMLPUBFUN int XMLCALL xmlUCSIsSpacingModifierLetters (int code);
XMLPUBFUN int XMLCALL xmlUCSIsSpecials (int code);
XMLPUBFUN int XMLCALL xmlUCSIsSuperscriptsandSubscripts (int code);
XMLPUBFUN int XMLCALL xmlUCSIsSupplementalArrowsA (int code);
XMLPUBFUN int XMLCALL xmlUCSIsSupplementalArrowsB (int code);
XMLPUBFUN int XMLCALL xmlUCSIsSupplementalMathematicalOperators (int code);
XMLPUBFUN int XMLCALL xmlUCSIsSupplementaryPrivateUseAreaA (int code);
XMLPUBFUN int XMLCALL xmlUCSIsSupplementaryPrivateUseAreaB (int code);
XMLPUBFUN int XMLCALL xmlUCSIsSyriac (int code);
XMLPUBFUN int XMLCALL xmlUCSIsTagalog (int code);
XMLPUBFUN int XMLCALL xmlUCSIsTagbanwa (int code);
XMLPUBFUN int XMLCALL xmlUCSIsTags (int code);
XMLPUBFUN int XMLCALL xmlUCSIsTaiLe (int code);
XMLPUBFUN int XMLCALL xmlUCSIsTaiXuanJingSymbols (int code);
XMLPUBFUN int XMLCALL xmlUCSIsTamil (int code);
XMLPUBFUN int XMLCALL xmlUCSIsTelugu (int code);
XMLPUBFUN int XMLCALL xmlUCSIsThaana (int code);
XMLPUBFUN int XMLCALL xmlUCSIsThai (int code);
XMLPUBFUN int XMLCALL xmlUCSIsTibetan (int code);
XMLPUBFUN int XMLCALL xmlUCSIsUgaritic (int code);
XMLPUBFUN int XMLCALL xmlUCSIsUnifiedCanadianAboriginalSyllabics (int code);
XMLPUBFUN int XMLCALL xmlUCSIsVariationSelectors (int code);
XMLPUBFUN int XMLCALL xmlUCSIsVariationSelectorsSupplement (int code);
XMLPUBFUN int XMLCALL xmlUCSIsYiRadicals (int code);
XMLPUBFUN int XMLCALL xmlUCSIsYiSyllables (int code);
XMLPUBFUN int XMLCALL xmlUCSIsYijingHexagramSymbols (int code);
XMLPUBFUN int XMLCALL xmlUCSIsBlock (int code,
const char *block);
XMLPUBFUN int XMLCALL xmlUCSIsBlock (int code, const char *block);
XMLPUBFUN int XMLCALL xmlUCSIsCatC (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCatCc (int code);
@ -157,8 +188,7 @@ XMLPUBFUN int XMLCALL xmlUCSIsCatZl (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCatZp (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCatZs (int code);
XMLPUBFUN int XMLCALL xmlUCSIsCat (int code,
const char *cat);
XMLPUBFUN int XMLCALL xmlUCSIsCat (int code, const char *cat);
#ifdef __cplusplus
}

View File

@ -570,6 +570,7 @@ def buildStubs():
wrapper.write("/* Generated */\n\n")
wrapper.write("#include <Python.h>\n")
# wrapper.write("#include \"config.h\"\n")
wrapper.write("#define IN_LIBXML\n")
wrapper.write("#include <libxml/xmlversion.h>\n")
wrapper.write("#include <libxml/tree.h>\n")
wrapper.write("#include <libxml/xmlschemastypes.h>\n")

View File

@ -229,6 +229,7 @@ schemaCleanupTypes()
schemaInitTypes()
# functions from module xmlunicode
uCSIsAegeanNumbers()
uCSIsAlphabeticPresentationForms()
uCSIsArabic()
uCSIsArabicPresentationFormsA()
@ -243,6 +244,7 @@ uCSIsBopomofo()
uCSIsBopomofoExtended()
uCSIsBoxDrawing()
uCSIsBraillePatterns()
uCSIsBuhid()
uCSIsByzantineMusicalSymbols()
uCSIsCJKCompatibility()
uCSIsCJKCompatibilityForms()
@ -292,11 +294,14 @@ uCSIsCatZp()
uCSIsCatZs()
uCSIsCherokee()
uCSIsCombiningDiacriticalMarks()
uCSIsCombiningDiacriticalMarksforSymbols()
uCSIsCombiningHalfMarks()
uCSIsCombiningMarksforSymbols()
uCSIsControlPictures()
uCSIsCurrencySymbols()
uCSIsCypriotSyllabary()
uCSIsCyrillic()
uCSIsCyrillicSupplement()
uCSIsDeseret()
uCSIsDevanagari()
uCSIsDingbats()
@ -309,12 +314,14 @@ uCSIsGeorgian()
uCSIsGothic()
uCSIsGreek()
uCSIsGreekExtended()
uCSIsGreekandCoptic()
uCSIsGujarati()
uCSIsGurmukhi()
uCSIsHalfwidthandFullwidthForms()
uCSIsHangulCompatibilityJamo()
uCSIsHangulJamo()
uCSIsHangulSyllables()
uCSIsHanunoo()
uCSIsHebrew()
uCSIsHighPrivateUseSurrogates()
uCSIsHighSurrogates()
@ -325,18 +332,26 @@ uCSIsKanbun()
uCSIsKangxiRadicals()
uCSIsKannada()
uCSIsKatakana()
uCSIsKatakanaPhoneticExtensions()
uCSIsKhmer()
uCSIsKhmerSymbols()
uCSIsLao()
uCSIsLatin1Supplement()
uCSIsLatinExtendedA()
uCSIsLatinExtendedAdditional()
uCSIsLatinExtendedB()
uCSIsLetterlikeSymbols()
uCSIsLimbu()
uCSIsLinearBIdeograms()
uCSIsLinearBSyllabary()
uCSIsLowSurrogates()
uCSIsMalayalam()
uCSIsMathematicalAlphanumericSymbols()
uCSIsMathematicalOperators()
uCSIsMiscellaneousMathematicalSymbolsA()
uCSIsMiscellaneousMathematicalSymbolsB()
uCSIsMiscellaneousSymbols()
uCSIsMiscellaneousSymbolsandArrows()
uCSIsMiscellaneousTechnical()
uCSIsMongolian()
uCSIsMusicalSymbols()
@ -346,23 +361,40 @@ uCSIsOgham()
uCSIsOldItalic()
uCSIsOpticalCharacterRecognition()
uCSIsOriya()
uCSIsOsmanya()
uCSIsPhoneticExtensions()
uCSIsPrivateUse()
uCSIsPrivateUseArea()
uCSIsRunic()
uCSIsShavian()
uCSIsSinhala()
uCSIsSmallFormVariants()
uCSIsSpacingModifierLetters()
uCSIsSpecials()
uCSIsSuperscriptsandSubscripts()
uCSIsSupplementalArrowsA()
uCSIsSupplementalArrowsB()
uCSIsSupplementalMathematicalOperators()
uCSIsSupplementaryPrivateUseAreaA()
uCSIsSupplementaryPrivateUseAreaB()
uCSIsSyriac()
uCSIsTagalog()
uCSIsTagbanwa()
uCSIsTags()
uCSIsTaiLe()
uCSIsTaiXuanJingSymbols()
uCSIsTamil()
uCSIsTelugu()
uCSIsThaana()
uCSIsThai()
uCSIsTibetan()
uCSIsUgaritic()
uCSIsUnifiedCanadianAboriginalSyllabics()
uCSIsVariationSelectors()
uCSIsVariationSelectorsSupplement()
uCSIsYiRadicals()
uCSIsYiSyllables()
uCSIsYijingHexagramSymbols()
# functions from module xmlversion
checkVersion()

View File

@ -674,6 +674,7 @@ xmlCreateIntSubset
xmlCreateMemoryParserCtxt
xmlCreatePushParserCtxt
xmlCreateURI
xmlCreateURLParserCtxt
xmlCtxtGetLastError
xmlCtxtReadDoc
xmlCtxtReadFd
@ -682,6 +683,7 @@ xmlCtxtReadIO
xmlCtxtReadMemory
xmlCtxtReset
xmlCtxtResetLastError
xmlCtxtResetPush
xmlCtxtUseOptions
xmlCurrentChar
#ifdef LIBXML_DEBUG_ENABLED
@ -1477,6 +1479,9 @@ xmlSchemaGetPredefinedType
xmlSchemaInitTypes
#endif
#ifdef LIBXML_SCHEMAS_ENABLED
xmlSchemaNewDocParserCtxt
#endif
#ifdef LIBXML_SCHEMAS_ENABLED
xmlSchemaNewFacet
#endif
#ifdef LIBXML_SCHEMAS_ENABLED
@ -1580,6 +1585,7 @@ xmlStopParser
xmlStrEqual
xmlStrPrintf
xmlStrQEqual
xmlStrVPrintf
xmlStrcasecmp
xmlStrcasestr
xmlStrcat
@ -1747,6 +1753,9 @@ xmlThrDefSetStructuredErrorFunc
xmlThrDefSubstituteEntitiesDefaultValue
xmlThrDefTreeIndentString
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsAegeanNumbers
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsAlphabeticPresentationForms
#endif
#ifdef LIBXML_REGEXP_ENABLED
@ -1789,6 +1798,9 @@ xmlUCSIsBoxDrawing
xmlUCSIsBraillePatterns
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsBuhid
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsByzantineMusicalSymbols
#endif
#ifdef LIBXML_REGEXP_ENABLED
@ -1936,6 +1948,9 @@ xmlUCSIsCherokee
xmlUCSIsCombiningDiacriticalMarks
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsCombiningDiacriticalMarksforSymbols
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsCombiningHalfMarks
#endif
#ifdef LIBXML_REGEXP_ENABLED
@ -1948,9 +1963,15 @@ xmlUCSIsControlPictures
xmlUCSIsCurrencySymbols
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsCypriotSyllabary
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsCyrillic
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsCyrillicSupplement
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsDeseret
#endif
#ifdef LIBXML_REGEXP_ENABLED
@ -1987,6 +2008,9 @@ xmlUCSIsGreek
xmlUCSIsGreekExtended
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsGreekandCoptic
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsGujarati
#endif
#ifdef LIBXML_REGEXP_ENABLED
@ -2005,6 +2029,9 @@ xmlUCSIsHangulJamo
xmlUCSIsHangulSyllables
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsHanunoo
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsHebrew
#endif
#ifdef LIBXML_REGEXP_ENABLED
@ -2035,9 +2062,15 @@ xmlUCSIsKannada
xmlUCSIsKatakana
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsKatakanaPhoneticExtensions
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsKhmer
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsKhmerSymbols
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsLao
#endif
#ifdef LIBXML_REGEXP_ENABLED
@ -2056,6 +2089,15 @@ xmlUCSIsLatinExtendedB
xmlUCSIsLetterlikeSymbols
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsLimbu
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsLinearBIdeograms
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsLinearBSyllabary
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsLowSurrogates
#endif
#ifdef LIBXML_REGEXP_ENABLED
@ -2068,9 +2110,18 @@ xmlUCSIsMathematicalAlphanumericSymbols
xmlUCSIsMathematicalOperators
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsMiscellaneousMathematicalSymbolsA
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsMiscellaneousMathematicalSymbolsB
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsMiscellaneousSymbols
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsMiscellaneousSymbolsandArrows
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsMiscellaneousTechnical
#endif
#ifdef LIBXML_REGEXP_ENABLED
@ -2098,12 +2149,24 @@ xmlUCSIsOpticalCharacterRecognition
xmlUCSIsOriya
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsOsmanya
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsPhoneticExtensions
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsPrivateUse
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsPrivateUseArea
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsRunic
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsShavian
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsSinhala
#endif
#ifdef LIBXML_REGEXP_ENABLED
@ -2119,12 +2182,39 @@ xmlUCSIsSpecials
xmlUCSIsSuperscriptsandSubscripts
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsSupplementalArrowsA
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsSupplementalArrowsB
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsSupplementalMathematicalOperators
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsSupplementaryPrivateUseAreaA
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsSupplementaryPrivateUseAreaB
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsSyriac
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsTagalog
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsTagbanwa
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsTags
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsTaiLe
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsTaiXuanJingSymbols
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsTamil
#endif
#ifdef LIBXML_REGEXP_ENABLED
@ -2140,14 +2230,26 @@ xmlUCSIsThai
xmlUCSIsTibetan
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsUgaritic
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsUnifiedCanadianAboriginalSyllabics
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsVariationSelectors
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsVariationSelectorsSupplement
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsYiRadicals
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsYiSyllables
#endif
#ifdef LIBXML_REGEXP_ENABLED
xmlUCSIsYijingHexagramSymbols
#endif
xmlURIEscape
xmlURIEscapeStr
xmlURIUnescapeString
@ -2196,9 +2298,18 @@ xmlValidatePushElement
xmlValidateQName
xmlValidateRoot
#ifdef LIBXML_XINCLUDE_ENABLED
xmlXIncludeFreeContext
#endif
#ifdef LIBXML_XINCLUDE_ENABLED
xmlXIncludeNewContext
#endif
#ifdef LIBXML_XINCLUDE_ENABLED
xmlXIncludeProcess
#endif
#ifdef LIBXML_XINCLUDE_ENABLED
xmlXIncludeProcessNode
#endif
#ifdef LIBXML_XINCLUDE_ENABLED
xmlXIncludeProcessTree
#endif
#ifdef LIBXML_XPATH_ENABLED

File diff suppressed because it is too large Load Diff