diff --git a/doc/apibuild.py b/doc/apibuild.py index c2181828..7641dd1f 100755 --- a/doc/apibuild.py +++ b/doc/apibuild.py @@ -32,6 +32,7 @@ ignored_files = { "timsort.h": "Internal header only for xpath.c 2.9.0", "nanoftp.h": "empty", "SAX.h": "empty", + "xmlunicode.h": "empty", } ignored_words = { diff --git a/doc/libxml2-api.xml b/doc/libxml2-api.xml index 4399769c..8a773ca2 100644 --- a/doc/libxml2-api.xml +++ b/doc/libxml2-api.xml @@ -3102,177 +3102,6 @@ - - Unicode character APIs - API for the Unicode character APIs This file is automatically generated from the UCS description files of the Unicode Character Database - Daniel Veillard - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - compile-time version information compile-time version information for the XML library @@ -3303,7 +3132,6 @@ - @@ -3891,9 +3719,6 @@ Always enabled since 2.14.0 - - Whether the Unicode related interfaces are compiled in - Whether the DTD validation support is configured in @@ -15029,1004 +14854,6 @@ crash if you try to modify the tree)'/> - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of AegeanNumbers UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of AlphabeticPresentationForms UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Arabic UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of ArabicPresentationForms-A UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of ArabicPresentationForms-B UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Armenian UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Arrows UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of BasicLatin UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Bengali UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of the UCS Block - - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of BlockElements UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Bopomofo UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of BopomofoExtended UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of BoxDrawing UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of BraillePatterns UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Buhid UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of ByzantineMusicalSymbols UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CJKCompatibility UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CJKCompatibilityForms UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CJKCompatibilityIdeographs UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CJKCompatibilityIdeographsSupplement UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CJKRadicalsSupplement UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CJKSymbolsandPunctuation UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CJKUnifiedIdeographs UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CJKUnifiedIdeographsExtensionA UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CJKUnifiedIdeographsExtensionB UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of the UCS Category - - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of C UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Cc UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Cf UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Co UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Cs UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of L UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Ll UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Lm UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Lo UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Lt UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Lu UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of M UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Mc UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Me UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Mn UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of N UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Nd UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Nl UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of No UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of P UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Pc UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Pd UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Pe UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Pf UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Pi UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Po UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Ps UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of S UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Sc UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Sk UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Sm UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of So UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Z UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Zl UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Zp UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Zs UCS Category - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Cherokee UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CombiningDiacriticalMarks UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CombiningDiacriticalMarksforSymbols UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CombiningHalfMarks UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CombiningMarksforSymbols UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of ControlPictures UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CurrencySymbols UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CypriotSyllabary UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Cyrillic UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of CyrillicSupplement UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Deseret UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Devanagari UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Dingbats UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of EnclosedAlphanumerics UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of EnclosedCJKLettersandMonths UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Ethiopic UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of GeneralPunctuation UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of GeometricShapes UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Georgian UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Gothic UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Greek UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of GreekExtended UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of GreekandCoptic UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Gujarati UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Gurmukhi UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of HalfwidthandFullwidthForms UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of HangulCompatibilityJamo UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of HangulJamo UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of HangulSyllables UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Hanunoo UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Hebrew UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of HighPrivateUseSurrogates UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of HighSurrogates UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Hiragana UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of IPAExtensions UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of IdeographicDescriptionCharacters UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Kanbun UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of KangxiRadicals UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Kannada UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Katakana UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of KatakanaPhoneticExtensions UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Khmer UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of KhmerSymbols UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Lao UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Latin-1Supplement UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of LatinExtended-A UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of LatinExtendedAdditional UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of LatinExtended-B UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of LetterlikeSymbols UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Limbu UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of LinearBIdeograms UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of LinearBSyllabary UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of LowSurrogates UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Malayalam UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of MathematicalAlphanumericSymbols UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of MathematicalOperators UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of MiscellaneousMathematicalSymbols-A UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of MiscellaneousMathematicalSymbols-B UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of MiscellaneousSymbols UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of MiscellaneousSymbolsandArrows UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of MiscellaneousTechnical UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Mongolian UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of MusicalSymbols UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Myanmar UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of NumberForms UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Ogham UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of OldItalic UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of OpticalCharacterRecognition UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Oriya UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Osmanya UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of PhoneticExtensions UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of PrivateUse UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of PrivateUseArea UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Runic UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Shavian UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Sinhala UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of SmallFormVariants UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of SpacingModifierLetters UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Specials UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of SuperscriptsandSubscripts UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of SupplementalArrows-A UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of SupplementalArrows-B UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of SupplementalMathematicalOperators UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of SupplementaryPrivateUseArea-A UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of SupplementaryPrivateUseArea-B UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Syriac UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Tagalog UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Tagbanwa UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Tags UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of TaiLe UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of TaiXuanJingSymbols UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Tamil UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Telugu UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Thaana UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Thai UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Tibetan UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of Ugaritic UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of UnifiedCanadianAboriginalSyllabics UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of VariationSelectors UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of VariationSelectorsSupplement UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of YiRadicals UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of YiSyllables UCS Block - - - - - defined(LIBXML_UNICODE_ENABLED) - Check whether the character is part of YijingHexagramSymbols UCS Block - - - Escaping routine, does not do validity checks ! It will try to escape the chars needing this, but this is heuristic based it's impossible to be sure. diff --git a/include/libxml/xmlunicode.h b/include/libxml/xmlunicode.h index b6d795b2..efda81b2 100644 --- a/include/libxml/xmlunicode.h +++ b/include/libxml/xmlunicode.h @@ -2,365 +2,14 @@ * Summary: Unicode character APIs * Description: API for the Unicode character APIs * - * This file is automatically generated from the - * UCS description files of the Unicode Character Database - * http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html - * using the genUnicode.py Python script. - * - * Generation date: Tue Apr 30 17:30:38 2024 - * Sources: Blocks-4.0.1.txt UnicodeData-4.0.1.txt - * Author: Daniel Veillard + * Deprecated, don't use. */ #ifndef __XML_UNICODE_H__ #define __XML_UNICODE_H__ -#include - -#ifdef LIBXML_UNICODE_ENABLED - -#ifdef __cplusplus -extern "C" { +#ifdef __GNUC__ + #warning "libxml/xmlunicode.h is deprecated" #endif -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsAegeanNumbers (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsAlphabeticPresentationForms (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsArabic (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsArabicPresentationFormsA (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsArabicPresentationFormsB (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsArmenian (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsArrows (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsBasicLatin (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsBengali (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsBlockElements (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsBopomofo (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsBopomofoExtended (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsBoxDrawing (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsBraillePatterns (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsBuhid (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsByzantineMusicalSymbols (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCJKCompatibility (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCJKCompatibilityForms (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCJKCompatibilityIdeographs (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCJKCompatibilityIdeographsSupplement (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCJKRadicalsSupplement (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCJKSymbolsandPunctuation (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCJKUnifiedIdeographs (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCJKUnifiedIdeographsExtensionA (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCJKUnifiedIdeographsExtensionB (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCherokee (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCombiningDiacriticalMarks (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCombiningDiacriticalMarksforSymbols (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCombiningHalfMarks (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCombiningMarksforSymbols (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsControlPictures (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCurrencySymbols (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCypriotSyllabary (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCyrillic (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCyrillicSupplement (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsDeseret (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsDevanagari (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsDingbats (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsEnclosedAlphanumerics (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsEnclosedCJKLettersandMonths (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsEthiopic (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsGeneralPunctuation (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsGeometricShapes (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsGeorgian (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsGothic (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsGreek (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsGreekExtended (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsGreekandCoptic (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsGujarati (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsGurmukhi (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsHalfwidthandFullwidthForms (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsHangulCompatibilityJamo (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsHangulJamo (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsHangulSyllables (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsHanunoo (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsHebrew (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsHighPrivateUseSurrogates (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsHighSurrogates (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsHiragana (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsIPAExtensions (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsIdeographicDescriptionCharacters (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsKanbun (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsKangxiRadicals (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsKannada (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsKatakana (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsKatakanaPhoneticExtensions (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsKhmer (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsKhmerSymbols (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsLao (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsLatin1Supplement (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsLatinExtendedA (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsLatinExtendedB (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsLatinExtendedAdditional (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsLetterlikeSymbols (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsLimbu (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsLinearBIdeograms (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsLinearBSyllabary (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsLowSurrogates (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsMalayalam (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsMathematicalAlphanumericSymbols (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsMathematicalOperators (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsMiscellaneousMathematicalSymbolsA (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsMiscellaneousMathematicalSymbolsB (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsMiscellaneousSymbols (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsMiscellaneousSymbolsandArrows (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsMiscellaneousTechnical (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsMongolian (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsMusicalSymbols (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsMyanmar (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsNumberForms (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsOgham (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsOldItalic (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsOpticalCharacterRecognition (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsOriya (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsOsmanya (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsPhoneticExtensions (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsPrivateUse (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsPrivateUseArea (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsRunic (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsShavian (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsSinhala (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsSmallFormVariants (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsSpacingModifierLetters (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsSpecials (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsSuperscriptsandSubscripts (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsSupplementalArrowsA (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsSupplementalArrowsB (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsSupplementalMathematicalOperators (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsSupplementaryPrivateUseAreaA (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsSupplementaryPrivateUseAreaB (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsSyriac (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsTagalog (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsTagbanwa (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsTags (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsTaiLe (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsTaiXuanJingSymbols (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsTamil (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsTelugu (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsThaana (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsThai (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsTibetan (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsUgaritic (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsUnifiedCanadianAboriginalSyllabics (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsVariationSelectors (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsVariationSelectorsSupplement (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsYiRadicals (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsYiSyllables (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsYijingHexagramSymbols (int code); - -XMLPUBFUN int xmlUCSIsBlock (int code, const char *block); - -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatC (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatCc (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatCf (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatCo (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatCs (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatL (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatLl (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatLm (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatLo (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatLt (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatLu (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatM (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatMc (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatMe (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatMn (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatN (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatNd (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatNl (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatNo (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatP (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatPc (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatPd (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatPe (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatPf (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatPi (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatPo (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatPs (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatS (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatSc (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatSk (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatSm (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatSo (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatZ (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatZl (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatZp (int code); -XML_DEPRECATED -XMLPUBFUN int xmlUCSIsCatZs (int code); - -XMLPUBFUN int xmlUCSIsCat (int code, const char *cat); - -#ifdef __cplusplus -} -#endif - -#endif /* LIBXML_UNICODE_ENABLED */ - #endif /* __XML_UNICODE_H__ */ diff --git a/include/libxml/xmlversion.h.in b/include/libxml/xmlversion.h.in index 6fc9bd2f..c3f0e4db 100644 --- a/include/libxml/xmlversion.h.in +++ b/include/libxml/xmlversion.h.in @@ -243,11 +243,9 @@ /** * LIBXML_UNICODE_ENABLED: * - * Whether the Unicode related interfaces are compiled in + * Removed in 2.14 */ -#if @WITH_REGEXPS@ -#define LIBXML_UNICODE_ENABLED -#endif +#undef LIBXML_UNICODE_ENABLED /** * LIBXML_REGEXP_ENABLED: diff --git a/include/private/Makefile.am b/include/private/Makefile.am index 28def095..7bdba3d3 100644 --- a/include/private/Makefile.am +++ b/include/private/Makefile.am @@ -16,6 +16,7 @@ EXTRA_DIST = \ string.h \ threads.h \ tree.h \ + unicode.h \ xinclude.h \ xpath.h \ xzlib.h diff --git a/include/private/unicode.h b/include/private/unicode.h new file mode 100644 index 00000000..2765263d --- /dev/null +++ b/include/private/unicode.h @@ -0,0 +1,44 @@ +#ifndef XML_UNICODE_H_PRIVATE__ +#define XML_UNICODE_H_PRIVATE__ + +XML_HIDDEN int xmlUCSIsBlock (int code, const char *block); +XML_HIDDEN int xmlUCSIsCat (int code, const char *cat); + +XML_HIDDEN int xmlUCSIsCatC (int code); +XML_HIDDEN int xmlUCSIsCatCc (int code); +XML_HIDDEN int xmlUCSIsCatCf (int code); +XML_HIDDEN int xmlUCSIsCatCo (int code); +XML_HIDDEN int xmlUCSIsCatCs (int code); +XML_HIDDEN int xmlUCSIsCatL (int code); +XML_HIDDEN int xmlUCSIsCatLl (int code); +XML_HIDDEN int xmlUCSIsCatLm (int code); +XML_HIDDEN int xmlUCSIsCatLo (int code); +XML_HIDDEN int xmlUCSIsCatLt (int code); +XML_HIDDEN int xmlUCSIsCatLu (int code); +XML_HIDDEN int xmlUCSIsCatM (int code); +XML_HIDDEN int xmlUCSIsCatMc (int code); +XML_HIDDEN int xmlUCSIsCatMe (int code); +XML_HIDDEN int xmlUCSIsCatMn (int code); +XML_HIDDEN int xmlUCSIsCatN (int code); +XML_HIDDEN int xmlUCSIsCatNd (int code); +XML_HIDDEN int xmlUCSIsCatNl (int code); +XML_HIDDEN int xmlUCSIsCatNo (int code); +XML_HIDDEN int xmlUCSIsCatP (int code); +XML_HIDDEN int xmlUCSIsCatPc (int code); +XML_HIDDEN int xmlUCSIsCatPd (int code); +XML_HIDDEN int xmlUCSIsCatPe (int code); +XML_HIDDEN int xmlUCSIsCatPf (int code); +XML_HIDDEN int xmlUCSIsCatPi (int code); +XML_HIDDEN int xmlUCSIsCatPo (int code); +XML_HIDDEN int xmlUCSIsCatPs (int code); +XML_HIDDEN int xmlUCSIsCatS (int code); +XML_HIDDEN int xmlUCSIsCatSc (int code); +XML_HIDDEN int xmlUCSIsCatSk (int code); +XML_HIDDEN int xmlUCSIsCatSm (int code); +XML_HIDDEN int xmlUCSIsCatSo (int code); +XML_HIDDEN int xmlUCSIsCatZ (int code); +XML_HIDDEN int xmlUCSIsCatZl (int code); +XML_HIDDEN int xmlUCSIsCatZp (int code); +XML_HIDDEN int xmlUCSIsCatZs (int code); + +#endif /* XML_UNICODE_H_PRIVATE__ */ diff --git a/parser.c b/parser.c index ef2a4b96..e8340356 100644 --- a/parser.c +++ b/parser.c @@ -631,11 +631,7 @@ xmlHasFeature(xmlFeature feature) return(0); #endif case XML_WITH_UNICODE: -#ifdef LIBXML_UNICODE_ENABLED - return(1); -#else return(0); -#endif case XML_WITH_REGEXP: #ifdef LIBXML_REGEXP_ENABLED return(1); diff --git a/python/libxml_wrap.h b/python/libxml_wrap.h index d3fdb69f..a949b2b5 100644 --- a/python/libxml_wrap.h +++ b/python/libxml_wrap.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/genUnicode.py b/tools/genUnicode.py index cf50bfef..cfcdc885 100755 --- a/tools/genUnicode.py +++ b/tools/genUnicode.py @@ -191,12 +191,6 @@ ckeys = sorted(Categories.keys()) # # Generate the resulting files # -try: - header = open("include/libxml/xmlunicode.h", "w") -except: - print("Failed to open include/libxml/xmlunicode.h") - sys.exit(1) - try: output = open("xmlunicode.c", "w") except: @@ -205,34 +199,6 @@ except: date = time.asctime(time.localtime(time.time())) -header.write( -"""/* - * Summary: Unicode character APIs - * Description: API for the Unicode character APIs - * - * This file is automatically generated from the - * UCS description files of the Unicode Character Database - * %s - * using the genUnicode.py Python script. - * - * Generation date: %s - * Sources: %s - * Author: Daniel Veillard - */ - -#ifndef __XML_UNICODE_H__ -#define __XML_UNICODE_H__ - -#include - -#ifdef LIBXML_UNICODE_ENABLED - -#ifdef __cplusplus -extern "C" { -#endif - -""" % (webpage, date, sources)); - output.write( """/* * xmlunicode.c: this module implements the Unicode character APIs @@ -250,13 +216,14 @@ output.write( #define IN_LIBXML #include "libxml.h" -#ifdef LIBXML_UNICODE_ENABLED +#ifdef LIBXML_REGEXP_ENABLED #include #include -#include #include +#include "private/unicode.h" + typedef int (xmlIntFunc)(int); /* just to keep one's mind untwisted */ typedef struct { @@ -272,29 +239,8 @@ typedef struct { static xmlIntFunc *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname); -static const xmlUnicodeRange xmlUnicodeBlocks[] = { """ % (webpage, date, sources)); -flag = 0 -for block in bkeys: - name = block.replace('-', '') - if flag: - output.write(',\n') - else: - flag = 1 - output.write(' {"%s", xmlUCSIs%s}' % (block, name)) -output.write('};\n\n') - -output.write('static const xmlUnicodeRange xmlUnicodeCats[] = {\n') -flag = 0; -for name in ckeys: - if flag: - output.write(',\n') - else: - flag = 1 - output.write(' {"%s", xmlUCSIsCat%s}' % (name, name)) -output.write('};\n\n') - # # For any categories with more than minTableSize ranges we generate # a range table suitable for xmlCharInRange @@ -335,10 +281,7 @@ for name in ckeys: output.write( -"""static const xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s}; -static const xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s}; - -/** +"""/** * xmlUnicodeLookup: * @tptr: pointer to the name table * @name: name to be found @@ -359,7 +302,8 @@ static xmlIntFunc sptr = tptr->table; while (low <= high) { mid = (low + high) / 2; - if ((cmp=strcmp(tname, sptr[mid].rangename)) == 0) + cmp = strcmp(tname, sptr[mid].rangename); + if (cmp == 0) return (sptr[mid].func); if (cmp < 0) high = mid - 1; @@ -369,16 +313,15 @@ static xmlIntFunc return (NULL); } -""" % (len(BlockNames), len(Categories)) ) +""") for block in bkeys: name = block.replace('-', '') - header.write("XML_DEPRECATED\nXMLPUBFUN int xmlUCSIs%s\t(int code);\n" % name) output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name)) output.write(" *\n * Check whether the character is part of %s UCS Block\n"% (block)) output.write(" *\n * Returns 1 if true 0 otherwise\n */\n"); - output.write("int\nxmlUCSIs%s(int code) {\n return(" % name) + output.write("static int\nxmlUCSIs%s(int code) {\n return(" % name) flag = 0 for (start, end) in BlockNames[block]: if flag: @@ -388,32 +331,8 @@ for block in bkeys: output.write("((code >= %s) && (code <= %s))" % (start, end)) output.write(");\n}\n\n") -header.write("\nXMLPUBFUN int xmlUCSIsBlock\t(int code, const char *block);\n\n") -output.write( -"""/** - * xmlUCSIsBlock: - * @code: UCS code point - * @block: UCS block name - * - * Check whether the character is part of the UCS Block - * - * Returns 1 if true, 0 if false and -1 on unknown block - */ -int -xmlUCSIsBlock(int code, const char *block) { - xmlIntFunc *func; - - func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block); - if (func == NULL) - return (-1); - return (func(code)); -} - -""") - for name in ckeys: ranges = Categories[name] - header.write("XML_DEPRECATED\nXMLPUBFUN int xmlUCSIsCat%s\t(int code);\n" % name) output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name)) output.write(" *\n * Check whether the character is part of %s UCS Category\n"% (name)) @@ -438,9 +357,54 @@ for name in ckeys: hex(begin), hex(end))) output.write(");\n}\n\n") -header.write("\nXMLPUBFUN int xmlUCSIsCat\t(int code, const char *cat);\n") output.write( -"""/** +"""static const xmlUnicodeRange xmlUnicodeBlocks[] = {""") + +flag = 0 +for block in bkeys: + name = block.replace('-', '') + if flag: + output.write(',\n') + else: + flag = 1 + output.write(' {"%s", xmlUCSIs%s}' % (block, name)) +output.write('};\n\n') + +output.write('static const xmlUnicodeRange xmlUnicodeCats[] = {\n') +flag = 0; +for name in ckeys: + if flag: + output.write(',\n') + else: + flag = 1 + output.write(' {"%s", xmlUCSIsCat%s}' % (name, name)) + +output.write( +"""}; + +static const xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, %s}; +static const xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, %s}; + +/** + * xmlUCSIsBlock: + * @code: UCS code point + * @block: UCS block name + * + * Check whether the character is part of the UCS Block + * + * Returns 1 if true, 0 if false and -1 on unknown block + */ +int +xmlUCSIsBlock(int code, const char *block) { + xmlIntFunc *func; + + func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block); + if (func == NULL) + return (-1); + return (func(code)); +} + +/** * xmlUCSIsCat: * @code: UCS code point * @cat: UCS Category name @@ -459,18 +423,7 @@ xmlUCSIsCat(int code, const char *cat) { return (func(code)); } -#endif /* LIBXML_UNICODE_ENABLED */ -""") +#endif /* LIBXML_REGEXP_ENABLED */ +""" % (len(BlockNames), len(Categories))) -header.write(""" -#ifdef __cplusplus -} -#endif - -#endif /* LIBXML_UNICODE_ENABLED */ - -#endif /* __XML_UNICODE_H__ */ -"""); - -header.close() output.close() diff --git a/xmlregexp.c b/xmlregexp.c index da826487..9d36c172 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -27,11 +27,11 @@ #include #include #include -#include #include "private/error.h" #include "private/memory.h" #include "private/regexp.h" +#include "private/unicode.h" #ifndef SIZE_MAX #define SIZE_MAX ((size_t) -1) diff --git a/xmlunicode.c b/xmlunicode.c index a385bcbd..0f0dd02d 100644 --- a/xmlunicode.c +++ b/xmlunicode.c @@ -6,7 +6,7 @@ * http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html * using the genUnicode.py Python script. * - * Generation date: Mon Mar 27 11:09:52 2006 + * Generation date: Tue Mar 4 16:29:31 2025 * Sources: Blocks-4.0.1.txt UnicodeData-4.0.1.txt * Daniel Veillard */ @@ -14,13 +14,14 @@ #define IN_LIBXML #include "libxml.h" -#ifdef LIBXML_UNICODE_ENABLED +#ifdef LIBXML_REGEXP_ENABLED #include #include -#include #include +#include "private/unicode.h" + typedef int (xmlIntFunc)(int); /* just to keep one's mind untwisted */ typedef struct { @@ -36,174 +37,6 @@ typedef struct { static xmlIntFunc *xmlUnicodeLookup(const xmlUnicodeNameTable *tptr, const char *tname); -static const xmlUnicodeRange xmlUnicodeBlocks[] = { - {"AegeanNumbers", xmlUCSIsAegeanNumbers}, - {"AlphabeticPresentationForms", xmlUCSIsAlphabeticPresentationForms}, - {"Arabic", xmlUCSIsArabic}, - {"ArabicPresentationForms-A", xmlUCSIsArabicPresentationFormsA}, - {"ArabicPresentationForms-B", xmlUCSIsArabicPresentationFormsB}, - {"Armenian", xmlUCSIsArmenian}, - {"Arrows", xmlUCSIsArrows}, - {"BasicLatin", xmlUCSIsBasicLatin}, - {"Bengali", xmlUCSIsBengali}, - {"BlockElements", xmlUCSIsBlockElements}, - {"Bopomofo", xmlUCSIsBopomofo}, - {"BopomofoExtended", xmlUCSIsBopomofoExtended}, - {"BoxDrawing", xmlUCSIsBoxDrawing}, - {"BraillePatterns", xmlUCSIsBraillePatterns}, - {"Buhid", xmlUCSIsBuhid}, - {"ByzantineMusicalSymbols", xmlUCSIsByzantineMusicalSymbols}, - {"CJKCompatibility", xmlUCSIsCJKCompatibility}, - {"CJKCompatibilityForms", xmlUCSIsCJKCompatibilityForms}, - {"CJKCompatibilityIdeographs", xmlUCSIsCJKCompatibilityIdeographs}, - {"CJKCompatibilityIdeographsSupplement", xmlUCSIsCJKCompatibilityIdeographsSupplement}, - {"CJKRadicalsSupplement", xmlUCSIsCJKRadicalsSupplement}, - {"CJKSymbolsandPunctuation", xmlUCSIsCJKSymbolsandPunctuation}, - {"CJKUnifiedIdeographs", xmlUCSIsCJKUnifiedIdeographs}, - {"CJKUnifiedIdeographsExtensionA", xmlUCSIsCJKUnifiedIdeographsExtensionA}, - {"CJKUnifiedIdeographsExtensionB", xmlUCSIsCJKUnifiedIdeographsExtensionB}, - {"Cherokee", xmlUCSIsCherokee}, - {"CombiningDiacriticalMarks", xmlUCSIsCombiningDiacriticalMarks}, - {"CombiningDiacriticalMarksforSymbols", xmlUCSIsCombiningDiacriticalMarksforSymbols}, - {"CombiningHalfMarks", xmlUCSIsCombiningHalfMarks}, - {"CombiningMarksforSymbols", xmlUCSIsCombiningMarksforSymbols}, - {"ControlPictures", xmlUCSIsControlPictures}, - {"CurrencySymbols", xmlUCSIsCurrencySymbols}, - {"CypriotSyllabary", xmlUCSIsCypriotSyllabary}, - {"Cyrillic", xmlUCSIsCyrillic}, - {"CyrillicSupplement", xmlUCSIsCyrillicSupplement}, - {"Deseret", xmlUCSIsDeseret}, - {"Devanagari", xmlUCSIsDevanagari}, - {"Dingbats", xmlUCSIsDingbats}, - {"EnclosedAlphanumerics", xmlUCSIsEnclosedAlphanumerics}, - {"EnclosedCJKLettersandMonths", xmlUCSIsEnclosedCJKLettersandMonths}, - {"Ethiopic", xmlUCSIsEthiopic}, - {"GeneralPunctuation", xmlUCSIsGeneralPunctuation}, - {"GeometricShapes", xmlUCSIsGeometricShapes}, - {"Georgian", xmlUCSIsGeorgian}, - {"Gothic", xmlUCSIsGothic}, - {"Greek", xmlUCSIsGreek}, - {"GreekExtended", xmlUCSIsGreekExtended}, - {"GreekandCoptic", xmlUCSIsGreekandCoptic}, - {"Gujarati", xmlUCSIsGujarati}, - {"Gurmukhi", xmlUCSIsGurmukhi}, - {"HalfwidthandFullwidthForms", xmlUCSIsHalfwidthandFullwidthForms}, - {"HangulCompatibilityJamo", xmlUCSIsHangulCompatibilityJamo}, - {"HangulJamo", xmlUCSIsHangulJamo}, - {"HangulSyllables", xmlUCSIsHangulSyllables}, - {"Hanunoo", xmlUCSIsHanunoo}, - {"Hebrew", xmlUCSIsHebrew}, - {"HighPrivateUseSurrogates", xmlUCSIsHighPrivateUseSurrogates}, - {"HighSurrogates", xmlUCSIsHighSurrogates}, - {"Hiragana", xmlUCSIsHiragana}, - {"IPAExtensions", xmlUCSIsIPAExtensions}, - {"IdeographicDescriptionCharacters", xmlUCSIsIdeographicDescriptionCharacters}, - {"Kanbun", xmlUCSIsKanbun}, - {"KangxiRadicals", xmlUCSIsKangxiRadicals}, - {"Kannada", xmlUCSIsKannada}, - {"Katakana", xmlUCSIsKatakana}, - {"KatakanaPhoneticExtensions", xmlUCSIsKatakanaPhoneticExtensions}, - {"Khmer", xmlUCSIsKhmer}, - {"KhmerSymbols", xmlUCSIsKhmerSymbols}, - {"Lao", xmlUCSIsLao}, - {"Latin-1Supplement", xmlUCSIsLatin1Supplement}, - {"LatinExtended-A", xmlUCSIsLatinExtendedA}, - {"LatinExtended-B", xmlUCSIsLatinExtendedB}, - {"LatinExtendedAdditional", xmlUCSIsLatinExtendedAdditional}, - {"LetterlikeSymbols", xmlUCSIsLetterlikeSymbols}, - {"Limbu", xmlUCSIsLimbu}, - {"LinearBIdeograms", xmlUCSIsLinearBIdeograms}, - {"LinearBSyllabary", xmlUCSIsLinearBSyllabary}, - {"LowSurrogates", xmlUCSIsLowSurrogates}, - {"Malayalam", xmlUCSIsMalayalam}, - {"MathematicalAlphanumericSymbols", xmlUCSIsMathematicalAlphanumericSymbols}, - {"MathematicalOperators", xmlUCSIsMathematicalOperators}, - {"MiscellaneousMathematicalSymbols-A", xmlUCSIsMiscellaneousMathematicalSymbolsA}, - {"MiscellaneousMathematicalSymbols-B", xmlUCSIsMiscellaneousMathematicalSymbolsB}, - {"MiscellaneousSymbols", xmlUCSIsMiscellaneousSymbols}, - {"MiscellaneousSymbolsandArrows", xmlUCSIsMiscellaneousSymbolsandArrows}, - {"MiscellaneousTechnical", xmlUCSIsMiscellaneousTechnical}, - {"Mongolian", xmlUCSIsMongolian}, - {"MusicalSymbols", xmlUCSIsMusicalSymbols}, - {"Myanmar", xmlUCSIsMyanmar}, - {"NumberForms", xmlUCSIsNumberForms}, - {"Ogham", xmlUCSIsOgham}, - {"OldItalic", xmlUCSIsOldItalic}, - {"OpticalCharacterRecognition", xmlUCSIsOpticalCharacterRecognition}, - {"Oriya", xmlUCSIsOriya}, - {"Osmanya", xmlUCSIsOsmanya}, - {"PhoneticExtensions", xmlUCSIsPhoneticExtensions}, - {"PrivateUse", xmlUCSIsPrivateUse}, - {"PrivateUseArea", xmlUCSIsPrivateUseArea}, - {"Runic", xmlUCSIsRunic}, - {"Shavian", xmlUCSIsShavian}, - {"Sinhala", xmlUCSIsSinhala}, - {"SmallFormVariants", xmlUCSIsSmallFormVariants}, - {"SpacingModifierLetters", xmlUCSIsSpacingModifierLetters}, - {"Specials", xmlUCSIsSpecials}, - {"SuperscriptsandSubscripts", xmlUCSIsSuperscriptsandSubscripts}, - {"SupplementalArrows-A", xmlUCSIsSupplementalArrowsA}, - {"SupplementalArrows-B", xmlUCSIsSupplementalArrowsB}, - {"SupplementalMathematicalOperators", xmlUCSIsSupplementalMathematicalOperators}, - {"SupplementaryPrivateUseArea-A", xmlUCSIsSupplementaryPrivateUseAreaA}, - {"SupplementaryPrivateUseArea-B", xmlUCSIsSupplementaryPrivateUseAreaB}, - {"Syriac", xmlUCSIsSyriac}, - {"Tagalog", xmlUCSIsTagalog}, - {"Tagbanwa", xmlUCSIsTagbanwa}, - {"Tags", xmlUCSIsTags}, - {"TaiLe", xmlUCSIsTaiLe}, - {"TaiXuanJingSymbols", xmlUCSIsTaiXuanJingSymbols}, - {"Tamil", xmlUCSIsTamil}, - {"Telugu", xmlUCSIsTelugu}, - {"Thaana", xmlUCSIsThaana}, - {"Thai", xmlUCSIsThai}, - {"Tibetan", xmlUCSIsTibetan}, - {"Ugaritic", xmlUCSIsUgaritic}, - {"UnifiedCanadianAboriginalSyllabics", xmlUCSIsUnifiedCanadianAboriginalSyllabics}, - {"VariationSelectors", xmlUCSIsVariationSelectors}, - {"VariationSelectorsSupplement", xmlUCSIsVariationSelectorsSupplement}, - {"YiRadicals", xmlUCSIsYiRadicals}, - {"YiSyllables", xmlUCSIsYiSyllables}, - {"YijingHexagramSymbols", xmlUCSIsYijingHexagramSymbols}}; - -static const xmlUnicodeRange xmlUnicodeCats[] = { - {"C", xmlUCSIsCatC}, - {"Cc", xmlUCSIsCatCc}, - {"Cf", xmlUCSIsCatCf}, - {"Co", xmlUCSIsCatCo}, - {"Cs", xmlUCSIsCatCs}, - {"L", xmlUCSIsCatL}, - {"Ll", xmlUCSIsCatLl}, - {"Lm", xmlUCSIsCatLm}, - {"Lo", xmlUCSIsCatLo}, - {"Lt", xmlUCSIsCatLt}, - {"Lu", xmlUCSIsCatLu}, - {"M", xmlUCSIsCatM}, - {"Mc", xmlUCSIsCatMc}, - {"Me", xmlUCSIsCatMe}, - {"Mn", xmlUCSIsCatMn}, - {"N", xmlUCSIsCatN}, - {"Nd", xmlUCSIsCatNd}, - {"Nl", xmlUCSIsCatNl}, - {"No", xmlUCSIsCatNo}, - {"P", xmlUCSIsCatP}, - {"Pc", xmlUCSIsCatPc}, - {"Pd", xmlUCSIsCatPd}, - {"Pe", xmlUCSIsCatPe}, - {"Pf", xmlUCSIsCatPf}, - {"Pi", xmlUCSIsCatPi}, - {"Po", xmlUCSIsCatPo}, - {"Ps", xmlUCSIsCatPs}, - {"S", xmlUCSIsCatS}, - {"Sc", xmlUCSIsCatSc}, - {"Sk", xmlUCSIsCatSk}, - {"Sm", xmlUCSIsCatSm}, - {"So", xmlUCSIsCatSo}, - {"Z", xmlUCSIsCatZ}, - {"Zl", xmlUCSIsCatZl}, - {"Zp", xmlUCSIsCatZp}, - {"Zs", xmlUCSIsCatZs}}; - static const xmlChSRange xmlCS[] = {{0x0, 0x1f}, {0x7f, 0x9f}, {0xad, 0xad}, {0x600, 0x603}, {0x6dd, 0x6dd}, {0x70f, 0x70f}, {0x17b4, 0x17b5}, {0x200b, 0x200f}, {0x202a, 0x202e}, {0x2060, 0x2063}, @@ -930,9 +763,6 @@ static const xmlChSRange xmlZS[] = {{0x20, 0x20}, {0xa0, 0xa0}, {0x202f, 0x202f}, {0x205f, 0x205f}, {0x3000, 0x3000} }; static const xmlChRangeGroup xmlZG = {9,0,xmlZS,NULL}; -static const xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, 128}; -static const xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, 36}; - /** * xmlUnicodeLookup: * @tptr: pointer to the name table @@ -973,7 +803,7 @@ static xmlIntFunc * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsAegeanNumbers(int code) { return(((code >= 0x10100) && (code <= 0x1013F))); } @@ -986,7 +816,7 @@ xmlUCSIsAegeanNumbers(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsAlphabeticPresentationForms(int code) { return(((code >= 0xFB00) && (code <= 0xFB4F))); } @@ -999,7 +829,7 @@ xmlUCSIsAlphabeticPresentationForms(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsArabic(int code) { return(((code >= 0x0600) && (code <= 0x06FF))); } @@ -1012,7 +842,7 @@ xmlUCSIsArabic(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsArabicPresentationFormsA(int code) { return(((code >= 0xFB50) && (code <= 0xFDFF))); } @@ -1025,7 +855,7 @@ xmlUCSIsArabicPresentationFormsA(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsArabicPresentationFormsB(int code) { return(((code >= 0xFE70) && (code <= 0xFEFF))); } @@ -1038,7 +868,7 @@ xmlUCSIsArabicPresentationFormsB(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsArmenian(int code) { return(((code >= 0x0530) && (code <= 0x058F))); } @@ -1051,7 +881,7 @@ xmlUCSIsArmenian(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsArrows(int code) { return(((code >= 0x2190) && (code <= 0x21FF))); } @@ -1064,7 +894,7 @@ xmlUCSIsArrows(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsBasicLatin(int code) { return(((code >= 0x0000) && (code <= 0x007F))); } @@ -1077,7 +907,7 @@ xmlUCSIsBasicLatin(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsBengali(int code) { return(((code >= 0x0980) && (code <= 0x09FF))); } @@ -1090,7 +920,7 @@ xmlUCSIsBengali(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsBlockElements(int code) { return(((code >= 0x2580) && (code <= 0x259F))); } @@ -1103,7 +933,7 @@ xmlUCSIsBlockElements(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsBopomofo(int code) { return(((code >= 0x3100) && (code <= 0x312F))); } @@ -1116,7 +946,7 @@ xmlUCSIsBopomofo(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsBopomofoExtended(int code) { return(((code >= 0x31A0) && (code <= 0x31BF))); } @@ -1129,7 +959,7 @@ xmlUCSIsBopomofoExtended(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsBoxDrawing(int code) { return(((code >= 0x2500) && (code <= 0x257F))); } @@ -1142,7 +972,7 @@ xmlUCSIsBoxDrawing(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsBraillePatterns(int code) { return(((code >= 0x2800) && (code <= 0x28FF))); } @@ -1155,7 +985,7 @@ xmlUCSIsBraillePatterns(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsBuhid(int code) { return(((code >= 0x1740) && (code <= 0x175F))); } @@ -1168,7 +998,7 @@ xmlUCSIsBuhid(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsByzantineMusicalSymbols(int code) { return(((code >= 0x1D000) && (code <= 0x1D0FF))); } @@ -1181,7 +1011,7 @@ xmlUCSIsByzantineMusicalSymbols(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCJKCompatibility(int code) { return(((code >= 0x3300) && (code <= 0x33FF))); } @@ -1194,7 +1024,7 @@ xmlUCSIsCJKCompatibility(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCJKCompatibilityForms(int code) { return(((code >= 0xFE30) && (code <= 0xFE4F))); } @@ -1207,7 +1037,7 @@ xmlUCSIsCJKCompatibilityForms(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCJKCompatibilityIdeographs(int code) { return(((code >= 0xF900) && (code <= 0xFAFF))); } @@ -1220,7 +1050,7 @@ xmlUCSIsCJKCompatibilityIdeographs(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCJKCompatibilityIdeographsSupplement(int code) { return(((code >= 0x2F800) && (code <= 0x2FA1F))); } @@ -1233,7 +1063,7 @@ xmlUCSIsCJKCompatibilityIdeographsSupplement(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCJKRadicalsSupplement(int code) { return(((code >= 0x2E80) && (code <= 0x2EFF))); } @@ -1246,7 +1076,7 @@ xmlUCSIsCJKRadicalsSupplement(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCJKSymbolsandPunctuation(int code) { return(((code >= 0x3000) && (code <= 0x303F))); } @@ -1259,7 +1089,7 @@ xmlUCSIsCJKSymbolsandPunctuation(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCJKUnifiedIdeographs(int code) { return(((code >= 0x4E00) && (code <= 0x9FFF))); } @@ -1272,7 +1102,7 @@ xmlUCSIsCJKUnifiedIdeographs(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCJKUnifiedIdeographsExtensionA(int code) { return(((code >= 0x3400) && (code <= 0x4DBF))); } @@ -1285,7 +1115,7 @@ xmlUCSIsCJKUnifiedIdeographsExtensionA(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCJKUnifiedIdeographsExtensionB(int code) { return(((code >= 0x20000) && (code <= 0x2A6DF))); } @@ -1298,7 +1128,7 @@ xmlUCSIsCJKUnifiedIdeographsExtensionB(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCherokee(int code) { return(((code >= 0x13A0) && (code <= 0x13FF))); } @@ -1311,7 +1141,7 @@ xmlUCSIsCherokee(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCombiningDiacriticalMarks(int code) { return(((code >= 0x0300) && (code <= 0x036F))); } @@ -1324,7 +1154,7 @@ xmlUCSIsCombiningDiacriticalMarks(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCombiningDiacriticalMarksforSymbols(int code) { return(((code >= 0x20D0) && (code <= 0x20FF))); } @@ -1337,7 +1167,7 @@ xmlUCSIsCombiningDiacriticalMarksforSymbols(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCombiningHalfMarks(int code) { return(((code >= 0xFE20) && (code <= 0xFE2F))); } @@ -1350,7 +1180,7 @@ xmlUCSIsCombiningHalfMarks(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCombiningMarksforSymbols(int code) { return(((code >= 0x20D0) && (code <= 0x20FF))); } @@ -1363,7 +1193,7 @@ xmlUCSIsCombiningMarksforSymbols(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsControlPictures(int code) { return(((code >= 0x2400) && (code <= 0x243F))); } @@ -1376,7 +1206,7 @@ xmlUCSIsControlPictures(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCurrencySymbols(int code) { return(((code >= 0x20A0) && (code <= 0x20CF))); } @@ -1389,7 +1219,7 @@ xmlUCSIsCurrencySymbols(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCypriotSyllabary(int code) { return(((code >= 0x10800) && (code <= 0x1083F))); } @@ -1402,7 +1232,7 @@ xmlUCSIsCypriotSyllabary(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCyrillic(int code) { return(((code >= 0x0400) && (code <= 0x04FF))); } @@ -1415,7 +1245,7 @@ xmlUCSIsCyrillic(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsCyrillicSupplement(int code) { return(((code >= 0x0500) && (code <= 0x052F))); } @@ -1428,7 +1258,7 @@ xmlUCSIsCyrillicSupplement(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsDeseret(int code) { return(((code >= 0x10400) && (code <= 0x1044F))); } @@ -1441,7 +1271,7 @@ xmlUCSIsDeseret(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsDevanagari(int code) { return(((code >= 0x0900) && (code <= 0x097F))); } @@ -1454,7 +1284,7 @@ xmlUCSIsDevanagari(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsDingbats(int code) { return(((code >= 0x2700) && (code <= 0x27BF))); } @@ -1467,7 +1297,7 @@ xmlUCSIsDingbats(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsEnclosedAlphanumerics(int code) { return(((code >= 0x2460) && (code <= 0x24FF))); } @@ -1480,7 +1310,7 @@ xmlUCSIsEnclosedAlphanumerics(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsEnclosedCJKLettersandMonths(int code) { return(((code >= 0x3200) && (code <= 0x32FF))); } @@ -1493,7 +1323,7 @@ xmlUCSIsEnclosedCJKLettersandMonths(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsEthiopic(int code) { return(((code >= 0x1200) && (code <= 0x137F))); } @@ -1506,7 +1336,7 @@ xmlUCSIsEthiopic(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsGeneralPunctuation(int code) { return(((code >= 0x2000) && (code <= 0x206F))); } @@ -1519,7 +1349,7 @@ xmlUCSIsGeneralPunctuation(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsGeometricShapes(int code) { return(((code >= 0x25A0) && (code <= 0x25FF))); } @@ -1532,7 +1362,7 @@ xmlUCSIsGeometricShapes(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsGeorgian(int code) { return(((code >= 0x10A0) && (code <= 0x10FF))); } @@ -1545,7 +1375,7 @@ xmlUCSIsGeorgian(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsGothic(int code) { return(((code >= 0x10330) && (code <= 0x1034F))); } @@ -1558,7 +1388,7 @@ xmlUCSIsGothic(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsGreek(int code) { return(((code >= 0x0370) && (code <= 0x03FF))); } @@ -1571,7 +1401,7 @@ xmlUCSIsGreek(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsGreekExtended(int code) { return(((code >= 0x1F00) && (code <= 0x1FFF))); } @@ -1584,7 +1414,7 @@ xmlUCSIsGreekExtended(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsGreekandCoptic(int code) { return(((code >= 0x0370) && (code <= 0x03FF))); } @@ -1597,7 +1427,7 @@ xmlUCSIsGreekandCoptic(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsGujarati(int code) { return(((code >= 0x0A80) && (code <= 0x0AFF))); } @@ -1610,7 +1440,7 @@ xmlUCSIsGujarati(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsGurmukhi(int code) { return(((code >= 0x0A00) && (code <= 0x0A7F))); } @@ -1623,7 +1453,7 @@ xmlUCSIsGurmukhi(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsHalfwidthandFullwidthForms(int code) { return(((code >= 0xFF00) && (code <= 0xFFEF))); } @@ -1636,7 +1466,7 @@ xmlUCSIsHalfwidthandFullwidthForms(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsHangulCompatibilityJamo(int code) { return(((code >= 0x3130) && (code <= 0x318F))); } @@ -1649,7 +1479,7 @@ xmlUCSIsHangulCompatibilityJamo(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsHangulJamo(int code) { return(((code >= 0x1100) && (code <= 0x11FF))); } @@ -1662,7 +1492,7 @@ xmlUCSIsHangulJamo(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsHangulSyllables(int code) { return(((code >= 0xAC00) && (code <= 0xD7AF))); } @@ -1675,7 +1505,7 @@ xmlUCSIsHangulSyllables(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsHanunoo(int code) { return(((code >= 0x1720) && (code <= 0x173F))); } @@ -1688,7 +1518,7 @@ xmlUCSIsHanunoo(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsHebrew(int code) { return(((code >= 0x0590) && (code <= 0x05FF))); } @@ -1701,7 +1531,7 @@ xmlUCSIsHebrew(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsHighPrivateUseSurrogates(int code) { return(((code >= 0xDB80) && (code <= 0xDBFF))); } @@ -1714,7 +1544,7 @@ xmlUCSIsHighPrivateUseSurrogates(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsHighSurrogates(int code) { return(((code >= 0xD800) && (code <= 0xDB7F))); } @@ -1727,7 +1557,7 @@ xmlUCSIsHighSurrogates(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsHiragana(int code) { return(((code >= 0x3040) && (code <= 0x309F))); } @@ -1740,7 +1570,7 @@ xmlUCSIsHiragana(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsIPAExtensions(int code) { return(((code >= 0x0250) && (code <= 0x02AF))); } @@ -1753,7 +1583,7 @@ xmlUCSIsIPAExtensions(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsIdeographicDescriptionCharacters(int code) { return(((code >= 0x2FF0) && (code <= 0x2FFF))); } @@ -1766,7 +1596,7 @@ xmlUCSIsIdeographicDescriptionCharacters(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsKanbun(int code) { return(((code >= 0x3190) && (code <= 0x319F))); } @@ -1779,7 +1609,7 @@ xmlUCSIsKanbun(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsKangxiRadicals(int code) { return(((code >= 0x2F00) && (code <= 0x2FDF))); } @@ -1792,7 +1622,7 @@ xmlUCSIsKangxiRadicals(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsKannada(int code) { return(((code >= 0x0C80) && (code <= 0x0CFF))); } @@ -1805,7 +1635,7 @@ xmlUCSIsKannada(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsKatakana(int code) { return(((code >= 0x30A0) && (code <= 0x30FF))); } @@ -1818,7 +1648,7 @@ xmlUCSIsKatakana(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsKatakanaPhoneticExtensions(int code) { return(((code >= 0x31F0) && (code <= 0x31FF))); } @@ -1831,7 +1661,7 @@ xmlUCSIsKatakanaPhoneticExtensions(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsKhmer(int code) { return(((code >= 0x1780) && (code <= 0x17FF))); } @@ -1844,7 +1674,7 @@ xmlUCSIsKhmer(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsKhmerSymbols(int code) { return(((code >= 0x19E0) && (code <= 0x19FF))); } @@ -1857,7 +1687,7 @@ xmlUCSIsKhmerSymbols(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsLao(int code) { return(((code >= 0x0E80) && (code <= 0x0EFF))); } @@ -1870,7 +1700,7 @@ xmlUCSIsLao(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsLatin1Supplement(int code) { return(((code >= 0x0080) && (code <= 0x00FF))); } @@ -1883,7 +1713,7 @@ xmlUCSIsLatin1Supplement(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsLatinExtendedA(int code) { return(((code >= 0x0100) && (code <= 0x017F))); } @@ -1896,7 +1726,7 @@ xmlUCSIsLatinExtendedA(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsLatinExtendedB(int code) { return(((code >= 0x0180) && (code <= 0x024F))); } @@ -1909,7 +1739,7 @@ xmlUCSIsLatinExtendedB(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsLatinExtendedAdditional(int code) { return(((code >= 0x1E00) && (code <= 0x1EFF))); } @@ -1922,7 +1752,7 @@ xmlUCSIsLatinExtendedAdditional(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsLetterlikeSymbols(int code) { return(((code >= 0x2100) && (code <= 0x214F))); } @@ -1935,7 +1765,7 @@ xmlUCSIsLetterlikeSymbols(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsLimbu(int code) { return(((code >= 0x1900) && (code <= 0x194F))); } @@ -1948,7 +1778,7 @@ xmlUCSIsLimbu(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsLinearBIdeograms(int code) { return(((code >= 0x10080) && (code <= 0x100FF))); } @@ -1961,7 +1791,7 @@ xmlUCSIsLinearBIdeograms(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsLinearBSyllabary(int code) { return(((code >= 0x10000) && (code <= 0x1007F))); } @@ -1974,7 +1804,7 @@ xmlUCSIsLinearBSyllabary(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsLowSurrogates(int code) { return(((code >= 0xDC00) && (code <= 0xDFFF))); } @@ -1987,7 +1817,7 @@ xmlUCSIsLowSurrogates(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsMalayalam(int code) { return(((code >= 0x0D00) && (code <= 0x0D7F))); } @@ -2000,7 +1830,7 @@ xmlUCSIsMalayalam(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsMathematicalAlphanumericSymbols(int code) { return(((code >= 0x1D400) && (code <= 0x1D7FF))); } @@ -2013,7 +1843,7 @@ xmlUCSIsMathematicalAlphanumericSymbols(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsMathematicalOperators(int code) { return(((code >= 0x2200) && (code <= 0x22FF))); } @@ -2026,7 +1856,7 @@ xmlUCSIsMathematicalOperators(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsMiscellaneousMathematicalSymbolsA(int code) { return(((code >= 0x27C0) && (code <= 0x27EF))); } @@ -2039,7 +1869,7 @@ xmlUCSIsMiscellaneousMathematicalSymbolsA(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsMiscellaneousMathematicalSymbolsB(int code) { return(((code >= 0x2980) && (code <= 0x29FF))); } @@ -2052,7 +1882,7 @@ xmlUCSIsMiscellaneousMathematicalSymbolsB(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsMiscellaneousSymbols(int code) { return(((code >= 0x2600) && (code <= 0x26FF))); } @@ -2065,7 +1895,7 @@ xmlUCSIsMiscellaneousSymbols(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsMiscellaneousSymbolsandArrows(int code) { return(((code >= 0x2B00) && (code <= 0x2BFF))); } @@ -2078,7 +1908,7 @@ xmlUCSIsMiscellaneousSymbolsandArrows(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsMiscellaneousTechnical(int code) { return(((code >= 0x2300) && (code <= 0x23FF))); } @@ -2091,7 +1921,7 @@ xmlUCSIsMiscellaneousTechnical(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsMongolian(int code) { return(((code >= 0x1800) && (code <= 0x18AF))); } @@ -2104,7 +1934,7 @@ xmlUCSIsMongolian(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsMusicalSymbols(int code) { return(((code >= 0x1D100) && (code <= 0x1D1FF))); } @@ -2117,7 +1947,7 @@ xmlUCSIsMusicalSymbols(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsMyanmar(int code) { return(((code >= 0x1000) && (code <= 0x109F))); } @@ -2130,7 +1960,7 @@ xmlUCSIsMyanmar(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsNumberForms(int code) { return(((code >= 0x2150) && (code <= 0x218F))); } @@ -2143,7 +1973,7 @@ xmlUCSIsNumberForms(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsOgham(int code) { return(((code >= 0x1680) && (code <= 0x169F))); } @@ -2156,7 +1986,7 @@ xmlUCSIsOgham(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsOldItalic(int code) { return(((code >= 0x10300) && (code <= 0x1032F))); } @@ -2169,7 +1999,7 @@ xmlUCSIsOldItalic(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsOpticalCharacterRecognition(int code) { return(((code >= 0x2440) && (code <= 0x245F))); } @@ -2182,7 +2012,7 @@ xmlUCSIsOpticalCharacterRecognition(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsOriya(int code) { return(((code >= 0x0B00) && (code <= 0x0B7F))); } @@ -2195,7 +2025,7 @@ xmlUCSIsOriya(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsOsmanya(int code) { return(((code >= 0x10480) && (code <= 0x104AF))); } @@ -2208,7 +2038,7 @@ xmlUCSIsOsmanya(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsPhoneticExtensions(int code) { return(((code >= 0x1D00) && (code <= 0x1D7F))); } @@ -2221,7 +2051,7 @@ xmlUCSIsPhoneticExtensions(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsPrivateUse(int code) { return(((code >= 0xE000) && (code <= 0xF8FF)) || ((code >= 0xF0000) && (code <= 0xFFFFF)) || @@ -2236,7 +2066,7 @@ xmlUCSIsPrivateUse(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsPrivateUseArea(int code) { return(((code >= 0xE000) && (code <= 0xF8FF))); } @@ -2249,7 +2079,7 @@ xmlUCSIsPrivateUseArea(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsRunic(int code) { return(((code >= 0x16A0) && (code <= 0x16FF))); } @@ -2262,7 +2092,7 @@ xmlUCSIsRunic(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsShavian(int code) { return(((code >= 0x10450) && (code <= 0x1047F))); } @@ -2275,7 +2105,7 @@ xmlUCSIsShavian(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsSinhala(int code) { return(((code >= 0x0D80) && (code <= 0x0DFF))); } @@ -2288,7 +2118,7 @@ xmlUCSIsSinhala(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsSmallFormVariants(int code) { return(((code >= 0xFE50) && (code <= 0xFE6F))); } @@ -2301,7 +2131,7 @@ xmlUCSIsSmallFormVariants(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsSpacingModifierLetters(int code) { return(((code >= 0x02B0) && (code <= 0x02FF))); } @@ -2314,7 +2144,7 @@ xmlUCSIsSpacingModifierLetters(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsSpecials(int code) { return(((code >= 0xFFF0) && (code <= 0xFFFF))); } @@ -2327,7 +2157,7 @@ xmlUCSIsSpecials(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsSuperscriptsandSubscripts(int code) { return(((code >= 0x2070) && (code <= 0x209F))); } @@ -2340,7 +2170,7 @@ xmlUCSIsSuperscriptsandSubscripts(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsSupplementalArrowsA(int code) { return(((code >= 0x27F0) && (code <= 0x27FF))); } @@ -2353,7 +2183,7 @@ xmlUCSIsSupplementalArrowsA(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsSupplementalArrowsB(int code) { return(((code >= 0x2900) && (code <= 0x297F))); } @@ -2366,7 +2196,7 @@ xmlUCSIsSupplementalArrowsB(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsSupplementalMathematicalOperators(int code) { return(((code >= 0x2A00) && (code <= 0x2AFF))); } @@ -2379,7 +2209,7 @@ xmlUCSIsSupplementalMathematicalOperators(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsSupplementaryPrivateUseAreaA(int code) { return(((code >= 0xF0000) && (code <= 0xFFFFF))); } @@ -2392,7 +2222,7 @@ xmlUCSIsSupplementaryPrivateUseAreaA(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsSupplementaryPrivateUseAreaB(int code) { return(((code >= 0x100000) && (code <= 0x10FFFF))); } @@ -2405,7 +2235,7 @@ xmlUCSIsSupplementaryPrivateUseAreaB(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsSyriac(int code) { return(((code >= 0x0700) && (code <= 0x074F))); } @@ -2418,7 +2248,7 @@ xmlUCSIsSyriac(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsTagalog(int code) { return(((code >= 0x1700) && (code <= 0x171F))); } @@ -2431,7 +2261,7 @@ xmlUCSIsTagalog(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsTagbanwa(int code) { return(((code >= 0x1760) && (code <= 0x177F))); } @@ -2444,7 +2274,7 @@ xmlUCSIsTagbanwa(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsTags(int code) { return(((code >= 0xE0000) && (code <= 0xE007F))); } @@ -2457,7 +2287,7 @@ xmlUCSIsTags(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsTaiLe(int code) { return(((code >= 0x1950) && (code <= 0x197F))); } @@ -2470,7 +2300,7 @@ xmlUCSIsTaiLe(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsTaiXuanJingSymbols(int code) { return(((code >= 0x1D300) && (code <= 0x1D35F))); } @@ -2483,7 +2313,7 @@ xmlUCSIsTaiXuanJingSymbols(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsTamil(int code) { return(((code >= 0x0B80) && (code <= 0x0BFF))); } @@ -2496,7 +2326,7 @@ xmlUCSIsTamil(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsTelugu(int code) { return(((code >= 0x0C00) && (code <= 0x0C7F))); } @@ -2509,7 +2339,7 @@ xmlUCSIsTelugu(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsThaana(int code) { return(((code >= 0x0780) && (code <= 0x07BF))); } @@ -2522,7 +2352,7 @@ xmlUCSIsThaana(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsThai(int code) { return(((code >= 0x0E00) && (code <= 0x0E7F))); } @@ -2535,7 +2365,7 @@ xmlUCSIsThai(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsTibetan(int code) { return(((code >= 0x0F00) && (code <= 0x0FFF))); } @@ -2548,7 +2378,7 @@ xmlUCSIsTibetan(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsUgaritic(int code) { return(((code >= 0x10380) && (code <= 0x1039F))); } @@ -2561,7 +2391,7 @@ xmlUCSIsUgaritic(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsUnifiedCanadianAboriginalSyllabics(int code) { return(((code >= 0x1400) && (code <= 0x167F))); } @@ -2574,7 +2404,7 @@ xmlUCSIsUnifiedCanadianAboriginalSyllabics(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsVariationSelectors(int code) { return(((code >= 0xFE00) && (code <= 0xFE0F))); } @@ -2587,7 +2417,7 @@ xmlUCSIsVariationSelectors(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsVariationSelectorsSupplement(int code) { return(((code >= 0xE0100) && (code <= 0xE01EF))); } @@ -2600,7 +2430,7 @@ xmlUCSIsVariationSelectorsSupplement(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsYiRadicals(int code) { return(((code >= 0xA490) && (code <= 0xA4CF))); } @@ -2613,7 +2443,7 @@ xmlUCSIsYiRadicals(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsYiSyllables(int code) { return(((code >= 0xA000) && (code <= 0xA48F))); } @@ -2626,30 +2456,11 @@ xmlUCSIsYiSyllables(int code) { * * Returns 1 if true 0 otherwise */ -int +static int xmlUCSIsYijingHexagramSymbols(int code) { return(((code >= 0x4DC0) && (code <= 0x4DFF))); } -/** - * xmlUCSIsBlock: - * @code: UCS code point - * @block: UCS block name - * - * Check whether the character is part of the UCS Block - * - * Returns 1 if true, 0 if false and -1 on unknown block - */ -int -xmlUCSIsBlock(int code, const char *block) { - xmlIntFunc *func; - - func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block); - if (func == NULL) - return (-1); - return (func(code)); -} - /** * xmlUCSIsCatC: * @code: UCS code point @@ -3156,6 +2967,195 @@ xmlUCSIsCatZs(int code) { (code == 0x3000)); } +static const xmlUnicodeRange xmlUnicodeBlocks[] = { {"AegeanNumbers", xmlUCSIsAegeanNumbers}, + {"AlphabeticPresentationForms", xmlUCSIsAlphabeticPresentationForms}, + {"Arabic", xmlUCSIsArabic}, + {"ArabicPresentationForms-A", xmlUCSIsArabicPresentationFormsA}, + {"ArabicPresentationForms-B", xmlUCSIsArabicPresentationFormsB}, + {"Armenian", xmlUCSIsArmenian}, + {"Arrows", xmlUCSIsArrows}, + {"BasicLatin", xmlUCSIsBasicLatin}, + {"Bengali", xmlUCSIsBengali}, + {"BlockElements", xmlUCSIsBlockElements}, + {"Bopomofo", xmlUCSIsBopomofo}, + {"BopomofoExtended", xmlUCSIsBopomofoExtended}, + {"BoxDrawing", xmlUCSIsBoxDrawing}, + {"BraillePatterns", xmlUCSIsBraillePatterns}, + {"Buhid", xmlUCSIsBuhid}, + {"ByzantineMusicalSymbols", xmlUCSIsByzantineMusicalSymbols}, + {"CJKCompatibility", xmlUCSIsCJKCompatibility}, + {"CJKCompatibilityForms", xmlUCSIsCJKCompatibilityForms}, + {"CJKCompatibilityIdeographs", xmlUCSIsCJKCompatibilityIdeographs}, + {"CJKCompatibilityIdeographsSupplement", xmlUCSIsCJKCompatibilityIdeographsSupplement}, + {"CJKRadicalsSupplement", xmlUCSIsCJKRadicalsSupplement}, + {"CJKSymbolsandPunctuation", xmlUCSIsCJKSymbolsandPunctuation}, + {"CJKUnifiedIdeographs", xmlUCSIsCJKUnifiedIdeographs}, + {"CJKUnifiedIdeographsExtensionA", xmlUCSIsCJKUnifiedIdeographsExtensionA}, + {"CJKUnifiedIdeographsExtensionB", xmlUCSIsCJKUnifiedIdeographsExtensionB}, + {"Cherokee", xmlUCSIsCherokee}, + {"CombiningDiacriticalMarks", xmlUCSIsCombiningDiacriticalMarks}, + {"CombiningDiacriticalMarksforSymbols", xmlUCSIsCombiningDiacriticalMarksforSymbols}, + {"CombiningHalfMarks", xmlUCSIsCombiningHalfMarks}, + {"CombiningMarksforSymbols", xmlUCSIsCombiningMarksforSymbols}, + {"ControlPictures", xmlUCSIsControlPictures}, + {"CurrencySymbols", xmlUCSIsCurrencySymbols}, + {"CypriotSyllabary", xmlUCSIsCypriotSyllabary}, + {"Cyrillic", xmlUCSIsCyrillic}, + {"CyrillicSupplement", xmlUCSIsCyrillicSupplement}, + {"Deseret", xmlUCSIsDeseret}, + {"Devanagari", xmlUCSIsDevanagari}, + {"Dingbats", xmlUCSIsDingbats}, + {"EnclosedAlphanumerics", xmlUCSIsEnclosedAlphanumerics}, + {"EnclosedCJKLettersandMonths", xmlUCSIsEnclosedCJKLettersandMonths}, + {"Ethiopic", xmlUCSIsEthiopic}, + {"GeneralPunctuation", xmlUCSIsGeneralPunctuation}, + {"GeometricShapes", xmlUCSIsGeometricShapes}, + {"Georgian", xmlUCSIsGeorgian}, + {"Gothic", xmlUCSIsGothic}, + {"Greek", xmlUCSIsGreek}, + {"GreekExtended", xmlUCSIsGreekExtended}, + {"GreekandCoptic", xmlUCSIsGreekandCoptic}, + {"Gujarati", xmlUCSIsGujarati}, + {"Gurmukhi", xmlUCSIsGurmukhi}, + {"HalfwidthandFullwidthForms", xmlUCSIsHalfwidthandFullwidthForms}, + {"HangulCompatibilityJamo", xmlUCSIsHangulCompatibilityJamo}, + {"HangulJamo", xmlUCSIsHangulJamo}, + {"HangulSyllables", xmlUCSIsHangulSyllables}, + {"Hanunoo", xmlUCSIsHanunoo}, + {"Hebrew", xmlUCSIsHebrew}, + {"HighPrivateUseSurrogates", xmlUCSIsHighPrivateUseSurrogates}, + {"HighSurrogates", xmlUCSIsHighSurrogates}, + {"Hiragana", xmlUCSIsHiragana}, + {"IPAExtensions", xmlUCSIsIPAExtensions}, + {"IdeographicDescriptionCharacters", xmlUCSIsIdeographicDescriptionCharacters}, + {"Kanbun", xmlUCSIsKanbun}, + {"KangxiRadicals", xmlUCSIsKangxiRadicals}, + {"Kannada", xmlUCSIsKannada}, + {"Katakana", xmlUCSIsKatakana}, + {"KatakanaPhoneticExtensions", xmlUCSIsKatakanaPhoneticExtensions}, + {"Khmer", xmlUCSIsKhmer}, + {"KhmerSymbols", xmlUCSIsKhmerSymbols}, + {"Lao", xmlUCSIsLao}, + {"Latin-1Supplement", xmlUCSIsLatin1Supplement}, + {"LatinExtended-A", xmlUCSIsLatinExtendedA}, + {"LatinExtended-B", xmlUCSIsLatinExtendedB}, + {"LatinExtendedAdditional", xmlUCSIsLatinExtendedAdditional}, + {"LetterlikeSymbols", xmlUCSIsLetterlikeSymbols}, + {"Limbu", xmlUCSIsLimbu}, + {"LinearBIdeograms", xmlUCSIsLinearBIdeograms}, + {"LinearBSyllabary", xmlUCSIsLinearBSyllabary}, + {"LowSurrogates", xmlUCSIsLowSurrogates}, + {"Malayalam", xmlUCSIsMalayalam}, + {"MathematicalAlphanumericSymbols", xmlUCSIsMathematicalAlphanumericSymbols}, + {"MathematicalOperators", xmlUCSIsMathematicalOperators}, + {"MiscellaneousMathematicalSymbols-A", xmlUCSIsMiscellaneousMathematicalSymbolsA}, + {"MiscellaneousMathematicalSymbols-B", xmlUCSIsMiscellaneousMathematicalSymbolsB}, + {"MiscellaneousSymbols", xmlUCSIsMiscellaneousSymbols}, + {"MiscellaneousSymbolsandArrows", xmlUCSIsMiscellaneousSymbolsandArrows}, + {"MiscellaneousTechnical", xmlUCSIsMiscellaneousTechnical}, + {"Mongolian", xmlUCSIsMongolian}, + {"MusicalSymbols", xmlUCSIsMusicalSymbols}, + {"Myanmar", xmlUCSIsMyanmar}, + {"NumberForms", xmlUCSIsNumberForms}, + {"Ogham", xmlUCSIsOgham}, + {"OldItalic", xmlUCSIsOldItalic}, + {"OpticalCharacterRecognition", xmlUCSIsOpticalCharacterRecognition}, + {"Oriya", xmlUCSIsOriya}, + {"Osmanya", xmlUCSIsOsmanya}, + {"PhoneticExtensions", xmlUCSIsPhoneticExtensions}, + {"PrivateUse", xmlUCSIsPrivateUse}, + {"PrivateUseArea", xmlUCSIsPrivateUseArea}, + {"Runic", xmlUCSIsRunic}, + {"Shavian", xmlUCSIsShavian}, + {"Sinhala", xmlUCSIsSinhala}, + {"SmallFormVariants", xmlUCSIsSmallFormVariants}, + {"SpacingModifierLetters", xmlUCSIsSpacingModifierLetters}, + {"Specials", xmlUCSIsSpecials}, + {"SuperscriptsandSubscripts", xmlUCSIsSuperscriptsandSubscripts}, + {"SupplementalArrows-A", xmlUCSIsSupplementalArrowsA}, + {"SupplementalArrows-B", xmlUCSIsSupplementalArrowsB}, + {"SupplementalMathematicalOperators", xmlUCSIsSupplementalMathematicalOperators}, + {"SupplementaryPrivateUseArea-A", xmlUCSIsSupplementaryPrivateUseAreaA}, + {"SupplementaryPrivateUseArea-B", xmlUCSIsSupplementaryPrivateUseAreaB}, + {"Syriac", xmlUCSIsSyriac}, + {"Tagalog", xmlUCSIsTagalog}, + {"Tagbanwa", xmlUCSIsTagbanwa}, + {"Tags", xmlUCSIsTags}, + {"TaiLe", xmlUCSIsTaiLe}, + {"TaiXuanJingSymbols", xmlUCSIsTaiXuanJingSymbols}, + {"Tamil", xmlUCSIsTamil}, + {"Telugu", xmlUCSIsTelugu}, + {"Thaana", xmlUCSIsThaana}, + {"Thai", xmlUCSIsThai}, + {"Tibetan", xmlUCSIsTibetan}, + {"Ugaritic", xmlUCSIsUgaritic}, + {"UnifiedCanadianAboriginalSyllabics", xmlUCSIsUnifiedCanadianAboriginalSyllabics}, + {"VariationSelectors", xmlUCSIsVariationSelectors}, + {"VariationSelectorsSupplement", xmlUCSIsVariationSelectorsSupplement}, + {"YiRadicals", xmlUCSIsYiRadicals}, + {"YiSyllables", xmlUCSIsYiSyllables}, + {"YijingHexagramSymbols", xmlUCSIsYijingHexagramSymbols}}; + +static const xmlUnicodeRange xmlUnicodeCats[] = { + {"C", xmlUCSIsCatC}, + {"Cc", xmlUCSIsCatCc}, + {"Cf", xmlUCSIsCatCf}, + {"Co", xmlUCSIsCatCo}, + {"Cs", xmlUCSIsCatCs}, + {"L", xmlUCSIsCatL}, + {"Ll", xmlUCSIsCatLl}, + {"Lm", xmlUCSIsCatLm}, + {"Lo", xmlUCSIsCatLo}, + {"Lt", xmlUCSIsCatLt}, + {"Lu", xmlUCSIsCatLu}, + {"M", xmlUCSIsCatM}, + {"Mc", xmlUCSIsCatMc}, + {"Me", xmlUCSIsCatMe}, + {"Mn", xmlUCSIsCatMn}, + {"N", xmlUCSIsCatN}, + {"Nd", xmlUCSIsCatNd}, + {"Nl", xmlUCSIsCatNl}, + {"No", xmlUCSIsCatNo}, + {"P", xmlUCSIsCatP}, + {"Pc", xmlUCSIsCatPc}, + {"Pd", xmlUCSIsCatPd}, + {"Pe", xmlUCSIsCatPe}, + {"Pf", xmlUCSIsCatPf}, + {"Pi", xmlUCSIsCatPi}, + {"Po", xmlUCSIsCatPo}, + {"Ps", xmlUCSIsCatPs}, + {"S", xmlUCSIsCatS}, + {"Sc", xmlUCSIsCatSc}, + {"Sk", xmlUCSIsCatSk}, + {"Sm", xmlUCSIsCatSm}, + {"So", xmlUCSIsCatSo}, + {"Z", xmlUCSIsCatZ}, + {"Zl", xmlUCSIsCatZl}, + {"Zp", xmlUCSIsCatZp}, + {"Zs", xmlUCSIsCatZs}}; + +static const xmlUnicodeNameTable xmlUnicodeBlockTbl = {xmlUnicodeBlocks, 128}; +static const xmlUnicodeNameTable xmlUnicodeCatTbl = {xmlUnicodeCats, 36}; + +/** + * xmlUCSIsBlock: + * @code: UCS code point + * @block: UCS block name + * + * Check whether the character is part of the UCS Block + * + * Returns 1 if true, 0 if false and -1 on unknown block + */ +int +xmlUCSIsBlock(int code, const char *block) { + xmlIntFunc *func; + + func = xmlUnicodeLookup(&xmlUnicodeBlockTbl, block); + if (func == NULL) + return (-1); + return (func(code)); +} + /** * xmlUCSIsCat: * @code: UCS code point @@ -3175,4 +3175,4 @@ xmlUCSIsCat(int code, const char *cat) { return (func(code)); } -#endif /* LIBXML_UNICODE_ENABLED */ +#endif /* LIBXML_REGEXP_ENABLED */