2022-12-07 00:40:01 +03:00
#!/usr/bin/env python3
2020-01-01 21:16:10 +03:00
# -*- coding: utf-8 -*-
2002-12-27 22:37:04 +03:00
#
2002-12-29 01:56:33 +03:00
# this tests the DTD validation with the XmlTextReader interface
2002-12-27 22:37:04 +03:00
#
import sys
import glob
2022-03-30 23:00:50 +03:00
import os
2023-08-15 13:49:27 +03:00
import setup_test
2002-12-27 22:37:04 +03:00
import libxml2
2013-03-30 17:38:20 +04:00
try :
import StringIO
str_io = StringIO . StringIO
except :
import io
str_io = io . StringIO
2002-12-27 22:37:04 +03:00
# Memory debug specific
libxml2 . debugMemory ( 1 )
2020-01-01 21:16:10 +03:00
err = " "
2022-03-30 23:00:50 +03:00
basedir = os . path . dirname ( os . path . realpath ( __file__ ) )
2022-12-06 18:48:55 +03:00
dir_prefix = os . path . realpath ( os . path . join ( basedir , " .. " , " .. " , " test " , " valid " ) )
2020-01-01 21:16:10 +03:00
# This dictionary reflects the contents of the files
# ../../test/valid/*.xml.err that are not empty, except that
# the file paths in the messages start with ../../test/
expect = {
' 766956 ' :
2022-12-06 18:48:55 +03:00
""" {0} /dtds/766956.dtd:2: parser error : PEReference: expecting ' ; '
2020-01-01 21:16:10 +03:00
% ä % ent ;
^
2022-12-06 18:48:55 +03:00
{ 0 } / dtds / 766956. dtd : 2 : parser error : Content error in the external subset
2020-01-01 21:16:10 +03:00
% ä % ent ;
^
Entity : line 1 :
value
^
2022-12-06 18:48:55 +03:00
""" .format(dir_prefix),
2020-01-01 21:16:10 +03:00
' 781333 ' :
2022-12-06 18:48:55 +03:00
""" {0} /781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got
2020-01-01 21:16:10 +03:00
< a / >
^
2023-04-10 23:06:18 +03:00
{ 0 } / 781333. xml : 5 : element a : validity error : Element a content does not follow the DTD , Expecting more children
2020-01-01 21:16:10 +03:00
^
2022-12-06 18:48:55 +03:00
""" .format(dir_prefix),
2020-01-01 21:16:10 +03:00
' cond_sect2 ' :
2022-12-06 18:48:55 +03:00
""" {0} /dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity
2020-01-01 21:16:10 +03:00
% ent ;
^
Entity : line 1 :
] ] >
^
2022-12-06 18:48:55 +03:00
{ 0 } / dtds / cond_sect2 . dtd : 17 : parser error : Content error in the external subset
2020-01-01 21:16:10 +03:00
^
2022-12-06 18:48:55 +03:00
""" .format(dir_prefix),
2020-01-01 21:16:10 +03:00
' rss ' :
2022-12-06 18:48:55 +03:00
""" {0} /rss.xml:177: element rss: validity error : Element rss does not carry attribute version
2003-01-04 22:06:55 +03:00
< / rss >
2003-05-16 02:11:36 +04:00
^
2022-12-06 18:48:55 +03:00
""" .format(dir_prefix),
2020-01-01 21:16:10 +03:00
' t8 ' :
2022-12-06 18:48:55 +03:00
""" {0} /t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
2020-01-01 21:16:10 +03:00
% defroot ; % defmiddle ; % deftest ;
^
Entity : line 1 :
& lt ; ! ELEMENT root ( middle ) >
^
2022-12-06 18:48:55 +03:00
""" .format(dir_prefix),
2020-01-01 21:16:10 +03:00
' t8a ' :
2022-12-06 18:48:55 +03:00
""" {0} /t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
2020-01-01 21:16:10 +03:00
% defroot ; % defmiddle ; % deftest ;
^
Entity : line 1 :
& lt ; ! ELEMENT root ( middle ) >
^
2022-12-06 18:48:55 +03:00
""" .format(dir_prefix),
2020-01-01 21:16:10 +03:00
' xlink ' :
2022-12-06 18:48:55 +03:00
""" {0} /xlink.xml:450: element termdef: validity error : ID dt-arc already defined
2003-04-22 01:36:41 +04:00
< p > < termdef id = " dt-arc " term = " Arc " > An < ter
2003-05-16 02:11:36 +04:00
^
2020-01-01 21:16:10 +03:00
validity error : attribute def line 199 references an unknown ID " dt-xlg "
2022-12-06 18:48:55 +03:00
""" .format(dir_prefix),
2020-01-01 21:16:10 +03:00
}
# Add prefix_dir and extension to the keys
2022-12-06 18:48:55 +03:00
expect = { os . path . join ( dir_prefix , key + " .xml " ) : val for key , val in expect . items ( ) }
2003-01-05 04:27:54 +03:00
2002-12-27 22:37:04 +03:00
def callback ( ctx , str ) :
global err
err = err + " %s " % ( str )
libxml2 . registerErrorHandler ( callback , " " )
2020-01-01 21:16:10 +03:00
parsing_error_files = [ " 766956 " , " cond_sect2 " , " t8 " , " t8a " ]
2022-12-06 18:48:55 +03:00
expect_parsing_error = [ os . path . join ( dir_prefix , f + " .xml " ) for f in parsing_error_files ]
2020-01-01 21:16:10 +03:00
2022-12-06 18:48:55 +03:00
valid_files = glob . glob ( os . path . join ( dir_prefix , " *.x* " ) )
2022-03-30 23:00:50 +03:00
assert valid_files , " found no valid files in ' {} ' " . format ( dir_prefix )
2003-01-04 22:06:55 +03:00
valid_files . sort ( )
2022-12-06 18:34:04 +03:00
failures = 0
2002-12-27 22:37:04 +03:00
for file in valid_files :
2020-01-01 21:16:10 +03:00
err = " "
2002-12-27 22:37:04 +03:00
reader = libxml2 . newTextReaderFilename ( file )
#print "%s:" % (file)
2002-12-29 00:14:18 +03:00
reader . SetParserProp ( libxml2 . PARSER_VALIDATE , 1 )
ret = reader . Read ( )
2002-12-27 22:37:04 +03:00
while ret == 1 :
2002-12-29 00:14:18 +03:00
ret = reader . Read ( )
2020-01-01 21:16:10 +03:00
if ret != 0 and file not in expect_parsing_error :
2013-03-30 17:38:20 +04:00
print ( " Error parsing and validating %s " % ( file ) )
2020-01-01 21:16:10 +03:00
#sys.exit(1)
if ( err ) :
if not ( file in expect and err == expect [ file ] ) :
2022-12-06 18:34:04 +03:00
failures + = 1
2020-01-01 21:16:10 +03:00
print ( " Error: " , err )
if file in expect :
print ( " Expected: " , expect [ file ] )
2022-12-06 18:34:04 +03:00
if failures :
print ( " Failed %d tests " % failures )
sys . exit ( 1 )
2002-12-31 17:45:26 +03:00
#
# another separate test based on Stephane Bidoul one
#
s = """
< ! DOCTYPE test [
< ! ELEMENT test ( x , b ) >
< ! ELEMENT x ( c ) >
< ! ELEMENT b ( #PCDATA)>
< ! ELEMENT c ( #PCDATA)>
< ! ENTITY x " <x><c>xxx</c></x> " >
] >
< test >
& x ;
< b > bbb < / b >
< / test >
"""
2003-01-05 04:27:54 +03:00
expect = """ 10,test
1 , test
2003-08-04 05:06:15 +04:00
14 , #text
2002-12-31 17:45:26 +03:00
1 , x
1 , c
3 , #text
15 , c
15 , x
2003-08-04 05:06:15 +04:00
14 , #text
2002-12-31 17:45:26 +03:00
1 , b
3 , #text
15 , b
2003-08-04 05:06:15 +04:00
14 , #text
2002-12-31 17:45:26 +03:00
15 , test
"""
res = " "
err = " "
2013-03-30 17:38:20 +04:00
input = libxml2 . inputBuffer ( str_io ( s ) )
2002-12-31 17:45:26 +03:00
reader = input . newTextReader ( " test2 " )
reader . SetParserProp ( libxml2 . PARSER_LOADDTD , 1 )
reader . SetParserProp ( libxml2 . PARSER_DEFAULTATTRS , 1 )
reader . SetParserProp ( libxml2 . PARSER_SUBST_ENTITIES , 1 )
reader . SetParserProp ( libxml2 . PARSER_VALIDATE , 1 )
while reader . Read ( ) == 1 :
res = res + " %s , %s \n " % ( reader . NodeType ( ) , reader . Name ( ) )
if res != expect :
2013-03-30 17:38:20 +04:00
print ( " test2 failed: unexpected output " )
print ( res )
2002-12-31 17:45:26 +03:00
sys . exit ( 1 )
if err != " " :
2013-03-30 17:38:20 +04:00
print ( " test2 failed: validation error found " )
print ( err )
2002-12-31 17:45:26 +03:00
sys . exit ( 1 )
2003-01-01 17:50:44 +03:00
#
# Another test for external entity parsing and validation
#
s = """ <!DOCTYPE test [
< ! ELEMENT test ( x ) >
< ! ELEMENT x ( #PCDATA)>
< ! ENTITY e SYSTEM " tst.ent " >
] >
< test >
& e ;
< / test >
"""
tst_ent = """ <x>hello</x> """
2003-01-05 04:27:54 +03:00
expect = """ 10 test
1 test
2003-08-04 05:06:15 +04:00
14 #text
2003-01-01 17:50:44 +03:00
1 x
3 #text
15 x
2003-08-04 05:06:15 +04:00
14 #text
2003-01-01 17:50:44 +03:00
15 test
"""
res = " "
def myResolver ( URL , ID , ctxt ) :
if URL == " tst.ent " :
2013-03-30 17:38:20 +04:00
return ( str_io ( tst_ent ) )
2003-01-01 17:50:44 +03:00
return None
libxml2 . setEntityLoader ( myResolver )
2013-03-30 17:38:20 +04:00
input = libxml2 . inputBuffer ( str_io ( s ) )
2003-01-01 17:50:44 +03:00
reader = input . newTextReader ( " test3 " )
reader . SetParserProp ( libxml2 . PARSER_LOADDTD , 1 )
reader . SetParserProp ( libxml2 . PARSER_DEFAULTATTRS , 1 )
reader . SetParserProp ( libxml2 . PARSER_SUBST_ENTITIES , 1 )
reader . SetParserProp ( libxml2 . PARSER_VALIDATE , 1 )
while reader . Read ( ) == 1 :
res = res + " %s %s \n " % ( reader . NodeType ( ) , reader . Name ( ) )
if res != expect :
2013-03-30 17:38:20 +04:00
print ( " test3 failed: unexpected output " )
print ( res )
2003-01-01 17:50:44 +03:00
sys . exit ( 1 )
if err != " " :
2013-03-30 17:38:20 +04:00
print ( " test3 failed: validation error found " )
print ( err )
2003-01-01 17:50:44 +03:00
sys . exit ( 1 )
2003-01-03 04:18:43 +03:00
#
# Another test for recursive entity parsing, validation, and replacement of
# entities, making sure the entity ref node doesn't show up in that case
#
s = """ <!DOCTYPE test [
< ! ELEMENT test ( x , x ) >
< ! ELEMENT x ( y ) >
< ! ELEMENT y ( #PCDATA)>
< ! ENTITY x " <x>&y;</x> " >
< ! ENTITY y " <y>yyy</y> " >
] >
< test >
& x ;
& x ;
< / test > """
2003-01-05 04:27:54 +03:00
expect = """ 10 test 0
1 test 0
2003-08-04 05:06:15 +04:00
14 #text 1
2003-01-03 04:18:43 +03:00
1 x 1
1 y 2
3 #text 3
15 y 2
15 x 1
2003-08-04 05:06:15 +04:00
14 #text 1
2003-01-03 04:18:43 +03:00
1 x 1
1 y 2
3 #text 3
15 y 2
15 x 1
2003-08-04 05:06:15 +04:00
14 #text 1
2003-01-03 04:18:43 +03:00
15 test 0
"""
res = " "
err = " "
2013-03-30 17:38:20 +04:00
input = libxml2 . inputBuffer ( str_io ( s ) )
2003-01-03 04:18:43 +03:00
reader = input . newTextReader ( " test4 " )
reader . SetParserProp ( libxml2 . PARSER_LOADDTD , 1 )
reader . SetParserProp ( libxml2 . PARSER_DEFAULTATTRS , 1 )
reader . SetParserProp ( libxml2 . PARSER_SUBST_ENTITIES , 1 )
reader . SetParserProp ( libxml2 . PARSER_VALIDATE , 1 )
while reader . Read ( ) == 1 :
res = res + " %s %s %d \n " % ( reader . NodeType ( ) , reader . Name ( ) , reader . Depth ( ) )
if res != expect :
2013-03-30 17:38:20 +04:00
print ( " test4 failed: unexpected output " )
print ( res )
2003-01-03 04:18:43 +03:00
sys . exit ( 1 )
if err != " " :
2013-03-30 17:38:20 +04:00
print ( " test4 failed: validation error found " )
print ( err )
2003-01-03 04:18:43 +03:00
sys . exit ( 1 )
2003-01-03 15:52:08 +03:00
#
# The same test but without entity substitution this time
#
s = """ <!DOCTYPE test [
< ! ELEMENT test ( x , x ) >
< ! ELEMENT x ( y ) >
< ! ELEMENT y ( #PCDATA)>
< ! ENTITY x " <x>&y;</x> " >
< ! ENTITY y " <y>yyy</y> " >
] >
< test >
& x ;
& x ;
< / test > """
2003-01-05 04:27:54 +03:00
expect = """ 10 test 0
1 test 0
2003-08-04 05:06:15 +04:00
14 #text 1
2003-01-03 15:52:08 +03:00
5 x 1
2003-08-04 05:06:15 +04:00
14 #text 1
2003-01-03 15:52:08 +03:00
5 x 1
2003-08-04 05:06:15 +04:00
14 #text 1
2003-01-03 15:52:08 +03:00
15 test 0
"""
res = " "
err = " "
2013-03-30 17:38:20 +04:00
input = libxml2 . inputBuffer ( str_io ( s ) )
2003-09-28 22:58:27 +04:00
reader = input . newTextReader ( " test5 " )
2003-01-03 15:52:08 +03:00
reader . SetParserProp ( libxml2 . PARSER_VALIDATE , 1 )
while reader . Read ( ) == 1 :
res = res + " %s %s %d \n " % ( reader . NodeType ( ) , reader . Name ( ) , reader . Depth ( ) )
if res != expect :
2013-03-30 17:38:20 +04:00
print ( " test5 failed: unexpected output " )
print ( res )
2022-12-06 18:34:04 +03:00
sys . exit ( 1 )
2003-01-03 15:52:08 +03:00
if err != " " :
2013-03-30 17:38:20 +04:00
print ( " test5 failed: validation error found " )
print ( err )
2022-12-06 18:34:04 +03:00
sys . exit ( 1 )
2003-01-03 15:52:08 +03:00
2003-01-01 17:50:44 +03:00
#
# cleanup
#
2002-12-31 17:45:26 +03:00
del input
2002-12-27 22:37:04 +03:00
del reader
# Memory debug specific
libxml2 . cleanupParser ( )
if libxml2 . debugMemory ( 1 ) == 0 :
2013-03-30 17:38:20 +04:00
print ( " OK " )
2002-12-27 22:37:04 +03:00
else :
2013-03-30 17:38:20 +04:00
print ( " Memory leak %d bytes " % ( libxml2 . debugMemory ( 1 ) ) )