2002-02-09 18:03:01 +00:00
import libxml2mod
2003-12-04 14:12:05 +00:00
import types
2006-12-14 15:49:41 +00:00
import sys
2002-01-30 16:37:32 +00:00
2004-09-28 11:08:27 +00:00
# The root of all libxml2 errors.
class libxmlError ( Exception ) : pass
2013-03-29 15:17:40 +08:00
# Type of the wrapper class for the C objects wrappers
def checkWrapper ( obj ) :
try :
n = type ( _obj ) . __name__
if n != ' PyCObject ' and n != ' PyCapsule ' :
return 1
except :
return 0
return 0
2006-12-14 15:49:41 +00:00
#
# id() is sometimes negative ...
#
def pos_id ( o ) :
i = id ( o )
if ( i < 0 ) :
2013-03-27 22:40:54 +08:00
return ( sys . maxsize - i )
2006-12-14 15:49:41 +00:00
return i
2002-03-05 15:41:29 +00:00
#
# Errors raised by the wrappers when some tree handling failed.
#
2004-09-28 11:08:27 +00:00
class treeError ( libxmlError ) :
2002-03-05 15:41:29 +00:00
def __init__ ( self , msg ) :
self . msg = msg
def __str__ ( self ) :
return self . msg
2004-09-28 11:08:27 +00:00
class parserError ( libxmlError ) :
2002-03-05 15:41:29 +00:00
def __init__ ( self , msg ) :
self . msg = msg
def __str__ ( self ) :
return self . msg
2004-09-28 11:08:27 +00:00
class uriError ( libxmlError ) :
2002-03-05 15:41:29 +00:00
def __init__ ( self , msg ) :
self . msg = msg
def __str__ ( self ) :
return self . msg
2004-09-28 11:08:27 +00:00
class xpathError ( libxmlError ) :
2002-03-05 15:41:29 +00:00
def __init__ ( self , msg ) :
self . msg = msg
def __str__ ( self ) :
return self . msg
2002-09-12 15:00:57 +00:00
class ioWrapper :
def __init__ ( self , _obj ) :
self . __io = _obj
self . _o = None
def io_close ( self ) :
if self . __io == None :
2003-10-27 13:48:16 +00:00
return ( - 1 )
self . __io . close ( )
self . __io = None
return ( 0 )
2002-09-12 15:00:57 +00:00
def io_flush ( self ) :
if self . __io == None :
2003-10-27 13:48:16 +00:00
return ( - 1 )
self . __io . flush ( )
return ( 0 )
2002-09-12 15:00:57 +00:00
def io_read ( self , len = - 1 ) :
if self . __io == None :
2003-10-27 13:48:16 +00:00
return ( - 1 )
2013-03-29 15:17:40 +08:00
try :
if len < 0 :
ret = self . __io . read ( )
else :
ret = self . __io . read ( len )
2013-04-02 10:27:57 +08:00
except Exception :
import sys
e = sys . exc_info ( ) [ 1 ]
2013-03-29 15:17:40 +08:00
print ( " failed to read from Python: " , type ( e ) )
print ( " on IO: " , self . __io )
self . __io == None
return ( - 1 )
return ( ret )
2002-09-12 15:00:57 +00:00
def io_write ( self , str , len = - 1 ) :
if self . __io == None :
2003-10-27 13:48:16 +00:00
return ( - 1 )
2002-09-12 15:00:57 +00:00
if len < 0 :
2003-10-27 13:48:16 +00:00
return ( self . __io . write ( str ) )
return ( self . __io . write ( str , len ) )
2002-09-12 15:00:57 +00:00
class ioReadWrapper ( ioWrapper ) :
def __init__ ( self , _obj , enc = " " ) :
ioWrapper . __init__ ( self , _obj )
self . _o = libxml2mod . xmlCreateInputBuffer ( self , enc )
def __del__ ( self ) :
2013-03-27 22:40:54 +08:00
print ( " __del__ " )
2002-09-12 15:00:57 +00:00
self . io_close ( )
if self . _o != None :
libxml2mod . xmlFreeParserInputBuffer ( self . _o )
self . _o = None
def close ( self ) :
self . io_close ( )
if self . _o != None :
libxml2mod . xmlFreeParserInputBuffer ( self . _o )
self . _o = None
class ioWriteWrapper ( ioWrapper ) :
def __init__ ( self , _obj , enc = " " ) :
2003-12-04 14:12:05 +00:00
# print "ioWriteWrapper.__init__", _obj
if type ( _obj ) == type ( ' ' ) :
2013-03-27 22:40:54 +08:00
print ( " write io from a string " )
2004-07-12 16:27:37 +00:00
self . o = None
2013-03-29 15:17:40 +08:00
elif type ( _obj ) . __name__ == ' PyCapsule ' :
file = libxml2mod . outputBufferGetPythonFile ( _obj )
if file != None :
ioWrapper . __init__ ( self , file )
else :
ioWrapper . __init__ ( self , _obj )
self . _o = _obj
# elif type(_obj) == types.InstanceType:
# print(("write io from instance of %s" % (_obj.__class__)))
# ioWrapper.__init__(self, _obj)
# self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
2004-07-12 16:27:37 +00:00
else :
file = libxml2mod . outputBufferGetPythonFile ( _obj )
if file != None :
ioWrapper . __init__ ( self , file )
else :
ioWrapper . __init__ ( self , _obj )
self . _o = _obj
2002-09-12 15:00:57 +00:00
def __del__ ( self ) :
2003-12-04 14:12:05 +00:00
# print "__del__"
2002-09-12 15:00:57 +00:00
self . io_close ( )
if self . _o != None :
libxml2mod . xmlOutputBufferClose ( self . _o )
self . _o = None
2003-12-04 14:12:05 +00:00
def flush ( self ) :
self . io_flush ( )
if self . _o != None :
libxml2mod . xmlOutputBufferClose ( self . _o )
self . _o = None
2002-09-12 15:00:57 +00:00
def close ( self ) :
2003-12-04 14:12:05 +00:00
self . io_flush ( )
2002-09-12 15:00:57 +00:00
if self . _o != None :
libxml2mod . xmlOutputBufferClose ( self . _o )
self . _o = None
2002-03-05 15:41:29 +00:00
#
# Example of a class to handle SAX events
#
class SAXCallback :
""" Base class for SAX handlers """
def startDocument ( self ) :
""" called at the start of the document """
pass
def endDocument ( self ) :
""" called at the end of the document """
pass
def startElement ( self , tag , attrs ) :
""" called at the start of every element, tag is the name of
2003-10-27 13:48:16 +00:00
the element , attrs is a dictionary of the element ' s attributes " " "
2002-03-05 15:41:29 +00:00
pass
def endElement ( self , tag ) :
""" called at the start of every element, tag is the name of
2003-10-27 13:48:16 +00:00
the element """
2002-03-05 15:41:29 +00:00
pass
def characters ( self , data ) :
""" called when character data have been read, data is the string
2003-10-27 13:48:16 +00:00
containing the data , multiple consecutive characters ( ) callback
are possible . """
2002-03-05 15:41:29 +00:00
pass
def cdataBlock ( self , data ) :
""" called when CDATA section have been read, data is the string
2003-10-27 13:48:16 +00:00
containing the data , multiple consecutive cdataBlock ( ) callback
are possible . """
2002-03-05 15:41:29 +00:00
pass
def reference ( self , name ) :
""" called when an entity reference has been found """
pass
def ignorableWhitespace ( self , data ) :
""" called when potentially ignorable white spaces have been found """
pass
def processingInstruction ( self , target , data ) :
""" called when a PI has been found, target contains the PI name and
2003-10-27 13:48:16 +00:00
data is the associated data in the PI """
2002-03-05 15:41:29 +00:00
pass
def comment ( self , content ) :
""" called when a comment has been found, content contains the comment """
pass
def externalSubset ( self , name , externalID , systemID ) :
""" called when a DOCTYPE declaration has been found, name is the
2003-10-27 13:48:16 +00:00
DTD name and externalID , systemID are the DTD public and system
identifier for that DTd if available """
2002-03-05 15:41:29 +00:00
pass
def internalSubset ( self , name , externalID , systemID ) :
""" called when a DOCTYPE declaration has been found, name is the
2003-10-27 13:48:16 +00:00
DTD name and externalID , systemID are the DTD public and system
identifier for that DTD if available """
2002-03-05 15:41:29 +00:00
pass
def entityDecl ( self , name , type , externalID , systemID , content ) :
""" called when an ENTITY declaration has been found, name is the
2003-10-27 13:48:16 +00:00
entity name and externalID , systemID are the entity public and
system identifier for that entity if available , type indicates
the entity type , and content reports it ' s string content " " "
2002-03-05 15:41:29 +00:00
pass
def notationDecl ( self , name , externalID , systemID ) :
""" called when an NOTATION declaration has been found, name is the
2003-10-27 13:48:16 +00:00
notation name and externalID , systemID are the notation public and
system identifier for that notation if available """
2002-03-05 15:41:29 +00:00
pass
def attributeDecl ( self , elem , name , type , defi , defaultValue , nameList ) :
""" called when an ATTRIBUTE definition has been found """
2003-10-27 13:48:16 +00:00
pass
2002-03-05 15:41:29 +00:00
def elementDecl ( self , name , type , content ) :
""" called when an ELEMENT definition has been found """
2003-10-27 13:48:16 +00:00
pass
2002-03-05 15:41:29 +00:00
def entityDecl ( self , name , publicId , systemID , notationName ) :
""" called when an unparsed ENTITY declaration has been found,
2003-10-27 13:48:16 +00:00
name is the entity name and publicId , , systemID are the entity
public and system identifier for that entity if available ,
and notationName indicate the associated NOTATION """
2002-03-05 15:41:29 +00:00
pass
def warning ( self , msg ) :
2005-10-27 14:10:52 +00:00
#print msg
2006-02-27 09:59:30 +00:00
pass
2002-03-05 15:41:29 +00:00
def error ( self , msg ) :
raise parserError ( msg )
def fatalError ( self , msg ) :
raise parserError ( msg )
2002-01-31 20:29:19 +00:00
#
# This class is the ancestor of all the Node classes. It provides
# the basic functionalities shared by all nodes (and handle
# gracefylly the exception), like name, navigation in the tree,
2002-03-06 17:35:40 +00:00
# doc reference, content access and serializing to a string or URI
2002-01-31 20:29:19 +00:00
#
2002-01-30 23:49:06 +00:00
class xmlCore :
2002-01-30 16:37:32 +00:00
def __init__ ( self , _obj = None ) :
if _obj != None :
2002-02-11 18:42:20 +00:00
self . _o = _obj ;
return
self . _o = None
2006-06-26 18:25:40 +00:00
def __eq__ ( self , other ) :
if other == None :
2007-02-16 17:11:09 +00:00
return False
2006-06-26 18:25:40 +00:00
ret = libxml2mod . compareNodesEqual ( self . _o , other . _o )
2007-02-16 17:11:09 +00:00
if ret == None :
return False
return ret == True
2006-06-26 18:25:40 +00:00
def __ne__ ( self , other ) :
if other == None :
2007-02-16 17:11:09 +00:00
return True
2006-06-26 18:25:40 +00:00
ret = libxml2mod . compareNodesEqual ( self . _o , other . _o )
2007-02-16 17:11:09 +00:00
return not ret
2006-06-26 18:25:40 +00:00
def __hash__ ( self ) :
2007-02-16 17:11:09 +00:00
ret = libxml2mod . nodeHash ( self . _o )
return ret
2006-06-26 18:25:40 +00:00
2005-01-15 17:45:28 +00:00
def __str__ ( self ) :
return self . serialize ( )
2002-01-30 16:37:32 +00:00
def get_parent ( self ) :
2002-02-11 18:42:20 +00:00
ret = libxml2mod . parent ( self . _o )
if ret == None :
return None
2013-02-20 18:28:25 -08:00
return nodeWrap ( ret )
2002-01-30 16:37:32 +00:00
def get_children ( self ) :
2002-02-11 18:42:20 +00:00
ret = libxml2mod . children ( self . _o )
if ret == None :
return None
2013-02-20 18:28:25 -08:00
return nodeWrap ( ret )
2002-01-30 16:37:32 +00:00
def get_last ( self ) :
2002-02-11 18:42:20 +00:00
ret = libxml2mod . last ( self . _o )
if ret == None :
return None
2013-02-20 18:28:25 -08:00
return nodeWrap ( ret )
2002-01-30 16:37:32 +00:00
def get_next ( self ) :
2002-02-11 18:42:20 +00:00
ret = libxml2mod . next ( self . _o )
if ret == None :
return None
2013-02-20 18:28:25 -08:00
return nodeWrap ( ret )
2002-01-31 20:29:19 +00:00
def get_properties ( self ) :
2002-02-11 18:42:20 +00:00
ret = libxml2mod . properties ( self . _o )
if ret == None :
return None
return xmlAttr ( _obj = ret )
2002-01-30 16:37:32 +00:00
def get_prev ( self ) :
2002-02-11 18:42:20 +00:00
ret = libxml2mod . prev ( self . _o )
if ret == None :
return None
2013-02-20 18:28:25 -08:00
return nodeWrap ( ret )
2002-01-30 16:37:32 +00:00
def get_content ( self ) :
2002-02-11 18:42:20 +00:00
return libxml2mod . xmlNodeGetContent ( self . _o )
2003-01-04 19:42:46 +00:00
getContent = get_content # why is this duplicate naming needed ?
2002-01-30 16:37:32 +00:00
def get_name ( self ) :
2002-02-11 18:42:20 +00:00
return libxml2mod . name ( self . _o )
2002-01-30 16:37:32 +00:00
def get_type ( self ) :
2002-02-11 18:42:20 +00:00
return libxml2mod . type ( self . _o )
2003-01-04 19:42:46 +00:00
def get_doc ( self ) :
ret = libxml2mod . doc ( self . _o )
if ret == None :
if self . type in [ " document_xml " , " document_html " ] :
return xmlDoc ( _obj = self . _o )
else :
return None
return xmlDoc ( _obj = ret )
#
# Those are common attributes to nearly all type of nodes
# defined as python2 properties
#
import sys
if float ( sys . version [ 0 : 3 ] ) < 2.2 :
2003-10-27 13:48:16 +00:00
def __getattr__ ( self , attr ) :
if attr == " parent " :
ret = libxml2mod . parent ( self . _o )
if ret == None :
return None
2013-02-20 18:28:25 -08:00
return nodeWrap ( ret )
2003-10-27 13:48:16 +00:00
elif attr == " properties " :
ret = libxml2mod . properties ( self . _o )
if ret == None :
return None
return xmlAttr ( _obj = ret )
elif attr == " children " :
ret = libxml2mod . children ( self . _o )
if ret == None :
return None
2013-02-20 18:28:25 -08:00
return nodeWrap ( ret )
2003-10-27 13:48:16 +00:00
elif attr == " last " :
ret = libxml2mod . last ( self . _o )
if ret == None :
return None
2013-02-20 18:28:25 -08:00
return nodeWrap ( ret )
2003-10-27 13:48:16 +00:00
elif attr == " next " :
ret = libxml2mod . next ( self . _o )
if ret == None :
return None
2013-02-20 18:28:25 -08:00
return nodeWrap ( ret )
2003-10-27 13:48:16 +00:00
elif attr == " prev " :
ret = libxml2mod . prev ( self . _o )
if ret == None :
return None
2013-02-20 18:28:25 -08:00
return nodeWrap ( ret )
2003-10-27 13:48:16 +00:00
elif attr == " content " :
return libxml2mod . xmlNodeGetContent ( self . _o )
elif attr == " name " :
return libxml2mod . name ( self . _o )
elif attr == " type " :
return libxml2mod . type ( self . _o )
elif attr == " doc " :
ret = libxml2mod . doc ( self . _o )
if ret == None :
if self . type == " document_xml " or self . type == " document_html " :
return xmlDoc ( _obj = self . _o )
else :
return None
return xmlDoc ( _obj = ret )
2013-03-27 22:40:54 +08:00
raise AttributeError ( attr )
2003-01-04 19:42:46 +00:00
else :
2003-10-27 13:48:16 +00:00
parent = property ( get_parent , None , None , " Parent node " )
children = property ( get_children , None , None , " First child node " )
last = property ( get_last , None , None , " Last sibling node " )
next = property ( get_next , None , None , " Next sibling node " )
prev = property ( get_prev , None , None , " Previous sibling node " )
properties = property ( get_properties , None , None , " List of properies " )
content = property ( get_content , None , None , " Content of this node " )
name = property ( get_name , None , None , " Node name " )
type = property ( get_type , None , None , " Node type " )
doc = property ( get_doc , None , None , " The document this node belongs to " )
2002-03-06 17:35:40 +00:00
#
# Serialization routines, the optional arguments have the following
# meaning:
# encoding: string to ask saving in a specific encoding
2003-01-04 19:42:46 +00:00
# indent: if 1 the serializer is asked to indent the output
2002-03-06 17:35:40 +00:00
#
def serialize ( self , encoding = None , format = 0 ) :
return libxml2mod . serializeNode ( self . _o , encoding , format )
def saveTo ( self , file , encoding = None , format = 0 ) :
return libxml2mod . saveNodeTo ( self . _o , file , encoding , format )
2002-02-11 18:42:20 +00:00
2004-03-09 09:03:28 +00:00
#
# Canonicalization routines:
#
# nodes: the node set (tuple or list) to be included in the
# canonized image or None if all document nodes should be
# included.
# exclusive: the exclusive flag (0 - non-exclusive
# canonicalization; otherwise - exclusive canonicalization)
# prefixes: the list of inclusive namespace prefixes (strings),
# or None if there is no inclusive namespaces (only for
# exclusive canonicalization, ignored otherwise)
# with_comments: include comments in the result (!=0) or not
# (==0)
def c14nMemory ( self ,
nodes = None ,
exclusive = 0 ,
prefixes = None ,
with_comments = 0 ) :
if nodes :
2013-03-27 22:40:54 +08:00
nodes = [ n . _o for n in nodes ]
2004-03-09 09:03:28 +00:00
return libxml2mod . xmlC14NDocDumpMemory (
self . get_doc ( ) . _o ,
nodes ,
exclusive != 0 ,
prefixes ,
with_comments != 0 )
def c14nSaveTo ( self ,
file ,
nodes = None ,
exclusive = 0 ,
prefixes = None ,
with_comments = 0 ) :
if nodes :
2013-03-27 22:40:54 +08:00
nodes = [ n . _o for n in nodes ]
2004-03-09 09:03:28 +00:00
return libxml2mod . xmlC14NDocSaveTo (
self . get_doc ( ) . _o ,
nodes ,
exclusive != 0 ,
prefixes ,
with_comments != 0 ,
file )
2002-03-07 00:05:35 +00:00
#
# Selecting nodes using XPath, a bit slow because the context
# is allocated/freed every time but convenient.
#
def xpathEval ( self , expr ) :
2003-10-27 13:48:16 +00:00
doc = self . doc
if doc == None :
return None
ctxt = doc . xpathNewContext ( )
ctxt . setContextNode ( self )
res = ctxt . xpathEval ( expr )
ctxt . xpathFreeContext ( )
return res
2003-01-04 19:42:46 +00:00
2003-12-08 10:25:02 +00:00
# #
# # Selecting nodes using XPath, faster because the context
# # is allocated just once per xmlDoc.
# #
# # Removed: DV memleaks c.f. #126735
# #
# def xpathEval2(self, expr):
# doc = self.doc
# if doc == None:
# return None
# try:
# doc._ctxt.setContextNode(self)
# except:
# doc._ctxt = doc.xpathNewContext()
# doc._ctxt.setContextNode(self)
# res = doc._ctxt.xpathEval(expr)
# return res
2003-01-04 19:42:46 +00:00
def xpathEval2 ( self , expr ) :
2003-12-08 10:25:02 +00:00
return self . xpathEval ( expr )
2003-01-04 19:42:46 +00:00
2005-04-12 01:02:29 +00:00
# Remove namespaces
def removeNsDef ( self , href ) :
"""
Remove a namespace definition from a node . If href is None ,
remove all of the ns definitions on that node . The removed
namespaces are returned as a linked list .
Note : If any child nodes referred to the removed namespaces ,
they will be left with dangling links . You should call
2009-08-24 14:08:43 +02:00
renconciliateNs ( ) to fix those pointers .
2005-04-12 01:02:29 +00:00
Note : This method does not free memory taken by the ns
definitions . You will need to free it manually with the
freeNsList ( ) method on the returns xmlNs object .
"""
ret = libxml2mod . xmlNodeRemoveNsDef ( self . _o , href )
if ret is None : return None
__tmp = xmlNs ( _obj = ret )
return __tmp
2003-01-04 19:42:46 +00:00
# support for python2 iterators
def walk_depth_first ( self ) :
return xmlCoreDepthFirstItertor ( self )
def walk_breadth_first ( self ) :
return xmlCoreBreadthFirstItertor ( self )
__iter__ = walk_depth_first
def free ( self ) :
try :
self . doc . _ctxt . xpathFreeContext ( )
except :
pass
2003-12-08 10:25:02 +00:00
libxml2mod . xmlFreeDoc ( self . _o )
2003-01-04 19:42:46 +00:00
#
# implements the depth-first iterator for libxml2 DOM tree
#
class xmlCoreDepthFirstItertor :
def __init__ ( self , node ) :
self . node = node
self . parents = [ ]
def __iter__ ( self ) :
return self
2014-09-27 21:56:03 -07:00
def __next__ ( self ) :
2003-01-04 19:42:46 +00:00
while 1 :
if self . node :
ret = self . node
self . parents . append ( self . node )
self . node = self . node . children
return ret
try :
parent = self . parents . pop ( )
except IndexError :
raise StopIteration
2013-04-01 13:33:42 +08:00
self . node = parent . next
2014-09-27 21:56:03 -07:00
next = __next__
2003-01-04 19:42:46 +00:00
#
# implements the breadth-first iterator for libxml2 DOM tree
#
class xmlCoreBreadthFirstItertor :
def __init__ ( self , node ) :
self . node = node
self . parents = [ ]
def __iter__ ( self ) :
return self
2014-09-27 21:56:03 -07:00
def __next__ ( self ) :
2003-01-04 19:42:46 +00:00
while 1 :
if self . node :
ret = self . node
self . parents . append ( self . node )
2013-04-01 13:33:42 +08:00
self . node = self . node . next
2003-01-04 19:42:46 +00:00
return ret
try :
parent = self . parents . pop ( )
except IndexError :
raise StopIteration
self . node = parent . children
2014-09-27 21:56:03 -07:00
next = __next__
2003-01-04 19:42:46 +00:00
2002-01-31 20:29:19 +00:00
#
# converters to present a nicer view of the XPath returns
#
def nodeWrap ( o ) :
# TODO try to cast to the most appropriate node class
2004-08-14 21:46:31 +00:00
name = libxml2mod . type ( o )
2002-01-31 20:29:19 +00:00
if name == " element " or name == " text " :
return xmlNode ( _obj = o )
if name == " attribute " :
return xmlAttr ( _obj = o )
if name [ 0 : 8 ] == " document " :
return xmlDoc ( _obj = o )
2004-08-14 21:46:31 +00:00
if name == " namespace " :
2002-01-31 20:29:19 +00:00
return xmlNs ( _obj = o )
if name == " elem_decl " :
return xmlElement ( _obj = o )
if name == " attribute_decl " :
2004-08-14 21:46:31 +00:00
return xmlAttribute ( _obj = o )
2002-01-31 20:29:19 +00:00
if name == " entity_decl " :
return xmlEntity ( _obj = o )
if name == " dtd " :
2003-01-04 16:35:29 +00:00
return xmlDtd ( _obj = o )
2002-01-31 20:29:19 +00:00
return xmlNode ( _obj = o )
def xpathObjectRet ( o ) :
2006-10-10 08:40:04 +00:00
otype = type ( o )
if otype == type ( [ ] ) :
2013-03-27 22:40:54 +08:00
ret = list ( map ( xpathObjectRet , o ) )
2002-02-11 18:42:20 +00:00
return ret
2006-10-10 08:40:04 +00:00
elif otype == type ( ( ) ) :
2013-03-27 22:40:54 +08:00
ret = list ( map ( xpathObjectRet , o ) )
2006-10-10 08:40:04 +00:00
return tuple ( ret )
elif otype == type ( ' ' ) or otype == type ( 0 ) or otype == type ( 0.0 ) :
return o
else :
return nodeWrap ( o )
2002-01-31 20:29:19 +00:00
2002-02-01 17:56:45 +00:00
#
# register an XPath function
#
def registerXPathFunction ( ctxt , name , ns_uri , f ) :
2002-02-09 18:03:01 +00:00
ret = libxml2mod . xmlRegisterXPathFunction ( ctxt , name , ns_uri , f )
2002-02-01 17:56:45 +00:00
2002-12-27 15:18:35 +00:00
#
# For the xmlTextReader parser configuration
#
PARSER_LOADDTD = 1
PARSER_DEFAULTATTRS = 2
PARSER_VALIDATE = 3
2002-12-28 22:56:33 +00:00
PARSER_SUBST_ENTITIES = 4
2002-12-27 15:18:35 +00:00
2003-01-16 22:45:08 +00:00
#
2003-01-20 21:26:34 +00:00
# For the error callback severities
2003-01-16 22:45:08 +00:00
#
2003-01-20 21:26:34 +00:00
PARSER_SEVERITY_VALIDITY_WARNING = 1
PARSER_SEVERITY_VALIDITY_ERROR = 2
PARSER_SEVERITY_WARNING = 3
PARSER_SEVERITY_ERROR = 4
2003-01-16 22:45:08 +00:00
2002-01-30 23:49:06 +00:00
#
2003-01-10 13:14:40 +00:00
# register the libxml2 error handler
2002-01-30 23:49:06 +00:00
#
2003-01-10 13:14:40 +00:00
def registerErrorHandler ( f , ctx ) :
""" Register a Python written function to for error reporting.
The function is called back as f ( ctx , error ) . """
import sys
2013-03-27 22:40:54 +08:00
if ' libxslt ' not in sys . modules :
2003-01-10 13:14:40 +00:00
# normal behaviour when libxslt is not imported
ret = libxml2mod . xmlRegisterErrorHandler ( f , ctx )
else :
# when libxslt is already imported, one must
# use libxst's error handler instead
import libxslt
ret = libxslt . registerErrorHandler ( f , ctx )
return ret
2003-01-14 11:42:39 +00:00
class parserCtxtCore :
def __init__ ( self , _obj = None ) :
if _obj != None :
self . _o = _obj ;
return
self . _o = None
def __del__ ( self ) :
if self . _o != None :
libxml2mod . xmlFreeParserCtxt ( self . _o )
2003-10-27 13:48:16 +00:00
self . _o = None
2003-01-14 11:42:39 +00:00
2003-01-20 21:26:34 +00:00
def setErrorHandler ( self , f , arg ) :
""" Register an error handler that will be called back as
f ( arg , msg , severity , reserved ) .
@reserved is currently always None . """
libxml2mod . xmlParserCtxtSetErrorHandler ( self . _o , f , arg )
2003-01-14 11:42:39 +00:00
2003-01-20 21:26:34 +00:00
def getErrorHandler ( self ) :
""" Return (f,arg) as previously registered with setErrorHandler
or ( None , None ) . """
return libxml2mod . xmlParserCtxtGetErrorHandler ( self . _o )
2003-04-23 07:36:50 +00:00
def addLocalCatalog ( self , uri ) :
""" Register a local catalog with the parser """
return libxml2mod . addLocalCatalog ( self . _o , uri )
2005-03-30 22:47:10 +00:00
class ValidCtxtCore :
def __init__ ( self , * args , * * kw ) :
pass
def setValidityErrorHandler ( self , err_func , warn_func , arg = None ) :
"""
Register error and warning handlers for DTD validation .
These will be called back as f ( msg , arg )
"""
libxml2mod . xmlSetValidErrors ( self . _o , err_func , warn_func , arg )
class SchemaValidCtxtCore :
def __init__ ( self , * args , * * kw ) :
pass
def setValidityErrorHandler ( self , err_func , warn_func , arg = None ) :
"""
Register error and warning handlers for Schema validation .
These will be called back as f ( msg , arg )
"""
libxml2mod . xmlSchemaSetValidErrors ( self . _o , err_func , warn_func , arg )
class relaxNgValidCtxtCore :
def __init__ ( self , * args , * * kw ) :
pass
def setValidityErrorHandler ( self , err_func , warn_func , arg = None ) :
"""
Register error and warning handlers for RelaxNG validation .
These will be called back as f ( msg , arg )
"""
libxml2mod . xmlRelaxNGSetValidErrors ( self . _o , err_func , warn_func , arg )
2013-03-27 22:40:54 +08:00
def _xmlTextReaderErrorFunc ( xxx_todo_changeme , msg , severity , locator ) :
2003-01-20 21:26:34 +00:00
""" Intermediate callback to wrap the locator """
2013-03-27 22:40:54 +08:00
( f , arg ) = xxx_todo_changeme
2003-01-20 21:26:34 +00:00
return f ( arg , msg , severity , xmlTextReaderLocator ( locator ) )
2003-01-14 11:42:39 +00:00
2003-01-16 22:45:08 +00:00
class xmlTextReaderCore :
def __init__ ( self , _obj = None ) :
self . input = None
if _obj != None : self . _o = _obj ; return
self . _o = None
def __del__ ( self ) :
if self . _o != None :
libxml2mod . xmlFreeTextReader ( self . _o )
self . _o = None
2003-01-20 21:26:34 +00:00
def SetErrorHandler ( self , f , arg ) :
2003-01-16 22:45:08 +00:00
""" Register an error handler that will be called back as
2003-01-20 21:26:34 +00:00
f ( arg , msg , severity , locator ) . """
if f is None :
libxml2mod . xmlTextReaderSetErrorHandler ( \
self . _o , None , None )
else :
libxml2mod . xmlTextReaderSetErrorHandler ( \
self . _o , _xmlTextReaderErrorFunc , ( f , arg ) )
def GetErrorHandler ( self ) :
2003-01-16 22:45:08 +00:00
""" Return (f,arg) as previously registered with setErrorHandler
or ( None , None ) . """
2003-01-20 21:26:34 +00:00
f , arg = libxml2mod . xmlTextReaderGetErrorHandler ( self . _o )
if f is None :
return None , None
else :
# assert f is _xmlTextReaderErrorFunc
return arg
2003-01-16 22:45:08 +00:00
2004-07-01 12:56:30 +00:00
#
2013-02-25 15:54:25 +08:00
# The cleanup now goes though a wrapper in libxml.c
2004-07-01 12:56:30 +00:00
#
def cleanupParser ( ) :
libxml2mod . xmlPythonCleanupParser ( )
2003-12-21 13:01:56 +00:00
2013-02-25 15:54:25 +08:00
#
# The interface to xmlRegisterInputCallbacks.
# Since this API does not allow to pass a data object along with
# match/open callbacks, it is necessary to maintain a list of all
# Python callbacks.
#
__input_callbacks = [ ]
def registerInputCallback ( func ) :
def findOpenCallback ( URI ) :
for cb in reversed ( __input_callbacks ) :
o = cb ( URI )
if o is not None :
return o
libxml2mod . xmlRegisterInputCallback ( findOpenCallback )
__input_callbacks . append ( func )
def popInputCallbacks ( ) :
# First pop python-level callbacks, when no more available - start
# popping built-in ones.
if len ( __input_callbacks ) > 0 :
__input_callbacks . pop ( )
if len ( __input_callbacks ) == 0 :
libxml2mod . xmlUnregisterInputCallback ( )
2003-01-10 13:14:40 +00:00
# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
#
# Everything before this line comes from libxml.py
# Everything after this line is automatically generated
#
# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
2002-01-31 20:29:19 +00:00