2002-02-09 21:03:01 +03:00
import libxml2mod
2002-01-30 19:37:32 +03:00
2002-03-05 18:41:29 +03:00
#
# Errors raised by the wrappers when some tree handling failed.
#
class treeError :
def __init__ ( self , msg ) :
self . msg = msg
def __str__ ( self ) :
return self . msg
class parserError :
def __init__ ( self , msg ) :
self . msg = msg
def __str__ ( self ) :
return self . msg
class uriError :
def __init__ ( self , msg ) :
self . msg = msg
def __str__ ( self ) :
return self . msg
class xpathError :
def __init__ ( self , msg ) :
self . msg = msg
def __str__ ( self ) :
return self . msg
2002-09-12 19:00:57 +04:00
class ioWrapper :
def __init__ ( self , _obj ) :
self . __io = _obj
self . _o = None
def io_close ( self ) :
if self . __io == None :
return ( - 1 )
self . __io . close ( )
self . __io = None
return ( 0 )
def io_flush ( self ) :
if self . __io == None :
return ( - 1 )
self . __io . flush ( )
return ( 0 )
def io_read ( self , len = - 1 ) :
if self . __io == None :
return ( - 1 )
if len < 0 :
return ( self . __io . read ( ) )
return ( self . __io . read ( len ) )
def io_write ( self , str , len = - 1 ) :
if self . __io == None :
return ( - 1 )
if len < 0 :
return ( self . __io . write ( str ) )
return ( self . __io . write ( str , len ) )
class ioReadWrapper ( ioWrapper ) :
def __init__ ( self , _obj , enc = " " ) :
ioWrapper . __init__ ( self , _obj )
self . _o = libxml2mod . xmlCreateInputBuffer ( self , enc )
def __del__ ( self ) :
print " __del__ "
self . io_close ( )
if self . _o != None :
libxml2mod . xmlFreeParserInputBuffer ( self . _o )
self . _o = None
def close ( self ) :
self . io_close ( )
if self . _o != None :
libxml2mod . xmlFreeParserInputBuffer ( self . _o )
self . _o = None
class ioWriteWrapper ( ioWrapper ) :
def __init__ ( self , _obj , enc = " " ) :
ioWrapper . __init__ ( self , _obj )
self . _o = libxml2mod . xmlCreateOutputBuffer ( self , enc )
def __del__ ( self ) :
print " __del__ "
self . io_close ( )
if self . _o != None :
libxml2mod . xmlOutputBufferClose ( self . _o )
self . _o = None
def close ( self ) :
self . io_close ( )
if self . _o != None :
libxml2mod . xmlOutputBufferClose ( self . _o )
self . _o = None
2002-03-05 18:41:29 +03:00
#
# Example of a class to handle SAX events
#
class SAXCallback :
""" Base class for SAX handlers """
def startDocument ( self ) :
""" called at the start of the document """
pass
def endDocument ( self ) :
""" called at the end of the document """
pass
def startElement ( self , tag , attrs ) :
""" called at the start of every element, tag is the name of
the element , attrs is a dictionary of the element ' s attributes " " "
pass
def endElement ( self , tag ) :
""" called at the start of every element, tag is the name of
the element """
pass
def characters ( self , data ) :
""" called when character data have been read, data is the string
containing the data , multiple consecutive characters ( ) callback
are possible . """
pass
def cdataBlock ( self , data ) :
""" called when CDATA section have been read, data is the string
containing the data , multiple consecutive cdataBlock ( ) callback
are possible . """
pass
def reference ( self , name ) :
""" called when an entity reference has been found """
pass
def ignorableWhitespace ( self , data ) :
""" called when potentially ignorable white spaces have been found """
pass
def processingInstruction ( self , target , data ) :
""" called when a PI has been found, target contains the PI name and
data is the associated data in the PI """
pass
def comment ( self , content ) :
""" called when a comment has been found, content contains the comment """
pass
def externalSubset ( self , name , externalID , systemID ) :
""" called when a DOCTYPE declaration has been found, name is the
DTD name and externalID , systemID are the DTD public and system
identifier for that DTd if available """
pass
def internalSubset ( self , name , externalID , systemID ) :
""" called when a DOCTYPE declaration has been found, name is the
DTD name and externalID , systemID are the DTD public and system
identifier for that DTD if available """
pass
def entityDecl ( self , name , type , externalID , systemID , content ) :
""" called when an ENTITY declaration has been found, name is the
entity name and externalID , systemID are the entity public and
system identifier for that entity if available , type indicates
the entity type , and content reports it ' s string content " " "
pass
def notationDecl ( self , name , externalID , systemID ) :
""" called when an NOTATION declaration has been found, name is the
notation name and externalID , systemID are the notation public and
system identifier for that notation if available """
pass
def attributeDecl ( self , elem , name , type , defi , defaultValue , nameList ) :
""" called when an ATTRIBUTE definition has been found """
pass
def elementDecl ( self , name , type , content ) :
""" called when an ELEMENT definition has been found """
pass
def entityDecl ( self , name , publicId , systemID , notationName ) :
""" called when an unparsed ENTITY declaration has been found,
name is the entity name and publicId , , systemID are the entity
public and system identifier for that entity if available ,
and notationName indicate the associated NOTATION """
pass
def warning ( self , msg ) :
print msg
def error ( self , msg ) :
raise parserError ( msg )
def fatalError ( self , msg ) :
raise parserError ( msg )
2002-01-31 23:29:19 +03:00
#
# This class is the ancestor of all the Node classes. It provides
# the basic functionalities shared by all nodes (and handle
# gracefylly the exception), like name, navigation in the tree,
2002-03-06 20:35:40 +03:00
# doc reference, content access and serializing to a string or URI
2002-01-31 23:29:19 +03:00
#
2002-01-31 02:49:06 +03:00
class xmlCore :
2002-01-30 19:37:32 +03:00
def __init__ ( self , _obj = None ) :
if _obj != None :
2002-02-11 21:42:20 +03:00
self . _o = _obj ;
return
self . _o = None
2002-01-30 19:37:32 +03:00
def get_parent ( self ) :
2002-02-11 21:42:20 +03:00
ret = libxml2mod . parent ( self . _o )
if ret == None :
return None
return xmlNode ( _obj = ret )
2002-01-30 19:37:32 +03:00
def get_children ( self ) :
2002-02-11 21:42:20 +03:00
ret = libxml2mod . children ( self . _o )
if ret == None :
return None
return xmlNode ( _obj = ret )
2002-01-30 19:37:32 +03:00
def get_last ( self ) :
2002-02-11 21:42:20 +03:00
ret = libxml2mod . last ( self . _o )
if ret == None :
return None
return xmlNode ( _obj = ret )
2002-01-30 19:37:32 +03:00
def get_next ( self ) :
2002-02-11 21:42:20 +03:00
ret = libxml2mod . next ( self . _o )
if ret == None :
return None
return xmlNode ( _obj = ret )
2002-01-31 23:29:19 +03:00
def get_properties ( self ) :
2002-02-11 21:42:20 +03:00
ret = libxml2mod . properties ( self . _o )
if ret == None :
return None
return xmlAttr ( _obj = ret )
2002-01-30 19:37:32 +03:00
def get_prev ( self ) :
2002-02-11 21:42:20 +03:00
ret = libxml2mod . prev ( self . _o )
if ret == None :
return None
return xmlNode ( _obj = ret )
2002-01-30 19:37:32 +03:00
def get_content ( self ) :
2002-02-11 21:42:20 +03:00
return libxml2mod . xmlNodeGetContent ( self . _o )
2003-01-04 22:42:46 +03:00
getContent = get_content # why is this duplicate naming needed ?
2002-01-30 19:37:32 +03:00
def get_name ( self ) :
2002-02-11 21:42:20 +03:00
return libxml2mod . name ( self . _o )
2002-01-30 19:37:32 +03:00
def get_type ( self ) :
2002-02-11 21:42:20 +03:00
return libxml2mod . type ( self . _o )
2003-01-04 22:42:46 +03:00
def get_doc ( self ) :
ret = libxml2mod . doc ( self . _o )
if ret == None :
if self . type in [ " document_xml " , " document_html " ] :
return xmlDoc ( _obj = self . _o )
else :
return None
return xmlDoc ( _obj = ret )
#
# Those are common attributes to nearly all type of nodes
# defined as python2 properties
#
import sys
if float ( sys . version [ 0 : 3 ] ) < 2.2 :
def __getattr__ ( self , attr ) :
if attr == " parent " :
ret = libxml2mod . parent ( self . _o )
if ret == None :
return None
return xmlNode ( _obj = ret )
elif attr == " properties " :
ret = libxml2mod . properties ( self . _o )
if ret == None :
return None
return xmlAttr ( _obj = ret )
elif attr == " children " :
ret = libxml2mod . children ( self . _o )
if ret == None :
return None
return xmlNode ( _obj = ret )
elif attr == " last " :
ret = libxml2mod . last ( self . _o )
if ret == None :
return None
return xmlNode ( _obj = ret )
elif attr == " next " :
ret = libxml2mod . next ( self . _o )
if ret == None :
return None
return xmlNode ( _obj = ret )
elif attr == " prev " :
ret = libxml2mod . prev ( self . _o )
if ret == None :
return None
return xmlNode ( _obj = ret )
elif attr == " content " :
return libxml2mod . xmlNodeGetContent ( self . _o )
elif attr == " name " :
return libxml2mod . name ( self . _o )
elif attr == " type " :
return libxml2mod . type ( self . _o )
elif attr == " doc " :
ret = libxml2mod . doc ( self . _o )
if ret == None :
if self . type == " document_xml " or self . type == " document_html " :
return xmlDoc ( _obj = self . _o )
else :
return None
return xmlDoc ( _obj = ret )
raise AttributeError , attr
else :
parent = property ( get_parent , None , None , " Parent node " )
children = property ( get_children , None , None , " First child node " )
last = property ( get_last , None , None , " Last sibling node " )
next = property ( get_next , None , None , " Next sibling node " )
prev = property ( get_prev , None , None , " Previous sibling node " )
properties = property ( get_properties , None , None , " List of properies " )
content = property ( get_content , None , None , " Content of this node " )
name = property ( get_name , None , None , " Node name " )
type = property ( get_type , None , None , " Node type " )
doc = property ( get_doc , None , None , " The document this node belongs to " )
2002-03-06 20:35:40 +03:00
#
# Serialization routines, the optional arguments have the following
# meaning:
# encoding: string to ask saving in a specific encoding
2003-01-04 22:42:46 +03:00
# indent: if 1 the serializer is asked to indent the output
2002-03-06 20:35:40 +03:00
#
def serialize ( self , encoding = None , format = 0 ) :
return libxml2mod . serializeNode ( self . _o , encoding , format )
def saveTo ( self , file , encoding = None , format = 0 ) :
return libxml2mod . saveNodeTo ( self . _o , file , encoding , format )
2002-02-11 21:42:20 +03:00
2002-03-07 03:05:35 +03:00
#
# Selecting nodes using XPath, a bit slow because the context
# is allocated/freed every time but convenient.
#
def xpathEval ( self , expr ) :
doc = self . doc
if doc == None :
return None
ctxt = doc . xpathNewContext ( )
ctxt . setContextNode ( self )
res = ctxt . xpathEval ( expr )
ctxt . xpathFreeContext ( )
return res
2003-01-04 22:42:46 +03:00
#
# Selecting nodes using XPath, faster because the context
# is allocated just once per xmlDoc.
#
def xpathEval2 ( self , expr ) :
doc = self . doc
if doc == None :
return None
try :
doc . _ctxt . setContextNode ( self )
except :
doc . _ctxt = doc . xpathNewContext ( )
doc . _ctxt . setContextNode ( self )
res = doc . _ctxt . xpathEval ( expr )
return res
# support for python2 iterators
def walk_depth_first ( self ) :
return xmlCoreDepthFirstItertor ( self )
def walk_breadth_first ( self ) :
return xmlCoreBreadthFirstItertor ( self )
__iter__ = walk_depth_first
def free ( self ) :
try :
self . doc . _ctxt . xpathFreeContext ( )
except :
pass
libxml2mod . freeDoc ( self . _o )
#
# implements the depth-first iterator for libxml2 DOM tree
#
class xmlCoreDepthFirstItertor :
def __init__ ( self , node ) :
self . node = node
self . parents = [ ]
def __iter__ ( self ) :
return self
def next ( self ) :
while 1 :
if self . node :
ret = self . node
self . parents . append ( self . node )
self . node = self . node . children
return ret
try :
parent = self . parents . pop ( )
except IndexError :
raise StopIteration
self . node = parent . next
#
# implements the breadth-first iterator for libxml2 DOM tree
#
class xmlCoreBreadthFirstItertor :
def __init__ ( self , node ) :
self . node = node
self . parents = [ ]
def __iter__ ( self ) :
return self
def next ( self ) :
while 1 :
if self . node :
ret = self . node
self . parents . append ( self . node )
self . node = self . node . next
return ret
try :
parent = self . parents . pop ( )
except IndexError :
raise StopIteration
self . node = parent . children
2002-01-31 23:29:19 +03:00
#
# converters to present a nicer view of the XPath returns
#
def nodeWrap ( o ) :
# TODO try to cast to the most appropriate node class
2002-02-09 21:03:01 +03:00
name = libxml2mod . name ( o )
2002-01-31 23:29:19 +03:00
if name == " element " or name == " text " :
return xmlNode ( _obj = o )
if name == " attribute " :
return xmlAttr ( _obj = o )
if name [ 0 : 8 ] == " document " :
return xmlDoc ( _obj = o )
if name [ 0 : 8 ] == " namespace " :
return xmlNs ( _obj = o )
if name == " elem_decl " :
return xmlElement ( _obj = o )
if name == " attribute_decl " :
return xmlAtribute ( _obj = o )
if name == " entity_decl " :
return xmlEntity ( _obj = o )
if name == " dtd " :
2003-01-04 19:35:29 +03:00
return xmlDtd ( _obj = o )
2002-01-31 23:29:19 +03:00
return xmlNode ( _obj = o )
def xpathObjectRet ( o ) :
if type ( o ) == type ( [ ] ) or type ( o ) == type ( ( ) ) :
ret = map ( lambda x : nodeWrap ( x ) , o )
2002-02-11 21:42:20 +03:00
return ret
2002-01-31 23:29:19 +03:00
return o
2002-02-01 20:56:45 +03:00
#
# register an XPath function
#
def registerXPathFunction ( ctxt , name , ns_uri , f ) :
2002-02-09 21:03:01 +03:00
ret = libxml2mod . xmlRegisterXPathFunction ( ctxt , name , ns_uri , f )
2002-02-01 20:56:45 +03:00
2002-12-27 18:18:35 +03:00
#
# For the xmlTextReader parser configuration
#
PARSER_LOADDTD = 1
PARSER_DEFAULTATTRS = 2
PARSER_VALIDATE = 3
2002-12-29 01:56:33 +03:00
PARSER_SUBST_ENTITIES = 4
2002-12-27 18:18:35 +03:00
2003-01-17 01:45:08 +03:00
#
2003-01-21 00:26:34 +03:00
# For the error callback severities
2003-01-17 01:45:08 +03:00
#
2003-01-21 00:26:34 +03:00
PARSER_SEVERITY_VALIDITY_WARNING = 1
PARSER_SEVERITY_VALIDITY_ERROR = 2
PARSER_SEVERITY_WARNING = 3
PARSER_SEVERITY_ERROR = 4
2003-01-17 01:45:08 +03:00
2002-01-31 02:49:06 +03:00
#
2003-01-10 16:14:40 +03:00
# register the libxml2 error handler
2002-01-31 02:49:06 +03:00
#
2003-01-10 16:14:40 +03:00
def registerErrorHandler ( f , ctx ) :
""" Register a Python written function to for error reporting.
The function is called back as f ( ctx , error ) . """
import sys
if not sys . modules . has_key ( ' libxslt ' ) :
# normal behaviour when libxslt is not imported
ret = libxml2mod . xmlRegisterErrorHandler ( f , ctx )
else :
# when libxslt is already imported, one must
# use libxst's error handler instead
import libxslt
ret = libxslt . registerErrorHandler ( f , ctx )
return ret
2003-01-14 14:42:39 +03:00
class parserCtxtCore :
def __init__ ( self , _obj = None ) :
if _obj != None :
self . _o = _obj ;
return
self . _o = None
def __del__ ( self ) :
if self . _o != None :
libxml2mod . xmlFreeParserCtxt ( self . _o )
self . _o = None
2003-01-21 00:26:34 +03:00
def setErrorHandler ( self , f , arg ) :
""" Register an error handler that will be called back as
f ( arg , msg , severity , reserved ) .
@reserved is currently always None . """
libxml2mod . xmlParserCtxtSetErrorHandler ( self . _o , f , arg )
2003-01-14 14:42:39 +03:00
2003-01-21 00:26:34 +03:00
def getErrorHandler ( self ) :
""" Return (f,arg) as previously registered with setErrorHandler
or ( None , None ) . """
return libxml2mod . xmlParserCtxtGetErrorHandler ( self . _o )
def _xmlTextReaderErrorFunc ( ( f , arg ) , msg , severity , locator ) :
""" Intermediate callback to wrap the locator """
return f ( arg , msg , severity , xmlTextReaderLocator ( locator ) )
2003-01-14 14:42:39 +03:00
2003-01-17 01:45:08 +03:00
class xmlTextReaderCore :
def __init__ ( self , _obj = None ) :
self . input = None
if _obj != None : self . _o = _obj ; return
self . _o = None
def __del__ ( self ) :
if self . _o != None :
libxml2mod . xmlFreeTextReader ( self . _o )
self . _o = None
2003-01-21 00:26:34 +03:00
def SetErrorHandler ( self , f , arg ) :
2003-01-17 01:45:08 +03:00
""" Register an error handler that will be called back as
2003-01-21 00:26:34 +03:00
f ( arg , msg , severity , locator ) . """
if f is None :
libxml2mod . xmlTextReaderSetErrorHandler ( \
self . _o , None , None )
else :
libxml2mod . xmlTextReaderSetErrorHandler ( \
self . _o , _xmlTextReaderErrorFunc , ( f , arg ) )
def GetErrorHandler ( self ) :
2003-01-17 01:45:08 +03:00
""" Return (f,arg) as previously registered with setErrorHandler
or ( None , None ) . """
2003-01-21 00:26:34 +03:00
f , arg = libxml2mod . xmlTextReaderGetErrorHandler ( self . _o )
if f is None :
return None , None
else :
# assert f is _xmlTextReaderErrorFunc
return arg
2003-01-17 01:45:08 +03:00
2003-01-10 16:14:40 +03:00
# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
#
# Everything before this line comes from libxml.py
# Everything after this line is automatically generated
#
# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
2002-01-31 23:29:19 +03:00