1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-01-27 14:03:36 +03:00
libxml2/xmlIO.c
Daniel Veillard dbfd641b78 - Lots of improvements, too long to list here
- Push mode for the XML parser (HTML to come)
- XML shell like interface for debug
- improvements on XPath and validation
Daniel
1999-12-28 16:35:14 +00:00

529 lines
13 KiB
C

/*
* xmlIO.c : implementation of the I/O interfaces used by the parser
*
* See Copyright for the status of this software.
*
* Daniel.Veillard@w3.org
*/
#ifdef WIN32
#include "win32config.h"
#else
#include "config.h"
#endif
#include <stdio.h>
#include <string.h>
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
#include "xmlmemory.h"
#include "parser.h"
#include "xmlIO.h"
#include "nanohttp.h"
/* #define DEBUG_INPUT */
/* #define VERBOSE_FAILURE */
/* #define DEBUG_EXTERNAL_ENTITIES */
#ifdef DEBUG_INPUT
#define MINLEN 40
#else
#define MINLEN 4000
#endif
/**
* xmlAllocParserInputBuffer:
* @enc: the charset encoding if known
*
* Create a buffered parser input for progressive parsing
*
* Returns the new parser input or NULL
*/
xmlParserInputBufferPtr
xmlAllocParserInputBuffer(xmlCharEncoding enc) {
xmlParserInputBufferPtr ret;
ret = (xmlParserInputBufferPtr) xmlMalloc(sizeof(xmlParserInputBuffer));
if (ret == NULL) {
fprintf(stderr, "xmlAllocParserInputBuffer : out of memory!\n");
return(NULL);
}
memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer));
ret->buffer = xmlBufferCreate();
if (ret->buffer == NULL) {
xmlFree(ret);
return(NULL);
}
ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
ret->encoder = xmlGetCharEncodingHandler(enc);
ret->fd = -1;
ret->netIO = NULL;
return(ret);
}
/**
* xmlFreeParserInputBuffer:
* @in: a buffered parser input
*
* Free up the memory used by a buffered parser input
*/
void
xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
if (in->buffer != NULL) {
xmlBufferFree(in->buffer);
in->buffer = NULL;
}
#ifdef HAVE_ZLIB_H
if (in->gzfile != NULL)
gzclose(in->gzfile);
#endif
if (in->netIO != NULL)
xmlNanoHTTPClose(in->netIO);
if (in->fd >= 0)
close(in->fd);
memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
xmlFree(in);
}
/**
* xmlParserInputBufferCreateFilename:
* @filename: a C string containing the filename
* @enc: the charset encoding if known
*
* Create a buffered parser input for the progressive parsing of a file
* If filename is "-' then we use stdin as the input.
* Automatic support for ZLIB/Compress compressed document is provided
* by default if found at compile-time.
*
* Returns the new parser input or NULL
*/
xmlParserInputBufferPtr
xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
xmlParserInputBufferPtr ret;
#ifdef HAVE_ZLIB_H
gzFile input = 0;
#else
int input = -1;
#endif
void *netIO = NULL;
if (filename == NULL) return(NULL);
if (!strncmp(filename, "http://", 7)) {
netIO = xmlNanoHTTPOpen(filename, NULL);
if (netIO == NULL) {
#ifdef VERBOSE_FAILURE
fprintf (stderr, "Cannot read URL %s\n", filename);
perror ("xmlNanoHTTPOpen failed");
#endif
return(NULL);
}
} else if (!strcmp(filename, "-")) {
#ifdef HAVE_ZLIB_H
input = gzdopen (fileno(stdin), "r");
if (input == NULL) {
#ifdef VERBOSE_FAILURE
fprintf (stderr, "Cannot read from stdin\n");
perror ("gzdopen failed");
#endif
return(NULL);
}
#else
#ifdef WIN32
input = -1;
#else
input = fileno(stdin);
#endif
if (input < 0) {
#ifdef VERBOSE_FAILURE
fprintf (stderr, "Cannot read from stdin\n");
perror ("open failed");
#endif
return(NULL);
}
#endif
} else {
#ifdef HAVE_ZLIB_H
input = gzopen (filename, "r");
if (input == NULL) {
#ifdef VERBOSE_FAILURE
fprintf (stderr, "Cannot read file %s :\n", filename);
perror ("gzopen failed");
#endif
return(NULL);
}
#else
#ifdef WIN32
input = _open (filename, O_RDONLY | _O_BINARY);
#else
input = open (filename, O_RDONLY);
#endif
if (input < 0) {
#ifdef VERBOSE_FAILURE
fprintf (stderr, "Cannot read file %s :\n", filename);
perror ("open failed");
#endif
return(NULL);
}
#endif
}
/*
* TODO : get the 4 first bytes and decode the charset
* if enc == XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines here. !!!
* enc = xmlDetectCharEncoding(buffer);
*/
ret = xmlAllocParserInputBuffer(enc);
if (ret != NULL) {
#ifdef HAVE_ZLIB_H
ret->gzfile = input;
#else
ret->fd = input;
#endif
ret->netIO = netIO;
}
xmlParserInputBufferRead(ret, 4);
return(ret);
}
/**
* xmlParserInputBufferCreateFile:
* @file: a FILE*
* @enc: the charset encoding if known
*
* Create a buffered parser input for the progressive parsing of a FILE *
* buffered C I/O
*
* Returns the new parser input or NULL
*/
xmlParserInputBufferPtr
xmlParserInputBufferCreateFile(FILE *file, xmlCharEncoding enc) {
xmlParserInputBufferPtr ret;
if (file == NULL) return(NULL);
ret = xmlAllocParserInputBuffer(enc);
if (ret != NULL)
ret->file = file;
return(ret);
}
/**
* xmlParserInputBufferCreateFd:
* @fd: a file descriptor number
* @enc: the charset encoding if known
*
* Create a buffered parser input for the progressive parsing for the input
* from a file descriptor
*
* Returns the new parser input or NULL
*/
xmlParserInputBufferPtr
xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) {
xmlParserInputBufferPtr ret;
if (fd < 0) return(NULL);
ret = xmlAllocParserInputBuffer(enc);
if (ret != NULL)
ret->fd = fd;
return(ret);
}
/**
* xmlParserInputBufferPush:
* @in: a buffered parser input
* @buf: an char array
* @len: the size in bytes of the array.
*
* Push the content of the arry in the input buffer
* This routine handle the I18N transcoding to internal UTF-8
* This is used when operating the parser in progressive (push) mode.
*
* Returns the number of chars read and stored in the buffer, or -1
* in case of error.
*/
int
xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
int nbchars = 0;
if (len < 0) return(0);
if (in->encoder != NULL) {
xmlChar *buffer;
buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
if (buffer == NULL) {
fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
xmlFree(buffer);
return(-1);
}
nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar),
(xmlChar *) buf, len);
/*
* TODO : we really need to have something atomic or the
* encoder must report the number of bytes read
*/
buffer[nbchars] = 0;
xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
xmlFree(buffer);
} else {
nbchars = len;
xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
}
#ifdef DEBUG_INPUT
fprintf(stderr, "I/O: pushed %d chars, buffer %d/%d\n",
nbchars, in->buffer->use, in->buffer->size);
#endif
return(nbchars);
}
/**
* xmlParserInputBufferGrow:
* @in: a buffered parser input
* @len: indicative value of the amount of chars to read
*
* Grow up the content of the input buffer, the old data are preserved
* This routine handle the I18N transcoding to internal UTF-8
* This routine is used when operating the parser in normal (pull) mode
* TODO: one should be able to remove one extra copy
*
* Returns the number of chars read and stored in the buffer, or -1
* in case of error.
*/
int
xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
char *buffer = NULL;
#ifdef HAVE_ZLIB_H
gzFile input = (gzFile) in->gzfile;
#endif
int res = 0;
int nbchars = 0;
int buffree;
if ((len <= MINLEN) && (len != 4))
len = MINLEN;
buffree = in->buffer->size - in->buffer->use;
if (buffree <= 0) {
fprintf(stderr, "xmlParserInputBufferGrow : buffer full !\n");
return(0);
}
if (len > buffree)
len = buffree;
buffer = xmlMalloc((len + 1) * sizeof(char));
if (buffer == NULL) {
fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
return(-1);
}
if (in->netIO != NULL) {
res = xmlNanoHTTPRead(in->netIO, &buffer[0], len);
} else if (in->file != NULL) {
res = fread(&buffer[0], 1, len, in->file);
#ifdef HAVE_ZLIB_H
} else if (in->gzfile != NULL) {
res = gzread(input, &buffer[0], len);
#endif
} else if (in->fd >= 0) {
res = read(in->fd, &buffer[0], len);
} else {
fprintf(stderr, "xmlParserInputBufferGrow : no input !\n");
xmlFree(buffer);
return(-1);
}
if (res == 0) {
xmlFree(buffer);
return(0);
}
if (res < 0) {
perror ("read error");
xmlFree(buffer);
return(-1);
}
if (in->encoder != NULL) {
xmlChar *buf;
buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
if (buf == NULL) {
fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
xmlFree(buffer);
return(-1);
}
nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
BAD_CAST buffer, res);
buf[nbchars] = 0;
xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
xmlFree(buf);
} else {
nbchars = res;
buffer[nbchars] = 0;
xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
}
#ifdef DEBUG_INPUT
fprintf(stderr, "I/O: read %d chars, buffer %d/%d\n",
nbchars, in->buffer->use, in->buffer->size);
#endif
xmlFree(buffer);
return(nbchars);
}
/**
* xmlParserInputBufferRead:
* @in: a buffered parser input
* @len: indicative value of the amount of chars to read
*
* Refresh the content of the input buffer, the old data are considered
* consumed
* This routine handle the I18N transcoding to internal UTF-8
*
* Returns the number of chars read and stored in the buffer, or -1
* in case of error.
*/
int
xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) {
/* xmlBufferEmpty(in->buffer); */
if ((in->netIO != NULL) || (in->file != NULL) ||
#ifdef HAVE_ZLIB_H
(in->gzfile != NULL) ||
#endif
(in->fd >= 0))
return(xmlParserInputBufferGrow(in, len));
else
return(0);
}
/*
* xmlParserGetDirectory:
* @filename: the path to a file
*
* lookup the directory for that file
*
* Returns a new allocated string containing the directory, or NULL.
*/
char *
xmlParserGetDirectory(const char *filename) {
char *ret = NULL;
char dir[1024];
char *cur;
char sep = '/';
if (filename == NULL) return(NULL);
#ifdef WIN32
sep = '\\';
#endif
strncpy(dir, filename, 1023);
dir[1023] = 0;
cur = &dir[strlen(dir)];
while (cur > dir) {
if (*cur == sep) break;
cur --;
}
if (*cur == sep) {
if (cur == dir) dir[1] = 0;
else *cur = 0;
ret = xmlMemStrdup(dir);
} else {
if (getcwd(dir, 1024) != NULL) {
dir[1023] = 0;
ret = xmlMemStrdup(dir);
}
}
return(ret);
}
/****************************************************************
* *
* External entities loading *
* *
****************************************************************/
/*
* xmlDefaultExternalEntityLoader:
* @URL: the URL for the entity to load
* @ID: the System ID for the entity to load
* @context: the context in which the entity is called or NULL
*
* By default we don't load external entitites, yet.
* TODO: get a sample http implementation and scan for existing one
* at compile time.
*
* Returns a new allocated xmlParserInputPtr, or NULL.
*/
static
xmlParserInputPtr
xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
xmlParserInputPtr context) {
#ifdef DEBUG_EXTERNAL_ENTITIES
fprintf(stderr, "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
#endif
return(NULL);
}
static xmlExternalEntityLoader xmlCurrentExternalEntityLoader =
xmlDefaultExternalEntityLoader;
/*
* xmlSetExternalEntityLoader:
* @f: the new entity resolver function
*
* Changes the defaultexternal entity resolver function for the application
*/
void
xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
xmlCurrentExternalEntityLoader = f;
}
/*
* xmlGetExternalEntityLoader:
*
* Get the default external entity resolver function for the application
*
* Returns the xmlExternalEntityLoader function pointer
*/
xmlExternalEntityLoader
xmlGetExternalEntityLoader(void) {
return(xmlCurrentExternalEntityLoader);
}
/*
* xmlLoadExternalEntity:
* @URL: the URL for the entity to load
* @ID: the System ID for the entity to load
* @context: the context in which the entity is called or NULL
*
* Load an external entity, note that the use of this function for
* unparsed entities may generate problems
* TODO: a more generic External entitiy API must be designed
*
* Returns the xmlParserInputPtr or NULL
*/
xmlParserInputPtr
xmlLoadExternalEntity(const char *URL, const char *ID,
xmlParserInputPtr context) {
return(xmlCurrentExternalEntityLoader(URL, ID, context));
}