samba-mirror/py_tdbpack.c at 24fb0cde2f0b657df1c99474cd694438c94a566e

mirror of https://github.com/samba-team/samba.git synced 2025-02-09 09:57:48 +03:00

Martin Pool 1f7ed8bb86 Import my code to do reasonably fast tdbpack/unpack from Python

663 lines

16 KiB

C

Raw Blame History

 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
    Python wrapper for Samba tdb pack/unpack functions
    Copyright (C) Martin Pool 2002
    NOTE PYTHON STYLE GUIDE
    http://www.python.org/peps/pep-0007.html
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 #include "Python.h"
 static int pytdbpack_calc_reqd_len(char *format_str,
 				   PyObject *val_seq);
 static PyObject *pytdbpack_unpack_item(char,
 				      char **pbuf,
 				      int *plen);
 static int
 pytdbpack_calc_item_len(char format_ch,
 			PyObject *val_obj);
 static PyObject *pytdbpack_pack_data(const char *format_str,
 				     PyObject *val_seq,
 				     unsigned char *buf);
 static const char * pytdbpack_docstring =
 "Convert between Python values and Samba binary encodings.
 This module is conceptually similar to the standard 'struct' module, but it
 uses both a different binary format and a different description string.
 Samba's encoding is based on that used inside DCE-RPC and SMB: a
 little-endian, unpadded, non-self-describing binary format.  It is intended
 that these functions be as similar as possible to the routines in Samba's
 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
 Python strings are used to specify the format of data to be packed or
 unpacked.
 Strings in TDBs are typically stored in DOS codepages.  The caller of this
 module must make appropriate translations if necessary, typically to and from
 Unicode objects.
 tdbpack format strings:
     'f':  NULL-terminated string in DOS codepage
     'P':  same as 'f'
     'd':  4 byte little-endian number
     'w':  2 byte little-endian number
     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
           really just an \"exists\" or \"does not exist\" flag.  The boolean
           value of the Python object is used.
     'B': 4-byte LE length, followed by that many bytes of binary data.
          Corresponds to a Python byte string of the appropriate length.
     '$': Special flag indicating that the preceding format code should be
          repeated while data remains.  This is only supported for unpacking.
     Every code corresponds to a single Python object, except 'B' which
     corresponds to two values (length and contents), and '$', which produces
     however many make sense.
 ";
 static char const pytdbpack_pack_doc[] =
 "pack(format, values) -> buffer
 Pack Python objects into Samba binary format according to format string.
 arguments:
     format -- string of tdbpack format characters
     values -- sequence of value objects corresponding 1:1 to format characters
 returns:
     buffer -- string containing packed data
 raises:
     IndexError -- if there are not the same number of format codes as of
         values
     ValueError -- if any of the format characters is illegal
     TypeError  -- if the format is not a string, or values is not a sequence,
         or any of the values is of the wrong type for the corresponding
         format character
 ";
 static char const pytdbpack_unpack_doc[] =
 "unpack(format, buffer) -> (values, rest)
 Unpack Samba binary data according to format string.
 arguments:
     format -- string of tdbpack characters
     buffer -- string of packed binary data
 returns:
 -tuple of:
         values -- sequence of values corresponding 1:1 to format characters
         rest -- string containing data that was not decoded, or '' if the
             whole string was consumed
 raises:
     IndexError -- if there is insufficient data in the buffer for the
         format (or if the data is corrupt and contains a variable-length
         field extending past the end)
     ValueError -- if any of the format characters is illegal
 notes:
     Because unconsumed data is returned, you can feed it back in to the
     unpacker to extract further fields.  Alternatively, if you wish to modify
     some fields near the start of the data, you may be able to save time by
     only unpacking and repacking the necessary part.
 ";
 /*
   Game plan is to first of all walk through the arguments and calculate the
   total length that will be required.  We allocate a Python string of that
   size, then walk through again and fill it in.
   We just borrow references to all the passed arguments, since none of them
   need to be permanently stored.  We transfer ownership to the returned
   object.
  */
 static PyObject *
 pytdbpack_pack(PyObject *self,
 	       PyObject *args)
 {
 	char *format_str;
 	PyObject *val_seq, *fast_seq, *buf_str;
 	int reqd_len;
 	char *packed_buf;
 	/* TODO: Test passing wrong types or too many arguments */
 	if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 		return NULL;
 	/* Convert into a list or tuple (if not already one), so that we can
 	 * index more easily. */
 	fast_seq = PySequence_Fast(val_seq,
 				   __FUNCTION__ ": argument 2 must be sequence");
 	if (!fast_seq)
 		return NULL;
 	reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
 	if (reqd_len == -1)	/* exception was thrown */
 		return NULL;
 	/* Allocate space.
 	   This design causes an unnecessary copying of the data when Python
 	   constructs an object, and that might possibly be avoided by using a
 	   Buffer object of some kind instead.  I'm not doing that for now
 	   though.  */
 	packed_buf = malloc(reqd_len);
 	if (!packed_buf) {
 		PyErr_Format(PyExc_MemoryError,
 			     "%s: couldn't allocate %d bytes for packed buffer",
 			     __FUNCTION__, reqd_len);
 		return NULL;
 	}
 	if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
 		free(packed_buf);
 		return NULL;
 	}
 	buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
 	free(packed_buf);	/* get rid of tmp buf */
 	return buf_str;
 }
 static PyObject *
 pytdbpack_unpack(PyObject *self,
 		 PyObject *args)
 {
 	char *format_str, *packed_str, *ppacked;
 	PyObject *val_list = NULL, *ret_tuple = NULL;
 	PyObject *rest_string = NULL;
 	int format_len, packed_len;
 	int i;
 	char last_format = '#';
 	/* get arguments */
 	if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 		return NULL;
 	format_len = strlen(format_str);
 	/* allocate list to hold results */
 	val_list = PyList_New(format_len);
 	if (!val_list)
 		goto failed;
 	ret_tuple = PyTuple_New(2);
 	if (!ret_tuple)
 		goto failed;
 	/* For every object, unpack.  */
 	for (ppacked = packed_str, i = 0; i < format_len; i++) {
 		PyObject *val_obj;
 		char format;
 		format = format_str[i];
 		if (format == '$') {
 			if (i == 0) {
 				PyErr_Format(PyExc_ValueError,
 					     "%s: '$' may not be first character in format",
 					     __FUNCTION__);
 				goto failed;
 			}
 			else {
 				format = last_format; /* repeat */
 			}
 		}
 		val_obj = pytdbpack_unpack_item(format,
 						&ppacked,
 						&packed_len);
 		if (!val_obj)
 			goto failed;
 		PyList_SET_ITEM(val_list, i, val_obj);
 		last_format = format;
 	}
 	/* put leftovers in box for lunch tomorrow */
 	rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 	if (!rest_string)
 		goto failed;
 	/* return (values, rest) tuple; give up references to them */
 	PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 	val_list = NULL;
 	PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 	val_list = NULL;
 	return ret_tuple;
   failed:
 	/* handle failure: deallocate anything */
 	Py_XDECREF(val_list);
 	Py_XDECREF(ret_tuple);
 	Py_XDECREF(rest_string);
 	return NULL;
 }
 /*
   Internal routine that calculates how many bytes will be required to
   encode the values in the format.
   Also checks that the value list is the right size for the format list.
   Returns number of bytes (may be 0), or -1 if there's something wrong, in
   which case a Python exception has been raised.
   Arguments:
     val_seq: a Fast Sequence (list or tuple), being all the values
 */
 static int
 pytdbpack_calc_reqd_len(char *format_str,
 			PyObject *val_seq)
 {
 	int len = 0;
 	char *p;
 	int val_i;
 	int val_len;
 	val_len = PySequence_Fast_GET_SIZE(val_seq);
 	for (p = format_str, val_i = 0; *p; p++, val_i++) {
 		char ch = *p;
 		PyObject *val_obj;
 		int item_len;
 		if (val_i >= val_len) {
 			PyErr_Format(PyExc_IndexError,
 				     "samba.tdbpack.pack: value list is too short for format string");
 			return -1;
 		}
 		/* borrow a reference to the item */
 		val_obj = PySequence_Fast_GET_ITEM(val_seq, val_i);
 		if (!val_obj)
 			return -1;
 		item_len = pytdbpack_calc_item_len(ch, val_obj);
 		if (item_len == -1)
 			return -1;
 		else
 			len += item_len;
 	}
 	if (val_i != val_len) {
 		PyErr_Format(PyExc_IndexError,
 			     "%s: value list is wrong length for format string",
 			     __FUNCTION__);
 		return -1;
 	}
 	return len;
 }
 /*
   Calculate the number of bytes required to pack a single value.
 */
 static int
 pytdbpack_calc_item_len(char ch,
 			PyObject *val_obj)
 {
 	if (ch == 'd' || ch == 'w') {
 		if (!PyInt_Check(val_obj)) {
 			PyErr_Format(PyExc_TypeError,
 				     "tdbpack: format '%c' requires an Int",
 				     ch);
 			return -1;
 		}
 		if (ch == 'w')
 			return 2;
 		else
 			return 4;
 	} else if (ch == 'p') {
 		return 4;
 	}
 	else if (ch == 'f' || ch == 'P' || ch == 'B') {
 		/* nul-terminated 8-bit string */
 		if (!PyString_Check(val_obj)) {
 			PyErr_Format(PyExc_TypeError,
 				     "tdbpack: format '%c' requires a String",
 				     ch);
 			return -1;
 		}
 		if (ch == 'B') {
 			/* byte buffer; just use Python string's length, plus
 			   a preceding word */
 			return 4 + PyString_GET_SIZE(val_obj);
 		}
 		else {
 			/* one nul character */
 			return 1 + PyString_GET_SIZE(val_obj);
 		}
 	}
 	else {
 		PyErr_Format(PyExc_ValueError,
 			     __FUNCTION__ ": format character '%c' is not supported",
 			     ch);
 		return -1;
 	}
 }
 /*
   XXX: glib and Samba have quicker macro for doing the endianness conversions,
   but I don't know of one in plain libc, and it's probably not a big deal.  I
   realize this is kind of dumb because we'll almost always be on x86, but
   being safe is important.
 */
 static void pack_int32(unsigned long val_long, unsigned char **pbuf)
 {
 	(*pbuf)[0] =         val_long & 0xff;
 	(*pbuf)[1] = (val_long >> 8)  & 0xff;
 	(*pbuf)[2] = (val_long >> 16) & 0xff;
 	(*pbuf)[3] = (val_long >> 24) & 0xff;
 	(*pbuf) += 4;
 }
 static void pack_bytes(long len, const char *from,
 		       unsigned char **pbuf)
 {
 	memcpy(*pbuf, from, len);
 	(*pbuf) += len;
 }
 static void
 unpack_err_too_short(void)
 {
 	PyErr_Format(PyExc_IndexError,
 		     __FUNCTION__ ": data too short for unpack format");
 }
 static PyObject *
 unpack_int32(char **pbuf, int *plen)
 {
 	long v;
 	unsigned char *b;
 	if (*plen < 4) {
 		unpack_err_too_short();
 		return NULL;
 	}
 	b = *pbuf;
 	v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 	(*pbuf) += 4;
 	(*plen) -= 4;
 	return PyInt_FromLong(v);
 }
 static PyObject *unpack_int16(char **pbuf, int *plen)
 {
 	long v;
 	unsigned char *b;
 	if (*plen < 2) {
 		unpack_err_too_short();
 		return NULL;
 	}
 	b = *pbuf;
 	v = b[0] | b[1]<<8;
 	(*pbuf) += 2;
 	(*plen) -= 2;
 	return PyInt_FromLong(v);
 }
 static PyObject *
 unpack_string(char **pbuf, int *plen)
 {
 	int len;
 	char *nul_ptr, *start;
 	start = *pbuf;
 	nul_ptr = memchr(start, '\0', *plen);
 	if (!nul_ptr) {
 		unpack_err_too_short();
 		return NULL;
 	}
 	len = nul_ptr - start;
 	*pbuf += len + 1;	/* skip \0 */
 	*plen -= len + 1;
 	return PyString_FromStringAndSize(start, len);
 }
 static PyObject *
 unpack_buffer(char **pbuf, int *plen)
 {
 	/* first get 32-bit len */
 	long slen;
 	unsigned char *b;
 	unsigned char *start;
 	if (*plen < 4) {
 		unpack_err_too_short();
 		return NULL;
 	}
 	b = *pbuf;
 	slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 	if (slen < 0) { /* surely you jest */
 		PyErr_Format(PyExc_ValueError,
 			     __FUNCTION__ ": buffer seems to have negative length");
 		return NULL;
 	}
 	(*pbuf) += 4;
 	(*plen) -= 4;
 	start = *pbuf;
 	if (*plen < slen) {
 		PyErr_Format(PyExc_IndexError,
 			     __FUNCTION__ ": not enough data to unpack buffer: "
 			     "need %d bytes, have %d",
 			     (int) slen, *plen);
 		return NULL;
 	}
 	(*pbuf) += slen;
 	(*plen) -= slen;
 	return PyString_FromStringAndSize(start, slen);
 }
 /* Unpack a single field from packed data, according to format character CH.
    Remaining data is at *PBUF, of *PLEN.
    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
    been consumed.
    Returns a reference to the unpacked Python object, or NULL for failure.
 */
 static PyObject *pytdbpack_unpack_item(char ch,
 				       char **pbuf,
 				       int *plen)
 {
 	if (ch == 'w') {	/* 16-bit int */
 		return unpack_int16(pbuf, plen);
 	}
 	else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 		/* pointers can just come through as integers */
 		return unpack_int32(pbuf, plen);
 	}
 	else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 		return unpack_string(pbuf, plen);
 	}
 	else if (ch == 'B') { /* length, buffer */
 		return unpack_buffer(pbuf, plen);
 	}
 	else {
 		PyErr_Format(PyExc_ValueError,
 			     __FUNCTION__ ": format character '%c' is not supported",
 			     ch);
 		return NULL;
 	}
 }
 /*
   Pack a single item VAL_OBJ, encoded using format CH, into a buffer at *PBUF,
   and advance the pointer.  Buffer length has been pre-calculated so we are
   sure that there is enough space.
 */
 static PyObject *
 pytdbpack_pack_item(char ch,
 		    PyObject *val_obj,
 		    unsigned char **pbuf)
 {
 	if (ch == 'w') {
 		unsigned long val_long = PyInt_AsLong(val_obj);
 		(*pbuf)[0] = val_long & 0xff;
 		(*pbuf)[1] = (val_long >> 8) & 0xff;
 		(*pbuf) += 2;
 	}
 	else if (ch == 'd') {
 		/* 4-byte LE number */
 		pack_int32(PyInt_AsLong(val_obj), pbuf);
 	}
 	else if (ch == 'p') {
 		/* "Pointer" value -- in the subset of DCERPC used by Samba,
 		   this is really just an "exists" or "does not exist"
 		   flag. */
 		pack_int32(PyObject_IsTrue(val_obj), pbuf);
 	}
 	else if (ch == 'f' || ch == 'P') {
 		int size;
 		char *sval;
 		size = PyString_GET_SIZE(val_obj);
 		sval = PyString_AS_STRING(val_obj);
 		pack_bytes(size+1, sval, pbuf); /* include nul */
 	}
 	else if (ch == 'B') {
 		int size;
 		char *sval;
 		size = PyString_GET_SIZE(val_obj);
 		pack_int32(size, pbuf);
 		sval = PyString_AS_STRING(val_obj);
 		pack_bytes(size, sval, pbuf); /* do not include nul */
 	}
 	else {
 		/* this ought to be caught while calculating the length, but
 		   just in case. */
 		PyErr_Format(PyExc_ValueError,
 			     "%s: format character '%c' is not supported",
 			     __FUNCTION__, ch);
 		return NULL;
 	}
 	return Py_None;
 }
 /*
   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
   PACKED_BUF.
   The string has already been checked out, so we know that VAL_SEQ is large
   enough to hold the packed data, and that there are enough value items.
   (However, their types may not have been thoroughly checked yet.)
   In addition, val_seq is a Python Fast sequence.
   Returns NULL for error (with exception set), or None.
 */
 PyObject *
 pytdbpack_pack_data(const char *format_str,
 		    PyObject *val_seq,
 		    unsigned char *packed_buf)
 {
 	int i;
 	for (i = 0; format_str[i]; i++) {
 		char ch = format_str[i];
 		PyObject *val_obj;
 		/* borrow a reference to the item */
 		val_obj = PySequence_Fast_GET_ITEM(val_seq, i);
 		if (!val_obj)
 			return NULL;
 		if (!pytdbpack_pack_item(ch, val_obj, &packed_buf))
 			return NULL;
 	}
 	return Py_None;
 }
 static PyMethodDef pytdbpack_methods[] = {
 	{ "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
 	{ "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
 };
 DL_EXPORT(void)
 inittdbpack(void)
 {
 	Py_InitModule3("tdbpack", pytdbpack_methods,
 		       (char *) pytdbpack_docstring);
 }

663 lines 16 KiB C Raw Blame History

663 lines

16 KiB

C

Raw Blame History