[BACK]Return to varint.c CVS log [TXT][DIR] Up to [pgFoundry] / varint / varint

File: [pgFoundry] / varint / varint / varint.c (download)

Revision 1.1, Sun Sep 21 05:41:01 2008 UTC (23 months, 1 week ago) by jeremyd
Branch: MAIN

Initial commit of an arbitrary precision unsigned integer data type with
bitwise operators.

The following operators are implemented so far (not optimally, but I'm not
worried about that yet):
& (bitwise and)
| (bitwise or)
^ (bitwise xor)
<< (bitwise left shift)
>> (bitwise right shift)
+ (arithmetic add)
- (arithmetic subtract)
* (arithmetic multiply)

I plan to implement comparison operators and division next.

/* this file was originally based on pgsql/src/backend/utils/adt/varbit.c */
#include "postgres.h"
#include <math.h>

#include "access/htup.h"
#include "libpq/pqformat.h"
#include "utils/array.h"
#include "utils/varbit.h"
#include "varint.h"

#define HEXDIG(z)	 ((z)<10 ? ((z)+'0') : ((z)-10+'A'))

PG_MODULE_MAGIC;

/* common code for bittypmodin and varbittypmodin */
static int32
anybit_typmodin(ArrayType *ta, const char *typename)
{
	int32		typmod;
	int32	   *tl;
	int			n;

	tl = ArrayGetIntegerTypmods(ta, &n);

	/*
	 * we're not too tense about good error message here because grammar
	 * shouldn't allow wrong number of modifiers for BIT
	 */
	if (n != 1)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid type modifier")));

	if (*tl < 1)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("length for type %s must be at least 1",
						typename)));
	if (*tl > (MaxAttrSize * BITS_PER_BYTE))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("length for type %s cannot exceed %d",
						typename, MaxAttrSize * BITS_PER_BYTE)));

	typmod = *tl;

	return typmod;
}

/* common code for bittypmodout and varbittypmodout */
static char *
anybit_typmodout(int32 typmod)
{
	char	   *res = (char *) palloc(64);

	if (typmod >= 0)
		snprintf(res, 64, "(%d)", typmod);
	else
		*res = '\0';

	return res;
}


/*----------
 *	attypmod -- contains the length of the bit string in bits, or for
 *			   varying bits the maximum length.
 *
 *	The data structure contains the following elements:
 *	  header  -- length of the whole data structure (incl header)
 *				 in bytes. (as with all varying length datatypes)
 *	  data section -- private data section for the bits data structures
 *		bitlength -- length of the bit string in bits
 *		bitdata   -- bit string, most significant byte first
 *
 *	The length of the bitdata vector should always be exactly as many
 *	bytes as are needed for the given bitlength.  If the bitlength is
 *	not a multiple of 8, the extra low-order padding bits of the last
 *	byte must be zeroes.
 *----------
 */

/*
 * bit_in -
 *	  converts a char string to the internal representation of a bitstring.
 *		  The length is determined by the number of bits required plus
 *		  VARHDRSZ bytes or from atttypmod.
 */
PG_FUNCTION_INFO_V1(varint_in);
Datum
varint_in(PG_FUNCTION_ARGS)
{
	char	   *input_string = PG_GETARG_CSTRING(0);

#ifdef NOT_USED
	Oid			typelem = PG_GETARG_OID(1);
#endif
	int32		atttypmod = PG_GETARG_INT32(2);
	VarInt	   *result;			/* The resulting bit string			  */
	char	   *sp, *ep;				/* pointer into the character string  */
	int			len,			/* Length of the whole data structure */
				bitlen,			/* Number of bits in the bit string   */
				slen;			/* Length of the input string		  */
	uint32 *r;
	uint32 x, bc;
	enum {
		BASE_2,
		BASE_10,
		BASE_16
	} base_of_input;

	/* Check that the first character is a b or an x */
	if (input_string[0] == 'b' || input_string[0] == 'B')
	{
		base_of_input = BASE_2;
		sp = input_string + 1;
	}
	else if (input_string[0] == 'x' || input_string[0] == 'X')
	{
		base_of_input = BASE_16;
		sp = input_string + 1;
	}
	else
	{
		/*
		 * Otherwise it's decimal.  This allows things like cast('1234' as varint)
		 * to work transparently.
		 */
		base_of_input = BASE_10;
		sp = input_string;
	}

	slen = strlen(sp);
	/* Determine bitlength from input string */
	switch (base_of_input)
	{
	case BASE_2:
		bitlen = slen;
		break;
	case BASE_10:
		/* estimate the maximum number of bits needed to represent this
		 * decimal number.  We know how many decimal digits it is by
		 * the length of the string, which is equivilant to
		 * floor(log10(x)).  Add one to account for the fraction part,
		 * and divide by log10(2) to convert the value to log2(x) by
		 * the change of base theorem.  The ceil of that is the maximum
		 * number of bits that a decimal number with slen digits could
		 * take
		 */
		bitlen = ceil((slen+1) / log10(2));
		break;
	case BASE_16:
		bitlen = slen * 4;
		break;
	default:
		elog(ERROR, "ARRGH");
	}

	/*
	 * Sometimes atttypmod is not supplied. If it is supplied we need to make
	 * sure that the bitstring fits.
	 */
	if (atttypmod <= 0)
		atttypmod = bitlen;
	else if (bitlen > atttypmod)
		ereport(ERROR,
				(errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
				 errmsg("bit string length %d does not match type bit(%d)",
						bitlen, atttypmod)));

	len = sizeof(int32) * 2 + ((atttypmod + 31)/32) * 4;
	/* set to 0 so that *r is always initialised and string is zero-padded */
	result = (VarInt *) palloc0(len);
	SET_VARSIZE(result, len);
	result->bit_len = atttypmod;

	r = result->bit_dat;
	ep = sp + slen - 1;
	switch (base_of_input)
	{
	case BASE_2:
		/* Parse the bit representation of the string */
		/* We know it fits, as bitlen was compared to atttypmod */
		x = 1;
		for (; ep >= sp; ep--)
		{
			if (*ep == '1')
				*r |= x;
			else if (*ep != '0')
				ereport(ERROR,
						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
						 errmsg("\"%c\" is not a valid binary digit",
								*ep)));

			x <<= 1;
			if (x == 0)
			{
				x = 1;
				r++;
			}
		}
		break;
	case BASE_10:
		/* Parse the decimal representation of the string */
	case BASE_16:
		/* Parse the hex representation of the string */
		bc = 0;
		for (; ep >= sp; ep--)
		{
			if (*ep >= '0' && *ep <= '9')
				x = (uint32) (*ep - '0');
			else if (*ep >= 'A' && *ep <= 'F')
				x = (uint32) (*ep - 'A') + 10;
			else if (*ep >= 'a' && *ep <= 'f')
				x = (uint32) (*ep - 'a') + 10;
			else
				ereport(ERROR,
						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
						 errmsg("\"%c\" is not a valid hexadecimal digit",
								*ep)));

			*r |= (x << bc);
			bc += 4;
			if (bc >= 32)
			{
				r++;
				bc = 0;
			}
		}
		break;
	}

	PG_RETURN_VARINT_P(result);
}


PG_FUNCTION_INFO_V1(varint_out);
Datum
varint_out(PG_FUNCTION_ARGS)
{
/* This is how one would print a hex string, in case someone wants to
   write a formatting function. */
	VarInt	   *s = PG_GETARG_VARINT_P(0);
	char	   *result,
			   *r;
	uint32	   *sp;
	uint32 x, bc;
	int			i,
				len;

	len = (s->bit_len + 3) / 4;
	result = (char *) palloc(len + 2);
	sp = s->bit_dat;
	r = result + len;
	*result = 'X';

	x = 0xF;
	bc = 0;
	for (i = 0; i < len; i++)
	{
		*r-- = HEXDIG((*sp & x) >> bc);
		x <<= 4;
		if (x != 0)
		{
			bc += 4;
		}
		else
		{
			bc = 0;
			x = 0xF;
			sp++;
		}
	}

	*(result+len+1) = '\0';

	PG_RETURN_CSTRING(result);
}

PG_FUNCTION_INFO_V1(varinttypmodin);
Datum
varinttypmodin(PG_FUNCTION_ARGS)
{
	ArrayType  *ta = PG_GETARG_ARRAYTYPE_P(0);

	PG_RETURN_INT32(anybit_typmodin(ta, "varint"));
}

PG_FUNCTION_INFO_V1(varinttypmodout);
Datum
varinttypmodout(PG_FUNCTION_ARGS)
{
	int32		typmod = PG_GETARG_INT32(0);

	PG_RETURN_CSTRING(anybit_typmodout(typmod));
}

PG_FUNCTION_INFO_V1(varintand);
Datum
varintand(PG_FUNCTION_ARGS)
{
	VarInt	   *a = PG_GETARG_VARINT_P(0);
	VarInt	   *b = PG_GETARG_VARINT_P(1);

	int4 len, bitlen;
	VarInt *result;
	int4 i, amax, bmax, rmin;

	amax = ((a->bit_len + 31)/32);
	bmax = ((b->bit_len + 31)/32);
	rmin = (amax < bmax) ? amax : bmax;

	bitlen = (a->bit_len > b->bit_len) ? a->bit_len : b->bit_len;
	len = sizeof(int32) * 2 + ((amax > bmax) ? amax : bmax) * 4;
	result = (VarInt *) palloc(len);
	SET_VARSIZE(result, len);
	result->bit_len = bitlen;

	memset(result->bit_dat + rmin, 0, abs(amax-bmax) * 4);

	for (i = 0; i < rmin; i++)
		result->bit_dat[i] = a->bit_dat[i] & b->bit_dat[i];

	PG_RETURN_VARINT_P(result);
}

PG_FUNCTION_INFO_V1(varintor);
Datum
varintor(PG_FUNCTION_ARGS)
{
	VarInt	   *a = PG_GETARG_VARINT_P(0);
	VarInt	   *b = PG_GETARG_VARINT_P(1);

	int4 len, bitlen;
	VarInt *result;
	int4 i, amax, bmax, rmin;

	amax = ((a->bit_len + 31)/32);
	bmax = ((b->bit_len + 31)/32);
	rmin = (amax < bmax) ? amax : bmax;

	bitlen = (a->bit_len > b->bit_len) ? a->bit_len : b->bit_len;
	len = sizeof(int32) * 2 + ((amax > bmax) ? amax : bmax) * 4;
	result = (VarInt *) palloc(len);
	SET_VARSIZE(result, len);
	result->bit_len = bitlen;

	memcpy(result->bit_dat + rmin, (amax > bmax) ? (a->bit_dat + rmin) : (b->bit_dat + rmin), abs(amax-bmax) * 4);

	for (i = 0; i < rmin; i++)
		result->bit_dat[i] = a->bit_dat[i] | b->bit_dat[i];

	PG_RETURN_VARINT_P(result);
}

PG_FUNCTION_INFO_V1(varintxor);
Datum
varintxor(PG_FUNCTION_ARGS)
{
	VarInt	   *a = PG_GETARG_VARINT_P(0);
	VarInt	   *b = PG_GETARG_VARINT_P(1);

	int4 len, bitlen;
	VarInt *result;
	int4 i, amax, bmax, rmin;

	amax = ((a->bit_len + 31)/32);
	bmax = ((b->bit_len + 31)/32);
	rmin = (amax < bmax) ? amax : bmax;

	bitlen = (a->bit_len > b->bit_len) ? a->bit_len : b->bit_len;
	len = sizeof(int32) * 2 + ((amax > bmax) ? amax : bmax) * 4;
	result = (VarInt *) palloc(len);
	SET_VARSIZE(result, len);
	result->bit_len = bitlen;

	memcpy(result->bit_dat + rmin, (amax > bmax) ? (a->bit_dat + rmin) : (b->bit_dat + rmin), abs(amax-bmax) * 4);

	for (i = 0; i < rmin; i++)
		result->bit_dat[i] = a->bit_dat[i] ^ b->bit_dat[i];

	PG_RETURN_VARINT_P(result);
}

PG_FUNCTION_INFO_V1(varintadd);
Datum
varintadd(PG_FUNCTION_ARGS)
{
	VarInt	   *a = PG_GETARG_VARINT_P(0);
	VarInt	   *b = PG_GETARG_VARINT_P(1);

	int4 len, bitlen;
	VarInt *result;
	int4 i, amax, bmax, rmin, rmax, carry;
	uint32 *maxdat;
	uint64 tmp;

	amax = ((a->bit_len + 31)/32);
	bmax = ((b->bit_len + 31)/32);
	rmin = (amax < bmax) ? amax : bmax;
	rmax = (amax > bmax) ? amax : bmax;
	maxdat = (amax > bmax) ? a->bit_dat : b->bit_dat;

	// got to have room for the carry
	bitlen = ((a->bit_len > b->bit_len) ? a->bit_len : b->bit_len) + 1;
	len = sizeof(int32) * 2 + ((bitlen + 31) / 32) * 4;
	result = (VarInt *) palloc(len);
	SET_VARSIZE(result, len);
	result->bit_len = bitlen;

	carry = 0;
	for (i = 0; i < rmin; i++)
	{
		tmp = a->bit_dat[i];
		tmp += b->bit_dat[i];
		tmp += carry;
		carry = ((tmp & (((uint64)1) << 32)) != 0) ? 1 : 0;
		result->bit_dat[i] = (uint32)tmp;
	}

	for (; i < rmax; i++)
	{
		tmp = maxdat[i];
		tmp += carry;
		carry = ((tmp & (((uint64)1) << 32)) != 0) ? 1 : 0;
		result->bit_dat[i] = (uint32)tmp;
	}

	if (((bitlen+31)/32) > i)
	{
		if (carry != 0)
		{
			result->bit_dat[i] = 1;
		}
		else
		{
			result->bit_len--;
			SET_VARSIZE(result, VARSIZE(result) - 4);
		}
	}
	else if ((result->bit_dat[i-1] & (1 << ((result->bit_len & 31) - 1))) == 0)
	{
		result->bit_len--;
	}

	PG_RETURN_VARINT_P(result);
}

PG_FUNCTION_INFO_V1(varintsub);
Datum
varintsub(PG_FUNCTION_ARGS)
{
	VarInt	   *a = PG_GETARG_VARINT_P(0);
	VarInt	   *b = PG_GETARG_VARINT_P(1);

	VarInt *negb, *tmpb;
	VarInt *result;
	VarInt vione = {0, 1, {1}};
	tmpb = palloc0(VARSIZE(b) + 4);
	tmpb->bit_len = ((a->bit_len > b->bit_len) ? a->bit_len : b->bit_len) + 1;
	SET_VARSIZE(tmpb, VARSIZE(b) + 4);
	memcpy (tmpb->bit_dat, b->bit_dat, (b->bit_len + 31)/32 * 4);

	negb = DatumGetVarIntP(DirectFunctionCall1(varintnot, VarIntPGetDatum(tmpb)));
	pfree(tmpb);
	tmpb = negb;
	negb = DatumGetVarIntP(DirectFunctionCall2(varintadd, VarIntPGetDatum(tmpb), VarIntPGetDatum(&vione)));
	pfree(tmpb);

	result = DatumGetVarIntP(DirectFunctionCall2(varintadd, VarIntPGetDatum(a), VarIntPGetDatum(negb)));
	result->bit_len--;
	result->bit_dat[(result->bit_len + 31)/32-1] &= (1 << (result->bit_len & 31)) - 1;

	PG_RETURN_VARINT_P(result);
}

PG_FUNCTION_INFO_V1(varintmul);
Datum
varintmul(PG_FUNCTION_ARGS)
{
	VarInt	   *a = PG_GETARG_VARINT_P(0);
	VarInt	   *b = PG_GETARG_VARINT_P(1);
	VarInt     *result = (VarInt*)palloc0(sizeof(VarInt));

	int4 i, x;

	if (a->bit_len > b->bit_len)
	{
		VarInt *c = a;
		a = b;
		b = c;
	}

	x = ((a->bit_len + 31)/32);

	for (i = 0; i < x-1; i++)
	{
		int j;
		for (j = 0; j < 32; ++j)
		{
			VarInt * tmp;
			if (a->bit_dat[i] & (1 << j))
			{
				tmp = DatumGetVarIntP(DirectFunctionCall2(varintadd, VarIntPGetDatum(result), VarIntPGetDatum(b)));
				pfree(result);
				result = tmp;
			}

			tmp = DatumGetVarIntP(DirectFunctionCall2(varintshl, VarIntPGetDatum(b), Int32GetDatum(1)));
			if ((Pointer)b != PG_GETARG_POINTER(0) && (Pointer)b != PG_GETARG_POINTER(1))
				pfree(b);
			b = tmp;
		}
	}

	if (a->bit_len & 31)
	{
		int j;
		for (j = 0; j < (a->bit_len & 31); ++j)
		{
			VarInt * tmp;
			if (a->bit_dat[i] & (1 << j))
			{
				tmp = DatumGetVarIntP(DirectFunctionCall2(varintadd, VarIntPGetDatum(result), VarIntPGetDatum(b)));
				pfree(result);
				result = tmp;
			}

			tmp = DatumGetVarIntP(DirectFunctionCall2(varintshl, VarIntPGetDatum(b), Int32GetDatum(1)));
			if ((Pointer)b != PG_GETARG_POINTER(0) && (Pointer)b != PG_GETARG_POINTER(1))
				pfree(b);
			b = tmp;
		}
	}

	if ((Pointer)b != PG_GETARG_POINTER(0) && (Pointer)b != PG_GETARG_POINTER(1))
		pfree(b);

	PG_RETURN_VARINT_P(result);
}

PG_FUNCTION_INFO_V1(varintnot);
Datum
varintnot(PG_FUNCTION_ARGS)
{
	VarInt	   *a = PG_GETARG_VARINT_P(0);

	int4 len;
	VarInt *result;
	int4 i, x;

	x = ((a->bit_len + 31)/32);

	len = sizeof(int32) * 2 + x * 4;
	result = (VarInt *) palloc(len);
	SET_VARSIZE(result, len);
	result->bit_len = a->bit_len;

	for (i = 0; i < x; i++)
		result->bit_dat[i] = ~a->bit_dat[i];

	if (a->bit_len & 31)
		result->bit_dat[x-1] &= (1 << (a->bit_len & 31)) - 1; 

	PG_RETURN_VARINT_P(result);
}


PG_FUNCTION_INFO_V1(varintshl);
Datum
varintshl(PG_FUNCTION_ARGS)
{
	VarInt	   *s = PG_GETARG_VARINT_P(0);
	int32 n = PG_GETARG_INT32(1);
	int4 len, bitlen;
	VarInt *result;

	bitlen = s->bit_len + n;
	len = sizeof(int32) * 2 + ((bitlen + 31)/32) * 4;
	result = (VarInt *) palloc(len);
	SET_VARSIZE(result, len);
	result->bit_len = bitlen;

	if (n >> 3)
		memset(result->bit_dat, 0, n >> 3);

	if ((n & 31) == 0)
	{
		/* nice and easy */
		memcpy(result->bit_dat + (n >> 5), s->bit_dat, ((s->bit_len + 31) >> 3) & ~2);
	}
	else
	{
		/* slightly more complex */
		int32 rem = (n & 31);
		int32 x = (s->bit_len + 31) >> 5;
		int32 carry = 0;
		int32 i;

		for (i = 0; i < x; i++)
		{
			result->bit_dat[i + (n >> 5)] = (s->bit_dat[i] << rem) | carry;
			carry = s->bit_dat[i] >> (32 - rem);
		}
	}

	PG_RETURN_VARINT_P(result);
}

PG_FUNCTION_INFO_V1(varintshr);
Datum
varintshr(PG_FUNCTION_ARGS)
{
	VarInt	   *s = PG_GETARG_VARINT_P(0);
	int32 n = PG_GETARG_INT32(1);
	int4 len, bitlen;
	VarInt *result;

	if (s->bit_len < n)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid length in external bit string")));

	bitlen = s->bit_len - n;
	len = sizeof(int32) * 2 + ((bitlen + 31)/32) * 4;
	result = (VarInt *) palloc(len);
	SET_VARSIZE(result, len);
	result->bit_len = bitlen;

	if ((n & 31) == 0)
	{
		/* nice and easy */
		memcpy(result->bit_dat, s->bit_dat + (n >> 5), ((bitlen + 31) >> 5) << 2);
	}
	else
	{
		/* slightly more complex */
		int32 rem = (n & 31);
		int32 x = (s->bit_len + 31) >> 5;
		int32 carry = 0;
		int32 i;

		for (i = x-1; i >= 0; i--)
		{
			if (i < ((result->bit_len + 31) >> 5))
				result->bit_dat[i] = (s->bit_dat[i+(n>>5)] >> rem) | carry;
			carry = s->bit_dat[i+(n>>5)] << (32-rem);
		}
	}

	PG_RETURN_VARINT_P(result);
}