File: [pgFoundry] / varint / varint / varint.c (download)
Revision 1.1, Sun Sep 21 05:41:01 2008 UTC (23 months, 1 week ago) by jeremyd
Branch: MAIN
Initial commit of an arbitrary precision unsigned integer data type with
bitwise operators.
The following operators are implemented so far (not optimally, but I'm not
worried about that yet):
& (bitwise and)
| (bitwise or)
^ (bitwise xor)
<< (bitwise left shift)
>> (bitwise right shift)
+ (arithmetic add)
- (arithmetic subtract)
* (arithmetic multiply)
I plan to implement comparison operators and division next.
|
/* this file was originally based on pgsql/src/backend/utils/adt/varbit.c */
#include "postgres.h"
#include <math.h>
#include "access/htup.h"
#include "libpq/pqformat.h"
#include "utils/array.h"
#include "utils/varbit.h"
#include "varint.h"
#define HEXDIG(z) ((z)<10 ? ((z)+'0') : ((z)-10+'A'))
PG_MODULE_MAGIC;
/* common code for bittypmodin and varbittypmodin */
static int32
anybit_typmodin(ArrayType *ta, const char *typename)
{
int32 typmod;
int32 *tl;
int n;
tl = ArrayGetIntegerTypmods(ta, &n);
/*
* we're not too tense about good error message here because grammar
* shouldn't allow wrong number of modifiers for BIT
*/
if (n != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid type modifier")));
if (*tl < 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("length for type %s must be at least 1",
typename)));
if (*tl > (MaxAttrSize * BITS_PER_BYTE))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("length for type %s cannot exceed %d",
typename, MaxAttrSize * BITS_PER_BYTE)));
typmod = *tl;
return typmod;
}
/* common code for bittypmodout and varbittypmodout */
static char *
anybit_typmodout(int32 typmod)
{
char *res = (char *) palloc(64);
if (typmod >= 0)
snprintf(res, 64, "(%d)", typmod);
else
*res = '\0';
return res;
}
/*----------
* attypmod -- contains the length of the bit string in bits, or for
* varying bits the maximum length.
*
* The data structure contains the following elements:
* header -- length of the whole data structure (incl header)
* in bytes. (as with all varying length datatypes)
* data section -- private data section for the bits data structures
* bitlength -- length of the bit string in bits
* bitdata -- bit string, most significant byte first
*
* The length of the bitdata vector should always be exactly as many
* bytes as are needed for the given bitlength. If the bitlength is
* not a multiple of 8, the extra low-order padding bits of the last
* byte must be zeroes.
*----------
*/
/*
* bit_in -
* converts a char string to the internal representation of a bitstring.
* The length is determined by the number of bits required plus
* VARHDRSZ bytes or from atttypmod.
*/
PG_FUNCTION_INFO_V1(varint_in);
Datum
varint_in(PG_FUNCTION_ARGS)
{
char *input_string = PG_GETARG_CSTRING(0);
#ifdef NOT_USED
Oid typelem = PG_GETARG_OID(1);
#endif
int32 atttypmod = PG_GETARG_INT32(2);
VarInt *result; /* The resulting bit string */
char *sp, *ep; /* pointer into the character string */
int len, /* Length of the whole data structure */
bitlen, /* Number of bits in the bit string */
slen; /* Length of the input string */
uint32 *r;
uint32 x, bc;
enum {
BASE_2,
BASE_10,
BASE_16
} base_of_input;
/* Check that the first character is a b or an x */
if (input_string[0] == 'b' || input_string[0] == 'B')
{
base_of_input = BASE_2;
sp = input_string + 1;
}
else if (input_string[0] == 'x' || input_string[0] == 'X')
{
base_of_input = BASE_16;
sp = input_string + 1;
}
else
{
/*
* Otherwise it's decimal. This allows things like cast('1234' as varint)
* to work transparently.
*/
base_of_input = BASE_10;
sp = input_string;
}
slen = strlen(sp);
/* Determine bitlength from input string */
switch (base_of_input)
{
case BASE_2:
bitlen = slen;
break;
case BASE_10:
/* estimate the maximum number of bits needed to represent this
* decimal number. We know how many decimal digits it is by
* the length of the string, which is equivilant to
* floor(log10(x)). Add one to account for the fraction part,
* and divide by log10(2) to convert the value to log2(x) by
* the change of base theorem. The ceil of that is the maximum
* number of bits that a decimal number with slen digits could
* take
*/
bitlen = ceil((slen+1) / log10(2));
break;
case BASE_16:
bitlen = slen * 4;
break;
default:
elog(ERROR, "ARRGH");
}
/*
* Sometimes atttypmod is not supplied. If it is supplied we need to make
* sure that the bitstring fits.
*/
if (atttypmod <= 0)
atttypmod = bitlen;
else if (bitlen > atttypmod)
ereport(ERROR,
(errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
errmsg("bit string length %d does not match type bit(%d)",
bitlen, atttypmod)));
len = sizeof(int32) * 2 + ((atttypmod + 31)/32) * 4;
/* set to 0 so that *r is always initialised and string is zero-padded */
result = (VarInt *) palloc0(len);
SET_VARSIZE(result, len);
result->bit_len = atttypmod;
r = result->bit_dat;
ep = sp + slen - 1;
switch (base_of_input)
{
case BASE_2:
/* Parse the bit representation of the string */
/* We know it fits, as bitlen was compared to atttypmod */
x = 1;
for (; ep >= sp; ep--)
{
if (*ep == '1')
*r |= x;
else if (*ep != '0')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%c\" is not a valid binary digit",
*ep)));
x <<= 1;
if (x == 0)
{
x = 1;
r++;
}
}
break;
case BASE_10:
/* Parse the decimal representation of the string */
case BASE_16:
/* Parse the hex representation of the string */
bc = 0;
for (; ep >= sp; ep--)
{
if (*ep >= '0' && *ep <= '9')
x = (uint32) (*ep - '0');
else if (*ep >= 'A' && *ep <= 'F')
x = (uint32) (*ep - 'A') + 10;
else if (*ep >= 'a' && *ep <= 'f')
x = (uint32) (*ep - 'a') + 10;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%c\" is not a valid hexadecimal digit",
*ep)));
*r |= (x << bc);
bc += 4;
if (bc >= 32)
{
r++;
bc = 0;
}
}
break;
}
PG_RETURN_VARINT_P(result);
}
PG_FUNCTION_INFO_V1(varint_out);
Datum
varint_out(PG_FUNCTION_ARGS)
{
/* This is how one would print a hex string, in case someone wants to
write a formatting function. */
VarInt *s = PG_GETARG_VARINT_P(0);
char *result,
*r;
uint32 *sp;
uint32 x, bc;
int i,
len;
len = (s->bit_len + 3) / 4;
result = (char *) palloc(len + 2);
sp = s->bit_dat;
r = result + len;
*result = 'X';
x = 0xF;
bc = 0;
for (i = 0; i < len; i++)
{
*r-- = HEXDIG((*sp & x) >> bc);
x <<= 4;
if (x != 0)
{
bc += 4;
}
else
{
bc = 0;
x = 0xF;
sp++;
}
}
*(result+len+1) = '\0';
PG_RETURN_CSTRING(result);
}
PG_FUNCTION_INFO_V1(varinttypmodin);
Datum
varinttypmodin(PG_FUNCTION_ARGS)
{
ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
PG_RETURN_INT32(anybit_typmodin(ta, "varint"));
}
PG_FUNCTION_INFO_V1(varinttypmodout);
Datum
varinttypmodout(PG_FUNCTION_ARGS)
{
int32 typmod = PG_GETARG_INT32(0);
PG_RETURN_CSTRING(anybit_typmodout(typmod));
}
PG_FUNCTION_INFO_V1(varintand);
Datum
varintand(PG_FUNCTION_ARGS)
{
VarInt *a = PG_GETARG_VARINT_P(0);
VarInt *b = PG_GETARG_VARINT_P(1);
int4 len, bitlen;
VarInt *result;
int4 i, amax, bmax, rmin;
amax = ((a->bit_len + 31)/32);
bmax = ((b->bit_len + 31)/32);
rmin = (amax < bmax) ? amax : bmax;
bitlen = (a->bit_len > b->bit_len) ? a->bit_len : b->bit_len;
len = sizeof(int32) * 2 + ((amax > bmax) ? amax : bmax) * 4;
result = (VarInt *) palloc(len);
SET_VARSIZE(result, len);
result->bit_len = bitlen;
memset(result->bit_dat + rmin, 0, abs(amax-bmax) * 4);
for (i = 0; i < rmin; i++)
result->bit_dat[i] = a->bit_dat[i] & b->bit_dat[i];
PG_RETURN_VARINT_P(result);
}
PG_FUNCTION_INFO_V1(varintor);
Datum
varintor(PG_FUNCTION_ARGS)
{
VarInt *a = PG_GETARG_VARINT_P(0);
VarInt *b = PG_GETARG_VARINT_P(1);
int4 len, bitlen;
VarInt *result;
int4 i, amax, bmax, rmin;
amax = ((a->bit_len + 31)/32);
bmax = ((b->bit_len + 31)/32);
rmin = (amax < bmax) ? amax : bmax;
bitlen = (a->bit_len > b->bit_len) ? a->bit_len : b->bit_len;
len = sizeof(int32) * 2 + ((amax > bmax) ? amax : bmax) * 4;
result = (VarInt *) palloc(len);
SET_VARSIZE(result, len);
result->bit_len = bitlen;
memcpy(result->bit_dat + rmin, (amax > bmax) ? (a->bit_dat + rmin) : (b->bit_dat + rmin), abs(amax-bmax) * 4);
for (i = 0; i < rmin; i++)
result->bit_dat[i] = a->bit_dat[i] | b->bit_dat[i];
PG_RETURN_VARINT_P(result);
}
PG_FUNCTION_INFO_V1(varintxor);
Datum
varintxor(PG_FUNCTION_ARGS)
{
VarInt *a = PG_GETARG_VARINT_P(0);
VarInt *b = PG_GETARG_VARINT_P(1);
int4 len, bitlen;
VarInt *result;
int4 i, amax, bmax, rmin;
amax = ((a->bit_len + 31)/32);
bmax = ((b->bit_len + 31)/32);
rmin = (amax < bmax) ? amax : bmax;
bitlen = (a->bit_len > b->bit_len) ? a->bit_len : b->bit_len;
len = sizeof(int32) * 2 + ((amax > bmax) ? amax : bmax) * 4;
result = (VarInt *) palloc(len);
SET_VARSIZE(result, len);
result->bit_len = bitlen;
memcpy(result->bit_dat + rmin, (amax > bmax) ? (a->bit_dat + rmin) : (b->bit_dat + rmin), abs(amax-bmax) * 4);
for (i = 0; i < rmin; i++)
result->bit_dat[i] = a->bit_dat[i] ^ b->bit_dat[i];
PG_RETURN_VARINT_P(result);
}
PG_FUNCTION_INFO_V1(varintadd);
Datum
varintadd(PG_FUNCTION_ARGS)
{
VarInt *a = PG_GETARG_VARINT_P(0);
VarInt *b = PG_GETARG_VARINT_P(1);
int4 len, bitlen;
VarInt *result;
int4 i, amax, bmax, rmin, rmax, carry;
uint32 *maxdat;
uint64 tmp;
amax = ((a->bit_len + 31)/32);
bmax = ((b->bit_len + 31)/32);
rmin = (amax < bmax) ? amax : bmax;
rmax = (amax > bmax) ? amax : bmax;
maxdat = (amax > bmax) ? a->bit_dat : b->bit_dat;
// got to have room for the carry
bitlen = ((a->bit_len > b->bit_len) ? a->bit_len : b->bit_len) + 1;
len = sizeof(int32) * 2 + ((bitlen + 31) / 32) * 4;
result = (VarInt *) palloc(len);
SET_VARSIZE(result, len);
result->bit_len = bitlen;
carry = 0;
for (i = 0; i < rmin; i++)
{
tmp = a->bit_dat[i];
tmp += b->bit_dat[i];
tmp += carry;
carry = ((tmp & (((uint64)1) << 32)) != 0) ? 1 : 0;
result->bit_dat[i] = (uint32)tmp;
}
for (; i < rmax; i++)
{
tmp = maxdat[i];
tmp += carry;
carry = ((tmp & (((uint64)1) << 32)) != 0) ? 1 : 0;
result->bit_dat[i] = (uint32)tmp;
}
if (((bitlen+31)/32) > i)
{
if (carry != 0)
{
result->bit_dat[i] = 1;
}
else
{
result->bit_len--;
SET_VARSIZE(result, VARSIZE(result) - 4);
}
}
else if ((result->bit_dat[i-1] & (1 << ((result->bit_len & 31) - 1))) == 0)
{
result->bit_len--;
}
PG_RETURN_VARINT_P(result);
}
PG_FUNCTION_INFO_V1(varintsub);
Datum
varintsub(PG_FUNCTION_ARGS)
{
VarInt *a = PG_GETARG_VARINT_P(0);
VarInt *b = PG_GETARG_VARINT_P(1);
VarInt *negb, *tmpb;
VarInt *result;
VarInt vione = {0, 1, {1}};
tmpb = palloc0(VARSIZE(b) + 4);
tmpb->bit_len = ((a->bit_len > b->bit_len) ? a->bit_len : b->bit_len) + 1;
SET_VARSIZE(tmpb, VARSIZE(b) + 4);
memcpy (tmpb->bit_dat, b->bit_dat, (b->bit_len + 31)/32 * 4);
negb = DatumGetVarIntP(DirectFunctionCall1(varintnot, VarIntPGetDatum(tmpb)));
pfree(tmpb);
tmpb = negb;
negb = DatumGetVarIntP(DirectFunctionCall2(varintadd, VarIntPGetDatum(tmpb), VarIntPGetDatum(&vione)));
pfree(tmpb);
result = DatumGetVarIntP(DirectFunctionCall2(varintadd, VarIntPGetDatum(a), VarIntPGetDatum(negb)));
result->bit_len--;
result->bit_dat[(result->bit_len + 31)/32-1] &= (1 << (result->bit_len & 31)) - 1;
PG_RETURN_VARINT_P(result);
}
PG_FUNCTION_INFO_V1(varintmul);
Datum
varintmul(PG_FUNCTION_ARGS)
{
VarInt *a = PG_GETARG_VARINT_P(0);
VarInt *b = PG_GETARG_VARINT_P(1);
VarInt *result = (VarInt*)palloc0(sizeof(VarInt));
int4 i, x;
if (a->bit_len > b->bit_len)
{
VarInt *c = a;
a = b;
b = c;
}
x = ((a->bit_len + 31)/32);
for (i = 0; i < x-1; i++)
{
int j;
for (j = 0; j < 32; ++j)
{
VarInt * tmp;
if (a->bit_dat[i] & (1 << j))
{
tmp = DatumGetVarIntP(DirectFunctionCall2(varintadd, VarIntPGetDatum(result), VarIntPGetDatum(b)));
pfree(result);
result = tmp;
}
tmp = DatumGetVarIntP(DirectFunctionCall2(varintshl, VarIntPGetDatum(b), Int32GetDatum(1)));
if ((Pointer)b != PG_GETARG_POINTER(0) && (Pointer)b != PG_GETARG_POINTER(1))
pfree(b);
b = tmp;
}
}
if (a->bit_len & 31)
{
int j;
for (j = 0; j < (a->bit_len & 31); ++j)
{
VarInt * tmp;
if (a->bit_dat[i] & (1 << j))
{
tmp = DatumGetVarIntP(DirectFunctionCall2(varintadd, VarIntPGetDatum(result), VarIntPGetDatum(b)));
pfree(result);
result = tmp;
}
tmp = DatumGetVarIntP(DirectFunctionCall2(varintshl, VarIntPGetDatum(b), Int32GetDatum(1)));
if ((Pointer)b != PG_GETARG_POINTER(0) && (Pointer)b != PG_GETARG_POINTER(1))
pfree(b);
b = tmp;
}
}
if ((Pointer)b != PG_GETARG_POINTER(0) && (Pointer)b != PG_GETARG_POINTER(1))
pfree(b);
PG_RETURN_VARINT_P(result);
}
PG_FUNCTION_INFO_V1(varintnot);
Datum
varintnot(PG_FUNCTION_ARGS)
{
VarInt *a = PG_GETARG_VARINT_P(0);
int4 len;
VarInt *result;
int4 i, x;
x = ((a->bit_len + 31)/32);
len = sizeof(int32) * 2 + x * 4;
result = (VarInt *) palloc(len);
SET_VARSIZE(result, len);
result->bit_len = a->bit_len;
for (i = 0; i < x; i++)
result->bit_dat[i] = ~a->bit_dat[i];
if (a->bit_len & 31)
result->bit_dat[x-1] &= (1 << (a->bit_len & 31)) - 1;
PG_RETURN_VARINT_P(result);
}
PG_FUNCTION_INFO_V1(varintshl);
Datum
varintshl(PG_FUNCTION_ARGS)
{
VarInt *s = PG_GETARG_VARINT_P(0);
int32 n = PG_GETARG_INT32(1);
int4 len, bitlen;
VarInt *result;
bitlen = s->bit_len + n;
len = sizeof(int32) * 2 + ((bitlen + 31)/32) * 4;
result = (VarInt *) palloc(len);
SET_VARSIZE(result, len);
result->bit_len = bitlen;
if (n >> 3)
memset(result->bit_dat, 0, n >> 3);
if ((n & 31) == 0)
{
/* nice and easy */
memcpy(result->bit_dat + (n >> 5), s->bit_dat, ((s->bit_len + 31) >> 3) & ~2);
}
else
{
/* slightly more complex */
int32 rem = (n & 31);
int32 x = (s->bit_len + 31) >> 5;
int32 carry = 0;
int32 i;
for (i = 0; i < x; i++)
{
result->bit_dat[i + (n >> 5)] = (s->bit_dat[i] << rem) | carry;
carry = s->bit_dat[i] >> (32 - rem);
}
}
PG_RETURN_VARINT_P(result);
}
PG_FUNCTION_INFO_V1(varintshr);
Datum
varintshr(PG_FUNCTION_ARGS)
{
VarInt *s = PG_GETARG_VARINT_P(0);
int32 n = PG_GETARG_INT32(1);
int4 len, bitlen;
VarInt *result;
if (s->bit_len < n)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid length in external bit string")));
bitlen = s->bit_len - n;
len = sizeof(int32) * 2 + ((bitlen + 31)/32) * 4;
result = (VarInt *) palloc(len);
SET_VARSIZE(result, len);
result->bit_len = bitlen;
if ((n & 31) == 0)
{
/* nice and easy */
memcpy(result->bit_dat, s->bit_dat + (n >> 5), ((bitlen + 31) >> 5) << 2);
}
else
{
/* slightly more complex */
int32 rem = (n & 31);
int32 x = (s->bit_len + 31) >> 5;
int32 carry = 0;
int32 i;
for (i = x-1; i >= 0; i--)
{
if (i < ((result->bit_len + 31) >> 5))
result->bit_dat[i] = (s->bit_dat[i+(n>>5)] >> rem) | carry;
carry = s->bit_dat[i+(n>>5)] << (32-rem);
}
}
PG_RETURN_VARINT_P(result);
}