// Copyright 2010 The Code Cavern

// digit array format  call it da
// binary digit array format , call it bda
// so we can have other bases and or nail
// interleaved array format  i2 is 2 streams
//jay_i2da_add_n(....)
// i4 is 4 streams
//jay_i4da_add_n(....)


typedef unsigned int bda_uint;
typedef unsigned long bda_digit;
typedef bda_digit* bda_digit_ptr;
typedef const bda_digit_ptr bda_digit_srcptr;
typedef unsigned long bda_bit;
typedef signed long bda_len;

#define BDA_DIGIT_BITS	(sizeof((bda_digit)0)*8)
#define BDA_DIGIT_MAX	((bda_digit)(-1))
#define BDA_HDIGIT_MAX	(BDA_DIGIT_MAX>>(BDA_DIGIT_BITS/2))
#define BDA_DIGIT_TOP_BIT       (((bda_digit)1)<<(BDA_DIGIT_BITS-1))


// x=y+z+ci return carry co
#define bda_digit_adc(co,x,y,z,ci)	do{	\
bda_bit		_co=0;				\
bda_digit	_x;				\
bda_digit	_y=(y);				\
bda_bit		_ci=(ci);			\
_x=_y+z;					\
if(_x<_y)_co=1;					\
_x=_x+_ci;					\
if(_x<_ci)_co=1;				\
(co)=_co;					\
(x)=_x;						\
}while(0)

#define bda_digit_add(co,x,y,z)	bda_digit_adc(co,x,y,z,0)

#define bda_2digit_add(h1,l1,h2,l2,h3,l3)	do{	\
bda_digit	_h1;					\
bda_digit	_l1;					\
bda_digit	_c;					\
bda_digit_add(_c,_l1,l2,l3);				\
bda_digit_adc(_c,_h1,h2,h3,_c);				\
(h1)=_h1;						\
(l1)=_l1;						\
}while(0);

// x=y-z-ci return borrow co
#define bda_digit_sbb(co,x,y,z,ci)	do{	\
bda_bit	_co=0;				\
bda_digit	_x;				\
bda_digit	_y=(y);				\
bda_digit	_z=(z);				\
bda_bit	_ci=(ci);			\
if(_z>_y)_co=1;					\
_x=_y-_z;					\
if(_ci>_x)_co=1;				\
_x=_x-_ci;					\
(co)=_co;					\
(x)=_x;						\
}while(0)

#define bda_digit_sub(co,x,y,z) bda_digit_sbb(co,x,y,z,0)

#define bda_2digit_sub(h1,l1,h2,l2,h3,l3)	do{	\
bda_digit	_h1;					\
bda_digit	_l1;					\
bda_digit	_c;					\
bda_digit_sub(_c,_l1,l2,l3);				\
bda_digit_sbb(_c,_h1,h2,h3,_c);				\
(h1)=_h1;						\
(l1)=_l1;						\
}while(0);

// ret c=pop(x)
#define bda_digit_popcount(c,x)	do{	\
bda_uint	_c=0;			\
bda_digit	_x=(x);			\
while(_x!=0){_c+=_x%2;_x/=2;}		\
(c)=_c;					\
}while(0)

// ret c where 2^c | x
#define bda_digit_ctz(c,x)	do{	\
bda_uint	_c=0;			\
bda_digit	_x=(x);			\
if(_x==0)				\
  {_c=BDA_DIGIT_BITS;}			\
else					\
  {while(_x%2==0){_x/=2;_c++;}}		\
(c)=_c;					\
}while(0)

// ret c where x*2^c is normalized
#define bda_digit_clz(c,x)	do{			\
bda_uint	_c=0;					\
bda_digit	_x=(x);					\
if(_x==0)						\
  {_c=BDA_DIGIT_BITS;}					\
else							\
  {while((_x&BDA_DIGIT_TOP_BIT)==0){_x*=2;_c++;}}	\
(c)=_c;							\
}while(0)

// x=normalize(y)
#define bda_digit_norm(x,y)	do{	\
bda_digit	_x;			\
bda_digit	_y=(y);			\
bda_uint	_c1;			\
bda_digit_clz(_c1,_y);			\
_x=_y<<_c1;				\
(x)=_x;					\
}while(0)

// x=rev(y)
#define bda_digit_rev(x,y)	do{	\
bda_digit	_x=0;			\
bda_digit	_y=(y);			\
bda_uint	_i;			\
for(_i=0;_i<BDA_DIGIT_BITS;_i++)	\
   {_x|=(_y%2);_y/=2;_x*=2;}		\
(x)=_x;					\
}while(0)

//c=flg(x)
#define bda_digit_flg(c,x)	do{	\
bda_uint	_c1;			\
bda_digit	_x=(x);			\
bda_digit_clz(_c1,_x);			\
_c1=BDA_DIGIT_BITS-1-_c1;		\
(c)=_c1;				\
}while(0)

//c=clg(x)
#define bda_digit_clg(c,x)	do{	\
bda_uint	_c1;			\
bda_digit	_x=(x);			\
bda_digit_clz(_c1,_x-1);		\
_c1=BDA_DIGIT_BITS-_c1;			\
(c)=_c1;				\
}while(0)

// ret i such that i*x==1 mod 2^64
#define bda_digit_binv(i,x)	do{	\
bda_digit	_i;			\
bda_digit	_x=(x);			\
bda_uint	_c=3;			\
_i=_x;					\
while(_c<BDA_DIGIT_BITS)		\
     {_i=_i*(2-_x*_i);_c*=2;}		\
(i)=_i;					\
}while(0)

// x=gcd(y,z)
#define bda_digit_gcd(x,y,z)	do{	\
bda_digit	_y=(y);			\
bda_digit	_z=(z);			\
bda_digit	_r;			\
while(_z!=0)				\
     {_r=_y%_z;				\
      _y=_z;				\
      _z=_r;}				\
(x)=_y;					\
}while(0)

// x=(y+z)/2 with no overflow
#define bda_digit_avg(x,y,z)	do{	\
bda_digit	_x;			\
bda_digit	_y=(y);			\
bda_digit	_z=(z);			\
_x=(_y&_z)+((_y^_z)/2);			\
(x)=_x;					\
}while(0)

//r=sqrt(y)
#define bda_digit_sqrt(r,y)	do{	\
bda_digit	_r;			\
bda_digit	_y=(y);			\
bda_digit	_t;			\
_t=_y;					\
do{_r=_t;				\
   _t=_y/_r;				\
   bda_digit_avg(_t,_t,_r);		\
  }while(_t<_r);			\
(r)=_r;					\
}while(0)

//  (h,l)=(x,y)<<c
#define bda_digit_shld(h,l,x,y,c)	do{	\
bda_digit	_h;				\
bda_digit	_l;				\
bda_digit	_x=(x);				\
bda_digit	_y=(y);				\
bda_uint	_c=(c);				\
_h=(_x<<_c)|(_y>>(BDA_DIGIT_BITS-1-_c)>>1);	\
_l=_y<<_c;					\
(h)=_h;						\
(l)=_l;						\
}while(0)

//  (h,l)=(x,y)>>c
#define bda_digit_shrd(h,l,x,y,c)	do{	\
bda_digit	_h;				\
bda_digit	_l;				\
bda_digit	_x=(x);				\
bda_digit	_y=(y);				\
bda_uint	_c=(c);				\
_l=(_y>>_c)|(_x<<(BDA_DIGIT_BITS-1-_c)<<1);	\
_h=_x>>_c;					\
(h)=_h;						\
(l)=_l;						\
}while(0)

// (h,l)=x*y
#define bda_digit_mul(h,l,x,y)	do{				\
bda_digit	_hi;						\
bda_digit	_lo;						\
bda_digit	_x=(x);						\
bda_digit	_y=(y);						\
bda_digit	_xl=_x&BDA_HDIGIT_MAX;				\
bda_digit	_yl=_y&BDA_HDIGIT_MAX;				\
bda_digit	_xh=_x>>(BDA_DIGIT_BITS/2);			\
bda_digit	_yh=_y>>(BDA_DIGIT_BITS/2);			\
bda_digit	_t0=_xl*_yl;					\
bda_digit	_t1=_xl*_yh;					\
bda_digit	_t2=_xh*_yl;					\
bda_digit	_t3=_xh*_yh;					\
bda_digit_shld(_hi,_lo,0,_t1,BDA_DIGIT_BITS/2);			\
bda_2digit_add(_t3,_t0,_t3,_t0,_hi,_lo);			\
bda_digit_shld(_hi,_lo,0,_t2,BDA_DIGIT_BITS/2);			\
bda_2digit_add(_hi,_lo,_hi,_lo,_t3,_t0);			\
(l)=_lo;							\
(h)=_hi;							\
}while(0)



// div and inverse etc
// cbrt root pow isprime issqr isperfpow
