Ver código fonte

It works now. 1000 times faster than python

master
Pete Shadbolt 10 anos atrás
pai
commit
2b0119e50d
2 arquivos alterados com 54 adições e 35 exclusões
  1. +28
    -6
      run-tests.py
  2. +26
    -29
      src/permanent.c

+ 28
- 6
run-tests.py Ver arquivo

@@ -16,10 +16,32 @@ def perm_ryser(a):
terms=map(get_term, indeces)
return np.sum(terms)*((-1)**n)

dimension=2
real=np.ones((dimension, dimension))
imag=np.ones((dimension, dimension))*0
def explain_ryser(a):
''' the permanent calculated using the ryser formula. much faster than the naive approach '''
n,n2=a.shape
z=np.arange(n)
irange=xrange(2**n)
get_index=lambda i: (i & (1 << z)) != 0
for q in irange:
print get_index(q)
#get_term=lambda index: ((-1)**np.sum(index))*np.prod(np.sum(a[index,:], 0))
#indeces=map(get_index, irange)
#terms=map(get_term, indeces)
#return np.sum(terms)*((-1)**n)



dimension=5
real=np.random.uniform(-1, 1, dimension*dimension).reshape((dimension, dimension))
imag=np.random.uniform(-1, 1, dimension*dimension).reshape((dimension, dimension))
submatrix=real+1j*imag
submatrix[0,0]*=2
print lib.permanent(submatrix)
print perm_ryser(submatrix)

t=time.clock()
for i in range(1000):
perm_ryser(submatrix)
print time.clock()-t

t=time.clock()
for i in range(1000):
lib.permanent(submatrix)
print time.clock()-t

+ 26
- 29
src/permanent.c Ver arquivo

@@ -9,6 +9,21 @@
(x1) * PyArray_STRIDES(submatrix)[1])))
#define SM_shape(x0) (int) PyArray_DIM(submatrix, x0)

int countbits(unsigned int n)
{
int q=n;
q = (q & 0x5555555555555555) + ((q & 0xAAAAAAAAAAAAAAAA) >> 1);
q = (q & 0x3333333333333333) + ((q & 0xCCCCCCCCCCCCCCCC) >> 2);
q = (q & 0x0F0F0F0F0F0F0F0F) + ((q & 0xF0F0F0F0F0F0F0F0) >> 4);
q = (q & 0x00FF00FF00FF00FF) + ((q & 0xFF00FF00FF00FF00) >> 8);
q = (q & 0x0000FFFF0000FFFF) + ((q & 0xFFFF0000FFFF0000) >> 16);
q = (q & 0x00000000FFFFFFFF) + ((q & 0xFFFFFFFF00000000) >> 32); // This last & isq't strictly qecessary.
return q;
}

int bitparity (unsigned int n) { return 1 - (countbits(n) & 1)*2; }


// Complex numbers
static const npy_complex128 complex_one = {.real=1, .imag=0};
static const npy_complex128 complex_zero = {.real=0, .imag=0};
@@ -20,8 +35,8 @@ static npy_complex128 complex_add(npy_complex128 a, npy_complex128 b) {
}
static npy_complex128 complex_prod(npy_complex128 a, npy_complex128 b) {
npy_complex128 x;
x.real = a.real*b.real+a.imag*b.imag;
x.imag = a.real*b.imag+a.imag*b.real;
x.real = a.real*b.real - a.imag*b.imag;
x.imag = a.imag*b.real + a.real*b.imag;
return x;
}

@@ -36,46 +51,28 @@ PyMODINIT_FUNC initpermanent(void) { // Module initia
import_array();
}

inline int* dec2binarr(long n, int dim)
{
// note: res[dim] will save the sum res[0]+...+res[dim-1]
int* res = (int*)calloc(dim + 1, sizeof(int));
int pos = dim - 1;
// note: this will crash if dim < log_2(n)...
while (n > 0)
{
res[pos] = n % 2;
res[dim] += res[pos];
n = n / 2; // integer division
pos--;
}
return res;
}

// Ryser's algorithm
static npy_complex128 perm_ryser(PyArrayObject *submatrix) {
int n = (int) PyArray_DIM(submatrix, 0);
int i = 0; int z = 0; int y = 0;
npy_complex128 sum, prod;
npy_complex128 rowsum, rowsumprod;
npy_complex128 perm = complex_zero;
int exp = 1 << n;
int i, y, z;

// Iterate over exponentially many index strings
for (i=0; i<exp; ++i) {
prod = complex_one;
rowsumprod = complex_one;
for (y=0; y<n; ++y) { // Rows
sum = complex_zero;
rowsum = complex_zero;
for (z=0; z<n; ++z) { // Columns
if ((i && (1 << z)) != 0) {
sum = complex_add(sum, SM(z,y));
}
if ((i & (1 << z)) != 0) { rowsum = complex_add(rowsum, SM(z,y)); }
}
prod = complex_prod(prod, sum);
rowsumprod = complex_prod(rowsumprod, rowsum);
}
if (i%2 == 1) {prod.real*=-1; prod.imag*=-1;}
perm = complex_add(perm, prod);
int sign = bitparity(i);
perm.real+=sign*rowsumprod.real; perm.imag+=sign*rowsumprod.imag;
}
if (i%2 == 1) {perm.real*=-1; perm.imag*=-1;}
if (n%2 == 1) {perm.real*=-1; perm.imag*=-1;}
return perm;
}



Carregando…
Cancelar
Salvar