It works now. 1000 times faster than python

10 anni fa · 2b0119e50d
--- a/run-tests.py
+++ b/run-tests.py
@@ -16,10 +16,32 @@ def perm_ryser(a):
    terms=map(get_term, indeces) 
    return np.sum(terms)*((-1)**n)

 dimension=2
 real=np.ones((dimension, dimension))
 imag=np.ones((dimension, dimension))*0
 def explain_ryser(a):
    ''' the permanent calculated using the ryser formula. much faster than the naive approach '''
    n,n2=a.shape
    z=np.arange(n)
    irange=xrange(2**n)
    get_index=lambda i: (i & (1 << z)) != 0 
    for q in irange:
        print get_index(q)
    #get_term=lambda index: ((-1)**np.sum(index))*np.prod(np.sum(a[index,:], 0))
    #indeces=map(get_index, irange)
    #terms=map(get_term, indeces) 
    #return np.sum(terms)*((-1)**n)



 dimension=5
 real=np.random.uniform(-1, 1, dimension*dimension).reshape((dimension, dimension))
 imag=np.random.uniform(-1, 1, dimension*dimension).reshape((dimension, dimension))
 submatrix=real+1j*imag
 submatrix[0,0]*=2
 print lib.permanent(submatrix)
 print perm_ryser(submatrix)

 t=time.clock()
 for i in range(1000):
    perm_ryser(submatrix)
 print time.clock()-t

 t=time.clock()
 for i in range(1000):
    lib.permanent(submatrix)
 print time.clock()-t
--- a/src/permanent.c
+++ b/src/permanent.c
@@ -9,6 +9,21 @@
                    (x1) * PyArray_STRIDES(submatrix)[1])))
 #define SM_shape(x0) (int) PyArray_DIM(submatrix, x0)

 int countbits(unsigned int n) 
 {
    int q=n;
    q = (q & 0x5555555555555555) + ((q & 0xAAAAAAAAAAAAAAAA) >> 1);
    q = (q & 0x3333333333333333) + ((q & 0xCCCCCCCCCCCCCCCC) >> 2);
    q = (q & 0x0F0F0F0F0F0F0F0F) + ((q & 0xF0F0F0F0F0F0F0F0) >> 4);
    q = (q & 0x00FF00FF00FF00FF) + ((q & 0xFF00FF00FF00FF00) >> 8);
    q = (q & 0x0000FFFF0000FFFF) + ((q & 0xFFFF0000FFFF0000) >> 16);
    q = (q & 0x00000000FFFFFFFF) + ((q & 0xFFFFFFFF00000000) >> 32); // This last & isq't strictly qecessary.
    return q;
 }

 int bitparity (unsigned int n) { return 1 - (countbits(n) & 1)*2; }


 // Complex numbers
 static const npy_complex128 complex_one = {.real=1, .imag=0};
 static const npy_complex128 complex_zero = {.real=0, .imag=0};
@@ -20,8 +35,8 @@ static npy_complex128 complex_add(npy_complex128 a, npy_complex128 b) {
 }
 static npy_complex128 complex_prod(npy_complex128 a, npy_complex128 b) { 
    npy_complex128 x;
    x.real = a.real*b.real+a.imag*b.imag;
    x.imag = a.real*b.imag+a.imag*b.real;
    x.real = a.real*b.real - a.imag*b.imag;
    x.imag = a.imag*b.real + a.real*b.imag;
    return x;
 }

@@ -36,46 +51,28 @@ PyMODINIT_FUNC initpermanent(void) {                            // Module initia
  import_array();
 }

 inline int* dec2binarr(long n, int dim)
 {
    // note: res[dim] will save the sum res[0]+...+res[dim-1]
    int* res = (int*)calloc(dim + 1, sizeof(int));   
    int pos = dim - 1;
    // note: this will crash if dim < log_2(n)...
    while (n > 0)
    {
        res[pos] = n % 2;
        res[dim] += res[pos];
        n = n / 2; // integer division        
        pos--;
    }
    return res;
 }

 // Ryser's algorithm 
 static npy_complex128 perm_ryser(PyArrayObject *submatrix) {
    int n = (int) PyArray_DIM(submatrix, 0);
    int i = 0; int z = 0; int y = 0;
    npy_complex128 sum, prod;
    npy_complex128 rowsum, rowsumprod;
    npy_complex128 perm = complex_zero;
    int exp = 1 << n; 
    int i, y, z;

    // Iterate over exponentially many index strings
    for (i=0; i<exp; ++i) {
        prod = complex_one;
        rowsumprod = complex_one;
        for (y=0; y<n; ++y) {               // Rows
            sum = complex_zero;
            rowsum = complex_zero;
            for (z=0; z<n; ++z) {           // Columns
                if ((i && (1 << z)) != 0) { 
                    sum = complex_add(sum, SM(z,y)); 
                }
                if ((i & (1 << z)) != 0) { rowsum = complex_add(rowsum, SM(z,y)); }
            }
            prod = complex_prod(prod, sum);
            rowsumprod = complex_prod(rowsumprod, rowsum);
        }
        if (i%2 == 1) {prod.real*=-1; prod.imag*=-1;}
        perm = complex_add(perm, prod);
        int sign = bitparity(i); 
        perm.real+=sign*rowsumprod.real; perm.imag+=sign*rowsumprod.imag;
    }
    if (i%2 == 1) {perm.real*=-1; perm.imag*=-1;}
    if (n%2 == 1) {perm.real*=-1; perm.imag*=-1;}
    return perm;
 }