33 #include "cstripack.h" 
   46 double nfft_elapsed_seconds(ticks t1, ticks t0)
 
   50   return elapsed(t1,t0) / TICKS_PER_SECOND;
 
   86   for (t = 1; t < d; t++)
 
   87     sum = sum * N[t] + idx[t];
 
  106 static void bspline_help(
int k, 
double x, 
double *scratch, 
int j, 
int ug,
 
  114   for (i = og + r - k + 1, idx = og; idx >= ug; i--, idx--)
 
  116     a = ((R)(x - i)) / ((R)(k - j));
 
  117     scratch[idx] = (1 - a) * scratch[idx-1] + a * scratch[idx];
 
  138       r=(int)(ceil(x)-1.0);
 
  140       for(idx=0; idx<k; idx++)
 
  152       for(j=1, og=g2+1; j<=g1; j++, og++)
 
  154     a = (x - r + k - 1 - og)/(k - j);
 
  155     scratch[og] = (1 - a) * scratch[og-1];
 
  156     bspline_help(k,x,scratch,j,ug+1,og-1,r);
 
  157     a = (x - r + k - 1 - ug)/(k - j);
 
  158     scratch[ug] = a * scratch[ug];
 
  160       for(og-- ; j<=g2; j++)
 
  162     bspline_help(k,x,scratch,j,ug+1,og,r);
 
  163     a = (x - r + k - 1 - ug)/(k - j);
 
  164     scratch[ug] = a * scratch[ug];
 
  169     bspline_help(k,x,scratch,j,ug,og,r);
 
  171       result_value = scratch[k-1];
 
  173   return(result_value);
 
  183   for(k=0,dot=0; k<n; k++)
 
  184     dot+=conj(x[k])*x[k];
 
  196   for(k=0,dot=0; k<n; k++)
 
  210   for(k=0,dot=0.0; k<n; k++)
 
  211     dot+=w[k]*conj(x[k])*x[k];
 
  223   for(k=0,dot=0.0; k<n; k++)
 
  238   for(k=0,dot=0.0; k<n; k++)
 
  239     dot+=w[k]*w2[k]*w2[k]*conj(x[k])*x[k];
 
  252   for(k=0,dot=0.0; k<n; k++)
 
  253     dot+=w2[k]*w2[k]*conj(x[k])*x[k];
 
  414     x[k]=a*x[k]+w[k]*y[k];
 
  424     x[k]=a*x[k]+w[k]*y[k];
 
  430   int d_pre, d_act, d_post;
 
  431   int N_pre, N_act, N_post;
 
  432   int k_pre, k_act, k_post;
 
  435   double _Complex x_swap;
 
  437   for(d_act=0;d_act<d;d_act++)
 
  439       for(d_pre=0, N_pre=1;d_pre<d_act;d_pre++)
 
  444       for(d_post=d_act+1, N_post=1;d_post<d;d_post++)
 
  447       for(k_pre=0;k_pre<N_pre;k_pre++)
 
  448   for(k_act=0;k_act<N_act/2;k_act++)
 
  449     for(k_post=0;k_post<N_post;k_post++)
 
  451         k=(k_pre*N_act+k_act)*N_post+k_post;
 
  452         k_swap=(k_pre*N_act+k_act+N_act/2)*N_post+k_post;
 
  469       printf (
"\n %s, adr=%p\n", text, (
void*)x);
 
  483     printf(
"%d,\n", x[k]);
 
  488 void X(vpr_double)(R *x, 
const int n, 
const char *text)
 
  494     printf(
"null pointer\n");
 
  501     printf (
"\n %s, adr=%p\n", text, (
void*)x);
 
  503     for (k = 0; k < n; k++)
 
  508       printf(
"%+.1" FE 
",", x[k]);
 
  518     for (k = 0; k < n; k++)
 
  519       printf(
"%+" FE 
",\n", x[k]);
 
  525 void X(vpr_complex)(C *x, 
const int n, 
const char *text)
 
  531     printf(
"\n %s, adr=%p\n", text, (
void*)x);
 
  532     for (k = 0; k < n; k++)
 
  537       printf(
"%+.1" FE 
"%+.1" FE 
"i,", CREAL(x[k]), CIMAG(x[k]));
 
  546     for (k = 0; k < n; k++)
 
  547       printf(
"%+" FE 
"%+" FE 
"i,\n", CREAL(x[k]), CIMAG(x[k]));
 
  552 void X(vrand_unit_complex)(C *x, 
const int n)
 
  556   for (k = 0; k < n; k++)
 
  557     x[k] = nfft_drand48() + II*nfft_drand48();
 
  560 void X(vrand_shifted_unit_double)(R *x, 
const int n)
 
  564   for (k = 0; k < n; k++)
 
  565     x[k] = nfft_drand48() - K(0.5);
 
  569 void X(voronoi_weights_1d)(R *w, R *x, 
const int M)
 
  573   w[0] = (x[1]-x[0])/K(2.0);
 
  575   for(j = 1; j < M-1; j++)
 
  576     w[j] = (x[j+1]-x[j-1])/K(2.0);
 
  578   w[M-1] = (x[M-1]-x[M-2])/K(2.0);
 
  624   xc = (
double*)
nfft_malloc((2*M-4+1)*
sizeof(double));
 
  625   yc = (
double*)
nfft_malloc((2*M-4+1)*
sizeof(double));
 
  626   zc = (
double*)
nfft_malloc((2*M-4+1)*
sizeof(double));
 
  627   rc = (
double*)
nfft_malloc((2*M-4+1)*
sizeof(double));
 
  628   vr = (
double*)
nfft_malloc(3*(2*M-4+1)*
sizeof(double));
 
  632   for (k = 0; k < M; k++)
 
  634     x[k] = sin(2.0*
PI*xi[2*k+1])*cos(2.0*
PI*xi[2*k]);
 
  635     y[k] = sin(2.0*
PI*xi[2*k+1])*sin(2.0*
PI*xi[2*k]);
 
  636     z[k] = cos(2.0*
PI*xi[2*k+1]);
 
  640   trmesh_(&Mlocal, x, y, z, list, lptr, lend, &lnew, near, next, dist, &ier);
 
  646     crlist_(&Mlocal, &Mlocal, x, y, z, list, lend, lptr, &lnew, ltri, listc, &nb, xc,
 
  652       for (k = 0; k < M; k++)
 
  670           vr[3*j+1] = yc[kv-1];
 
  671           vr[3*j+2] = zc[kv-1];
 
  676         for (el = 0; el < j; el++)
 
  678           a += areas_(vr, &vr[3*(el+1)],&vr[3*(((el+1)%j)+1)]);
 
  710 R X(modified_fejer)(
const int N, 
const int kk)
 
  712   return (K(2.0)/((R)(N*N))*(K(1.0)-FABS(K(2.0)*kk+K(1.0))/((R)N)));
 
  716 R X(modified_jackson2)(
const int N, 
const int kk)
 
  719   const R n=(N/K(2.0)+K(1.0))/K(2.0);
 
  722   for (result = K(0.0), kj = kk; kj <= kk+1; kj++)
 
  727       result += K(1.0) - (K(3.0)*k + K(6.0)*n*POW(k,K(2.0))
 
  728         - K(3.0)*POW(k,K(3.0)))/(K(2.0)*n*(K(2.0)*POW(n,K(2.0))+K(1.0)));
 
  730       result+= (K(2.0)*n-k)*(POW(2*n-k,K(2.0))-K(1.0))/(K(2.0)
 
  731         *n*(K(2.0)*POW(n,K(2.0))+K(1.0)));
 
  738 R X(modified_jackson4)(
const int N, 
const int kk)
 
  741   const R n = (N/K(2.0)+K(3.0))/K(4.0), normalisation = (K(2416.0)*POW(n,K(7.0))
 
  742     + K(1120.0)*POW(n,K(5.0)) + K(784.0)*POW(n,K(3.0)) + K(720.0)*n);
 
  745   for (result = K(0.0), kj = kk; kj <= kk + 1; kj++)
 
  750       result += K(1.0) - (K(1260.0)*k + (K(1680.0)*POW(n, K(5.0))
 
  751         + K(2240.0)*POW(n, K(3.0)) + K(2940.0)*n)*POW(k, K(2.0))
 
  752         - K(1715.0)*POW(k, K(3.0)) - (K(560.0)*POW(n, K(3.0))
 
  753         + K(1400.0)*n)*POW(k, K(4.0)) + K(490.0)*POW(k, K(5.0))
 
  754         + K(140.0)*n*POW(k, K(6.0)) - K(35.0)*POW(k,K(7.0)))/normalisation;
 
  756     if ((K(1.0) <= k/n) && (k/n < K(2.0)))
 
  757       result += ((K(2472.0)*POW(n, K(7.0)) + K(336.0)*POW(n, K(5.0))
 
  758         + K(3528.0)*POW(n, K(3.0)) - K(1296.0)*n) - (K(392.0)*POW(n, K(6.0))
 
  759         - K(3920.0)*POW(n, K(4.0)) + K(8232.0)*POW(n, K(2.0)) - K(756.0))*k
 
  760         - (K(504.0)*POW(n, K(5.0)) + K(10080.0)*POW(n, K(3.0))
 
  761         - K(5292.0)*n)*POW(k, K(2.0)) - (K(1960.0)*POW(n, K(4.0))
 
  762         - K(7840.0)*POW(n, K(2.0)) + K(1029.0))*POW(k, K(3.0))
 
  763         + (K(2520.0)*POW(n, K(3.0)) - K(2520.0)*n) * POW(k, K(4.0))
 
  764         - (K(1176.0)*POW(n, K(2.0)) - K(294.0)) * POW(k, K(5.0))
 
  765         + K(252.0)*n*POW(k, K(6.0)) - K(21.0)*POW(k, K(7.0)))/normalisation;
 
  767     if ((K(2.0) <= k/n) && (k/n < K(3.0)))
 
  768       result += (-(K(1112.0)*POW(n, K(7.0)) - K(12880.0)*POW(n, K(5.0))
 
  769         + K(7448.0)*POW(n, K(3.0)) - K(720.0)*n) + (K(12152.0)*POW(n, K(6.0))
 
  770         - K(27440.0)*POW(n, K(4.0)) + K(8232.0)*POW(n, K(2.0)) - K(252.0))*k
 
  771         - (K(19320.0)*POW(n, K(5.0)) - K(21280.0)*POW(n, K(3.0))
 
  772         + K(2940.0)*n)*POW(k, K(2.0)) + (K(13720.0)*POW(n, K(4.0))
 
  773         - K(7840.0)*POW(n, K(2.0)) + K(343.0))*POW(k, K(3.0))
 
  774         - (K(5320.0)*POW(n, K(3.0)) - K(1400.0)*n)*POW(k, K(4.0))
 
  775         + (K(1176.0)*POW(n, K(2.0)) - K(98.0))*POW(k, K(5.0))
 
  776         - K(140.0)*n*POW(k, K(6.0)) + K(7.0) * POW(k, K(7.0)))/normalisation;
 
  778     if ((K(3.0) <= k/n) && (k/n < K(4.0)))
 
  779       result += ((4*n-k)*(POW(4*n-k, K(2.0)) - K(1.0))*(POW(4*n-k, K(2.0))
 
  780         - K(4.0))*(POW(4*n-k, K(2.0)) - K(9.0)))/normalisation;
 
  787 R X(modified_sobolev)(
const R mu, 
const int kk)
 
  792   for (result = K(0.0), kj = kk; kj <= kk+1; kj++)
 
  798       result += POW((
double)k,-K(2.0)*mu);
 
  805 R X(modified_multiquadric)(
const R mu, 
const R c, 
const int kk)
 
  810   for (result = K(0.0), kj = kk; kj <= kk+1; kj++)
 
  813       result += POW((
double)(k*k + c*c), -mu);
 
  819 static inline int scaled_modified_bessel_i_series(
const R x, 
const R 
alpha,
 
  820   const int nb, 
const int ize, R *b)
 
  822   const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
 
  823   R tempa = K(1.0), empal = K(1.0) + 
alpha, halfx = K(0.0), tempb = K(0.0);
 
  830     tempa = POW(halfx, alpha)/TGAMMA(empal);
 
  835   if (K(1.0) < x + K(1.0))
 
  838   b[0] = tempa + tempa*tempb/empal;
 
  840   if (x != K(0.0) && b[0] == K(0.0))
 
  848     R tempc = halfx, tover = (enmten + enmten)/x;
 
  851       tover = enmten/tempb;
 
  853     for (n = 1; n < nb; n++)
 
  859       if (tempa <= tover*empal)
 
  862       b[n] = tempa + tempa*tempb/empal;
 
  864       if (b[n] == K(0.0) && n < ncalc)
 
  869     for (n = 1; n < nb; n++)
 
  875 static inline void scaled_modified_bessel_i_normalize(
const R x,
 
  876   const R alpha, 
const int nb, 
const int ize, R *b, 
const R sum_)
 
  878   const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
 
  884     sum = sum * TGAMMA(K(1.0) + alpha) * POW(x/K(2.0), -alpha);
 
  894   for (n = 1; n <= nb; n++)
 
  950 int nfft_smbi(
const R x, 
const R alpha, 
const int nb, 
const int ize, R *b)
 
  972   const int nsig = MANT_DIG + 2;
 
  973   const R enten = nfft_float_property(NFFT_R_MAX);
 
  974   const R ensig = POW(K(10.0),(R)nsig);
 
  975   const R rtnsig = POW(K(10.0),-CEIL((R)nsig/K(4.0)));
 
  976   const R xlarge = K(1E4);
 
  977   const R exparg = FLOOR(LOG(POW(K(R_RADIX),K(DBL_MAX_EXP-1))));
 
  980   int l, n, nend, magx, nbmx, ncalc, nstart;
 
  981   R p, em, en, sum, pold, test, empal, tempa, tempb, tempc, psave, plast, tover,
 
  984   magx = LRINT(FLOOR(x));
 
  987   if (   nb <= 0 || x < K(0.0) || alpha < K(0.0) || K(1.0) <= alpha
 
  988       || ((ize != 1 || exparg < x) && (ize != 2 || xlarge < x)))
 
  989     return (MIN(nb,0) - 1);
 
  993     return scaled_modified_bessel_i_series(x,alpha,nb,ize,b);
 
 1001   en = (R) (n+n) + (alpha+
alpha);
 
 1006   test = ensig + ensig;
 
 1008   if ((5*nsig) < (magx << 1))
 
 1009     test = SQRT(test*p);
 
 1011     test /= POW(K(1.585),(R)magx);
 
 1016     tover = enten/ensig;
 
 1020     for (n = nstart; n <= nend; n++)
 
 1025       p = en*plast/x + pold;
 
 1043           p = en*plast/x + pold;
 
 1044         } 
while (p <= K(1.0));
 
 1049         test = pold*plast*(K(0.5) - K(0.5)/(tempb * tempb))/ensig;
 
 1055         for (ncalc = nstart; ncalc <= nend; ncalc++)
 
 1059           psave = en*psavel/x + pold;
 
 1060           if (test < psave * psavel)
 
 1070     en = (R) (n+n) + (alpha+
alpha);
 
 1073     test = FMAX(test,SQRT(plast*ensig)*SQRT(p+p));
 
 1083     p = en*plast/x + pold;
 
 1094   emp2al = em - K(1.0) + (alpha+
alpha);
 
 1095   sum = tempa*empal*emp2al/em;
 
 1103     for (l = 1; l <= nend; ++l)
 
 1104       b[n-1 + l] = K(0.0);
 
 1111       for (l = 1; l <= nend; ++l)
 
 1117         tempa = en*tempb/x + tempc;
 
 1128         sum = (sum + tempa*empal)*emp2al/em;
 
 1137       sum = sum + sum + tempa;
 
 1138       scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
 
 1145     b[n-1] = en*tempa/x + tempb;
 
 1149       sum = sum + sum + b[0];
 
 1150       scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
 
 1161     sum = (sum + b[n-1]*empal)*emp2al/em;
 
 1169     for (l = 1; l <= nend; ++l)
 
 1173       b[n-1] = en*b[n]/x + b[n+1];
 
 1181       sum = (sum + b[n-1]*empal)*emp2al/em;
 
 1186   b[0] = K(2.0)*empal*b[1]/x + b[2];
 
 1187   sum = sum + sum + b[0];
 
 1189   scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
 
 1197 void nfft_assertion_failed(
const char *s, 
int line, 
const char *file)
 
 1200   fprintf(stderr, 
"nfft: %s:%d: assertion failed: %s\n", file, line, s);
 
 1213 #if HAVE_DECL_DRAND48 == 0 
 1214   extern double drand48(
void);
 
 1216 #if HAVE_DECL_SRAND48 == 0 
 1217   extern void srand48(
long int);
 
 1220 double nfft_drand48(
void)
 
 1225   return ((R)rand())/((R)RAND_MAX);
 
 1229 void nfft_srand48(
long int seed)
 
 1234   srand((
unsigned int)seed);
 
 1239 #define z_swap(_a_, _b_, _t_)     do { (_t_) = (_a_); (_a_) = (_b_); (_b_) = (_t_); } while (0) 
 1246 static void nfft_sort_node_indices_sort_bubble(
int n, 
int *keys)
 
 1250   for (i = 0; i < n; ++i)
 
 1253     while (j > 0 && keys[2 * j + 0] < keys[2 * (j - 1) + 0])
 
 1255       z_swap(keys[2 * j + 0], keys[2 * (j - 1) + 0], ti);
 
 1256       z_swap(keys[2 * j + 1], keys[2 * (j - 1) + 1], ti);
 
 1267 static void nfft_sort_node_indices_radix_count(
int n, 
int *keys, 
int shift, 
int mask, 
int *counts)
 
 1271   for (i = 0; i < n; ++i)
 
 1273     k = (keys[2 * i + 0] >> shift) & mask;
 
 1283 static void nfft_sort_node_indices_radix_rearrange(
int n, 
int *keys_in, 
int *keys_out, 
int shift, 
int mask, 
int *displs)
 
 1287   for (i = 0; i < n; ++i)
 
 1289     k = (keys_in[2 * i + 0] >> shift) & mask;
 
 1290     keys_out[2 * displs[k] + 0] = keys_in[2 * i + 0];
 
 1291     keys_out[2 * displs[k] + 1] = keys_in[2 * i + 1];
 
 1303   const int rwidth = 9;
 
 1304   const int radix_n = 1 << rwidth;
 
 1305   const int radix_mask = radix_n - 1;
 
 1306   const int rhigh_in = rhigh;
 
 1310     omp_get_max_threads();
 
 1315   int *from, *to, *tmp;
 
 1318   int lcounts[tmax * radix_n];
 
 1320   int tid = 0, tnum = 1;
 
 1329     #pragma omp parallel private(tid, tnum, i, l, h) 
 1331       tid = omp_get_thread_num();
 
 1332       tnum = omp_get_num_threads();
 
 1335       for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
 
 1337       l = (tid * n) / tnum;
 
 1338       h = ((tid + 1) * n) / tnum;
 
 1340       nfft_sort_node_indices_radix_count(h - l, from + (2 * l), rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
 
 1346     for (i = 0; i < radix_n; ++i)
 
 1348       for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
 
 1352     #pragma omp parallel private(tid, tnum, i, l, h) 
 1354       tid = omp_get_thread_num();
 
 1355       tnum = omp_get_num_threads();
 
 1358       l = (tid * n) / tnum;
 
 1359       h = ((tid + 1) * n) / tnum;
 
 1361       nfft_sort_node_indices_radix_rearrange(h - l, from + (2 * l), to, rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
 
 1375   if (to == keys0) memcpy(to, from, n * 2 * 
sizeof(
int));
 
 1385   const int rwidth = 9;
 
 1386   const int radix_n = 1 << rwidth;
 
 1387   const int radix_mask = radix_n - 1;
 
 1391     omp_get_max_threads();
 
 1397   int lcounts[tmax * radix_n];
 
 1399   int counts[radix_n], displs[radix_n];
 
 1401   int tid = 0, tnum = 1;
 
 1407   #pragma omp parallel private(tid, tnum, i, l, h) 
 1409     tid = omp_get_thread_num();
 
 1410     tnum = omp_get_num_threads();
 
 1413     for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
 
 1415     l = (tid * n) / tnum;
 
 1416     h = ((tid + 1) * n) / tnum;
 
 1418     nfft_sort_node_indices_radix_count(h - l, keys0 + (2 * l), rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
 
 1424   for (i = 0; i < radix_n; ++i)
 
 1426     for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
 
 1428     displs[i] = lcounts[0 * radix_n + i];
 
 1429     if (i > 0) counts[i - 1] = displs[i] - displs[i - 1];
 
 1431   counts[radix_n - 1] = n - displs[radix_n - 1];
 
 1434   #pragma omp parallel private(tid, tnum, i, l, h) 
 1436     tid = omp_get_thread_num();
 
 1437     tnum = omp_get_num_threads();
 
 1440     l = (tid * n) / tnum;
 
 1441     h = ((tid + 1) * n) / tnum;
 
 1443     nfft_sort_node_indices_radix_rearrange(h - l, keys0 + (2 * l), keys1, rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
 
 1448   memcpy(keys0, keys1, n * 2 * 
sizeof(
int));
 
 1452     for (i = 0; i < radix_n; ++i)
 
 1456         if (counts[i] > 256)
 
 1459           nfft_sort_node_indices_sort_bubble(counts[i], keys0 + 2 * displs[i]);
 
 1465 int nfft_get_num_threads(
void)
 
 1468   return nfft_get_omp_num_threads();
 
 1475 int nfft_get_omp_num_threads(
void)
 
 1478   #pragma omp parallel default(shared) 
 1480     int n = omp_get_num_threads();