61 static void nfft_sort_nodes_for_better_cache_handle(
int d,
 
   62     const int *n, 
int m, 
int local_x_num, 
const R *local_x, 
int *ar_x)
 
   64   int u_j[d], i, j, help, rhigh;
 
   68   for(i = 0; i < local_x_num; i++) {
 
   71     for(j = 0; j < d; j++) {
 
   72       help = (int) floor( n[j]*local_x[d*i+j] - m);
 
   73       u_j[j] = (help%n[j]+n[j])%n[j];
 
   81   for (j = 0, nprod = 1; j < d; j++)
 
   84   rhigh = (int) ceil(log2(nprod)) - 1;
 
   86   ar_x_temp = (
int *) 
nfft_malloc(2*local_x_num*
sizeof(
int));
 
   89   for (i = 1; i < local_x_num; i++)
 
   90     assert(ar_x[2*(i-1)] <= ar_x[2*i]);
 
  103 static void nfft_sort_nodes(
const nfft_plan *ths)
 
  106     nfft_sort_nodes_for_better_cache_handle(ths->
d, ths->
n, ths->
m, ths->
M_total, ths->
x, ths->
index_x);
 
  131 #define MACRO_ndft_init_result_trafo memset(f,0,ths->M_total*sizeof(C)); 
  132 #define MACRO_ndft_init_result_conjugated MACRO_ndft_init_result_trafo 
  133 #define MACRO_ndft_init_result_adjoint memset(f_hat,0,ths->N_total*sizeof(C)); 
  134 #define MACRO_ndft_init_result_transposed MACRO_ndft_init_result_adjoint 
  137 #define MACRO_ndft_sign_trafo K2PI*ths->x[j*ths->d+t] 
  138 #define MACRO_ndft_sign_conjugated -K2PI*ths->x[j*ths->d+t] 
  139 #define MACRO_ndft_sign_adjoint K2PI*ths->x[j*ths->d+t] 
  140 #define MACRO_ndft_sign_transposed -K2PI*ths->x[j*ths->d+t] 
  142 #define MACRO_init_k_N_Omega_x(which_one)                                     \ 
  144   for (t = 0; t < ths->d; t++)                                                \ 
  146     k[t] = -ths->N[t]/2;                                                      \ 
  147     x[t] = MACRO_ndft_sign_ ## which_one;                                     \ 
  148     Omega[t+1] = k[t]*x[t] + Omega[t];                                        \ 
  150   omega = Omega[ths->d];                                                      \ 
  153 #define MACRO_count_k_N_Omega                                                 \ 
  155   for (t = ths->d-1; (t >= 1) && (k[t] == ths->N[t]/2-1); t--)                \ 
  156     k[t]-= ths->N[t]-1;                                                       \ 
  160   for (t2 = t; t2 < ths->d; t2++)                                             \ 
  161     Omega[t2+1] = k[t2]*x[t2] + Omega[t2];                                    \ 
  163   omega = Omega[ths->d];                                                      \ 
  166 #define MACRO_ndft_compute_trafo f[j] += f_hat[k_L]*CEXP(-II*omega); 
  167 #define MACRO_ndft_compute_conjugated MACRO_ndft_compute_trafo 
  168 #define MACRO_ndft_compute_adjoint f_hat[k_L] += f[j]*CEXP(+II*omega); 
  169 #define MACRO_ndft_compute_transposed MACRO_ndft_compute_adjoint 
  171 #define MACRO_ndft(which_one)                                                 \ 
  172 void nfft_ ## which_one ## _direct (nfft_plan *ths)                           \ 
  174   C *f_hat = (C*)ths->f_hat, *f = (C*)ths->f;                                 \ 
  176   MACRO_ndft_init_result_ ## which_one                                        \ 
  183     for (j = 0; j < ths->M_total; j++)                                        \ 
  186       for(k_L = 0; k_L < ths->N_total; k_L++)                                 \ 
  188         R omega = (k_L - (ths->N_total/2)) * MACRO_ndft_sign_ ## which_one;   \ 
  189         MACRO_ndft_compute_ ## which_one;                                     \ 
  197     for (j = 0; j < ths->M_total; j++)                                        \ 
  199       R x[ths->d], omega, Omega[ths->d+1];                                    \ 
  200       int t, t2, k_L, k[ths->d];                                              \ 
  202       MACRO_init_k_N_Omega_x(which_one);                                      \ 
  203       for(k_L = 0; k_L < ths->N_total; k_L++)                                 \ 
  205         MACRO_ndft_compute_ ## which_one;                                     \ 
  206         MACRO_count_k_N_Omega;                                                \ 
  216   C *f_hat = (C*)ths->
f_hat, *f = (C*)ths->
f;
 
  218   memset(f,0,ths->
M_total*
sizeof(C));
 
  224     #pragma omp parallel for default(shared) private(j) 
  225     for (j = 0; j < ths->
M_total; j++)
 
  228       for(k_L = 0; k_L < ths->
N_total; k_L++)
 
  230         R omega = (k_L - ths->
N_total/2) * K2PI * ths->
x[j];
 
  231         f[j] += f_hat[k_L]*cexp(-( 1.0iF)*omega);
 
  239     #pragma omp parallel for default(shared) private(j) 
  240     for (j = 0; j < ths->
M_total; j++)
 
  242       R x[ths->
d], omega, Omega[ths->
d+1];
 
  243       int t, t2, k_L, k[ths->
d];
 
  244       Omega[0] = ((R) 0.0);
 
  245       for (t = 0; t < ths->
d; t++)
 
  248         x[t] = K2PI*ths->
x[j*ths->
d+t];
 
  249         Omega[t+1] = k[t]*x[t] + Omega[t];
 
  251       omega = Omega[ths->
d];
 
  253       for(k_L = 0; k_L < ths->
N_total; k_L++)
 
  255         f[j] += f_hat[k_L]*cexp(-( 1.0iF)*omega);
 
  257           for (t = ths->
d-1; (t >= 1) && (k[t] == ths->
N[t]/2-1); t--)
 
  262           for (t2 = t; t2 < ths->
d; t2++)
 
  263             Omega[t2+1] = k[t2]*x[t2] + Omega[t2];
 
  265           omega = Omega[ths->
d];
 
  275 void nfft_adjoint_direct (
nfft_plan *ths)
 
  277   C *f_hat = (C*)ths->
f_hat, *f = (C*)ths->
f;
 
  279   memset(f_hat,0,ths->
N_total*
sizeof(C));
 
  286       #pragma omp parallel for default(shared) private(k_L) 
  287       for(k_L = 0; k_L < ths->
N_total; k_L++)
 
  290         for (j = 0; j < ths->
M_total; j++)
 
  292           R omega = (k_L - (ths->
N_total/2)) * K2PI * ths->
x[j];
 
  293           f_hat[k_L] += f[j]*cexp(+( 1.0iF)*omega);
 
  298       for (j = 0; j < ths->
M_total; j++)
 
  301         for(k_L = 0; k_L < ths->
N_total; k_L++)
 
  303           R omega = (k_L - (ths->
N_total/2)) * K2PI * ths->
x[j];
 
  304           f_hat[k_L] += f[j]*cexp(+( 1.0iF)*omega);
 
  314     #pragma omp parallel for default(shared) private(j, k_L) 
  315     for(k_L = 0; k_L < ths->
N_total; k_L++)
 
  317       int k[ths->
d], k_temp, t;
 
  321       for (t = ths->
d-1; t >= 0; t--)
 
  323         k[t] = k_temp % ths->
N[t] - ths->
N[t]/2;
 
  327       for (j = 0; j < ths->
M_total; j++)
 
  330         for (t = 0; t < ths->
d; t++)
 
  331           omega += k[t] * K2PI * ths->
x[j*ths->
d+t];
 
  332         f_hat[k_L] += f[j]*cexp(+( 1.0iF)*omega);
 
  336     for (j = 0; j < ths->
M_total; j++)
 
  338       R x[ths->
d], omega, Omega[ths->
d+1];
 
  339       int t, t2, k[ths->
d];
 
  340       Omega[0] = ((R) 0.0);
 
  341       for (t = 0; t < ths->
d; t++)
 
  344         x[t] = K2PI * ths->
x[j*ths->
d+t];
 
  345         Omega[t+1] = k[t]*x[t] + Omega[t];
 
  347       omega = Omega[ths->
d];
 
  348       for(k_L = 0; k_L < ths->
N_total; k_L++)
 
  350         f_hat[k_L] += f[j]*cexp(+( 1.0iF)*omega);
 
  352         for (t = ths->
d-1; (t >= 1) && (k[t] == ths->
N[t]/2-1); t--)
 
  357         for (t2 = t; t2 < ths->
d; t2++)
 
  358           Omega[t2+1] = k[t2]*x[t2] + Omega[t2];
 
  360         omega = Omega[ths->
d];
 
  392 static inline void nfft_uo(
const nfft_plan *ths, 
const int j, 
int *up, 
int *op,
 
  395   const R xj = ths->
x[j * ths->
d + act_dim];
 
  396   int c = LRINT(FLOOR(xj * ths->
n[act_dim]));
 
  398   (*up) = c - (ths->
m);
 
  399   (*op) = c + 1 + (ths->
m);
 
  402 static inline void nfft_uo2(
int *u, 
int *o, 
const R x, 
const int n, 
const int m)
 
  404   int c = LRINT(FLOOR(x * n));
 
  406   *u = (c - m + n) % n;
 
  407   *o = (c + 1 + m + n) % n;
 
  410 #define MACRO_nfft_D_compute_A                                                \ 
  412   g_hat[k_plain[ths->d]] = f_hat[ks_plain[ths->d]] * c_phi_inv_k[ths->d];     \ 
  415 #define MACRO_nfft_D_compute_T                                                \ 
  417   f_hat[ks_plain[ths->d]] = g_hat[k_plain[ths->d]] * c_phi_inv_k[ths->d];     \ 
  420 #define MACRO_nfft_D_init_result_A memset(g_hat,0,ths->n_total*sizeof(C)); 
  422 #define MACRO_nfft_D_init_result_T memset(f_hat,0,ths->N_total*sizeof(C)); 
  424 #define MACRO_with_PRE_PHI_HUT * ths->c_phi_inv[t2][ks[t2]]; 
  426 #define MACRO_without_PRE_PHI_HUT / (PHI_HUT(ks[t2]-(ths->N[t2]/2),t2)); 
  428 #define MACRO_init_k_ks                                                       \ 
  430   for (t = ths->d-1; 0 <= t; t--)                                             \ 
  433     ks[t] = ths->N[t]/2;                                                      \ 
  438 #define MACRO_update_c_phi_inv_k(which_one) \ 
  440   for (t2 = t; t2 < ths->d; t2++)                                             \ 
  442     c_phi_inv_k[t2+1] = c_phi_inv_k[t2] MACRO_ ##which_one;                   \ 
  443     ks_plain[t2+1] = ks_plain[t2]*ths->N[t2] + ks[t2];                        \ 
  444     k_plain[t2+1] = k_plain[t2]*ths->n[t2] + k[t2];                           \ 
  448 #define MACRO_count_k_ks                                                      \ 
  450   for (t = ths->d-1; (t > 0) && (kp[t] == ths->N[t]-1); t--)                  \ 
  453     ks[t]= ths->N[t]/2;                                                       \ 
  456   kp[t]++; k[t]++; ks[t]++;                                                   \ 
  457   if(kp[t] == ths->N[t]/2)                                                    \ 
  459     k[t] = ths->n[t] - ths->N[t]/2;                                           \ 
  468 #define MACRO_nfft_D(which_one)                                               \ 
  469 static inline void nfft_D_serial_ ## which_one (nfft_plan *ths)               \ 
  472   R c_phi_inv_k[ths->d+1];              \ 
  478   int k_plain[ths->d+1];                \ 
  479   int ks_plain[ths->d+1];               \ 
  481   f_hat = (C*)ths->f_hat; g_hat = (C*)ths->g_hat;                             \ 
  482   MACRO_nfft_D_init_result_ ## which_one;                                     \ 
  484   c_phi_inv_k[0] = K(1.0);                                                    \ 
  488   if (ths->nfft_flags & PRE_PHI_HUT)                                          \ 
  492     for (k_L = 0; k_L < ths->N_total; k_L++)                                  \ 
  494       MACRO_update_c_phi_inv_k(with_PRE_PHI_HUT);                             \ 
  495       MACRO_nfft_D_compute_ ## which_one;                                     \ 
  502     for (k_L = 0; k_L < ths->N_total; k_L++)                                  \ 
  504       MACRO_update_c_phi_inv_k(without_PRE_PHI_HUT);                          \ 
  505       MACRO_nfft_D_compute_ ## which_one;                                     \ 
  512 static void nfft_D_openmp_A(
nfft_plan *ths)
 
  517   f_hat = (C*)ths->
f_hat; g_hat = (C*)ths->
g_hat;
 
  518   memset(g_hat,0,ths->
n_total*
sizeof(C));
 
  522     #pragma omp parallel for default(shared) private(k_L) 
  523     for (k_L = 0; k_L < ths->
N_total; k_L++)
 
  528       R c_phi_inv_k_val = K(1.0);
 
  530       int ks_plain_val = 0;
 
  534       for (t = ths->
d-1; t >= 0; t--)
 
  536         kp[t] = k_temp % ths->
N[t];
 
  537         if (kp[t] >= ths->
N[t]/2)
 
  538           k[t] = ths->
n[t] - ths->
N[t] + kp[t];
 
  541         ks[t] = (kp[t] + ths->
N[t]/2) % ths->
N[t];
 
  545       for (t = 0; t < ths->
d; t++)
 
  547         c_phi_inv_k_val *= ths->
c_phi_inv[t][ks[t]];
 
  548         ks_plain_val = ks_plain_val*ths->
N[t] + ks[t];
 
  549         k_plain_val = k_plain_val*ths->
n[t] + k[t];
 
  552       g_hat[k_plain_val] = f_hat[ks_plain_val] * c_phi_inv_k_val;
 
  557     #pragma omp parallel for default(shared) private(k_L) 
  558     for (k_L = 0; k_L < ths->
N_total; k_L++)
 
  563       R c_phi_inv_k_val = K(1.0);
 
  565       int ks_plain_val = 0;
 
  569       for (t = ths->
d-1; t >= 0; t--)
 
  571         kp[t] = k_temp % ths->
N[t];
 
  572         if (kp[t] >= ths->
N[t]/2)
 
  573           k[t] = ths->
n[t] - ths->
N[t] + kp[t];
 
  576         ks[t] = (kp[t] + ths->
N[t]/2) % ths->
N[t];
 
  580       for (t = 0; t < ths->
d; t++)
 
  582         c_phi_inv_k_val /= (PHI_HUT(ks[t]-(ths->
N[t]/2),t));
 
  583         ks_plain_val = ks_plain_val*ths->
N[t] + ks[t];
 
  584         k_plain_val = k_plain_val*ths->
n[t] + k[t];
 
  587       g_hat[k_plain_val] = f_hat[ks_plain_val] * c_phi_inv_k_val;
 
  600   nfft_D_openmp_A(ths);
 
  602   nfft_D_serial_A(ths);
 
  607 static void nfft_D_openmp_T(
nfft_plan *ths)
 
  612   f_hat = (C*)ths->
f_hat; g_hat = (C*)ths->
g_hat;
 
  613   memset(f_hat,0,ths->
N_total*
sizeof(C));
 
  617     #pragma omp parallel for default(shared) private(k_L) 
  618     for (k_L = 0; k_L < ths->
N_total; k_L++)
 
  623       R c_phi_inv_k_val = K(1.0);
 
  625       int ks_plain_val = 0;
 
  629       for (t = ths->
d-1; t >= 0; t--)
 
  631         kp[t] = k_temp % ths->
N[t];
 
  632         if (kp[t] >= ths->
N[t]/2)
 
  633           k[t] = ths->
n[t] - ths->
N[t] + kp[t];
 
  636         ks[t] = (kp[t] + ths->
N[t]/2) % ths->
N[t];
 
  640       for (t = 0; t < ths->
d; t++)
 
  642         c_phi_inv_k_val *= ths->
c_phi_inv[t][ks[t]];
 
  643         ks_plain_val = ks_plain_val*ths->
N[t] + ks[t];
 
  644         k_plain_val = k_plain_val*ths->
n[t] + k[t];
 
  647       f_hat[ks_plain_val] = g_hat[k_plain_val] * c_phi_inv_k_val;
 
  652     #pragma omp parallel for default(shared) private(k_L) 
  653     for (k_L = 0; k_L < ths->
N_total; k_L++)
 
  658       R c_phi_inv_k_val = K(1.0);
 
  660       int ks_plain_val = 0;
 
  664       for (t = ths->
d-1; t >= 0; t--)
 
  666         kp[t] = k_temp % ths->
N[t];
 
  667         if (kp[t] >= ths->
N[t]/2)
 
  668           k[t] = ths->
n[t] - ths->
N[t] + kp[t];
 
  671         ks[t] = (kp[t] + ths->
N[t]/2) % ths->
N[t];
 
  675       for (t = 0; t < ths->
d; t++)
 
  677         c_phi_inv_k_val /= (PHI_HUT(ks[t]-(ths->
N[t]/2),t));
 
  678         ks_plain_val = ks_plain_val*ths->
N[t] + ks[t];
 
  679         k_plain_val = k_plain_val*ths->
n[t] + k[t];
 
  682       f_hat[ks_plain_val] = g_hat[k_plain_val] * c_phi_inv_k_val;
 
  695   nfft_D_openmp_T(ths);
 
  697   nfft_D_serial_T(ths);
 
  704 #define MACRO_nfft_B_init_result_A  memset(f,0,ths->M_total*sizeof(C)); 
  705 #define MACRO_nfft_B_init_result_T memset(g,0,ths->n_total*sizeof(C)); 
  707 #define MACRO_nfft_B_PRE_FULL_PSI_compute_A                                   \ 
  709   (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]];                            \ 
  712 #define MACRO_nfft_B_PRE_FULL_PSI_compute_T                                   \ 
  714   g[ths->psi_index_g[ix]] += ths->psi[ix] * (*fj);                            \ 
  717 #define MACRO_nfft_B_compute_A                                                \ 
  719   (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]];                            \ 
  722 #define MACRO_nfft_B_compute_T                                                \ 
  724   g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj);                            \ 
  727 #define MACRO_with_FG_PSI fg_psi[t2][lj[t2]] 
  729 #define MACRO_with_PRE_PSI ths->psi[(j*ths->d+t2) * (2*ths->m+2)+lj[t2]] 
  731 #define MACRO_without_PRE_PSI  PHI(ths->x[j*ths->d+t2]                        \ 
  732   - ((R)l[t2])/((R)ths->n[t2]), t2) 
  734 #define MACRO_init_uo_l_lj_t                                                  \ 
  736   for (t = ths->d-1; t >= 0; t--)                                             \ 
  738     nfft_uo(ths,j,&u[t],&o[t],t);                                             \ 
  745 #define MACRO_update_phi_prod_ll_plain(which_one) {                           \ 
  746   for(t2=t; t2<ths->d; t2++)                                                  \ 
  748       phi_prod[t2+1] = phi_prod[t2]* MACRO_ ## which_one;                     \ 
  749       ll_plain[t2+1] = ll_plain[t2]*ths->n[t2] +(l[t2]+ths->n[t2])%ths->n[t2];\ 
  753 #define MACRO_count_uo_l_lj_t {                                               \ 
  754   for(t = ths->d-1; (t > 0) && (l[t] == o[t]); t--)                           \ 
  764 #define MACRO_nfft_B(which_one)                                               \ 
  765 static inline void nfft_B_serial_ ## which_one (nfft_plan *ths)               \ 
  768   int u[ths->d], o[ths->d];               \ 
  774   int ll_plain[ths->d+1];                       \ 
  775   R phi_prod[ths->d+1];                           \ 
  779   R fg_psi[ths->d][2*ths->m+2];                                               \ 
  780   R fg_exp_l[ths->d][2*ths->m+2];                                             \ 
  782   R tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3;                            \ 
  785   int ip_s = ths->K/(ths->m+2);                                               \ 
  787   f = (C*)ths->f; g = (C*)ths->g;                                             \ 
  789   MACRO_nfft_B_init_result_ ## which_one;                                     \ 
  791   if (ths->nfft_flags & PRE_FULL_PSI)                                         \ 
  793     for (ix = 0, j = 0, fj = f; j < ths->M_total; j++, fj++)                  \ 
  794       for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++)                   \ 
  795         MACRO_nfft_B_PRE_FULL_PSI_compute_ ## which_one;                      \ 
  799   phi_prod[0] = K(1.0);                                                       \ 
  802   for (t = 0, lprod = 1; t < ths->d; t++)                                     \ 
  803     lprod *= (2*ths->m+2);                                                    \ 
  805   if (ths->nfft_flags & PRE_PSI)                                              \ 
  807     for (j = 0, fj = f; j < ths->M_total; j++, fj++)                          \ 
  809       MACRO_init_uo_l_lj_t;                                                   \ 
  811       for (l_L = 0; l_L < lprod; l_L++)                                       \ 
  813         MACRO_update_phi_prod_ll_plain(with_PRE_PSI);                         \ 
  815         MACRO_nfft_B_compute_ ## which_one;                                   \ 
  817         MACRO_count_uo_l_lj_t;                                                \ 
  823   if (ths->nfft_flags & PRE_FG_PSI)                                           \ 
  825     for(t2 = 0; t2 < ths->d; t2++)                                            \ 
  827       tmpEXP2 = EXP(K(-1.0)/ths->b[t2]);                                      \ 
  828       tmpEXP2sq = tmpEXP2*tmpEXP2;                                            \ 
  831       fg_exp_l[t2][0] = K(1.0);                                               \ 
  832       for(lj_fg = 1; lj_fg <= (2*ths->m+2); lj_fg++)                          \ 
  834         tmp3 = tmp2*tmpEXP2;                                                  \ 
  836         fg_exp_l[t2][lj_fg] = fg_exp_l[t2][lj_fg-1]*tmp3;                     \ 
  839     for (j = 0, fj = f; j < ths->M_total; j++, fj++)                          \ 
  841       MACRO_init_uo_l_lj_t;                                                   \ 
  843       for (t2 = 0; t2 < ths->d; t2++)                                         \ 
  845         fg_psi[t2][0] = ths->psi[2*(j*ths->d+t2)];                            \ 
  846         tmpEXP1 = ths->psi[2*(j*ths->d+t2)+1];                                \ 
  848         for (l_fg = u[t2]+1, lj_fg = 1; l_fg <= o[t2]; l_fg++, lj_fg++)       \ 
  851           fg_psi[t2][lj_fg] = fg_psi[t2][0]*tmp1*fg_exp_l[t2][lj_fg];         \ 
  855       for (l_L= 0; l_L < lprod; l_L++)                                        \ 
  857         MACRO_update_phi_prod_ll_plain(with_FG_PSI);                          \ 
  859         MACRO_nfft_B_compute_ ## which_one;                                   \ 
  861         MACRO_count_uo_l_lj_t;                                                \ 
  867   if (ths->nfft_flags & FG_PSI)                                               \ 
  869     for (t2 = 0; t2 < ths->d; t2++)                                           \ 
  871       tmpEXP2 = EXP(K(-1.0)/ths->b[t2]);                                      \ 
  872       tmpEXP2sq = tmpEXP2*tmpEXP2;                                            \ 
  875       fg_exp_l[t2][0] = K(1.0);                                               \ 
  876       for (lj_fg = 1; lj_fg <= (2*ths->m+2); lj_fg++)                         \ 
  878         tmp3 = tmp2*tmpEXP2;                                                  \ 
  880         fg_exp_l[t2][lj_fg] = fg_exp_l[t2][lj_fg-1]*tmp3;                     \ 
  883     for (j = 0, fj = f; j < ths->M_total; j++, fj++)                          \ 
  885       MACRO_init_uo_l_lj_t;                                                   \ 
  887       for (t2 = 0; t2 < ths->d; t2++)                                         \ 
  889         fg_psi[t2][0] = (PHI((ths->x[j*ths->d+t2]-((R)u[t2])/ths->n[t2]),t2));\ 
  891         tmpEXP1 = EXP(K(2.0)*(ths->n[t2]*ths->x[j*ths->d+t2] - u[t2])         \ 
  894         for (l_fg = u[t2] + 1, lj_fg = 1; l_fg <= o[t2]; l_fg++, lj_fg++)     \ 
  897           fg_psi[t2][lj_fg] = fg_psi[t2][0]*tmp1*fg_exp_l[t2][lj_fg];         \ 
  901       for (l_L = 0; l_L < lprod; l_L++)                                       \ 
  903         MACRO_update_phi_prod_ll_plain(with_FG_PSI);                          \ 
  905         MACRO_nfft_B_compute_ ## which_one;                                   \ 
  907         MACRO_count_uo_l_lj_t;                                                \ 
  913   if (ths->nfft_flags & PRE_LIN_PSI)                                          \ 
  915     for (j = 0, fj=f; j<ths->M_total; j++, fj++)                              \ 
  917       MACRO_init_uo_l_lj_t;                                                   \ 
  919       for (t2 = 0; t2 < ths->d; t2++)                                         \ 
  921         y[t2] = ((ths->n[t2]*ths->x[j*ths->d+t2]-(R)u[t2])                    \ 
  922           * ((R)ths->K))/(ths->m+2);                                          \ 
  923         ip_u  = LRINT(FLOOR(y[t2]));                                          \ 
  925         for (l_fg = u[t2], lj_fg = 0; l_fg <= o[t2]; l_fg++, lj_fg++)         \ 
  927           fg_psi[t2][lj_fg] = ths->psi[(ths->K+1)*t2 + ABS(ip_u-lj_fg*ip_s)]  \ 
  928             * (1-ip_w) + ths->psi[(ths->K+1)*t2 + ABS(ip_u-lj_fg*ip_s+1)]     \ 
  933       for (l_L = 0; l_L < lprod; l_L++)                                       \ 
  935         MACRO_update_phi_prod_ll_plain(with_FG_PSI);                          \ 
  937         MACRO_nfft_B_compute_ ## which_one;                                   \ 
  939         MACRO_count_uo_l_lj_t;                                                \ 
  946   for (j = 0, fj = f; j < ths->M_total; j++, fj++)                            \ 
  948     MACRO_init_uo_l_lj_t;                                                     \ 
  950     for (l_L = 0; l_L < lprod; l_L++)                                         \ 
  952       MACRO_update_phi_prod_ll_plain(without_PRE_PSI);                        \ 
  954       MACRO_nfft_B_compute_ ## which_one;                                     \ 
  956       MACRO_count_uo_l_lj_t;                                                  \ 
  966 static inline void nfft_B_openmp_A (
nfft_plan *ths)
 
  971   memset(ths->
f,0,ths->
M_total*
sizeof(C));
 
  973   for (k = 0, lprod = 1; k < ths->
d; k++)
 
  974     lprod *= (2*ths->
m+2);
 
  978     #pragma omp parallel for default(shared) private(k) 
  979     for (k = 0; k < ths->
M_total; k++)
 
  984       for (l = 0; l < lprod; l++)
 
  992     #pragma omp parallel for default(shared) private(k) 
  993     for (k = 0; k < ths->
M_total; k++)
 
  995       int u[ths->
d], o[ths->
d]; 
 
 1000       int ll_plain[ths->
d+1]; 
 
 1001       R phi_prod[ths->
d+1]; 
 
 1004       phi_prod[0] = K(1.0);
 
 1007       MACRO_init_uo_l_lj_t;
 
 1009       for (l_L = 0; l_L < lprod; l_L++)
 
 1011         MACRO_update_phi_prod_ll_plain(with_PRE_PSI);
 
 1013         ths->
f[j] += phi_prod[ths->
d] * ths->
g[ll_plain[ths->
d]];
 
 1015         MACRO_count_uo_l_lj_t;
 
 1024     R fg_exp_l[ths->
d][2*ths->
m+2];
 
 1026     for(t2 = 0; t2 < ths->
d; t2++)
 
 1029       R tmpEXP2 = EXP(K(-1.0)/ths->
b[t2]);
 
 1030       R tmpEXP2sq = tmpEXP2*tmpEXP2;
 
 1033       fg_exp_l[t2][0] = K(1.0);
 
 1034       for(lj_fg = 1; lj_fg <= (2*ths->
m+2); lj_fg++)
 
 1036         tmp3 = tmp2*tmpEXP2;
 
 1038         fg_exp_l[t2][lj_fg] = fg_exp_l[t2][lj_fg-1]*tmp3;
 
 1042     #pragma omp parallel for default(shared) private(k,t,t2) 
 1043     for (k = 0; k < ths->
M_total; k++)
 
 1045       int ll_plain[ths->
d+1]; 
 
 1046       R phi_prod[ths->
d+1]; 
 
 1047       int u[ths->
d], o[ths->
d]; 
 
 1050       R fg_psi[ths->
d][2*ths->
m+2];
 
 1056       phi_prod[0] = K(1.0);
 
 1059       MACRO_init_uo_l_lj_t;
 
 1061       for (t2 = 0; t2 < ths->
d; t2++)
 
 1063         fg_psi[t2][0] = ths->
psi[2*(j*ths->
d+t2)];
 
 1064         tmpEXP1 = ths->
psi[2*(j*ths->
d+t2)+1];
 
 1066         for (l_fg = u[t2]+1, lj_fg = 1; l_fg <= o[t2]; l_fg++, lj_fg++)
 
 1069           fg_psi[t2][lj_fg] = fg_psi[t2][0]*tmp1*fg_exp_l[t2][lj_fg];
 
 1073       for (l_L= 0; l_L < lprod; l_L++)
 
 1075         MACRO_update_phi_prod_ll_plain(with_FG_PSI);
 
 1077         ths->
f[j] += phi_prod[ths->
d] * ths->
g[ll_plain[ths->
d]];
 
 1079         MACRO_count_uo_l_lj_t;
 
 1088     R fg_exp_l[ths->
d][2*ths->
m+2];
 
 1090     nfft_sort_nodes(ths);
 
 1092     for (t2 = 0; t2 < ths->
d; t2++)
 
 1095       R tmpEXP2 = EXP(K(-1.0)/ths->
b[t2]);
 
 1096       R tmpEXP2sq = tmpEXP2*tmpEXP2;
 
 1099       fg_exp_l[t2][0] = K(1.0);
 
 1100       for (lj_fg = 1; lj_fg <= (2*ths->
m+2); lj_fg++)
 
 1102         tmp3 = tmp2*tmpEXP2;
 
 1104         fg_exp_l[t2][lj_fg] = fg_exp_l[t2][lj_fg-1]*tmp3;
 
 1108     #pragma omp parallel for default(shared) private(k,t,t2) 
 1109     for (k = 0; k < ths->
M_total; k++)
 
 1111       int ll_plain[ths->
d+1]; 
 
 1112       R phi_prod[ths->
d+1]; 
 
 1113       int u[ths->
d], o[ths->
d]; 
 
 1116       R fg_psi[ths->
d][2*ths->
m+2];
 
 1122       phi_prod[0] = K(1.0);
 
 1125       MACRO_init_uo_l_lj_t;
 
 1127       for (t2 = 0; t2 < ths->
d; t2++)
 
 1129         fg_psi[t2][0] = (PHI((ths->
x[j*ths->
d+t2]-((R)u[t2])/ths->
n[t2]),t2));
 
 1131         tmpEXP1 = EXP(K(2.0)*(ths->
n[t2]*ths->
x[j*ths->
d+t2] - u[t2])
 
 1134         for (l_fg = u[t2] + 1, lj_fg = 1; l_fg <= o[t2]; l_fg++, lj_fg++)
 
 1137           fg_psi[t2][lj_fg] = fg_psi[t2][0]*tmp1*fg_exp_l[t2][lj_fg];
 
 1141       for (l_L = 0; l_L < lprod; l_L++)
 
 1143         MACRO_update_phi_prod_ll_plain(with_FG_PSI);
 
 1145         ths->
f[j] += phi_prod[ths->
d] * ths->
g[ll_plain[ths->
d]];
 
 1147         MACRO_count_uo_l_lj_t;
 
 1155     nfft_sort_nodes(ths);
 
 1157     #pragma omp parallel for default(shared) private(k) 
 1158     for (k = 0; k<ths->
M_total; k++)
 
 1160       int u[ths->
d], o[ths->
d]; 
 
 1165       int ll_plain[ths->
d+1]; 
 
 1166       R phi_prod[ths->
d+1]; 
 
 1168       R fg_psi[ths->
d][2*ths->
m+2];
 
 1172       int ip_s = ths->
K/(ths->
m+2);
 
 1175       phi_prod[0] = K(1.0);
 
 1178       MACRO_init_uo_l_lj_t;
 
 1180       for (t2 = 0; t2 < ths->
d; t2++)
 
 1182         y[t2] = ((ths->
n[t2]*ths->
x[j*ths->
d+t2]-(R)u[t2])
 
 1183           * ((R)ths->
K))/(ths->
m+2);
 
 1184         ip_u  = LRINT(FLOOR(y[t2]));
 
 1186         for (l_fg = u[t2], lj_fg = 0; l_fg <= o[t2]; l_fg++, lj_fg++)
 
 1188           fg_psi[t2][lj_fg] = ths->
psi[(ths->
K+1)*t2 + ABS(ip_u-lj_fg*ip_s)]
 
 1189             * (1-ip_w) + ths->
psi[(ths->
K+1)*t2 + ABS(ip_u-lj_fg*ip_s+1)]
 
 1194       for (l_L = 0; l_L < lprod; l_L++)
 
 1196         MACRO_update_phi_prod_ll_plain(with_FG_PSI);
 
 1198         ths->
f[j] += phi_prod[ths->
d] * ths->
g[ll_plain[ths->
d]];
 
 1200         MACRO_count_uo_l_lj_t;
 
 1207   nfft_sort_nodes(ths);
 
 1209   #pragma omp parallel for default(shared) private(k) 
 1210   for (k = 0; k < ths->
M_total; k++)
 
 1212     int u[ths->
d], o[ths->
d]; 
 
 1217     int ll_plain[ths->
d+1]; 
 
 1218     R phi_prod[ths->
d+1]; 
 
 1221     phi_prod[0] = K(1.0);
 
 1224     MACRO_init_uo_l_lj_t;
 
 1226     for (l_L = 0; l_L < lprod; l_L++)
 
 1228       MACRO_update_phi_prod_ll_plain(without_PRE_PSI);
 
 1230       ths->
f[j] += phi_prod[ths->
d] * ths->
g[ll_plain[ths->
d]];
 
 1232       MACRO_count_uo_l_lj_t;
 
 1241   nfft_B_openmp_A(ths);
 
 1243   nfft_B_serial_A(ths);
 
 1263 static inline int index_x_binary_search(
const int *ar_x, 
const int len, 
const int key)
 
 1265   int left = 0, right = len - 1;
 
 1270   while (left < right - 1)
 
 1272     int i = (left + right) / 2;
 
 1273     if (ar_x[2*i] >= key)
 
 1275     else if (ar_x[2*i] < key)
 
 1279   if (ar_x[2*left] < key && left != len-1)
 
 1302 static void nfft_adjoint_B_omp_blockwise_init(
int *my_u0, 
int *my_o0, 
int *min_u_a, 
int *max_u_a, 
int *min_u_b, 
int *max_u_b, 
const int d, 
const int *n, 
const int m)
 
 1304   const int n0 = n[0];
 
 1306   int nthreads = omp_get_num_threads();
 
 1307   int nthreads_used = MIN(nthreads, n0);
 
 1308   int size_per_thread = n0 / nthreads_used;
 
 1309   int size_left = n0 - size_per_thread * nthreads_used;
 
 1310   int size_g[nthreads_used];
 
 1311   int offset_g[nthreads_used];
 
 1312   int my_id = omp_get_thread_num();
 
 1313   int n_prod_rest = 1;
 
 1315   for (k = 1; k < d; k++)
 
 1316     n_prod_rest *= n[k];
 
 1325   if (my_id < nthreads_used)
 
 1327     const int m22 = 2 * m + 2;
 
 1330     for (k = 0; k < nthreads_used; k++)
 
 1333         offset_g[k] = offset_g[k-1] + size_g[k-1];
 
 1334       size_g[k] = size_per_thread;
 
 1342     *my_u0 = offset_g[my_id];
 
 1343     *my_o0 = offset_g[my_id] + size_g[my_id] - 1;
 
 1345     if (nthreads_used > 1)
 
 1347       *max_u_a = n_prod_rest*(offset_g[my_id] + size_g[my_id]) - 1;
 
 1348       *min_u_a = n_prod_rest*(offset_g[my_id] - m22 + 1);
 
 1353       *max_u_a = n_prod_rest * n0 - 1;
 
 1358       *min_u_b = n_prod_rest * (offset_g[my_id] - m22 + 1 + n0);
 
 1359       *max_u_b = n_prod_rest * n0 - 1;
 
 1363     if (*min_u_b != -1 && *min_u_b <= *max_u_a)
 
 1365       *max_u_a = *max_u_b;
 
 1370     assert(*min_u_a <= *max_u_a);
 
 1371     assert(*min_u_b <= *max_u_b);
 
 1372     assert(*min_u_b == -1 || *max_u_a < *min_u_b);
 
 1386 static void nfft_adjoint_B_compute_full_psi(
 
 1387     C *g, 
const int *psi_index_g, 
const R *psi, 
const C *f,
 
 1388     const int M, 
const int d, 
const int *n, 
const int m, 
const int nfft_flags, 
const int *index_x)
 
 1391   int lprod, lprod_m1;
 
 1394     for(t = 0, lprod = 1; t < d; t++)
 
 1397   lprod_m1 = lprod / (2 * m + 2);
 
 1400   if (nfft_flags & NFFT_OMP_BLOCKWISE_ADJOINT)
 
 1402     #pragma omp parallel private(k) 
 1404       int my_u0, my_o0, min_u_a, max_u_a, min_u_b, max_u_b;
 
 1405       const int *ar_x = index_x;
 
 1406       int n_prod_rest = 1;
 
 1408       for (k = 1; k < d; k++)
 
 1409         n_prod_rest *= n[k];
 
 1411       nfft_adjoint_B_omp_blockwise_init(&my_u0, &my_o0, &min_u_a, &max_u_a, &min_u_b, &max_u_b, d, n, m);
 
 1415         k = index_x_binary_search(ar_x, M, min_u_a);
 
 1417         assert(ar_x[2*k] >= min_u_a || k == M-1);
 
 1419           assert(ar_x[2*k-2] < min_u_a);
 
 1424           int u_prod = ar_x[2*k];
 
 1425           int j = ar_x[2*k+1];
 
 1427           if (u_prod < min_u_a || u_prod > max_u_a)
 
 1430           for (l0 = 0; l0 < 2 * m + 2; l0++)
 
 1432             const int start_index = psi_index_g[j * lprod + l0 * lprod_m1];
 
 1434             if (start_index < my_u0 * n_prod_rest || start_index > (my_o0+1) * n_prod_rest - 1)
 
 1437             for (lrest = 0; lrest < lprod_m1; lrest++)
 
 1439               const int l = l0 * lprod_m1 + lrest;
 
 1440               g[psi_index_g[j * lprod + l]] += psi[j * lprod + l] * f[j];
 
 1450         k = index_x_binary_search(ar_x, M, min_u_b);
 
 1452         assert(ar_x[2*k] >= min_u_b || k == M-1);
 
 1454           assert(ar_x[2*k-2] < min_u_b);
 
 1459           int u_prod = ar_x[2*k];
 
 1460           int j = ar_x[2*k+1];
 
 1462           if (u_prod < min_u_b || u_prod > max_u_b)
 
 1465           for (l0 = 0; l0 < 2 * m + 2; l0++)
 
 1467             const int start_index = psi_index_g[j * lprod + l0 * lprod_m1];
 
 1469             if (start_index < my_u0 * n_prod_rest || start_index > (my_o0+1) * n_prod_rest - 1)
 
 1471             for (lrest = 0; lrest < lprod_m1; lrest++)
 
 1473               const int l = l0 * lprod_m1 + lrest;
 
 1474               g[psi_index_g[j * lprod + l]] += psi[j * lprod + l] * f[j];
 
 1486   #pragma omp parallel for default(shared) private(k) 
 1487   for (k = 0; k < M; k++)
 
 1490     int j = (nfft_flags & NFFT_SORT_NODES) ? index_x[2*k+1] : k;
 
 1492     for (l = 0; l < lprod; l++)
 
 1495       C val = psi[j * lprod + l] * f[j];
 
 1496       C *gref = g + psi_index_g[j * lprod + l];
 
 1497       R *gref_real = (R*) gref;
 
 1500       gref_real[0] += creal(val);
 
 1503       gref_real[1] += cimag(val);
 
 1505       g[psi_index_g[j * lprod + l]] += psi[j * lprod + l] * f[j];
 
 1516 static inline void nfft_B_openmp_T(
nfft_plan *ths)
 
 1521   memset(ths->
g,0,ths->
n_total*
sizeof(C));
 
 1523   for (k = 0, lprod = 1; k < ths->
d; k++)
 
 1524     lprod *= (2*ths->
m+2);
 
 1535     #pragma omp parallel for default(shared) private(k) 
 1536     for (k = 0; k < ths->
M_total; k++)
 
 1538       int u[ths->
d], o[ths->
d]; 
 
 1543       int ll_plain[ths->
d+1]; 
 
 1544       R phi_prod[ths->
d+1]; 
 
 1547       phi_prod[0] = K(1.0);
 
 1550       MACRO_init_uo_l_lj_t;
 
 1552       for (l_L = 0; l_L < lprod; l_L++)
 
 1558         MACRO_update_phi_prod_ll_plain(with_PRE_PSI);
 
 1560         lhs = ths->
g + ll_plain[ths->
d];
 
 1562         val = phi_prod[ths->
d] * ths->
f[j];
 
 1565         lhs_real[0] += creal(val);
 
 1568         lhs_real[1] += cimag(val);
 
 1570         MACRO_count_uo_l_lj_t;
 
 1579     R fg_exp_l[ths->
d][2*ths->
m+2];
 
 1580     for(t2 = 0; t2 < ths->
d; t2++)
 
 1583       R tmpEXP2 = EXP(K(-1.0)/ths->
b[t2]);
 
 1584       R tmpEXP2sq = tmpEXP2*tmpEXP2;
 
 1587       fg_exp_l[t2][0] = K(1.0);
 
 1588       for(lj_fg = 1; lj_fg <= (2*ths->
m+2); lj_fg++)
 
 1590         tmp3 = tmp2*tmpEXP2;
 
 1592         fg_exp_l[t2][lj_fg] = fg_exp_l[t2][lj_fg-1]*tmp3;
 
 1596     #pragma omp parallel for default(shared) private(k,t,t2) 
 1597     for (k = 0; k < ths->
M_total; k++)
 
 1599       int ll_plain[ths->
d+1]; 
 
 1600       R phi_prod[ths->
d+1]; 
 
 1601       int u[ths->
d], o[ths->
d]; 
 
 1604       R fg_psi[ths->
d][2*ths->
m+2];
 
 1610       phi_prod[0] = K(1.0);
 
 1613       MACRO_init_uo_l_lj_t;
 
 1615       for (t2 = 0; t2 < ths->
d; t2++)
 
 1617         fg_psi[t2][0] = ths->
psi[2*(j*ths->
d+t2)];
 
 1618         tmpEXP1 = ths->
psi[2*(j*ths->
d+t2)+1];
 
 1620         for (l_fg = u[t2]+1, lj_fg = 1; l_fg <= o[t2]; l_fg++, lj_fg++)
 
 1623           fg_psi[t2][lj_fg] = fg_psi[t2][0]*tmp1*fg_exp_l[t2][lj_fg];
 
 1627       for (l_L= 0; l_L < lprod; l_L++)
 
 1633         MACRO_update_phi_prod_ll_plain(with_FG_PSI);
 
 1635         lhs = ths->
g + ll_plain[ths->
d];
 
 1637         val = phi_prod[ths->
d] * ths->
f[j];
 
 1640         lhs_real[0] += creal(val);
 
 1643         lhs_real[1] += cimag(val);
 
 1645         MACRO_count_uo_l_lj_t;
 
 1654     R fg_exp_l[ths->
d][2*ths->
m+2];
 
 1656     nfft_sort_nodes(ths);
 
 1658     for (t2 = 0; t2 < ths->
d; t2++)
 
 1661       R tmpEXP2 = EXP(K(-1.0)/ths->
b[t2]);
 
 1662       R tmpEXP2sq = tmpEXP2*tmpEXP2;
 
 1665       fg_exp_l[t2][0] = K(1.0);
 
 1666       for (lj_fg = 1; lj_fg <= (2*ths->
m+2); lj_fg++)
 
 1668         tmp3 = tmp2*tmpEXP2;
 
 1670         fg_exp_l[t2][lj_fg] = fg_exp_l[t2][lj_fg-1]*tmp3;
 
 1674     #pragma omp parallel for default(shared) private(k,t,t2) 
 1675     for (k = 0; k < ths->
M_total; k++)
 
 1677       int ll_plain[ths->
d+1]; 
 
 1678       R phi_prod[ths->
d+1]; 
 
 1679       int u[ths->
d], o[ths->
d]; 
 
 1682       R fg_psi[ths->
d][2*ths->
m+2];
 
 1688       phi_prod[0] = K(1.0);
 
 1691       MACRO_init_uo_l_lj_t;
 
 1693       for (t2 = 0; t2 < ths->
d; t2++)
 
 1695         fg_psi[t2][0] = (PHI((ths->
x[j*ths->
d+t2]-((R)u[t2])/ths->
n[t2]),t2));
 
 1697         tmpEXP1 = EXP(K(2.0)*(ths->
n[t2]*ths->
x[j*ths->
d+t2] - u[t2])
 
 1700         for (l_fg = u[t2] + 1, lj_fg = 1; l_fg <= o[t2]; l_fg++, lj_fg++)
 
 1703           fg_psi[t2][lj_fg] = fg_psi[t2][0]*tmp1*fg_exp_l[t2][lj_fg];
 
 1707       for (l_L = 0; l_L < lprod; l_L++)
 
 1713         MACRO_update_phi_prod_ll_plain(with_FG_PSI);
 
 1715         lhs = ths->
g + ll_plain[ths->
d];
 
 1717         val = phi_prod[ths->
d] * ths->
f[j];
 
 1720         lhs_real[0] += creal(val);
 
 1723         lhs_real[1] += cimag(val);
 
 1725         MACRO_count_uo_l_lj_t;
 
 1733     nfft_sort_nodes(ths);
 
 1735     #pragma omp parallel for default(shared) private(k) 
 1736     for (k = 0; k<ths->
M_total; k++)
 
 1738       int u[ths->
d], o[ths->
d]; 
 
 1743       int ll_plain[ths->
d+1]; 
 
 1744       R phi_prod[ths->
d+1]; 
 
 1746       R fg_psi[ths->
d][2*ths->
m+2];
 
 1750       int ip_s = ths->
K/(ths->
m+2);
 
 1753       phi_prod[0] = K(1.0);
 
 1756       MACRO_init_uo_l_lj_t;
 
 1758       for (t2 = 0; t2 < ths->
d; t2++)
 
 1760         y[t2] = ((ths->
n[t2]*ths->
x[j*ths->
d+t2]-(R)u[t2])
 
 1761           * ((R)ths->
K))/(ths->
m+2);
 
 1762         ip_u  = LRINT(FLOOR(y[t2]));
 
 1764         for (l_fg = u[t2], lj_fg = 0; l_fg <= o[t2]; l_fg++, lj_fg++)
 
 1766           fg_psi[t2][lj_fg] = ths->
psi[(ths->
K+1)*t2 + ABS(ip_u-lj_fg*ip_s)]
 
 1767             * (1-ip_w) + ths->
psi[(ths->
K+1)*t2 + ABS(ip_u-lj_fg*ip_s+1)]
 
 1772       for (l_L = 0; l_L < lprod; l_L++)
 
 1778         MACRO_update_phi_prod_ll_plain(with_FG_PSI);
 
 1780         lhs = ths->
g + ll_plain[ths->
d];
 
 1782         val = phi_prod[ths->
d] * ths->
f[j];
 
 1785         lhs_real[0] += creal(val);
 
 1788         lhs_real[1] += cimag(val);
 
 1790         MACRO_count_uo_l_lj_t;
 
 1797   nfft_sort_nodes(ths);
 
 1799   #pragma omp parallel for default(shared) private(k) 
 1800   for (k = 0; k < ths->
M_total; k++)
 
 1802     int u[ths->
d], o[ths->
d]; 
 
 1807     int ll_plain[ths->
d+1]; 
 
 1808     R phi_prod[ths->
d+1]; 
 
 1811     phi_prod[0] = K(1.0);
 
 1814     MACRO_init_uo_l_lj_t;
 
 1816     for (l_L = 0; l_L < lprod; l_L++)
 
 1822       MACRO_update_phi_prod_ll_plain(without_PRE_PSI);
 
 1824       lhs = ths->
g + ll_plain[ths->
d];
 
 1826       val = phi_prod[ths->
d] * ths->
f[j];
 
 1829       lhs_real[0] += creal(val);
 
 1832       lhs_real[1] += cimag(val);
 
 1834       MACRO_count_uo_l_lj_t;
 
 1843   nfft_B_openmp_T(ths);
 
 1845   nfft_B_serial_T(ths);
 
 1851 static void nfft_1d_init_fg_exp_l(R *fg_exp_l, 
const int m, 
const R b)
 
 1853   const int tmp2 = 2*m+2;
 
 1855   R fg_exp_b0, fg_exp_b1, fg_exp_b2, fg_exp_b0_sq;
 
 1857   fg_exp_b0 = EXP(K(-1.0)/b);
 
 1858   fg_exp_b0_sq = fg_exp_b0*fg_exp_b0;
 
 1859   fg_exp_b1 = fg_exp_b2 =fg_exp_l[0] = K(1.0);
 
 1861   for (l = 1; l < tmp2; l++)
 
 1863     fg_exp_b2 = fg_exp_b1*fg_exp_b0;
 
 1864     fg_exp_b1 *= fg_exp_b0_sq;
 
 1865     fg_exp_l[l] = fg_exp_l[l-1]*fg_exp_b2;
 
 1870 static void nfft_trafo_1d_compute(C *fj, 
const C *g,
const R *psij_const,
 
 1871   const R *xj, 
const int n, 
const int m)
 
 1878   nfft_uo2(&u, &o, *xj, n, m);
 
 1882     for (l = 1, gj = g + u, (*fj) = (*psij++) * (*gj++); l <= 2*m+1; l++)
 
 1883       (*fj) += (*psij++) * (*gj++);
 
 1887     for (l = 1, gj = g + u, (*fj) = (*psij++) * (*gj++); l < 2*m+1 - o; l++)
 
 1888       (*fj) += (*psij++) * (*gj++);
 
 1889     for (l = 0, gj = g; l <= o; l++)
 
 1890       (*fj) += (*psij++) * (*gj++);
 
 1895 static void nfft_adjoint_1d_compute_serial(
const C *fj, C *g,
const R *psij_const,
 
 1896   const R *xj, 
const int n, 
const int m)
 
 1903   nfft_uo2(&u,&o,*xj, n, m);
 
 1907     for (l = 0, gj = g+u; l <= 2*m+1; l++)
 
 1908       (*gj++) += (*psij++) * (*fj);
 
 1912     for (l = 0, gj = g+u; l < 2*m+1-o; l++)
 
 1913       (*gj++) += (*psij++) * (*fj);
 
 1914     for (l = 0, gj = g; l <= o; l++)
 
 1915       (*gj++) += (*psij++) * (*fj);
 
 1922 static void nfft_adjoint_1d_compute_omp_atomic(
const C f, C *g,
const R *psij_const,
 
 1923   const R *xj, 
const int n, 
const int m)
 
 1927   int index_temp[2*m+2];
 
 1929   nfft_uo2(&u,&o,*xj, n, m);
 
 1931   for (l=0; l<=2*m+1; l++)
 
 1932     index_temp[l] = (l+u)%n;
 
 1934   for (l = 0, gj = g+u; l <= 2*m+1; l++)
 
 1936     int i = index_temp[l];
 
 1938     R *lhs_real = (R*)lhs;
 
 1939     C val = psij_const[l] * f;
 
 1941     lhs_real[0] += creal(val);
 
 1944     lhs_real[1] += cimag(val);
 
 1965 static void nfft_adjoint_1d_compute_omp_blockwise(
const C f, C *g,
const R *psij_const,
 
 1966   const R *xj, 
const int n, 
const int m, 
const int my_u0, 
const int my_o0)
 
 1970   nfft_uo2(&ar_u,&ar_o,*xj, n, m);
 
 1974     int u = MAX(my_u0,ar_u);
 
 1975     int o = MIN(my_o0,ar_o);
 
 1976     int offset_psij = u-ar_u;
 
 1978     assert(offset_psij >= 0);
 
 1979     assert(o-u <= 2*m+1);
 
 1980     assert(offset_psij+o-u <= 2*m+1);
 
 1983     for (l = 0; l <= o-u; l++)
 
 1984       g[u+l] += psij_const[offset_psij+l] * f;
 
 1988     int u = MAX(my_u0,ar_u);
 
 1990     int offset_psij = u-ar_u;
 
 1992     assert(offset_psij >= 0);
 
 1993     assert(o-u <= 2*m+1);
 
 1994     assert(offset_psij+o-u <= 2*m+1);
 
 1997     for (l = 0; l <= o-u; l++)
 
 1998       g[u+l] += psij_const[offset_psij+l] * f;
 
 2001     o = MIN(my_o0,ar_o);
 
 2002     offset_psij += my_u0-ar_u+n;
 
 2007       assert(o-u <= 2*m+1);
 
 2008       if (offset_psij+o-u > 2*m+1)
 
 2010         fprintf(stderr, 
"ERR: %d %d %d %d %d %d %d\n", ar_u, ar_o, my_u0, my_o0, u, o, offset_psij);
 
 2012       assert(offset_psij+o-u <= 2*m+1);
 
 2015     for (l = 0; l <= o-u; l++)
 
 2016       g[u+l] += psij_const[offset_psij+l] * f;
 
 2021 static void nfft_trafo_1d_B(
nfft_plan *ths)
 
 2023   const int n = ths->
n[0], M = ths->
M_total, m = ths->
m, m2p2 = 2*m+2;
 
 2024   const C *g = (C*)ths->
g;
 
 2029     #pragma omp parallel for default(shared) private(k) 
 2030     for (k = 0; k < M; k++)
 
 2035       for (l = 0; l < m2p2; l++)
 
 2044     #pragma omp parallel for default(shared) private(k) 
 2045     for (k = 0; k < M; k++)
 
 2048       nfft_trafo_1d_compute(&ths->
f[j], g, ths->
psi + j * (2 * m + 2),
 
 2059     nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 2061     #pragma omp parallel for default(shared) private(k) 
 2062     for (k = 0; k < M; k++)
 
 2065       const R fg_psij0 = ths->
psi[2 * j], fg_psij1 = ths->
psi[2 * j + 1];
 
 2066       R fg_psij2 = K(1.0);
 
 2070       psij_const[0] = fg_psij0;
 
 2072       for (l = 1; l < m2p2; l++)
 
 2074         fg_psij2 *= fg_psij1;
 
 2075         psij_const[l] = fg_psij0 * fg_psij2 * fg_exp_l[l];
 
 2078       nfft_trafo_1d_compute(&ths->
f[j], g, psij_const, &ths->
x[j], n, m);
 
 2089     nfft_sort_nodes(ths);
 
 2091     nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 2093     #pragma omp parallel for default(shared) private(k) 
 2094     for (k = 0; k < M; k++)
 
 2098       R fg_psij0, fg_psij1, fg_psij2;
 
 2101       nfft_uo(ths, (
int)j, &u, &o, 0);
 
 2102       fg_psij0 = (PHI(ths->
x[j]-((R)u)/n,0));
 
 2103       fg_psij1 = EXP(K(2.0) * (n * ths->
x[j] - u) / ths->
b[0]);
 
 2106       psij_const[0] = fg_psij0;
 
 2108       for (l = 1; l < m2p2; l++)
 
 2110         fg_psij2 *= fg_psij1;
 
 2111         psij_const[l] = fg_psij0 * fg_psij2 * fg_exp_l[l];
 
 2114       nfft_trafo_1d_compute(&ths->
f[j], g, psij_const, &ths->
x[j], n, m);
 
 2121     const int K = ths->
K, ip_s = K / (m + 2);
 
 2124     nfft_sort_nodes(ths);
 
 2126     #pragma omp parallel for default(shared) private(k) 
 2127     for (k = 0; k < M; k++)
 
 2135       nfft_uo(ths, (
int)j, &u, &o, 0);
 
 2137       ip_y = FABS(n * ths->
x[j] - u) * ((R)ip_s);
 
 2138       ip_u = LRINT(FLOOR(ip_y));
 
 2141       for (l = 0; l < m2p2; l++)
 
 2142         psij_const[l] = ths->
psi[ABS(ip_u-l*ip_s)] * (K(1.0) - ip_w)
 
 2143           + ths->
psi[ABS(ip_u-l*ip_s+1)] * (ip_w);
 
 2145       nfft_trafo_1d_compute(&ths->
f[j], g, psij_const, &ths->
x[j], n, m);
 
 2154     nfft_sort_nodes(ths);
 
 2156     #pragma omp parallel for default(shared) private(k) 
 2157     for (k = 0; k < M; k++)
 
 2163       nfft_uo(ths, (
int)j, &u, &o, 0);
 
 2165       for (l = 0; l < m2p2; l++)
 
 2166         psij_const[l] = (PHI(ths->
x[j]-((R)((u+l)))/n,0));
 
 2168       nfft_trafo_1d_compute(&ths->
f[j], g, psij_const, &ths->
x[j], n, m);
 
 2175 #define MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_ASSERT_A                        \ 
 2177           assert(ar_x[2*k] >= min_u_a || k == M-1);                           \ 
 2179             assert(ar_x[2*k-2] < min_u_a);                                    \ 
 2182 #define MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_ASSERT_A 
 2186 #define MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_ASSERT_B                        \ 
 2188           assert(ar_x[2*k] >= min_u_b || k == M-1);                           \ 
 2190             assert(ar_x[2*k-2] < min_u_b);                                    \ 
 2193 #define MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_ASSERT_B 
 2196 #define MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_COMPUTE_PRE_PSI                 \ 
 2198             nfft_adjoint_1d_compute_omp_blockwise(ths->f[j], g,               \ 
 2199                 ths->psi + j * (2 * m + 2), ths->x + j, n, m, my_u0, my_o0);  \ 
 2202 #define MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_COMPUTE_PRE_FG_PSI              \ 
 2204             R psij_const[2 * m + 2];                                          \ 
 2206             R fg_psij0 = ths->psi[2 * j];                                     \ 
 2207             R fg_psij1 = ths->psi[2 * j + 1];                                 \ 
 2208             R fg_psij2 = K(1.0);                                              \ 
 2210             psij_const[0] = fg_psij0;                                         \ 
 2211             for (l = 1; l <= 2 * m + 1; l++)                                  \ 
 2213               fg_psij2 *= fg_psij1;                                           \ 
 2214               psij_const[l] = fg_psij0 * fg_psij2 * fg_exp_l[l];              \ 
 2217             nfft_adjoint_1d_compute_omp_blockwise(ths->f[j], g, psij_const,   \ 
 2218                 ths->x + j, n, m, my_u0, my_o0);                              \ 
 2221 #define MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_COMPUTE_FG_PSI                  \ 
 2223             R psij_const[2 * m + 2];                                          \ 
 2224             R fg_psij0, fg_psij1, fg_psij2;                                   \ 
 2227             nfft_uo(ths, j, &u, &o, 0);                                       \ 
 2228             fg_psij0 = (PHI(ths->x[j]-((R)u)/n,0));                           \ 
 2229             fg_psij1 = EXP(K(2.0) * (n * (ths->x[j]) - u) / ths->b[0]);       \ 
 2230             fg_psij2 = K(1.0);                                                \ 
 2231             psij_const[0] = fg_psij0;                                         \ 
 2232             for (l = 1; l <= 2 * m + 1; l++)                                  \ 
 2234               fg_psij2 *= fg_psij1;                                           \ 
 2235               psij_const[l] = fg_psij0 * fg_psij2 * fg_exp_l[l];              \ 
 2238             nfft_adjoint_1d_compute_omp_blockwise(ths->f[j], g, psij_const,   \ 
 2239                 ths->x + j, n, m, my_u0, my_o0);                              \ 
 2242 #define MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_COMPUTE_PRE_LIN_PSI             \ 
 2244             R psij_const[2 * m + 2];                                          \ 
 2249             nfft_uo(ths, j, &u, &o, 0);                                       \ 
 2251             ip_y = FABS(n * ths->x[j] - u) * ((R)ip_s);                       \ 
 2252             ip_u = LRINT(FLOOR(ip_y));                                        \ 
 2253             ip_w = ip_y - ip_u;                                               \ 
 2254             for (l = 0; l < 2 * m + 2; l++)                                   \ 
 2256                   = ths->psi[ABS(ip_u-l*ip_s)] * (K(1.0) - ip_w)              \ 
 2257                       + ths->psi[ABS(ip_u-l*ip_s+1)] * (ip_w);                \ 
 2259             nfft_adjoint_1d_compute_omp_blockwise(ths->f[j], g, psij_const,   \ 
 2260                 ths->x + j, n, m, my_u0, my_o0);                              \ 
 2263 #define MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_COMPUTE_NO_PSI                  \ 
 2265             R psij_const[2 * m + 2];                                          \ 
 2268             nfft_uo(ths, j, &u, &o, 0);                                       \ 
 2270             for (l = 0; l <= 2 * m + 1; l++)                                  \ 
 2271               psij_const[l] = (PHI(ths->x[j]-((R)((u+l)))/n,0));              \ 
 2273             nfft_adjoint_1d_compute_omp_blockwise(ths->f[j], g, psij_const,   \ 
 2274                 ths->x + j, n, m, my_u0, my_o0);                              \ 
 2277 #define MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE(whichone)                       \ 
 2279     if (ths->nfft_flags & NFFT_OMP_BLOCKWISE_ADJOINT)                         \ 
 2281       _Pragma("omp parallel private(k)")                                      \ 
 2283         int my_u0, my_o0, min_u_a, max_u_a, min_u_b, max_u_b;                 \ 
 2284         int *ar_x = ths->index_x;                                             \ 
 2286         nfft_adjoint_B_omp_blockwise_init(&my_u0, &my_o0, &min_u_a, &max_u_a, \ 
 2287                                       &min_u_b, &max_u_b, 1, &n, m);      \ 
 2289         if (min_u_a != -1)                                                    \ 
 2291           k = index_x_binary_search(ar_x, M, min_u_a);                        \ 
 2293           MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_ASSERT_A                      \ 
 2297             int u_prod = ar_x[2*k];                                           \ 
 2298             int j = ar_x[2*k+1];                                              \ 
 2300             if (u_prod < min_u_a || u_prod > max_u_a)                         \ 
 2303             MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_COMPUTE_ ##whichone         \ 
 2309         if (min_u_b != -1)                                                    \ 
 2311           k = index_x_binary_search(ar_x, M, min_u_b);                        \ 
 2313           MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_ASSERT_B                      \ 
 2317             int u_prod = ar_x[2*k];                                           \ 
 2318             int j = ar_x[2*k+1];                                              \ 
 2320             if (u_prod < min_u_b || u_prod > max_u_b)                         \ 
 2323             MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE_COMPUTE_ ##whichone         \ 
 2333 static void nfft_adjoint_1d_B(
nfft_plan *ths)
 
 2335   const int n = ths->
n[0], M = ths->
M_total, m = ths->
m;
 
 2339   memset(g,0,ths->
n_total*
sizeof(C));
 
 2343     nfft_adjoint_B_compute_full_psi(g, ths->
psi_index_g, ths->
psi, ths->
f, M,
 
 2351     MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE(PRE_PSI)
 
 2354     #pragma omp parallel for default(shared) private(k) 
 2355     for (k = 0; k < M; k++)
 
 2359       nfft_adjoint_1d_compute_omp_atomic(ths->
f[j], g, ths->
psi + j * (2 * m + 2), ths->
x + j, n, m);
 
 2361       nfft_adjoint_1d_compute_serial(ths->
f + j, g, ths->
psi + j * (2 * m + 2), ths->
x + j, n, m);
 
 2370     R fg_exp_l[2 * m + 2];
 
 2372     nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 2375     MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE(PRE_FG_PSI)
 
 2379     #pragma omp parallel for default(shared) private(k) 
 2380     for (k = 0; k < M; k++)
 
 2382       R psij_const[2 * m + 2];
 
 2385       R fg_psij0 = ths->
psi[2 * j];
 
 2386       R fg_psij1 = ths->
psi[2 * j + 1];
 
 2387       R fg_psij2 = K(1.0);
 
 2389       psij_const[0] = fg_psij0;
 
 2390       for (l = 1; l <= 2 * m + 1; l++)
 
 2392         fg_psij2 *= fg_psij1;
 
 2393         psij_const[l] = fg_psij0 * fg_psij2 * fg_exp_l[l];
 
 2397       nfft_adjoint_1d_compute_omp_atomic(ths->
f[j], g, psij_const, ths->
x + j, n, m);
 
 2399       nfft_adjoint_1d_compute_serial(ths->
f + j, g, psij_const, ths->
x + j, n, m);
 
 2408     R fg_exp_l[2 * m + 2];
 
 2410     nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 2412     nfft_sort_nodes(ths);
 
 2415     MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE(FG_PSI)
 
 2418     #pragma omp parallel for default(shared) private(k) 
 2419     for (k = 0; k < M; k++)
 
 2422       R psij_const[2 * m + 2];
 
 2423       R fg_psij0, fg_psij1, fg_psij2;
 
 2426       nfft_uo(ths, j, &u, &o, 0);
 
 2427       fg_psij0 = (PHI(ths->
x[j]-((R)u)/n,0));
 
 2428       fg_psij1 = EXP(K(2.0) * (n * (ths->
x[j]) - u) / ths->
b[0]);
 
 2430       psij_const[0] = fg_psij0;
 
 2431       for (l = 1; l <= 2 * m + 1; l++)
 
 2433         fg_psij2 *= fg_psij1;
 
 2434         psij_const[l] = fg_psij0 * fg_psij2 * fg_exp_l[l];
 
 2438       nfft_adjoint_1d_compute_omp_atomic(ths->
f[j], g, psij_const, ths->
x + j, n, m);
 
 2440       nfft_adjoint_1d_compute_serial(ths->
f + j, g, psij_const, ths->
x + j, n, m);
 
 2449     const int K = ths->
K;
 
 2450     const int ip_s = K / (m + 2);
 
 2452     nfft_sort_nodes(ths);
 
 2455     MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE(PRE_LIN_PSI)
 
 2458     #pragma openmp parallel for default(shared) private(k) 
 2459     for (k = 0; k < M; k++)
 
 2465       R psij_const[2 * m + 2];
 
 2467       nfft_uo(ths, j, &u, &o, 0);
 
 2469       ip_y = FABS(n * ths->
x[j] - u) * ((R)ip_s);
 
 2470       ip_u = LRINT(FLOOR(ip_y));
 
 2472       for (l = 0; l < 2 * m + 2; l++)
 
 2474             = ths->
psi[ABS(ip_u-l*ip_s)] * (K(1.0) - ip_w)
 
 2475                 + ths->
psi[ABS(ip_u-l*ip_s+1)] * (ip_w);
 
 2478       nfft_adjoint_1d_compute_omp_atomic(ths->
f[j], g, psij_const, ths->
x + j, n, m);
 
 2480       nfft_adjoint_1d_compute_serial(ths->
f + j, g, psij_const, ths->
x + j, n, m);
 
 2487   nfft_sort_nodes(ths);
 
 2490   MACRO_nfft_adjoint_1d_B_OMP_BLOCKWISE(NO_PSI)
 
 2493   #pragma omp parallel for default(shared) private(k) 
 2494   for (k = 0; k < M; k++)
 
 2497     R psij_const[2 * m + 2];
 
 2500     nfft_uo(ths, j, &u, &o, 0);
 
 2502     for (l = 0; l <= 2 * m + 1; l++)
 
 2503       psij_const[l] = (PHI(ths->
x[j]-((R)((u+l)))/n,0));
 
 2506     nfft_adjoint_1d_compute_omp_atomic(ths->
f[j], g, psij_const, ths->
x + j, n, m);
 
 2508     nfft_adjoint_1d_compute_serial(ths->
f + j, g, psij_const, ths->
x + j, n, m);
 
 2515   const int N = ths->
N[0], N2 = N/2, n = ths->
n[0];
 
 2516   C *f_hat1 = (C*)ths->
f_hat, *f_hat2 = (C*)&ths->
f_hat[N2];
 
 2522     C *g_hat1 = (C*)&ths->
g_hat[n-N/2], *g_hat2 = (C*)ths->
g_hat;
 
 2523     R *c_phi_inv1, *c_phi_inv2;
 
 2529       #pragma omp parallel for default(shared) private(k) 
 2530       for (k = 0; k < ths->
n_total; k++)
 
 2531         ths->
g_hat[k] = 0.0;
 
 2542       #pragma omp parallel for default(shared) private(k) 
 2543       for (k = 0; k < N2; k++)
 
 2545         g_hat1[k] = f_hat1[k] * c_phi_inv1[k];
 
 2546         g_hat2[k] = f_hat2[k] * c_phi_inv2[k];
 
 2552       #pragma omp parallel for default(shared) private(k) 
 2553       for (k = 0; k < N2; k++)
 
 2555         g_hat1[k] = f_hat1[k] / (PHI_HUT(k-N2,0));
 
 2556         g_hat2[k] = f_hat2[k] / (PHI_HUT(k,0));
 
 2562     fftw_execute(ths->my_fftw_plan1);
 
 2566     nfft_trafo_1d_B(ths);
 
 2574   C *g_hat1,*g_hat2,*f_hat1,*f_hat2;
 
 2575   R *c_phi_inv1, *c_phi_inv2;
 
 2583   f_hat1=(C*)ths->f_hat;
 
 2584   f_hat2=(C*)&ths->f_hat[N/2];
 
 2585   g_hat1=(C*)&ths->g_hat[n-N/2];
 
 2586   g_hat2=(C*)ths->g_hat;
 
 2589   nfft_adjoint_1d_B(ths);
 
 2593   fftw_execute(ths->my_fftw_plan2);
 
 2597   if(ths->nfft_flags & PRE_PHI_HUT)
 
 2600     c_phi_inv1=ths->c_phi_inv[0];
 
 2601     c_phi_inv2=&ths->c_phi_inv[0][N/2];
 
 2603     #pragma omp parallel for default(shared) private(k) 
 2604     for (k = 0; k < N/2; k++)
 
 2606       f_hat1[k] = g_hat1[k] * c_phi_inv1[k];
 
 2607       f_hat2[k] = g_hat2[k] * c_phi_inv2[k];
 
 2614     #pragma omp parallel for default(shared) private(k) 
 2615     for (k = 0; k < N/2; k++)
 
 2617       f_hat1[k] = g_hat1[k] / (PHI_HUT(k-N/2,0));
 
 2618       f_hat2[k] = g_hat2[k] / (PHI_HUT(k,0));
 
 2627 static void nfft_2d_init_fg_exp_l(R *fg_exp_l, 
const int m, 
const R b)
 
 2630   R fg_exp_b0, fg_exp_b1, fg_exp_b2, fg_exp_b0_sq;
 
 2632   fg_exp_b0 = EXP(K(-1.0)/b);
 
 2633   fg_exp_b0_sq = fg_exp_b0*fg_exp_b0;
 
 2636   fg_exp_l[0] = K(1.0);
 
 2637   for(l=1; l <= 2*m+1; l++)
 
 2639       fg_exp_b2 = fg_exp_b1*fg_exp_b0;
 
 2640       fg_exp_b1 *= fg_exp_b0_sq;
 
 2641       fg_exp_l[l] = fg_exp_l[l-1]*fg_exp_b2;
 
 2645 static void nfft_trafo_2d_compute(C *fj, 
const C *g,
 
 2646           const R *psij_const0, 
const R *psij_const1,
 
 2647           const R *xj0, 
const R *xj1,
 
 2648           const int n0, 
const int n1, 
const int m)
 
 2650   int u0,o0,l0,u1,o1,l1;
 
 2652   const R *psij0,*psij1;
 
 2657   nfft_uo2(&u0,&o0,*xj0, n0, m);
 
 2658   nfft_uo2(&u1,&o1,*xj1, n1, m);
 
 2664     for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 2668         for(l1=0; l1<=2*m+1; l1++)
 
 2669       (*fj) += (*psij0) * (*psij1++) * (*gj++);
 
 2672     for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 2676         for(l1=0; l1<2*m+1-o1; l1++)
 
 2677       (*fj) += (*psij0) * (*psij1++) * (*gj++);
 
 2679         for(l1=0; l1<=o1; l1++)
 
 2680       (*fj) += (*psij0) * (*psij1++) * (*gj++);
 
 2685     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 2689         for(l1=0; l1<=2*m+1; l1++)
 
 2690       (*fj) += (*psij0) * (*psij1++) * (*gj++);
 
 2692     for(l0=0; l0<=o0; l0++,psij0++)
 
 2696         for(l1=0; l1<=2*m+1; l1++)
 
 2697       (*fj) += (*psij0) * (*psij1++) * (*gj++);
 
 2702     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 2706         for(l1=0; l1<2*m+1-o1; l1++)
 
 2707       (*fj) += (*psij0) * (*psij1++) * (*gj++);
 
 2709         for(l1=0; l1<=o1; l1++)
 
 2710       (*fj) += (*psij0) * (*psij1++) * (*gj++);
 
 2712     for(l0=0; l0<=o0; l0++,psij0++)
 
 2716         for(l1=0; l1<2*m+1-o1; l1++)
 
 2717       (*fj) += (*psij0) * (*psij1++) * (*gj++);
 
 2719         for(l1=0; l1<=o1; l1++)
 
 2720       (*fj) += (*psij0) * (*psij1++) * (*gj++);
 
 2727 static void nfft_adjoint_2d_compute_omp_atomic(
const C f, C *g,
 
 2728             const R *psij_const0, 
const R *psij_const1,
 
 2729             const R *xj0, 
const R *xj1,
 
 2730             const int n0, 
const int n1, 
const int m)
 
 2732   int u0,o0,l0,u1,o1,l1;
 
 2733   const int lprod = (2*m+2) * (2*m+2); 
 
 2735   unsigned long int index_temp0[2*m+2];
 
 2736   unsigned long int index_temp1[2*m+2];
 
 2738   nfft_uo2(&u0,&o0,*xj0, n0, m);
 
 2739   nfft_uo2(&u1,&o1,*xj1, n1, m);
 
 2741   for (l0=0; l0<=2*m+1; l0++)
 
 2742     index_temp0[l0] = (u0+l0)%n0;
 
 2744   for (l1=0; l1<=2*m+1; l1++)
 
 2745     index_temp1[l1] = (u1+l1)%n1;
 
 2747   for(l0=0; l0<=2*m+1; l0++)
 
 2749     for(l1=0; l1<=2*m+1; l1++)
 
 2751       unsigned long int i = index_temp0[l0] * n1 + index_temp1[l1];
 
 2753       R *lhs_real = (R*)lhs;
 
 2754       C val = psij_const0[l0] * psij_const1[l1] * f;
 
 2757       lhs_real[0] += creal(val);
 
 2760       lhs_real[1] += cimag(val);
 
 2785 static void nfft_adjoint_2d_compute_omp_blockwise(
const C f, C *g,
 
 2786             const R *psij_const0, 
const R *psij_const1,
 
 2787             const R *xj0, 
const R *xj1,
 
 2788             const int n0, 
const int n1, 
const int m,
 
 2789       const int my_u0, 
const int my_o0)
 
 2791   int ar_u0,ar_o0,l0,u1,o1,l1;
 
 2792   const int lprod = (2*m+2) * (2*m+2); 
 
 2793   unsigned long int index_temp1[2*m+2];
 
 2795   nfft_uo2(&ar_u0,&ar_o0,*xj0, n0, m);
 
 2796   nfft_uo2(&u1,&o1,*xj1, n1, m);
 
 2798   for (l1=0; l1<=2*m+1; l1++)
 
 2799     index_temp1[l1] = (u1+l1)%n1;
 
 2803     int u0 = MAX(my_u0,ar_u0);
 
 2804     int o0 = MIN(my_o0,ar_o0);
 
 2805     int offset_psij = u0-ar_u0;
 
 2807     assert(offset_psij >= 0);
 
 2808     assert(o0-u0 <= 2*m+1);
 
 2809     assert(offset_psij+o0-u0 <= 2*m+1);
 
 2812     for (l0 = 0; l0 <= o0-u0; l0++)
 
 2814       unsigned long int i0 = (u0+l0) * n1;
 
 2815       const C val0 = psij_const0[offset_psij+l0];
 
 2817       for(l1=0; l1<=2*m+1; l1++)
 
 2818         g[i0 + index_temp1[l1]] += val0 * psij_const1[l1] * f;
 
 2823     int u0 = MAX(my_u0,ar_u0);
 
 2825     int offset_psij = u0-ar_u0;
 
 2827     assert(offset_psij >= 0);
 
 2828     assert(o0-u0 <= 2*m+1);
 
 2829     assert(offset_psij+o0-u0 <= 2*m+1);
 
 2832     for (l0 = 0; l0 <= o0-u0; l0++)
 
 2834       unsigned long int i0 = (u0+l0) * n1;
 
 2835       const C val0 = psij_const0[offset_psij+l0];
 
 2837       for(l1=0; l1<=2*m+1; l1++)
 
 2838         g[i0 + index_temp1[l1]] += val0 * psij_const1[l1] * f;
 
 2842     o0 = MIN(my_o0,ar_o0);
 
 2843     offset_psij += my_u0-ar_u0+n0;
 
 2848       assert(o0-u0 <= 2*m+1);
 
 2849       assert(offset_psij+o0-u0 <= 2*m+1);
 
 2853     for (l0 = 0; l0 <= o0-u0; l0++)
 
 2855       unsigned long int i0 = (u0+l0) * n1;
 
 2856       const C val0 = psij_const0[offset_psij+l0];
 
 2858       for(l1=0; l1<=2*m+1; l1++)
 
 2859         g[i0 + index_temp1[l1]] += val0 * psij_const1[l1] * f;
 
 2866 static void nfft_adjoint_2d_compute_serial(
const C *fj, C *g,
 
 2867             const R *psij_const0, 
const R *psij_const1,
 
 2868             const R *xj0, 
const R *xj1,
 
 2869             const int n0, 
const int n1, 
const int m)
 
 2871   int u0,o0,l0,u1,o1,l1;
 
 2873   const R *psij0,*psij1;
 
 2878   nfft_uo2(&u0,&o0,*xj0, n0, m);
 
 2879   nfft_uo2(&u1,&o1,*xj1, n1, m);
 
 2883     for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 2887         for(l1=0; l1<=2*m+1; l1++)
 
 2888     (*gj++) += (*psij0) * (*psij1++) * (*fj);
 
 2891     for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 2895         for(l1=0; l1<2*m+1-o1; l1++)
 
 2896       (*gj++) += (*psij0) * (*psij1++) * (*fj);
 
 2898         for(l1=0; l1<=o1; l1++)
 
 2899       (*gj++) += (*psij0) * (*psij1++) * (*fj);
 
 2904     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 2908         for(l1=0; l1<=2*m+1; l1++)
 
 2909       (*gj++) += (*psij0) * (*psij1++) * (*fj);
 
 2911     for(l0=0; l0<=o0; l0++,psij0++)
 
 2915         for(l1=0; l1<=2*m+1; l1++)
 
 2916       (*gj++) += (*psij0) * (*psij1++) * (*fj);
 
 2921     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 2925         for(l1=0; l1<2*m+1-o1; l1++)
 
 2926       (*gj++) += (*psij0) * (*psij1++) * (*fj);
 
 2928         for(l1=0; l1<=o1; l1++)
 
 2929       (*gj++) += (*psij0) * (*psij1++) * (*fj);
 
 2931     for(l0=0; l0<=o0; l0++,psij0++)
 
 2935         for(l1=0; l1<2*m+1-o1; l1++)
 
 2936       (*gj++) += (*psij0) * (*psij1++) * (*fj);
 
 2938         for(l1=0; l1<=o1; l1++)
 
 2939       (*gj++) += (*psij0) * (*psij1++) * (*fj);
 
 2945 static void nfft_trafo_2d_B(
nfft_plan *ths)
 
 2947   const C *g = (C*)ths->
g;
 
 2948   const int N0 = ths->
N[0];
 
 2949   const int n0 = ths->
n[0];
 
 2950   const int N1 = ths->
N[1];
 
 2951   const int n1 = ths->
n[1];
 
 2953   const int m = ths->
m;
 
 2959     const int lprod = (2*m+2) * (2*m+2);
 
 2960     #pragma omp parallel for default(shared) private(k) 
 2961     for (k = 0; k < M; k++)
 
 2966       for (l = 0; l < lprod; l++)
 
 2974     #pragma omp parallel for default(shared) private(k) 
 2975     for (k = 0; k < M; k++)
 
 2978       nfft_trafo_2d_compute(ths->
f+j, g, ths->
psi+j*2*(2*m+2), ths->
psi+(j*2+1)*(2*m+2), ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 2986     R fg_exp_l[2*(2*m+2)];
 
 2988     nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 2989     nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->
b[1]);
 
 2991     #pragma omp parallel for default(shared) private(k) 
 2992     for (k = 0; k < M; k++)
 
 2994       R psij_const[2*(2*m+2)];
 
 2997       R fg_psij0 = ths->
psi[2*j*2];
 
 2998       R fg_psij1 = ths->
psi[2*j*2+1];
 
 2999       R fg_psij2 = K(1.0);
 
 3001       psij_const[0] = fg_psij0;
 
 3002       for(l=1; l<=2*m+1; l++)
 
 3004         fg_psij2 *= fg_psij1;
 
 3005         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
 
 3008       fg_psij0 = ths->
psi[2*(j*2+1)];
 
 3009       fg_psij1 = ths->
psi[2*(j*2+1)+1];
 
 3011       psij_const[2*m+2] = fg_psij0;
 
 3012       for(l=1; l<=2*m+1; l++)
 
 3014         fg_psij2 *= fg_psij1;
 
 3015         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
 
 3018       nfft_trafo_2d_compute(ths->
f+j, g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3026     R fg_exp_l[2*(2*m+2)];
 
 3028     nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 3029     nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->
b[1]);
 
 3031     nfft_sort_nodes(ths);
 
 3033     #pragma omp parallel for default(shared) private(k) 
 3034     for (k = 0; k < M; k++)
 
 3037       R fg_psij0, fg_psij1, fg_psij2;
 
 3038       R psij_const[2*(2*m+2)];
 
 3041       nfft_uo(ths,j,&u,&o,0);
 
 3042       fg_psij0 = (PHI(ths->
x[2*j]-((R)u)/n0,0));
 
 3043       fg_psij1 = EXP(K(2.0)*(n0*(ths->
x[2*j]) - u)/ths->
b[0]);
 
 3045       psij_const[0] = fg_psij0;
 
 3046       for(l=1; l<=2*m+1; l++)
 
 3048         fg_psij2 *= fg_psij1;
 
 3049         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
 
 3052       nfft_uo(ths,j,&u,&o,1);
 
 3053       fg_psij0 = (PHI(ths->
x[2*j+1]-((R)u)/n1,1));
 
 3054       fg_psij1 = EXP(K(2.0)*(n1*(ths->
x[2*j+1]) - u)/ths->
b[1]);
 
 3056       psij_const[2*m+2] = fg_psij0;
 
 3057       for(l=1; l<=2*m+1; l++)
 
 3059         fg_psij2 *= fg_psij1;
 
 3060         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
 
 3063       nfft_trafo_2d_compute(ths->
f+j, g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3071     const int K = ths->
K, ip_s = K / (m + 2);
 
 3073     nfft_sort_nodes(ths);
 
 3075     #pragma omp parallel for default(shared) private(k) 
 3076     for (k = 0; k < M; k++)
 
 3081       R psij_const[2*(2*m+2)];
 
 3084       nfft_uo(ths,j,&u,&o,0);
 
 3085       ip_y = FABS(n0*ths->
x[2*j] - u)*((R)ip_s);
 
 3086       ip_u = LRINT(FLOOR(ip_y));
 
 3088       for(l=0; l < 2*m+2; l++)
 
 3089         psij_const[l] = ths->
psi[ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) + ths->
psi[ABS(ip_u-l*ip_s+1)]*(ip_w);
 
 3091       nfft_uo(ths,j,&u,&o,1);
 
 3092       ip_y = FABS(n1*ths->
x[2*j+1] - u)*((R)ip_s);
 
 3093       ip_u = LRINT(FLOOR(ip_y));
 
 3095       for(l=0; l < 2*m+2; l++)
 
 3096         psij_const[2*m+2+l] = ths->
psi[(K+1)+ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) + ths->
psi[(K+1)+ABS(ip_u-l*ip_s+1)]*(ip_w);
 
 3098       nfft_trafo_2d_compute(ths->
f+j, g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3105   nfft_sort_nodes(ths);
 
 3107   #pragma omp parallel for default(shared) private(k) 
 3108   for (k = 0; k < M; k++)
 
 3110     R psij_const[2*(2*m+2)];
 
 3114     nfft_uo(ths,j,&u,&o,0);
 
 3115     for(l=0;l<=2*m+1;l++)
 
 3116       psij_const[l]=(PHI(ths->
x[2*j]-((R)((u+l)))/n0,0));
 
 3118     nfft_uo(ths,j,&u,&o,1);
 
 3119     for(l=0;l<=2*m+1;l++)
 
 3120       psij_const[2*m+2+l]=(PHI(ths->
x[2*j+1]-((R)((u+l)))/n1,1));
 
 3122     nfft_trafo_2d_compute(ths->
f+j, g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3127 #define MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_ASSERT_A                        \ 
 3129           assert(ar_x[2*k] >= min_u_a || k == M-1);                           \ 
 3131             assert(ar_x[2*k-2] < min_u_a);                                    \ 
 3134 #define MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_ASSERT_A 
 3138 #define MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_ASSERT_B                        \ 
 3140           assert(ar_x[2*k] >= min_u_b || k == M-1);                           \ 
 3142             assert(ar_x[2*k-2] < min_u_b);                                    \ 
 3145 #define MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_ASSERT_B 
 3148 #define MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_COMPUTE_PRE_PSI                 \ 
 3149             nfft_adjoint_2d_compute_omp_blockwise(ths->f[j], g,               \ 
 3150                 ths->psi+j*2*(2*m+2), ths->psi+(j*2+1)*(2*m+2),               \ 
 3151                 ths->x+2*j, ths->x+2*j+1, n0, n1, m, my_u0, my_o0); 
 3153 #define MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_COMPUTE_PRE_FG_PSI              \ 
 3155             R psij_const[2*(2*m+2)];                                          \ 
 3157             R fg_psij0 = ths->psi[2*j*2];                                     \ 
 3158             R fg_psij1 = ths->psi[2*j*2+1];                                   \ 
 3159             R fg_psij2 = K(1.0);                                              \ 
 3161             psij_const[0] = fg_psij0;                                         \ 
 3162             for(l=1; l<=2*m+1; l++)                                           \ 
 3164               fg_psij2 *= fg_psij1;                                           \ 
 3165               psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];                  \ 
 3168             fg_psij0 = ths->psi[2*(j*2+1)];                                   \ 
 3169             fg_psij1 = ths->psi[2*(j*2+1)+1];                                 \ 
 3170             fg_psij2 = K(1.0);                                                \ 
 3171             psij_const[2*m+2] = fg_psij0;                                     \ 
 3172             for(l=1; l<=2*m+1; l++)                                           \ 
 3174               fg_psij2 *= fg_psij1;                                           \ 
 3175               psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];      \ 
 3178             nfft_adjoint_2d_compute_omp_blockwise(ths->f[j], g,               \ 
 3179                 psij_const, psij_const+2*m+2, ths->x+2*j, ths->x+2*j+1,       \ 
 3180                 n0, n1, m, my_u0, my_o0);                                     \ 
 3183 #define MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_COMPUTE_FG_PSI                  \ 
 3185             R psij_const[2*(2*m+2)];                                          \ 
 3186             R fg_psij0, fg_psij1, fg_psij2;                                   \ 
 3189             nfft_uo(ths,j,&u,&o,0);                                           \ 
 3190             fg_psij0 = (PHI(ths->x[2*j]-((R)u)/n0,0));                        \ 
 3191             fg_psij1 = EXP(K(2.0)*(n0*(ths->x[2*j]) - u)/ths->b[0]);          \ 
 3192             fg_psij2 = K(1.0);                                                \ 
 3193             psij_const[0] = fg_psij0;                                         \ 
 3194             for(l=1; l<=2*m+1; l++)                                           \ 
 3196               fg_psij2 *= fg_psij1;                                           \ 
 3197               psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];                  \ 
 3200             nfft_uo(ths,j,&u,&o,1);                                           \ 
 3201             fg_psij0 = (PHI(ths->x[2*j+1]-((R)u)/n1,1));                      \ 
 3202             fg_psij1 = EXP(K(2.0)*(n1*(ths->x[2*j+1]) - u)/ths->b[1]);        \ 
 3203             fg_psij2 = K(1.0);                                                \ 
 3204             psij_const[2*m+2] = fg_psij0;                                     \ 
 3205             for(l=1; l<=2*m+1; l++)                                           \ 
 3207               fg_psij2 *= fg_psij1;                                           \ 
 3208               psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];      \ 
 3211             nfft_adjoint_2d_compute_omp_blockwise(ths->f[j], g,               \ 
 3212                 psij_const, psij_const+2*m+2, ths->x+2*j, ths->x+2*j+1,       \ 
 3213                 n0, n1, m, my_u0, my_o0);                                     \ 
 3216 #define MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_COMPUTE_PRE_LIN_PSI             \ 
 3218             R psij_const[2*(2*m+2)];                                          \ 
 3223             nfft_uo(ths,j,&u,&o,0);                                           \ 
 3224             ip_y = FABS(n0*(ths->x[2*j]) - u)*((R)ip_s);                      \ 
 3225             ip_u = LRINT(FLOOR(ip_y));                                        \ 
 3227             for(l=0; l < 2*m+2; l++)                                          \ 
 3228               psij_const[l] = ths->psi[ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) +      \ 
 3229                 ths->psi[ABS(ip_u-l*ip_s+1)]*(ip_w);                          \ 
 3231             nfft_uo(ths,j,&u,&o,1);                                           \ 
 3232             ip_y = FABS(n1*(ths->x[2*j+1]) - u)*((R)ip_s);                    \ 
 3233             ip_u = LRINT(FLOOR(ip_y));                                        \ 
 3235             for(l=0; l < 2*m+2; l++)                                          \ 
 3236               psij_const[2*m+2+l] = ths->psi[(K+1)+ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) +  \ 
 3237                 ths->psi[(K+1)+ABS(ip_u-l*ip_s+1)]*(ip_w);                    \ 
 3239             nfft_adjoint_2d_compute_omp_blockwise(ths->f[j], g,               \ 
 3240                 psij_const, psij_const+2*m+2, ths->x+2*j, ths->x+2*j+1,       \ 
 3241                 n0, n1, m, my_u0, my_o0);                                     \ 
 3244 #define MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_COMPUTE_NO_PSI                  \ 
 3246             R psij_const[2*(2*m+2)];                                          \ 
 3249             nfft_uo(ths,j,&u,&o,0);                                           \ 
 3250             for(l=0;l<=2*m+1;l++)                                             \ 
 3251               psij_const[l]=(PHI(ths->x[2*j]-((R)((u+l)))/n0,0));             \ 
 3253             nfft_uo(ths,j,&u,&o,1);                                           \ 
 3254             for(l=0;l<=2*m+1;l++)                                             \ 
 3255               psij_const[2*m+2+l]=(PHI(ths->x[2*j+1]-((R)((u+l)))/n1,1));     \ 
 3257             nfft_adjoint_2d_compute_omp_blockwise(ths->f[j], g,               \ 
 3258                 psij_const, psij_const+2*m+2, ths->x+2*j, ths->x+2*j+1,       \ 
 3259                 n0, n1, m, my_u0, my_o0);                                     \ 
 3262 #define MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE(whichone)                       \ 
 3264     if (ths->nfft_flags & NFFT_OMP_BLOCKWISE_ADJOINT)                         \ 
 3266       _Pragma("omp parallel private(k)")                                      \ 
 3268         int my_u0, my_o0, min_u_a, max_u_a, min_u_b, max_u_b;                 \ 
 3269         int *ar_x = ths->index_x;                                             \ 
 3271         nfft_adjoint_B_omp_blockwise_init(&my_u0, &my_o0, &min_u_a, &max_u_a, \ 
 3272             &min_u_b, &max_u_b, 2, ths->n, m);                                \ 
 3274         if (min_u_a != -1)                                                    \ 
 3276           k = index_x_binary_search(ar_x, M, min_u_a);                        \ 
 3278           MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_ASSERT_A                      \ 
 3282             int u_prod = ar_x[2*k];                                           \ 
 3283             int j = ar_x[2*k+1];                                              \ 
 3285             if (u_prod < min_u_a || u_prod > max_u_a)                         \ 
 3288             MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_COMPUTE_ ##whichone         \ 
 3294         if (min_u_b != -1)                                                    \ 
 3296           int k = index_x_binary_search(ar_x, M, min_u_b);                    \ 
 3298           MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_ASSERT_B                      \ 
 3302             int u_prod = ar_x[2*k];                                           \ 
 3303             int j = ar_x[2*k+1];                                              \ 
 3305             if (u_prod < min_u_b || u_prod > max_u_b)                         \ 
 3308             MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE_COMPUTE_ ##whichone         \ 
 3319 static void nfft_adjoint_2d_B(
nfft_plan *ths)
 
 3321   const int N0 = ths->
N[0];
 
 3322   const int n0 = ths->
n[0];
 
 3323   const int N1 = ths->
N[1];
 
 3324   const int n1 = ths->
n[1];
 
 3326   const int m = ths->
m;
 
 3330   memset(g,0,ths->
n_total*
sizeof(C));
 
 3334     nfft_adjoint_B_compute_full_psi(g, ths->
psi_index_g, ths->
psi, ths->
f, M,
 
 3342     MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE(PRE_PSI)
 
 3345     #pragma omp parallel for default(shared) private(k) 
 3346     for (k = 0; k < M; k++)
 
 3350       nfft_adjoint_2d_compute_omp_atomic(ths->
f[j], g, ths->
psi+j*2*(2*m+2), ths->
psi+(j*2+1)*(2*m+2), ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3352       nfft_adjoint_2d_compute_serial(ths->
f+j, g, ths->
psi+j*2*(2*m+2), ths->
psi+(j*2+1)*(2*m+2), ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3360     R fg_exp_l[2*(2*m+2)];
 
 3362     nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 3363     nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->
b[1]);
 
 3366     MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE(PRE_FG_PSI)
 
 3370     #pragma omp parallel for default(shared) private(k) 
 3371     for (k = 0; k < M; k++)
 
 3373       R psij_const[2*(2*m+2)];
 
 3376       R fg_psij0 = ths->
psi[2*j*2];
 
 3377       R fg_psij1 = ths->
psi[2*j*2+1];
 
 3378       R fg_psij2 = K(1.0);
 
 3380       psij_const[0] = fg_psij0;
 
 3381       for(l=1; l<=2*m+1; l++)
 
 3383         fg_psij2 *= fg_psij1;
 
 3384         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
 
 3387       fg_psij0 = ths->
psi[2*(j*2+1)];
 
 3388       fg_psij1 = ths->
psi[2*(j*2+1)+1];
 
 3390       psij_const[2*m+2] = fg_psij0;
 
 3391       for(l=1; l<=2*m+1; l++)
 
 3393         fg_psij2 *= fg_psij1;
 
 3394         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
 
 3398       nfft_adjoint_2d_compute_omp_atomic(ths->
f[j], g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3400       nfft_adjoint_2d_compute_serial(ths->
f+j, g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3409     R fg_exp_l[2*(2*m+2)];
 
 3411     nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 3412     nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->
b[1]);
 
 3414     nfft_sort_nodes(ths);
 
 3417     MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE(FG_PSI)
 
 3420     #pragma omp parallel for default(shared) private(k) 
 3421     for (k = 0; k < M; k++)
 
 3424       R fg_psij0, fg_psij1, fg_psij2;
 
 3425       R psij_const[2*(2*m+2)];
 
 3428       nfft_uo(ths,j,&u,&o,0);
 
 3429       fg_psij0 = (PHI(ths->
x[2*j]-((R)u)/n0,0));
 
 3430       fg_psij1 = EXP(K(2.0)*(n0*(ths->
x[2*j]) - u)/ths->
b[0]);
 
 3432       psij_const[0] = fg_psij0;
 
 3433       for(l=1; l<=2*m+1; l++)
 
 3435         fg_psij2 *= fg_psij1;
 
 3436         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
 
 3439       nfft_uo(ths,j,&u,&o,1);
 
 3440       fg_psij0 = (PHI(ths->
x[2*j+1]-((R)u)/n1,1));
 
 3441       fg_psij1 = EXP(K(2.0)*(n1*(ths->
x[2*j+1]) - u)/ths->
b[1]);
 
 3443       psij_const[2*m+2] = fg_psij0;
 
 3444       for(l=1; l<=2*m+1; l++)
 
 3446         fg_psij2 *= fg_psij1;
 
 3447         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
 
 3451       nfft_adjoint_2d_compute_omp_atomic(ths->
f[j], g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3453       nfft_adjoint_2d_compute_serial(ths->
f+j, g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3462     const int K = ths->
K;
 
 3463     const int ip_s = K / (m + 2);
 
 3465     nfft_sort_nodes(ths);
 
 3468     MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE(PRE_LIN_PSI)
 
 3471     #pragma openmp parallel for default(shared) private(k) 
 3472     for (k = 0; k < M; k++)
 
 3478       R psij_const[2*(2*m+2)];
 
 3480       nfft_uo(ths,j,&u,&o,0);
 
 3481       ip_y = FABS(n0*(ths->
x[2*j]) - u)*((R)ip_s);
 
 3482       ip_u = LRINT(FLOOR(ip_y));
 
 3484       for(l=0; l < 2*m+2; l++)
 
 3485         psij_const[l] = ths->
psi[ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) +
 
 3486           ths->
psi[ABS(ip_u-l*ip_s+1)]*(ip_w);
 
 3488       nfft_uo(ths,j,&u,&o,1);
 
 3489       ip_y = FABS(n1*(ths->
x[2*j+1]) - u)*((R)ip_s);
 
 3490       ip_u = LRINT(FLOOR(ip_y));
 
 3492       for(l=0; l < 2*m+2; l++)
 
 3493         psij_const[2*m+2+l] = ths->
psi[(K+1)+ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) +
 
 3494           ths->
psi[(K+1)+ABS(ip_u-l*ip_s+1)]*(ip_w);
 
 3497       nfft_adjoint_2d_compute_omp_atomic(ths->
f[j], g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3499       nfft_adjoint_2d_compute_serial(ths->
f+j, g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3506   nfft_sort_nodes(ths);
 
 3509   MACRO_nfft_adjoint_2d_B_OMP_BLOCKWISE(NO_PSI)
 
 3512   #pragma omp parallel for default(shared) private(k) 
 3513   for (k = 0; k < M; k++)
 
 3516     R psij_const[2*(2*m+2)];
 
 3519     nfft_uo(ths,j,&u,&o,0);
 
 3520     for(l=0;l<=2*m+1;l++)
 
 3521       psij_const[l]=(PHI(ths->
x[2*j]-((R)((u+l)))/n0,0));
 
 3523     nfft_uo(ths,j,&u,&o,1);
 
 3524     for(l=0;l<=2*m+1;l++)
 
 3525       psij_const[2*m+2+l]=(PHI(ths->
x[2*j+1]-((R)((u+l)))/n1,1));
 
 3528     nfft_adjoint_2d_compute_omp_atomic(ths->
f[j], g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3530     nfft_adjoint_2d_compute_serial(ths->
f+j, g, psij_const, psij_const+2*m+2, ths->
x+2*j, ths->
x+2*j+1, n0, n1, m);
 
 3538   int k0,k1,n0,n1,N0,N1;
 
 3540   R *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12;
 
 3541   R ck01, ck02, ck11, ck12;
 
 3542   C *g_hat11,*f_hat11,*g_hat21,*f_hat21,*g_hat12,*f_hat12,*g_hat22,*f_hat22;
 
 3552   f_hat=(C*)ths->
f_hat;
 
 3553   g_hat=(C*)ths->
g_hat;
 
 3557   #pragma omp parallel for default(shared) private(k0) 
 3558   for (k0 = 0; k0 < ths->
n_total; k0++)
 
 3559     ths->
g_hat[k0] = 0.0;
 
 3568       #pragma omp parallel for default(shared) private(k0,k1,ck01,ck02,c_phi_inv11,c_phi_inv12,g_hat11,f_hat11,g_hat21,f_hat21,g_hat12,f_hat12,g_hat22,f_hat22,ck11,ck12) 
 3569       for(k0=0;k0<N0/2;k0++)
 
 3571         ck01=c_phi_inv01[k0];
 
 3572         ck02=c_phi_inv02[k0];
 
 3577         g_hat11=g_hat + (n0-(N0/2)+k0)*n1+n1-(N1/2);
 
 3578         f_hat11=f_hat + k0*N1;
 
 3579         g_hat21=g_hat + k0*n1+n1-(N1/2);
 
 3580         f_hat21=f_hat + ((N0/2)+k0)*N1;
 
 3581         g_hat12=g_hat + (n0-(N0/2)+k0)*n1;
 
 3582         f_hat12=f_hat + k0*N1+(N1/2);
 
 3583         g_hat22=g_hat + k0*n1;
 
 3584         f_hat22=f_hat + ((N0/2)+k0)*N1+(N1/2);
 
 3586         for(k1=0;k1<N1/2;k1++)
 
 3588           ck11=c_phi_inv11[k1];
 
 3589           ck12=c_phi_inv12[k1];
 
 3591           g_hat11[k1] = f_hat11[k1] * ck01 * ck11;
 
 3592           g_hat21[k1] = f_hat21[k1] * ck02 * ck11;
 
 3593           g_hat12[k1] = f_hat12[k1] * ck01 * ck12;
 
 3594           g_hat22[k1] = f_hat22[k1] * ck02 * ck12;
 
 3599     #pragma omp parallel for default(shared) private(k0,k1,ck01,ck02,ck11,ck12) 
 3600     for(k0=0;k0<N0/2;k0++)
 
 3602   ck01=K(1.0)/(PHI_HUT(k0-N0/2,0));
 
 3603   ck02=K(1.0)/(PHI_HUT(k0,0));
 
 3604   for(k1=0;k1<N1/2;k1++)
 
 3606       ck11=K(1.0)/(PHI_HUT(k1-N1/2,1));
 
 3607       ck12=K(1.0)/(PHI_HUT(k1,1));
 
 3608       g_hat[(n0-N0/2+k0)*n1+n1-N1/2+k1] = f_hat[k0*N1+k1]             * ck01 * ck11;
 
 3609       g_hat[k0*n1+n1-N1/2+k1]           = f_hat[(N0/2+k0)*N1+k1]      * ck02 * ck11;
 
 3610       g_hat[(n0-N0/2+k0)*n1+k1]         = f_hat[k0*N1+N1/2+k1]        * ck01 * ck12;
 
 3611       g_hat[k0*n1+k1]                   = f_hat[(N0/2+k0)*N1+N1/2+k1] * ck02 * ck12;
 
 3618   fftw_execute(ths->my_fftw_plan1);
 
 3622   nfft_trafo_2d_B(ths);
 
 3628   int k0,k1,n0,n1,N0,N1;
 
 3630   R *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12;
 
 3631   R ck01, ck02, ck11, ck12;
 
 3632   C *g_hat11,*f_hat11,*g_hat21,*f_hat21,*g_hat12,*f_hat12,*g_hat22,*f_hat22;
 
 3642   f_hat=(C*)ths->f_hat;
 
 3643   g_hat=(C*)ths->g_hat;
 
 3646   nfft_adjoint_2d_B(ths);
 
 3650   fftw_execute(ths->my_fftw_plan2);
 
 3654   if(ths->nfft_flags & PRE_PHI_HUT)
 
 3656       c_phi_inv01=ths->c_phi_inv[0];
 
 3657       c_phi_inv02=&ths->c_phi_inv[0][N0/2];
 
 3659       #pragma omp parallel for default(shared) private(k0,k1,ck01,ck02,c_phi_inv11,c_phi_inv12,g_hat11,f_hat11,g_hat21,f_hat21,g_hat12,f_hat12,g_hat22,f_hat22,ck11,ck12) 
 3660       for(k0=0;k0<N0/2;k0++)
 
 3662         ck01=c_phi_inv01[k0];
 
 3663         ck02=c_phi_inv02[k0];
 
 3665         c_phi_inv11=ths->c_phi_inv[1];
 
 3666         c_phi_inv12=&ths->c_phi_inv[1][N1/2];
 
 3668         g_hat11=g_hat + (n0-(N0/2)+k0)*n1+n1-(N1/2);
 
 3669         f_hat11=f_hat + k0*N1;
 
 3670         g_hat21=g_hat + k0*n1+n1-(N1/2);
 
 3671         f_hat21=f_hat + ((N0/2)+k0)*N1;
 
 3672         g_hat12=g_hat + (n0-(N0/2)+k0)*n1;
 
 3673         f_hat12=f_hat + k0*N1+(N1/2);
 
 3674         g_hat22=g_hat + k0*n1;
 
 3675         f_hat22=f_hat + ((N0/2)+k0)*N1+(N1/2);
 
 3677         for(k1=0;k1<N1/2;k1++)
 
 3679           ck11=c_phi_inv11[k1];
 
 3680           ck12=c_phi_inv12[k1];
 
 3682           f_hat11[k1] = g_hat11[k1] * ck01 * ck11;
 
 3683           f_hat21[k1] = g_hat21[k1] * ck02 * ck11;
 
 3684           f_hat12[k1] = g_hat12[k1] * ck01 * ck12;
 
 3685           f_hat22[k1] = g_hat22[k1] * ck02 * ck12;
 
 3690     #pragma omp parallel for default(shared) private(k0,k1,ck01,ck02,ck11,ck12) 
 3691     for(k0=0;k0<N0/2;k0++)
 
 3693   ck01=K(1.0)/(PHI_HUT(k0-N0/2,0));
 
 3694   ck02=K(1.0)/(PHI_HUT(k0,0));
 
 3695   for(k1=0;k1<N1/2;k1++)
 
 3697       ck11=K(1.0)/(PHI_HUT(k1-N1/2,1));
 
 3698       ck12=K(1.0)/(PHI_HUT(k1,1));
 
 3699       f_hat[k0*N1+k1]             = g_hat[(n0-N0/2+k0)*n1+n1-N1/2+k1] * ck01 * ck11;
 
 3700       f_hat[(N0/2+k0)*N1+k1]      = g_hat[k0*n1+n1-N1/2+k1]           * ck02 * ck11;
 
 3701       f_hat[k0*N1+N1/2+k1]        = g_hat[(n0-N0/2+k0)*n1+k1]         * ck01 * ck12;
 
 3702       f_hat[(N0/2+k0)*N1+N1/2+k1] = g_hat[k0*n1+k1]                   * ck02 * ck12;
 
 3710 static void nfft_3d_init_fg_exp_l(R *fg_exp_l, 
const int m, 
const R b)
 
 3713   R fg_exp_b0, fg_exp_b1, fg_exp_b2, fg_exp_b0_sq;
 
 3715   fg_exp_b0 = EXP(-1.0/b);
 
 3716   fg_exp_b0_sq = fg_exp_b0*fg_exp_b0;
 
 3719   fg_exp_l[0] = K(1.0);
 
 3720   for(l=1; l <= 2*m+1; l++)
 
 3722       fg_exp_b2 = fg_exp_b1*fg_exp_b0;
 
 3723       fg_exp_b1 *= fg_exp_b0_sq;
 
 3724       fg_exp_l[l] = fg_exp_l[l-1]*fg_exp_b2;
 
 3728 static void nfft_trafo_3d_compute(C *fj, 
const C *g,
 
 3729           const R *psij_const0, 
const R *psij_const1, 
const R *psij_const2,
 
 3730           const R *xj0, 
const R *xj1, 
const R *xj2,
 
 3731           const int n0, 
const int n1, 
const int n2, 
const int m)
 
 3733   int u0,o0,l0,u1,o1,l1,u2,o2,l2;
 
 3735   const R *psij0,*psij1,*psij2;
 
 3741   nfft_uo2(&u0,&o0,*xj0, n0, m);
 
 3742   nfft_uo2(&u1,&o1,*xj1, n1, m);
 
 3743   nfft_uo2(&u2,&o2,*xj2, n2, m);
 
 3750   for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 3753       for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 3756     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 3757     for(l2=0; l2<=2*m+1; l2++)
 
 3758       (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3762   for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 3765       for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 3768     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 3769     for(l2=0; l2<2*m+1-o2; l2++)
 
 3770       (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3771     gj=g+((u0+l0)*n1+(u1+l1))*n2;
 
 3772     for(l2=0; l2<=o2; l2++)
 
 3773       (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3778   for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 3781       for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 3784     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 3785     for(l2=0; l2<=2*m+1; l2++)
 
 3786       (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3788       for(l1=0; l1<=o1; l1++,psij1++)
 
 3791     gj=g+((u0+l0)*n1+l1)*n2+u2;
 
 3792     for(l2=0; l2<=2*m+1; l2++)
 
 3793       (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3798     for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 3801         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 3804       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 3805       for(l2=0; l2<2*m+1-o2; l2++)
 
 3806         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3807       gj=g+((u0+l0)*n1+(u1+l1))*n2;
 
 3808       for(l2=0; l2<=o2; l2++)
 
 3809         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3811         for(l1=0; l1<=o1; l1++,psij1++)
 
 3814       gj=g+((u0+l0)*n1+l1)*n2+u2;
 
 3815       for(l2=0; l2<2*m+1-o2; l2++)
 
 3816         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3817       gj=g+((u0+l0)*n1+l1)*n2;
 
 3818       for(l2=0; l2<=o2; l2++)
 
 3819         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3827     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 3830         for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 3833       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 3834       for(l2=0; l2<=2*m+1; l2++)
 
 3835         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3839     for(l0=0; l0<=o0; l0++,psij0++)
 
 3842         for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 3845       gj=g+(l0*n1+(u1+l1))*n2+u2;
 
 3846       for(l2=0; l2<=2*m+1; l2++)
 
 3847         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3853     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 3856         for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 3859       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 3860       for(l2=0; l2<2*m+1-o2; l2++)
 
 3861         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3862       gj=g+((u0+l0)*n1+(u1+l1))*n2;
 
 3863       for(l2=0; l2<=o2; l2++)
 
 3864         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3868     for(l0=0; l0<=o0; l0++,psij0++)
 
 3871         for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 3874       gj=g+(l0*n1+(u1+l1))*n2+u2;
 
 3875       for(l2=0; l2<2*m+1-o2; l2++)
 
 3876         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3877       gj=g+(l0*n1+(u1+l1))*n2;
 
 3878       for(l2=0; l2<=o2; l2++)
 
 3879         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3886     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 3889         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 3892       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 3893       for(l2=0; l2<=2*m+1; l2++)
 
 3894         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3896         for(l1=0; l1<=o1; l1++,psij1++)
 
 3899       gj=g+((u0+l0)*n1+l1)*n2+u2;
 
 3900       for(l2=0; l2<=2*m+1; l2++)
 
 3901         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3904     for(l0=0; l0<=o0; l0++,psij0++)
 
 3907         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 3910       gj=g+(l0*n1+(u1+l1))*n2+u2;
 
 3911       for(l2=0; l2<=2*m+1; l2++)
 
 3912         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3914         for(l1=0; l1<=o1; l1++,psij1++)
 
 3917       gj=g+(l0*n1+l1)*n2+u2;
 
 3918       for(l2=0; l2<=2*m+1; l2++)
 
 3919         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3925     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 3928         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 3931       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 3932       for(l2=0; l2<2*m+1-o2; l2++)
 
 3933         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3934       gj=g+((u0+l0)*n1+(u1+l1))*n2;
 
 3935       for(l2=0; l2<=o2; l2++)
 
 3936         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3938         for(l1=0; l1<=o1; l1++,psij1++)
 
 3941       gj=g+((u0+l0)*n1+l1)*n2+u2;
 
 3942       for(l2=0; l2<2*m+1-o2; l2++)
 
 3943         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3944       gj=g+((u0+l0)*n1+l1)*n2;
 
 3945       for(l2=0; l2<=o2; l2++)
 
 3946         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3950     for(l0=0; l0<=o0; l0++,psij0++)
 
 3953         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 3956       gj=g+(l0*n1+(u1+l1))*n2+u2;
 
 3957       for(l2=0; l2<2*m+1-o2; l2++)
 
 3958         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3959       gj=g+(l0*n1+(u1+l1))*n2;
 
 3960       for(l2=0; l2<=o2; l2++)
 
 3961         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3963         for(l1=0; l1<=o1; l1++,psij1++)
 
 3966       gj=g+(l0*n1+l1)*n2+u2;
 
 3967       for(l2=0; l2<2*m+1-o2; l2++)
 
 3968         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3970       for(l2=0; l2<=o2; l2++)
 
 3971         (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
 
 3999 static void nfft_adjoint_3d_compute_omp_blockwise(
const C f, C *g,
 
 4000             const R *psij_const0, 
const R *psij_const1, 
const R *psij_const2,
 
 4001             const R *xj0, 
const R *xj1, 
const R *xj2,
 
 4002             const int n0, 
const int n1, 
const int n2, 
const int m,
 
 4003       const int my_u0, 
const int my_o0)
 
 4005   int ar_u0,ar_o0,l0,u1,o1,l1,u2,o2,l2;
 
 4006   const int lprod = (2*m+2) * (2*m+2) * (2*m+2);
 
 4008   unsigned long int index_temp1[2*m+2];
 
 4009   unsigned long int index_temp2[2*m+2];
 
 4011   nfft_uo2(&ar_u0,&ar_o0,*xj0, n0, m);
 
 4012   nfft_uo2(&u1,&o1,*xj1, n1, m);
 
 4013   nfft_uo2(&u2,&o2,*xj2, n2, m);
 
 4015   for (l1=0; l1<=2*m+1; l1++)
 
 4016     index_temp1[l1] = (u1+l1)%n1;
 
 4018   for (l2=0; l2<=2*m+1; l2++)
 
 4019     index_temp2[l2] = (u2+l2)%n2;
 
 4023     int u0 = MAX(my_u0,ar_u0);
 
 4024     int o0 = MIN(my_o0,ar_o0);
 
 4025     int offset_psij = u0-ar_u0;
 
 4027     assert(offset_psij >= 0);
 
 4028     assert(o0-u0 <= 2*m+1);
 
 4029     assert(offset_psij+o0-u0 <= 2*m+1);
 
 4032     for (l0 = 0; l0 <= o0-u0; l0++)
 
 4034       const unsigned long int i0 = (u0+l0) * n1;
 
 4035       const C val0 = psij_const0[offset_psij+l0];
 
 4037       for(l1=0; l1<=2*m+1; l1++)
 
 4039         const unsigned long int i1 = (i0 + index_temp1[l1]) * n2;
 
 4040         const C val1 = psij_const1[l1];
 
 4042         for(l2=0; l2<=2*m+1; l2++)
 
 4043           g[i1 + index_temp2[l2]] += val0 * val1 * psij_const2[l2] * f;
 
 4049     int u0 = MAX(my_u0,ar_u0);
 
 4051     int offset_psij = u0-ar_u0;
 
 4053     assert(offset_psij >= 0);
 
 4054     assert(o0-u0 <= 2*m+1);
 
 4055     assert(offset_psij+o0-u0 <= 2*m+1);
 
 4058     for (l0 = 0; l0 <= o0-u0; l0++)
 
 4060       unsigned long int i0 = (u0+l0) * n1;
 
 4061       const C val0 = psij_const0[offset_psij+l0];
 
 4063       for(l1=0; l1<=2*m+1; l1++)
 
 4065         const unsigned long int i1 = (i0 + index_temp1[l1]) * n2;
 
 4066         const C val1 = psij_const1[l1];
 
 4068         for(l2=0; l2<=2*m+1; l2++)
 
 4069           g[i1 + index_temp2[l2]] += val0 * val1 * psij_const2[l2] * f;
 
 4074     o0 = MIN(my_o0,ar_o0);
 
 4075     offset_psij += my_u0-ar_u0+n0;
 
 4080       assert(o0-u0 <= 2*m+1);
 
 4081       assert(offset_psij+o0-u0 <= 2*m+1);
 
 4084     for (l0 = 0; l0 <= o0-u0; l0++)
 
 4086       unsigned long int i0 = (u0+l0) * n1;
 
 4087       const C val0 = psij_const0[offset_psij+l0];
 
 4089       for(l1=0; l1<=2*m+1; l1++)
 
 4091         const unsigned long int i1 = (i0 + index_temp1[l1]) * n2;
 
 4092         const C val1 = psij_const1[l1];
 
 4094         for(l2=0; l2<=2*m+1; l2++)
 
 4095           g[i1 + index_temp2[l2]] += val0 * val1 * psij_const2[l2] * f;
 
 4104 static void nfft_adjoint_3d_compute_omp_atomic(
const C f, C *g,
 
 4105             const R *psij_const0, 
const R *psij_const1, 
const R *psij_const2,
 
 4106             const R *xj0, 
const R *xj1, 
const R *xj2,
 
 4107             const int n0, 
const int n1, 
const int n2, 
const int m)
 
 4109   int u0,o0,l0,u1,o1,l1,u2,o2,l2;
 
 4110   const int lprod = (2*m+2) * (2*m+2) * (2*m+2);
 
 4112   unsigned long int index_temp0[2*m+2];
 
 4113   unsigned long int index_temp1[2*m+2];
 
 4114   unsigned long int index_temp2[2*m+2];
 
 4116   nfft_uo2(&u0,&o0,*xj0, n0, m);
 
 4117   nfft_uo2(&u1,&o1,*xj1, n1, m);
 
 4118   nfft_uo2(&u2,&o2,*xj2, n2, m);
 
 4120   for (l0=0; l0<=2*m+1; l0++)
 
 4121     index_temp0[l0] = (u0+l0)%n0;
 
 4123   for (l1=0; l1<=2*m+1; l1++)
 
 4124     index_temp1[l1] = (u1+l1)%n1;
 
 4126   for (l2=0; l2<=2*m+1; l2++)
 
 4127     index_temp2[l2] = (u2+l2)%n2;
 
 4129   for(l0=0; l0<=2*m+1; l0++)
 
 4131     for(l1=0; l1<=2*m+1; l1++)
 
 4133       for(l2=0; l2<=2*m+1; l2++)
 
 4135         unsigned long int i = (index_temp0[l0] * n1 + index_temp1[l1]) * n2 + index_temp2[l2];
 
 4137         R *lhs_real = (R*)lhs;
 
 4138         C val = psij_const0[l0] * psij_const1[l1] * psij_const2[l2] * f;
 
 4141         lhs_real[0] += creal(val);
 
 4144         lhs_real[1] += cimag(val);
 
 4152 static void nfft_adjoint_3d_compute_serial(
const C *fj, C *g,
 
 4153             const R *psij_const0, 
const R *psij_const1, 
const R *psij_const2,
 
 4154             const R *xj0, 
const R *xj1, 
const R *xj2,
 
 4155             const int n0, 
const int n1, 
const int n2, 
const int m)
 
 4157   int u0,o0,l0,u1,o1,l1,u2,o2,l2;
 
 4159   const R *psij0,*psij1,*psij2;
 
 4165   nfft_uo2(&u0,&o0,*xj0, n0, m);
 
 4166   nfft_uo2(&u1,&o1,*xj1, n1, m);
 
 4167   nfft_uo2(&u2,&o2,*xj2, n2, m);
 
 4172   for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 4175       for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 4178     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 4179     for(l2=0; l2<=2*m+1; l2++)
 
 4180       (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4184   for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 4187       for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 4190     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 4191     for(l2=0; l2<2*m+1-o2; l2++)
 
 4192       (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4193     gj=g+((u0+l0)*n1+(u1+l1))*n2;
 
 4194     for(l2=0; l2<=o2; l2++)
 
 4195       (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4200   for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 4203       for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 4206     gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 4207     for(l2=0; l2<=2*m+1; l2++)
 
 4208       (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4210       for(l1=0; l1<=o1; l1++,psij1++)
 
 4213     gj=g+((u0+l0)*n1+l1)*n2+u2;
 
 4214     for(l2=0; l2<=2*m+1; l2++)
 
 4215       (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4220     for(l0=0; l0<=2*m+1; l0++,psij0++)
 
 4223         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 4226       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 4227       for(l2=0; l2<2*m+1-o2; l2++)
 
 4228         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4229       gj=g+((u0+l0)*n1+(u1+l1))*n2;
 
 4230       for(l2=0; l2<=o2; l2++)
 
 4231         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4233         for(l1=0; l1<=o1; l1++,psij1++)
 
 4236       gj=g+((u0+l0)*n1+l1)*n2+u2;
 
 4237       for(l2=0; l2<2*m+1-o2; l2++)
 
 4238         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4239       gj=g+((u0+l0)*n1+l1)*n2;
 
 4240       for(l2=0; l2<=o2; l2++)
 
 4241         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4249     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 4252         for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 4255       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 4256       for(l2=0; l2<=2*m+1; l2++)
 
 4257         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4261     for(l0=0; l0<=o0; l0++,psij0++)
 
 4264         for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 4267       gj=g+(l0*n1+(u1+l1))*n2+u2;
 
 4268       for(l2=0; l2<=2*m+1; l2++)
 
 4269         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4275     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 4278         for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 4281       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 4282       for(l2=0; l2<2*m+1-o2; l2++)
 
 4283         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4284       gj=g+((u0+l0)*n1+(u1+l1))*n2;
 
 4285       for(l2=0; l2<=o2; l2++)
 
 4286         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4290     for(l0=0; l0<=o0; l0++,psij0++)
 
 4293         for(l1=0; l1<=2*m+1; l1++,psij1++)
 
 4296       gj=g+(l0*n1+(u1+l1))*n2+u2;
 
 4297       for(l2=0; l2<2*m+1-o2; l2++)
 
 4298         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4299       gj=g+(l0*n1+(u1+l1))*n2;
 
 4300       for(l2=0; l2<=o2; l2++)
 
 4301         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4308     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 4311         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 4314       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 4315       for(l2=0; l2<=2*m+1; l2++)
 
 4316         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4318         for(l1=0; l1<=o1; l1++,psij1++)
 
 4321       gj=g+((u0+l0)*n1+l1)*n2+u2;
 
 4322       for(l2=0; l2<=2*m+1; l2++)
 
 4323         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4326     for(l0=0; l0<=o0; l0++,psij0++)
 
 4329         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 4332       gj=g+(l0*n1+(u1+l1))*n2+u2;
 
 4333       for(l2=0; l2<=2*m+1; l2++)
 
 4334         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4336         for(l1=0; l1<=o1; l1++,psij1++)
 
 4339       gj=g+(l0*n1+l1)*n2+u2;
 
 4340       for(l2=0; l2<=2*m+1; l2++)
 
 4341         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4347     for(l0=0; l0<2*m+1-o0; l0++,psij0++)
 
 4350         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 4353       gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
 
 4354       for(l2=0; l2<2*m+1-o2; l2++)
 
 4355         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4356       gj=g+((u0+l0)*n1+(u1+l1))*n2;
 
 4357       for(l2=0; l2<=o2; l2++)
 
 4358         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4360         for(l1=0; l1<=o1; l1++,psij1++)
 
 4363       gj=g+((u0+l0)*n1+l1)*n2+u2;
 
 4364       for(l2=0; l2<2*m+1-o2; l2++)
 
 4365         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4366       gj=g+((u0+l0)*n1+l1)*n2;
 
 4367       for(l2=0; l2<=o2; l2++)
 
 4368         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4372     for(l0=0; l0<=o0; l0++,psij0++)
 
 4375         for(l1=0; l1<2*m+1-o1; l1++,psij1++)
 
 4378       gj=g+(l0*n1+(u1+l1))*n2+u2;
 
 4379       for(l2=0; l2<2*m+1-o2; l2++)
 
 4380         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4381       gj=g+(l0*n1+(u1+l1))*n2;
 
 4382       for(l2=0; l2<=o2; l2++)
 
 4383         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4385         for(l1=0; l1<=o1; l1++,psij1++)
 
 4388       gj=g+(l0*n1+l1)*n2+u2;
 
 4389       for(l2=0; l2<2*m+1-o2; l2++)
 
 4390         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4392       for(l2=0; l2<=o2; l2++)
 
 4393         (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
 
 4400 static void nfft_trafo_3d_B(
nfft_plan *ths)
 
 4402   const int N0 = ths->
N[0];
 
 4403   const int n0 = ths->
n[0];
 
 4404   const int N1 = ths->
N[1];
 
 4405   const int n1 = ths->
n[1];
 
 4406   const int N2 = ths->
N[2];
 
 4407   const int n2 = ths->
n[2];
 
 4409   const int m = ths->
m;
 
 4411   const C* g = (C*) ths->
g;
 
 4417     const int lprod = (2*m+2) * (2*m+2) * (2*m+2);
 
 4418     #pragma omp parallel for default(shared) private(k) 
 4419     for (k = 0; k < M; k++)
 
 4424       for (l = 0; l < lprod; l++)
 
 4432     #pragma omp parallel for default(shared) private(k) 
 4433     for (k = 0; k < M; k++)
 
 4436       nfft_trafo_3d_compute(ths->
f+j, g, ths->
psi+j*3*(2*m+2), ths->
psi+(j*3+1)*(2*m+2), ths->
psi+(j*3+2)*(2*m+2), ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 4443     R fg_exp_l[3*(2*m+2)];
 
 4445     nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 4446     nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->
b[1]);
 
 4447     nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->
b[2]);
 
 4449     #pragma omp parallel for default(shared) private(k) 
 4450     for (k = 0; k < M; k++)
 
 4454       R psij_const[3*(2*m+2)];
 
 4455       R fg_psij0 = ths->
psi[2*j*3];
 
 4456       R fg_psij1 = ths->
psi[2*j*3+1];
 
 4457       R fg_psij2 = K(1.0);
 
 4459       psij_const[0] = fg_psij0;
 
 4460       for(l=1; l<=2*m+1; l++)
 
 4462         fg_psij2 *= fg_psij1;
 
 4463         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
 
 4466       fg_psij0 = ths->
psi[2*(j*3+1)];
 
 4467       fg_psij1 = ths->
psi[2*(j*3+1)+1];
 
 4469       psij_const[2*m+2] = fg_psij0;
 
 4470       for(l=1; l<=2*m+1; l++)
 
 4472         fg_psij2 *= fg_psij1;
 
 4473         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
 
 4476       fg_psij0 = ths->
psi[2*(j*3+2)];
 
 4477       fg_psij1 = ths->
psi[2*(j*3+2)+1];
 
 4479       psij_const[2*(2*m+2)] = fg_psij0;
 
 4480       for(l=1; l<=2*m+1; l++)
 
 4482         fg_psij2 *= fg_psij1;
 
 4483         psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
 
 4486       nfft_trafo_3d_compute(ths->
f+j, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 4494     R fg_exp_l[3*(2*m+2)];
 
 4496     nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 4497     nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->
b[1]);
 
 4498     nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->
b[2]);
 
 4500     nfft_sort_nodes(ths);
 
 4502     #pragma omp parallel for default(shared) private(k) 
 4503     for (k = 0; k < M; k++)
 
 4507       R psij_const[3*(2*m+2)];
 
 4508       R fg_psij0, fg_psij1, fg_psij2;
 
 4510       nfft_uo(ths,j,&u,&o,0);
 
 4511       fg_psij0 = (PHI(ths->
x[3*j]-((R)u)/n0,0));
 
 4512       fg_psij1 = EXP(K(2.0)*(n0*(ths->
x[3*j]) - u)/ths->
b[0]);
 
 4514       psij_const[0] = fg_psij0;
 
 4515       for(l=1; l<=2*m+1; l++)
 
 4517         fg_psij2 *= fg_psij1;
 
 4518         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
 
 4521       nfft_uo(ths,j,&u,&o,1);
 
 4522       fg_psij0 = (PHI(ths->
x[3*j+1]-((R)u)/n1,1));
 
 4523       fg_psij1 = EXP(K(2.0)*(n1*(ths->
x[3*j+1]) - u)/ths->
b[1]);
 
 4525       psij_const[2*m+2] = fg_psij0;
 
 4526       for(l=1; l<=2*m+1; l++)
 
 4528         fg_psij2 *= fg_psij1;
 
 4529         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
 
 4532       nfft_uo(ths,j,&u,&o,2);
 
 4533       fg_psij0 = (PHI(ths->
x[3*j+2]-((R)u)/n2,2));
 
 4534       fg_psij1 = EXP(K(2.0)*(n2*(ths->
x[3*j+2]) - u)/ths->
b[2]);
 
 4536       psij_const[2*(2*m+2)] = fg_psij0;
 
 4537       for(l=1; l<=2*m+1; l++)
 
 4539         fg_psij2 *= fg_psij1;
 
 4540         psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
 
 4543       nfft_trafo_3d_compute(ths->
f+j, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 4551     const int K = ths->
K, ip_s = K / (m + 2);
 
 4553     nfft_sort_nodes(ths);
 
 4555     #pragma omp parallel for default(shared) private(k) 
 4556     for (k = 0; k < M; k++)
 
 4561       R psij_const[3*(2*m+2)];
 
 4564       nfft_uo(ths,j,&u,&o,0);
 
 4565       ip_y = FABS(n0*ths->
x[3*j+0] - u)*((R)ip_s);
 
 4566       ip_u = LRINT(FLOOR(ip_y));
 
 4568       for(l=0; l < 2*m+2; l++)
 
 4569         psij_const[l] = ths->
psi[ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) +
 
 4570           ths->
psi[ABS(ip_u-l*ip_s+1)]*(ip_w);
 
 4572       nfft_uo(ths,j,&u,&o,1);
 
 4573       ip_y = FABS(n1*ths->
x[3*j+1] - u)*((R)ip_s);
 
 4574       ip_u = LRINT(FLOOR(ip_y));
 
 4576       for(l=0; l < 2*m+2; l++)
 
 4577         psij_const[2*m+2+l] = ths->
psi[(K+1)+ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) +
 
 4578           ths->
psi[(K+1)+ABS(ip_u-l*ip_s+1)]*(ip_w);
 
 4580       nfft_uo(ths,j,&u,&o,2);
 
 4581       ip_y = FABS(n2*ths->
x[3*j+2] - u)*((R)ip_s);
 
 4582       ip_u = LRINT(FLOOR(ip_y));
 
 4584       for(l=0; l < 2*m+2; l++)
 
 4585         psij_const[2*(2*m+2)+l] = ths->
psi[2*(K+1)+ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) +
 
 4586           ths->
psi[2*(K+1)+ABS(ip_u-l*ip_s+1)]*(ip_w);
 
 4588       nfft_trafo_3d_compute(ths->
f+j, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 4595   nfft_sort_nodes(ths);
 
 4597   #pragma omp parallel for default(shared) private(k) 
 4598   for (k = 0; k < M; k++)
 
 4600     R psij_const[3*(2*m+2)];
 
 4604     nfft_uo(ths,j,&u,&o,0);
 
 4605     for(l=0;l<=2*m+1;l++)
 
 4606       psij_const[l]=(PHI(ths->
x[3*j]-((R)((u+l)))/n0,0));
 
 4608     nfft_uo(ths,j,&u,&o,1);
 
 4609     for(l=0;l<=2*m+1;l++)
 
 4610       psij_const[2*m+2+l]=(PHI(ths->
x[3*j+1]-((R)((u+l)))/n1,1));
 
 4612     nfft_uo(ths,j,&u,&o,2);
 
 4613     for(l=0;l<=2*m+1;l++)
 
 4614       psij_const[2*(2*m+2)+l]=(PHI(ths->
x[3*j+2]-((R)((u+l)))/n2,2));
 
 4616     nfft_trafo_3d_compute(ths->
f+j, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 4621 #define MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_ASSERT_A                        \ 
 4623           assert(ar_x[2*k] >= min_u_a || k == M-1);                           \ 
 4625             assert(ar_x[2*k-2] < min_u_a);                                    \ 
 4628 #define MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_ASSERT_A 
 4632 #define MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_ASSERT_B                        \ 
 4634           assert(ar_x[2*k] >= min_u_b || k == M-1);                           \ 
 4636             assert(ar_x[2*k-2] < min_u_b);                                    \ 
 4639 #define MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_ASSERT_B 
 4642 #define MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_COMPUTE_PRE_PSI                 \ 
 4643             nfft_adjoint_3d_compute_omp_blockwise(ths->f[j], g,               \ 
 4644                 ths->psi+j*3*(2*m+2),                                         \ 
 4645                 ths->psi+(j*3+1)*(2*m+2),                                     \ 
 4646                 ths->psi+(j*3+2)*(2*m+2),                                     \ 
 4647                 ths->x+3*j, ths->x+3*j+1, ths->x+3*j+2,                       \ 
 4648                 n0, n1, n2, m, my_u0, my_o0); 
 4650 #define MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_COMPUTE_PRE_FG_PSI              \ 
 4653             R psij_const[3*(2*m+2)];                                          \ 
 4654             R fg_psij0 = ths->psi[2*j*3];                                     \ 
 4655             R fg_psij1 = ths->psi[2*j*3+1];                                   \ 
 4656             R fg_psij2 = K(1.0);                                              \ 
 4658             psij_const[0] = fg_psij0;                                         \ 
 4659             for(l=1; l<=2*m+1; l++)                                           \ 
 4661               fg_psij2 *= fg_psij1;                                           \ 
 4662               psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];                  \ 
 4665             fg_psij0 = ths->psi[2*(j*3+1)];                                   \ 
 4666             fg_psij1 = ths->psi[2*(j*3+1)+1];                                 \ 
 4667             fg_psij2 = K(1.0);                                                \ 
 4668             psij_const[2*m+2] = fg_psij0;                                     \ 
 4669             for(l=1; l<=2*m+1; l++)                                           \ 
 4671               fg_psij2 *= fg_psij1;                                           \ 
 4672               psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];      \ 
 4675             fg_psij0 = ths->psi[2*(j*3+2)];                                   \ 
 4676             fg_psij1 = ths->psi[2*(j*3+2)+1];                                 \ 
 4677             fg_psij2 = K(1.0);                                                \ 
 4678             psij_const[2*(2*m+2)] = fg_psij0;                                 \ 
 4679             for(l=1; l<=2*m+1; l++)                                           \ 
 4681               fg_psij2 *= fg_psij1;                                           \ 
 4682               psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l]; \ 
 4685             nfft_adjoint_3d_compute_omp_blockwise(ths->f[j], g,               \ 
 4686                 psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2,           \ 
 4687                 ths->x+3*j, ths->x+3*j+1, ths->x+3*j+2,                       \ 
 4688                 n0, n1, n2, m, my_u0, my_o0);                                 \ 
 4691 #define MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_COMPUTE_FG_PSI                  \ 
 4694             R psij_const[3*(2*m+2)];                                          \ 
 4695             R fg_psij0, fg_psij1, fg_psij2;                                   \ 
 4697             nfft_uo(ths,j,&u,&o,0);                                           \ 
 4698             fg_psij0 = (PHI(ths->x[3*j]-((R)u)/n0,0));                        \ 
 4699             fg_psij1 = EXP(K(2.0)*(n0*(ths->x[3*j]) - u)/ths->b[0]);          \ 
 4700             fg_psij2 = K(1.0);                                                \ 
 4701             psij_const[0] = fg_psij0;                                         \ 
 4702             for(l=1; l<=2*m+1; l++)                                           \ 
 4704               fg_psij2 *= fg_psij1;                                           \ 
 4705               psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];                  \ 
 4708             nfft_uo(ths,j,&u,&o,1);                                           \ 
 4709             fg_psij0 = (PHI(ths->x[3*j+1]-((R)u)/n1,1));                      \ 
 4710             fg_psij1 = EXP(K(2.0)*(n1*(ths->x[3*j+1]) - u)/ths->b[1]);        \ 
 4711             fg_psij2 = K(1.0);                                                \ 
 4712             psij_const[2*m+2] = fg_psij0;                                     \ 
 4713             for(l=1; l<=2*m+1; l++)                                           \ 
 4715               fg_psij2 *= fg_psij1;                                           \ 
 4716               psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];      \ 
 4719             nfft_uo(ths,j,&u,&o,2);                                           \ 
 4720             fg_psij0 = (PHI(ths->x[3*j+2]-((R)u)/n2,2));                      \ 
 4721             fg_psij1 = EXP(K(2.0)*(n2*(ths->x[3*j+2]) - u)/ths->b[2]);        \ 
 4722             fg_psij2 = K(1.0);                                                \ 
 4723             psij_const[2*(2*m+2)] = fg_psij0;                                 \ 
 4724             for(l=1; l<=2*m+1; l++)                                           \ 
 4726               fg_psij2 *= fg_psij1;                                           \ 
 4727               psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l]; \ 
 4730             nfft_adjoint_3d_compute_omp_blockwise(ths->f[j], g,               \ 
 4731                 psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2,           \ 
 4732                 ths->x+3*j, ths->x+3*j+1, ths->x+3*j+2,                       \ 
 4733                 n0, n1, n2, m, my_u0, my_o0);                                 \ 
 4736 #define MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_COMPUTE_PRE_LIN_PSI             \ 
 4739             R psij_const[3*(2*m+2)];                                          \ 
 4743             nfft_uo(ths,j,&u,&o,0);                                           \ 
 4744             ip_y = FABS(n0*ths->x[3*j+0] - u)*((R)ip_s);                      \ 
 4745             ip_u = LRINT(FLOOR(ip_y));                                        \ 
 4747             for(l=0; l < 2*m+2; l++)                                          \ 
 4748               psij_const[l] = ths->psi[ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) +      \ 
 4749                 ths->psi[ABS(ip_u-l*ip_s+1)]*(ip_w);                          \ 
 4751             nfft_uo(ths,j,&u,&o,1);                                           \ 
 4752             ip_y = FABS(n1*ths->x[3*j+1] - u)*((R)ip_s);                      \ 
 4753             ip_u = LRINT(FLOOR(ip_y));                                        \ 
 4755             for(l=0; l < 2*m+2; l++)                                          \ 
 4756               psij_const[2*m+2+l] = ths->psi[(K+1)+ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) + \ 
 4757                 ths->psi[(K+1)+ABS(ip_u-l*ip_s+1)]*(ip_w);                    \ 
 4759             nfft_uo(ths,j,&u,&o,2);                                           \ 
 4760             ip_y = FABS(n2*ths->x[3*j+2] - u)*((R)ip_s);                      \ 
 4761             ip_u = LRINT(FLOOR(ip_y));                                        \ 
 4763             for(l=0; l < 2*m+2; l++)                                          \ 
 4764               psij_const[2*(2*m+2)+l] = ths->psi[2*(K+1)+ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) + \ 
 4765                 ths->psi[2*(K+1)+ABS(ip_u-l*ip_s+1)]*(ip_w);                  \ 
 4767             nfft_adjoint_3d_compute_omp_blockwise(ths->f[j], g,               \ 
 4768                 psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2,           \ 
 4769                 ths->x+3*j, ths->x+3*j+1, ths->x+3*j+2,                       \ 
 4770                 n0, n1, n2, m, my_u0, my_o0);                                 \ 
 4773 #define MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_COMPUTE_NO_PSI                  \ 
 4776             R psij_const[3*(2*m+2)];                                          \ 
 4778             nfft_uo(ths,j,&u,&o,0);                                           \ 
 4779             for(l=0;l<=2*m+1;l++)                                             \ 
 4780               psij_const[l]=(PHI(ths->x[3*j]-((R)((u+l)))/n0,0));             \ 
 4782             nfft_uo(ths,j,&u,&o,1);                                           \ 
 4783             for(l=0;l<=2*m+1;l++)                                             \ 
 4784               psij_const[2*m+2+l]=(PHI(ths->x[3*j+1]-((R)((u+l)))/n1,1));     \ 
 4786             nfft_uo(ths,j,&u,&o,2);                                           \ 
 4787             for(l=0;l<=2*m+1;l++)                                             \ 
 4788               psij_const[2*(2*m+2)+l]=(PHI(ths->x[3*j+2]-((R)((u+l)))/n2,2)); \ 
 4790             nfft_adjoint_3d_compute_omp_blockwise(ths->f[j], g,               \ 
 4791                 psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2,           \ 
 4792                 ths->x+3*j, ths->x+3*j+1, ths->x+3*j+2,                       \ 
 4793                 n0, n1, n2, m, my_u0, my_o0);                                 \ 
 4796 #define MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE(whichone)                       \ 
 4798     if (ths->nfft_flags & NFFT_OMP_BLOCKWISE_ADJOINT)                         \ 
 4800       _Pragma("omp parallel private(k)")                                      \ 
 4802         int my_u0, my_o0, min_u_a, max_u_a, min_u_b, max_u_b;                 \ 
 4803         int *ar_x = ths->index_x;                                             \ 
 4805         nfft_adjoint_B_omp_blockwise_init(&my_u0, &my_o0, &min_u_a, &max_u_a, \ 
 4806             &min_u_b, &max_u_b, 3, ths->n, m);                                \ 
 4808         if (min_u_a != -1)                                                    \ 
 4810           k = index_x_binary_search(ar_x, M, min_u_a);                        \ 
 4812           MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_ASSERT_A                      \ 
 4816             int u_prod = ar_x[2*k];                                           \ 
 4817             int j = ar_x[2*k+1];                                              \ 
 4819             if (u_prod < min_u_a || u_prod > max_u_a)                         \ 
 4822             MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_COMPUTE_ ##whichone         \ 
 4828         if (min_u_b != -1)                                                    \ 
 4830           int k = index_x_binary_search(ar_x, M, min_u_b);                    \ 
 4832           MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_ASSERT_B                      \ 
 4836             int u_prod = ar_x[2*k];                                           \ 
 4837             int j = ar_x[2*k+1];                                              \ 
 4839             if (u_prod < min_u_b || u_prod > max_u_b)                         \ 
 4842             MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE_COMPUTE_ ##whichone         \ 
 4852 static void nfft_adjoint_3d_B(
nfft_plan *ths)
 
 4855   const int N0 = ths->
N[0];
 
 4856   const int n0 = ths->
n[0];
 
 4857   const int N1 = ths->
N[1];
 
 4858   const int n1 = ths->
n[1];
 
 4859   const int N2 = ths->
N[2];
 
 4860   const int n2 = ths->
n[2];
 
 4862   const int m = ths->
m;
 
 4866   memset(g,0,ths->
n_total*
sizeof(C));
 
 4870     nfft_adjoint_B_compute_full_psi(g, ths->
psi_index_g, ths->
psi, ths->
f, M,
 
 4878     MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE(PRE_PSI)
 
 4881     #pragma omp parallel for default(shared) private(k) 
 4882     for (k = 0; k < M; k++)
 
 4886       nfft_adjoint_3d_compute_omp_atomic(ths->
f[j], g, ths->
psi+j*3*(2*m+2), ths->
psi+(j*3+1)*(2*m+2), ths->
psi+(j*3+2)*(2*m+2), ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 4888       nfft_adjoint_3d_compute_serial(ths->
f+j, g, ths->
psi+j*3*(2*m+2), ths->
psi+(j*3+1)*(2*m+2), ths->
psi+(j*3+2)*(2*m+2), ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 4896     R fg_exp_l[3*(2*m+2)];
 
 4898     nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 4899     nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->
b[1]);
 
 4900     nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->
b[2]);
 
 4903     MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE(PRE_FG_PSI)
 
 4906     #pragma omp parallel for default(shared) private(k) 
 4907     for (k = 0; k < M; k++)
 
 4909       R psij_const[3*(2*m+2)];
 
 4912       R fg_psij0 = ths->
psi[2*j*3];
 
 4913       R fg_psij1 = ths->
psi[2*j*3+1];
 
 4914       R fg_psij2 = K(1.0);
 
 4916       psij_const[0] = fg_psij0;
 
 4917       for(l=1; l<=2*m+1; l++)
 
 4919         fg_psij2 *= fg_psij1;
 
 4920         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
 
 4923       fg_psij0 = ths->
psi[2*(j*3+1)];
 
 4924       fg_psij1 = ths->
psi[2*(j*3+1)+1];
 
 4926       psij_const[2*m+2] = fg_psij0;
 
 4927       for(l=1; l<=2*m+1; l++)
 
 4929         fg_psij2 *= fg_psij1;
 
 4930         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
 
 4933       fg_psij0 = ths->
psi[2*(j*3+2)];
 
 4934       fg_psij1 = ths->
psi[2*(j*3+2)+1];
 
 4936       psij_const[2*(2*m+2)] = fg_psij0;
 
 4937       for(l=1; l<=2*m+1; l++)
 
 4939         fg_psij2 *= fg_psij1;
 
 4940         psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
 
 4944       nfft_adjoint_3d_compute_omp_atomic(ths->
f[j], g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 4946       nfft_adjoint_3d_compute_serial(ths->
f+j, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 4955     R fg_exp_l[3*(2*m+2)];
 
 4957     nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->
b[0]);
 
 4958     nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->
b[1]);
 
 4959     nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->
b[2]);
 
 4961     nfft_sort_nodes(ths);
 
 4964     MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE(FG_PSI)
 
 4967     #pragma openmp parallel for default(shared) private(k) 
 4968     for (k = 0; k < M; k++)
 
 4972       R psij_const[3*(2*m+2)];
 
 4973       R fg_psij0, fg_psij1, fg_psij2;
 
 4975       nfft_uo(ths,j,&u,&o,0);
 
 4976       fg_psij0 = (PHI(ths->
x[3*j]-((R)u)/n0,0));
 
 4977       fg_psij1 = EXP(K(2.0)*(n0*(ths->
x[3*j]) - u)/ths->
b[0]);
 
 4979       psij_const[0] = fg_psij0;
 
 4980       for(l=1; l<=2*m+1; l++)
 
 4982         fg_psij2 *= fg_psij1;
 
 4983         psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
 
 4986       nfft_uo(ths,j,&u,&o,1);
 
 4987       fg_psij0 = (PHI(ths->
x[3*j+1]-((R)u)/n1,1));
 
 4988       fg_psij1 = EXP(K(2.0)*(n1*(ths->
x[3*j+1]) - u)/ths->
b[1]);
 
 4990       psij_const[2*m+2] = fg_psij0;
 
 4991       for(l=1; l<=2*m+1; l++)
 
 4993         fg_psij2 *= fg_psij1;
 
 4994         psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
 
 4997       nfft_uo(ths,j,&u,&o,2);
 
 4998       fg_psij0 = (PHI(ths->
x[3*j+2]-((R)u)/n2,2));
 
 4999       fg_psij1 = EXP(K(2.0)*(n2*(ths->
x[3*j+2]) - u)/ths->
b[2]);
 
 5001       psij_const[2*(2*m+2)] = fg_psij0;
 
 5002       for(l=1; l<=2*m+1; l++)
 
 5004         fg_psij2 *= fg_psij1;
 
 5005         psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
 
 5009       nfft_adjoint_3d_compute_omp_atomic(ths->
f[j], g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 5011       nfft_adjoint_3d_compute_serial(ths->
f+j, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 5020     const int K = ths->
K;
 
 5021     const int ip_s = K / (m + 2);
 
 5023     nfft_sort_nodes(ths);
 
 5026     MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE(PRE_LIN_PSI)
 
 5029     #pragma openmp parallel for default(shared) private(k) 
 5030     for (k = 0; k < M; k++)
 
 5036       R psij_const[3*(2*m+2)];
 
 5038       nfft_uo(ths,j,&u,&o,0);
 
 5039       ip_y = FABS(n0*ths->
x[3*j+0] - u)*((R)ip_s);
 
 5040       ip_u = LRINT(FLOOR(ip_y));
 
 5042       for(l=0; l < 2*m+2; l++)
 
 5043         psij_const[l] = ths->
psi[ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) +
 
 5044           ths->
psi[ABS(ip_u-l*ip_s+1)]*(ip_w);
 
 5046       nfft_uo(ths,j,&u,&o,1);
 
 5047       ip_y = FABS(n1*ths->
x[3*j+1] - u)*((R)ip_s);
 
 5048       ip_u = LRINT(FLOOR(ip_y));
 
 5050       for(l=0; l < 2*m+2; l++)
 
 5051         psij_const[2*m+2+l] = ths->
psi[(K+1)+ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) +
 
 5052           ths->
psi[(K+1)+ABS(ip_u-l*ip_s+1)]*(ip_w);
 
 5054       nfft_uo(ths,j,&u,&o,2);
 
 5055       ip_y = FABS(n2*ths->
x[3*j+2] - u)*((R)ip_s);
 
 5056       ip_u = LRINT(FLOOR(ip_y));
 
 5058       for(l=0; l < 2*m+2; l++)
 
 5059         psij_const[2*(2*m+2)+l] = ths->
psi[2*(K+1)+ABS(ip_u-l*ip_s)]*(K(1.0)-ip_w) +
 
 5060           ths->
psi[2*(K+1)+ABS(ip_u-l*ip_s+1)]*(ip_w);
 
 5063       nfft_adjoint_3d_compute_omp_atomic(ths->
f[j], g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 5065       nfft_adjoint_3d_compute_serial(ths->
f+j, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 5072   nfft_sort_nodes(ths);
 
 5075   MACRO_nfft_adjoint_3d_B_OMP_BLOCKWISE(NO_PSI)
 
 5078   #pragma omp parallel for default(shared) private(k) 
 5079   for (k = 0; k < M; k++)
 
 5082     R psij_const[3*(2*m+2)];
 
 5085     nfft_uo(ths,j,&u,&o,0);
 
 5086     for(l=0;l<=2*m+1;l++)
 
 5087       psij_const[l]=(PHI(ths->
x[3*j]-((R)((u+l)))/n0,0));
 
 5089     nfft_uo(ths,j,&u,&o,1);
 
 5090     for(l=0;l<=2*m+1;l++)
 
 5091       psij_const[2*m+2+l]=(PHI(ths->
x[3*j+1]-((R)((u+l)))/n1,1));
 
 5093     nfft_uo(ths,j,&u,&o,2);
 
 5094     for(l=0;l<=2*m+1;l++)
 
 5095       psij_const[2*(2*m+2)+l]=(PHI(ths->
x[3*j+2]-((R)((u+l)))/n2,2));
 
 5098     nfft_adjoint_3d_compute_omp_atomic(ths->
f[j], g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 5100     nfft_adjoint_3d_compute_serial(ths->
f+j, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, ths->
x+3*j, ths->
x+3*j+1, ths->
x+3*j+2, n0, n1, n2, m);
 
 5108   int k0,k1,k2,n0,n1,n2,N0,N1,N2;
 
 5110   R *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12, *c_phi_inv21, *c_phi_inv22;
 
 5111   R ck01, ck02, ck11, ck12, ck21, ck22;
 
 5112   C *g_hat111,*f_hat111,*g_hat211,*f_hat211,*g_hat121,*f_hat121,*g_hat221,*f_hat221;
 
 5113   C *g_hat112,*f_hat112,*g_hat212,*f_hat212,*g_hat122,*f_hat122,*g_hat222,*f_hat222;
 
 5125   f_hat=(C*)ths->
f_hat;
 
 5126   g_hat=(C*)ths->
g_hat;
 
 5130   #pragma omp parallel for default(shared) private(k0) 
 5131   for (k0 = 0; k0 < ths->
n_total; k0++)
 
 5132     ths->
g_hat[k0] = 0.0;
 
 5142       #pragma omp parallel for default(shared) private(k0,k1,k2,ck01,ck02,c_phi_inv11,c_phi_inv12,ck11,ck12,c_phi_inv21,c_phi_inv22,g_hat111,f_hat111,g_hat211,f_hat211,g_hat121,f_hat121,g_hat221,f_hat221,g_hat112,f_hat112,g_hat212,f_hat212,g_hat122,f_hat122,g_hat222,f_hat222,ck21,ck22) 
 5143       for(k0=0;k0<N0/2;k0++)
 
 5145     ck01=c_phi_inv01[k0];
 
 5146     ck02=c_phi_inv02[k0];
 
 5150     for(k1=0;k1<N1/2;k1++)
 
 5152         ck11=c_phi_inv11[k1];
 
 5153         ck12=c_phi_inv12[k1];
 
 5157         g_hat111=g_hat + ((n0-(N0/2)+k0)*n1+n1-(N1/2)+k1)*n2+n2-(N2/2);
 
 5158         f_hat111=f_hat + (k0*N1+k1)*N2;
 
 5159         g_hat211=g_hat + (k0*n1+n1-(N1/2)+k1)*n2+n2-(N2/2);
 
 5160         f_hat211=f_hat + (((N0/2)+k0)*N1+k1)*N2;
 
 5161         g_hat121=g_hat + ((n0-(N0/2)+k0)*n1+k1)*n2+n2-(N2/2);
 
 5162         f_hat121=f_hat + (k0*N1+(N1/2)+k1)*N2;
 
 5163         g_hat221=g_hat + (k0*n1+k1)*n2+n2-(N2/2);
 
 5164         f_hat221=f_hat + (((N0/2)+k0)*N1+(N1/2)+k1)*N2;
 
 5166         g_hat112=g_hat + ((n0-(N0/2)+k0)*n1+n1-(N1/2)+k1)*n2;
 
 5167         f_hat112=f_hat + (k0*N1+k1)*N2+(N2/2);
 
 5168         g_hat212=g_hat + (k0*n1+n1-(N1/2)+k1)*n2;
 
 5169         f_hat212=f_hat + (((N0/2)+k0)*N1+k1)*N2+(N2/2);
 
 5170         g_hat122=g_hat + ((n0-(N0/2)+k0)*n1+k1)*n2;
 
 5171         f_hat122=f_hat + (k0*N1+N1/2+k1)*N2+(N2/2);
 
 5172         g_hat222=g_hat + (k0*n1+k1)*n2;
 
 5173         f_hat222=f_hat + (((N0/2)+k0)*N1+(N1/2)+k1)*N2+(N2/2);
 
 5175         for(k2=0;k2<N2/2;k2++)
 
 5177       ck21=c_phi_inv21[k2];
 
 5178       ck22=c_phi_inv22[k2];
 
 5180       g_hat111[k2] = f_hat111[k2] * ck01 * ck11 * ck21;
 
 5181       g_hat211[k2] = f_hat211[k2] * ck02 * ck11 * ck21;
 
 5182       g_hat121[k2] = f_hat121[k2] * ck01 * ck12 * ck21;
 
 5183       g_hat221[k2] = f_hat221[k2] * ck02 * ck12 * ck21;
 
 5185       g_hat112[k2] = f_hat112[k2] * ck01 * ck11 * ck22;
 
 5186       g_hat212[k2] = f_hat212[k2] * ck02 * ck11 * ck22;
 
 5187       g_hat122[k2] = f_hat122[k2] * ck01 * ck12 * ck22;
 
 5188       g_hat222[k2] = f_hat222[k2] * ck02 * ck12 * ck22;
 
 5194     #pragma omp parallel for default(shared) private(k0,k1,k2,ck01,ck02,ck11,ck12,ck21,ck22) 
 5195     for(k0=0;k0<N0/2;k0++)
 
 5197   ck01=K(1.0)/(PHI_HUT(k0-N0/2,0));
 
 5198   ck02=K(1.0)/(PHI_HUT(k0,0));
 
 5199   for(k1=0;k1<N1/2;k1++)
 
 5201       ck11=K(1.0)/(PHI_HUT(k1-N1/2,1));
 
 5202       ck12=K(1.0)/(PHI_HUT(k1,1));
 
 5204       for(k2=0;k2<N2/2;k2++)
 
 5206     ck21=K(1.0)/(PHI_HUT(k2-N2/2,2));
 
 5207     ck22=K(1.0)/(PHI_HUT(k2,2));
 
 5209     g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+n2-N2/2+k2] = f_hat[(k0*N1+k1)*N2+k2]                  * ck01 * ck11 * ck21;
 
 5210     g_hat[(k0*n1+n1-N1/2+k1)*n2+n2-N2/2+k2]           = f_hat[((N0/2+k0)*N1+k1)*N2+k2]           * ck02 * ck11 * ck21;
 
 5211     g_hat[((n0-N0/2+k0)*n1+k1)*n2+n2-N2/2+k2]         = f_hat[(k0*N1+N1/2+k1)*N2+k2]             * ck01 * ck12 * ck21;
 
 5212     g_hat[(k0*n1+k1)*n2+n2-N2/2+k2]                   = f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+k2]      * ck02 * ck12 * ck21;
 
 5214     g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+k2]         = f_hat[(k0*N1+k1)*N2+N2/2+k2]             * ck01 * ck11 * ck22;
 
 5215     g_hat[(k0*n1+n1-N1/2+k1)*n2+k2]                   = f_hat[((N0/2+k0)*N1+k1)*N2+N2/2+k2]      * ck02 * ck11 * ck22;
 
 5216     g_hat[((n0-N0/2+k0)*n1+k1)*n2+k2]                 = f_hat[(k0*N1+N1/2+k1)*N2+N2/2+k2]        * ck01 * ck12 * ck22;
 
 5217     g_hat[(k0*n1+k1)*n2+k2]                           = f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+N2/2+k2] * ck02 * ck12 * ck22;
 
 5225   fftw_execute(ths->my_fftw_plan1);
 
 5229   nfft_trafo_3d_B(ths);
 
 5235   int k0,k1,k2,n0,n1,n2,N0,N1,N2;
 
 5237   R *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12, *c_phi_inv21, *c_phi_inv22;
 
 5238   R ck01, ck02, ck11, ck12, ck21, ck22;
 
 5239   C *g_hat111,*f_hat111,*g_hat211,*f_hat211,*g_hat121,*f_hat121,*g_hat221,*f_hat221;
 
 5240   C *g_hat112,*f_hat112,*g_hat212,*f_hat212,*g_hat122,*f_hat122,*g_hat222,*f_hat222;
 
 5252   f_hat=(C*)ths->f_hat;
 
 5253   g_hat=(C*)ths->g_hat;
 
 5256   nfft_adjoint_3d_B(ths);
 
 5260   fftw_execute(ths->my_fftw_plan2);
 
 5264   if(ths->nfft_flags & PRE_PHI_HUT)
 
 5266       c_phi_inv01=ths->c_phi_inv[0];
 
 5267       c_phi_inv02=&ths->c_phi_inv[0][N0/2];
 
 5269       #pragma omp parallel for default(shared) private(k0,k1,k2,ck01,ck02,c_phi_inv11,c_phi_inv12,ck11,ck12,c_phi_inv21,c_phi_inv22,g_hat111,f_hat111,g_hat211,f_hat211,g_hat121,f_hat121,g_hat221,f_hat221,g_hat112,f_hat112,g_hat212,f_hat212,g_hat122,f_hat122,g_hat222,f_hat222,ck21,ck22) 
 5270       for(k0=0;k0<N0/2;k0++)
 
 5272     ck01=c_phi_inv01[k0];
 
 5273     ck02=c_phi_inv02[k0];
 
 5274     c_phi_inv11=ths->c_phi_inv[1];
 
 5275     c_phi_inv12=&ths->c_phi_inv[1][N1/2];
 
 5277     for(k1=0;k1<N1/2;k1++)
 
 5279         ck11=c_phi_inv11[k1];
 
 5280         ck12=c_phi_inv12[k1];
 
 5281         c_phi_inv21=ths->c_phi_inv[2];
 
 5282         c_phi_inv22=&ths->c_phi_inv[2][N2/2];
 
 5284         g_hat111=g_hat + ((n0-(N0/2)+k0)*n1+n1-(N1/2)+k1)*n2+n2-(N2/2);
 
 5285         f_hat111=f_hat + (k0*N1+k1)*N2;
 
 5286         g_hat211=g_hat + (k0*n1+n1-(N1/2)+k1)*n2+n2-(N2/2);
 
 5287         f_hat211=f_hat + (((N0/2)+k0)*N1+k1)*N2;
 
 5288         g_hat121=g_hat + ((n0-(N0/2)+k0)*n1+k1)*n2+n2-(N2/2);
 
 5289         f_hat121=f_hat + (k0*N1+(N1/2)+k1)*N2;
 
 5290         g_hat221=g_hat + (k0*n1+k1)*n2+n2-(N2/2);
 
 5291         f_hat221=f_hat + (((N0/2)+k0)*N1+(N1/2)+k1)*N2;
 
 5293         g_hat112=g_hat + ((n0-(N0/2)+k0)*n1+n1-(N1/2)+k1)*n2;
 
 5294         f_hat112=f_hat + (k0*N1+k1)*N2+(N2/2);
 
 5295         g_hat212=g_hat + (k0*n1+n1-(N1/2)+k1)*n2;
 
 5296         f_hat212=f_hat + (((N0/2)+k0)*N1+k1)*N2+(N2/2);
 
 5297         g_hat122=g_hat + ((n0-(N0/2)+k0)*n1+k1)*n2;
 
 5298         f_hat122=f_hat + (k0*N1+(N1/2)+k1)*N2+(N2/2);
 
 5299         g_hat222=g_hat + (k0*n1+k1)*n2;
 
 5300         f_hat222=f_hat + (((N0/2)+k0)*N1+(N1/2)+k1)*N2+(N2/2);
 
 5302         for(k2=0;k2<N2/2;k2++)
 
 5304       ck21=c_phi_inv21[k2];
 
 5305       ck22=c_phi_inv22[k2];
 
 5307       f_hat111[k2] = g_hat111[k2] * ck01 * ck11 * ck21;
 
 5308       f_hat211[k2] = g_hat211[k2] * ck02 * ck11 * ck21;
 
 5309       f_hat121[k2] = g_hat121[k2] * ck01 * ck12 * ck21;
 
 5310       f_hat221[k2] = g_hat221[k2] * ck02 * ck12 * ck21;
 
 5312       f_hat112[k2] = g_hat112[k2] * ck01 * ck11 * ck22;
 
 5313       f_hat212[k2] = g_hat212[k2] * ck02 * ck11 * ck22;
 
 5314       f_hat122[k2] = g_hat122[k2] * ck01 * ck12 * ck22;
 
 5315       f_hat222[k2] = g_hat222[k2] * ck02 * ck12 * ck22;
 
 5321     #pragma omp parallel for default(shared) private(k0,k1,k2,ck01,ck02,ck11,ck12,ck21,ck22) 
 5322     for(k0=0;k0<N0/2;k0++)
 
 5324   ck01=K(1.0)/(PHI_HUT(k0-N0/2,0));
 
 5325   ck02=K(1.0)/(PHI_HUT(k0,0));
 
 5326   for(k1=0;k1<N1/2;k1++)
 
 5328       ck11=K(1.0)/(PHI_HUT(k1-N1/2,1));
 
 5329       ck12=K(1.0)/(PHI_HUT(k1,1));
 
 5331       for(k2=0;k2<N2/2;k2++)
 
 5333     ck21=K(1.0)/(PHI_HUT(k2-N2/2,2));
 
 5334     ck22=K(1.0)/(PHI_HUT(k2,2));
 
 5336     f_hat[(k0*N1+k1)*N2+k2]                  = g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+n2-N2/2+k2] * ck01 * ck11 * ck21;
 
 5337     f_hat[((N0/2+k0)*N1+k1)*N2+k2]           = g_hat[(k0*n1+n1-N1/2+k1)*n2+n2-N2/2+k2]           * ck02 * ck11 * ck21;
 
 5338     f_hat[(k0*N1+N1/2+k1)*N2+k2]             = g_hat[((n0-N0/2+k0)*n1+k1)*n2+n2-N2/2+k2]         * ck01 * ck12 * ck21;
 
 5339     f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+k2]      = g_hat[(k0*n1+k1)*n2+n2-N2/2+k2]                   * ck02 * ck12 * ck21;
 
 5341     f_hat[(k0*N1+k1)*N2+N2/2+k2]             = g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+k2]         * ck01 * ck11 * ck22;
 
 5342     f_hat[((N0/2+k0)*N1+k1)*N2+N2/2+k2]      = g_hat[(k0*n1+n1-N1/2+k1)*n2+k2]                   * ck02 * ck11 * ck22;
 
 5343     f_hat[(k0*N1+N1/2+k1)*N2+N2/2+k2]        = g_hat[((n0-N0/2+k0)*n1+k1)*n2+k2]                 * ck01 * ck12 * ck22;
 
 5344     f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+N2/2+k2] = g_hat[(k0*n1+k1)*n2+k2]                           * ck02 * ck12 * ck22;
 
 5358     case 1: nfft_trafo_1d(ths); 
break;
 
 5359     case 2: nfft_trafo_2d(ths); 
break;
 
 5360     case 3: nfft_trafo_3d(ths); 
break;
 
 5378     fftw_execute(ths->my_fftw_plan1);
 
 5394     case 1: nfft_adjoint_1d(ths); 
break;
 
 5395     case 2: nfft_adjoint_2d(ths); 
break;
 
 5396     case 3: nfft_adjoint_3d(ths); 
break;
 
 5414       fftw_execute(ths->my_fftw_plan2);
 
 5429 static 
void nfft_precompute_phi_hut(
nfft_plan *ths)
 
 5434   ths->c_phi_inv = (R**) 
nfft_malloc(ths->d*
sizeof(R*));
 
 5436   for(t=0; t<ths->d; t++)
 
 5438       ths->c_phi_inv[t]= (R*)
nfft_malloc(ths->N[t]*
sizeof(R));
 
 5439       for(ks[t]=0; ks[t]<ths->N[t]; ks[t]++)
 
 5440   ths->c_phi_inv[t][ks[t]]= K(1.0)/(PHI_HUT(ks[t]-ths->N[t]/2,t));
 
 5455   for (t=0; t<ths->
d; t++)
 
 5457       step=((R)(ths->
m+2))/(((R)ths->
K)*ths->
n[t]);
 
 5458       for(j=0;j<=ths->
K;j++)
 
 5460     ths->
psi[(ths->
K+1)*t + j] = PHI(j*step,t);
 
 5465 static void nfft_precompute_fg_psi(
nfft_plan *ths)
 
 5470   nfft_sort_nodes(ths);
 
 5472   for (t=0; t<ths->
d; t++)
 
 5475     #pragma omp parallel for default(shared) private(j,u,o) 
 5476     for (j = 0; j < ths->
M_total; j++)
 
 5478   nfft_uo(ths,j,&u,&o,t);
 
 5480         ths->
psi[2*(j*ths->
d+t)]=
 
 5481             (PHI((ths->
x[j*ths->
d+t]-((R)u)/ths->
n[t]),t));
 
 5483         ths->
psi[2*(j*ths->
d+t)+1]=
 
 5484             EXP(K(2.0)*(ths->
n[t]*ths->
x[j*ths->
d+t] - u) / ths->
b[t]);
 
 5490 void nfft_precompute_psi(
nfft_plan *ths)
 
 5497   nfft_sort_nodes(ths);
 
 5499   for (t=0; t<ths->
d; t++)
 
 5502     #pragma omp parallel for default(shared) private(j,l,lj,u,o) 
 5503     for (j = 0; j < ths->
M_total; j++)
 
 5505   nfft_uo(ths,j,&u,&o,t);
 
 5507   for(l=u, lj=0; l <= o; l++, lj++)
 
 5508     ths->
psi[(j*ths->
d+t)*(2*ths->
m+2)+lj]=
 
 5509       (PHI((ths->
x[j*ths->
d+t]-((R)l)/ths->
n[t]),t));
 
 5516 static void nfft_precompute_full_psi_omp(
nfft_plan *ths)
 
 5523     for(t=0,lprod = 1; t<ths->
d; t++)
 
 5524         lprod *= 2*ths->
m+2;
 
 5527   #pragma omp parallel for default(shared) private(j) 
 5534       int ll_plain[ths->
d+1];               
 
 5536       int u[ths->
d], o[ths->
d];             
 
 5538       R phi_prod[ths->
d+1];
 
 5544       MACRO_init_uo_l_lj_t;
 
 5546       for(l_L=0; l_L<lprod; l_L++, ix++)
 
 5548         MACRO_update_phi_prod_ll_plain(without_PRE_PSI);
 
 5551         ths->
psi[ix]=phi_prod[ths->
d];
 
 5553         MACRO_count_uo_l_lj_t;
 
 5561 void nfft_precompute_full_psi(
nfft_plan *ths)
 
 5564   nfft_sort_nodes(ths);
 
 5566   nfft_precompute_full_psi_omp(ths);
 
 5573   int ll_plain[ths->
d+1];               
 
 5575   int u[ths->
d], o[ths->
d];             
 
 5577   R phi_prod[ths->
d+1];
 
 5581   nfft_sort_nodes(ths);
 
 5586   for(t=0,lprod = 1; t<ths->
d; t++)
 
 5587       lprod *= 2*ths->
m+2;
 
 5589   for(j=0,ix=0,ix_old=0; j<ths->M_total; j++)
 
 5591       MACRO_init_uo_l_lj_t;
 
 5593       for(l_L=0; l_L<lprod; l_L++, ix++)
 
 5595     MACRO_update_phi_prod_ll_plain(without_PRE_PSI);
 
 5598     ths->
psi[ix]=phi_prod[ths->
d];
 
 5600     MACRO_count_uo_l_lj_t;
 
 5610 void nfft_precompute_one_psi(
nfft_plan *ths)
 
 5615     nfft_precompute_fg_psi(ths);
 
 5617     nfft_precompute_psi(ths);
 
 5619     nfft_precompute_full_psi(ths);
 
 5623 static void nfft_init_help(
nfft_plan *ths)
 
 5628   if (ths->
nfft_flags & NFFT_OMP_BLOCKWISE_ADJOINT)
 
 5635   for(t = 0;t < ths->
d; t++)
 
 5636     ths->
sigma[t] = ((R)ths->
n[t])/ths->
N[t];
 
 5650     nfft_precompute_phi_hut(ths);
 
 5654       ths->
K=(1U<< 10)*(ths->
m+2);
 
 5663              (2*ths->
m+2)*
sizeof(R));
 
 5667       for(t=0,lprod = 1; t<ths->
d; t++)
 
 5668     lprod *= 2*ths->
m+2;
 
 5679     int nthreads = nfft_get_omp_num_threads();
 
 5690 #pragma omp critical (nfft_omp_critical_fftw_plan) 
 5692     fftw_plan_with_nthreads(nthreads);
 
 5694     ths->my_fftw_plan1 = fftw_plan_dft(ths->
d, ths->
n, ths->
g1, ths->
g2, FFTW_FORWARD, ths->
fftw_flags);
 
 5695     ths->my_fftw_plan2 = fftw_plan_dft(ths->
d, ths->
n, ths->
g2, ths->
g1,
 
 5708   ths->
mv_adjoint = (void (*) (
void* ))nfft_adjoint;
 
 5711 void nfft_init(
nfft_plan *ths, 
int d, 
int *N, 
int M_total)
 
 5719   for (t = 0;t < d; t++)
 
 5725   for (t = 0;t < d; t++)
 
 5726     ths->
n[t] = 2*X(next_power_of_2)(ths->
N[t]);
 
 5728   ths->
m = WINDOW_HELP_ESTIMATE_m;
 
 5733     ths->
nfft_flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
 
 5734                       FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES |
 
 5735           NFFT_OMP_BLOCKWISE_ADJOINT;
 
 5737     ths->
nfft_flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
 
 5738                       FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES;
 
 5742     ths->
nfft_flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
 
 5743                       FFTW_INIT | FFT_OUT_OF_PLACE;
 
 5745   ths->
fftw_flags= FFTW_ESTIMATE| FFTW_DESTROY_INPUT;
 
 5747   nfft_init_help(ths);
 
 5750 void nfft_init_guru(
nfft_plan *ths, 
int d, 
int *N, 
int M_total, 
int *n,
 
 5751       int m, 
unsigned nfft_flags, 
unsigned fftw_flags)
 
 5767   nfft_init_help(ths);
 
 5770 void nfft_init_1d(
nfft_plan *ths, 
int N1, 
int M_total)
 
 5776   nfft_init(ths, 1, N, M_total);
 
 5779 void nfft_init_2d(
nfft_plan *ths, 
int N1, 
int N2, 
int M_total)
 
 5785   nfft_init(ths,2,N,M_total);
 
 5788 void nfft_init_3d(
nfft_plan *ths, 
int N1, 
int N2, 
int N3, 
int M_total)
 
 5795   nfft_init(ths,3,N,M_total);
 
 5803     if((ths->
x[j]<-K(0.5)) || (ths->
x[j]>= K(0.5)))
 
 5804       return "ths->x out of range [-0.5,0.5)";
 
 5806   for(j=0;j<ths->
d;j++)
 
 5808     if(ths->
sigma[j]<=1)
 
 5809       return "nfft_check: oversampling factor too small";
 
 5811     if(ths->
N[j]<=ths->
m)
 
 5812       return "Polynomial degree N is smaller than cut-off m";
 
 5815       return "polynomial degree N has to be even";
 
 5829 #pragma omp critical (nfft_omp_critical_fftw_plan) 
 5831     fftw_destroy_plan(ths->my_fftw_plan2);
 
 5832     fftw_destroy_plan(ths->my_fftw_plan1);
 
 5859       for(t=0; t<ths->
d; t++)
 
 5873   WINDOW_HELP_FINALIZE;