44 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0)) 
   47 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1) 
   50 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y))) 
   53 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1) 
   55 #define N_TILDE(y) (y-1) 
   57 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z)) 
   59 #define FPT_BREAK_EVEN 4 
   71   double **a11,**a12,**a21,**
a22;         
 
  109   double _Complex *temp;                          
 
  110   double _Complex *work;                          
 
  111   double _Complex *result;                        
 
  112   double _Complex *vec3;
 
  113   double _Complex *vec4;
 
  130 static inline void abuvxpwy(
double a, 
double b, 
double _Complex* u, 
double _Complex* x,
 
  131   double* v, 
double _Complex* y, 
double* w, 
int n)
 
  133   int l; 
double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
 
  134   double *v_ptr = v, *w_ptr = w;
 
  135   for (l = 0; l < n; l++)
 
  136     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
 
  139 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \ 
  140 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \ 
  141   double* v, double _Complex* y, double* w, int n) \ 
  143   const int n2 = n>>1; \ 
  144   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \ 
  145   double *v_ptr = v, *w_ptr = w; \ 
  146   for (l = 0; l < n2; l++) \ 
  147     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \ 
  149   for (l = 0; l < n2; l++) \ 
  150     *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \ 
  153 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
 
  154 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
 
  156 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \ 
  157 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \ 
  158   double* v, double _Complex* y, int n, double *xx) \ 
  160   const int n2 = n>>1; \ 
  161   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \ 
  162   double *v_ptr = v, *xx_ptr = xx; \ 
  163   for (l = 0; l < n2; l++, v_ptr++) \ 
  164     *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \ 
  166   for (l = 0; l < n2; l++, v_ptr--) \ 
  167     *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \ 
  170 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
 
  171 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
 
  173 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \ 
  174 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \ 
  175   double _Complex* y, double* w, int n, double *xx) \ 
  177   const int n2 = n>>1; \ 
  178   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \ 
  179   double *w_ptr = w, *xx_ptr = xx; \ 
  180   for (l = 0; l < n2; l++, w_ptr++) \ 
  181     *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \ 
  183   for (l = 0; l < n2; l++, w_ptr--) \ 
  184     *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \ 
  187 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
 
  188 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
 
  190 static inline 
void auvxpwy(
double a, 
double _Complex* u, 
double _Complex* x, 
double* v,
 
  191   double _Complex* y, 
double* w, 
int n)
 
  194   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
 
  195   double *v_ptr = v, *w_ptr = w;
 
  196   for (l = n; l > 0; l--)
 
  197     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
 
  200 static inline void auvxpwy_symmetric(
double a, 
double _Complex* u, 
double _Complex* x,
 
  201   double* v, 
double _Complex* y, 
double* w, 
int n)
 
  203   const int n2 = n>>1; \
 
  205   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
 
  206   double *v_ptr = v, *w_ptr = w;
 
  207   for (l = n2; l > 0; l--)
 
  208     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
 
  210   for (l = n2; l > 0; l--)
 
  211     *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
 
  214 static inline void auvxpwy_symmetric_1(
double a, 
double _Complex* u, 
double _Complex* x,
 
  215   double* v, 
double _Complex* y, 
double* w, 
int n, 
double *xx)
 
  217   const int n2 = n>>1; \
 
  219   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
 
  220   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
 
  221   for (l = n2; l > 0; l--, xx_ptr++)
 
  222     *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
 
  224   for (l = n2; l > 0; l--, xx_ptr++)
 
  225     *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
 
  228 static inline void auvxpwy_symmetric_2(
double a, 
double _Complex* u, 
double _Complex* x,
 
  229   double* v, 
double _Complex* y, 
double* w, 
int n, 
double *xx)
 
  231   const int n2 = n>>1; \
 
  233   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
 
  234   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
 
  235   for (l = n2; l > 0; l--, xx_ptr++)
 
  236     *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
 
  238   for (l = n2; l > 0; l--, xx_ptr++)
 
  239     *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
 
  242 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \ 
  243 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, double *a12, \ 
  244   double *a21, double *a22, double g, int tau, fpt_set set) \ 
  247   int length = 1<<(tau+1); \ 
  249   double norm = 1.0/(length<<1); \ 
  256   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \ 
  257   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \ 
  270     M2_FUNCTION(norm,b,b,a22,a,a21,length); \ 
  276     M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \ 
  277     M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \ 
  278     memcpy(b,set->z,length*sizeof(double _Complex)); \ 
  280     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \ 
  286   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \ 
  291 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
 
  292 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
 
  296 static inline 
void fpt_do_step_symmetric_u(
double _Complex *a, 
double _Complex *b,
 
  297   double *a11, 
double *a12, 
double *a21, 
double *a22, 
double *x,
 
  298   double gam, 
int tau, 
fpt_set set)
 
  301   int length = 1<<(tau+1);
 
  303   double norm = 1.0/(length<<1);
 
  305   UNUSED(a21); UNUSED(a22);
 
  312   fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
 
  313   fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
 
  326     auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
 
  332     auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
 
  333     auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length);
 
  334     memcpy(b,set->z,length*
sizeof(
double _Complex));
 
  336     fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
 
  342   fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
 
  347 static inline void fpt_do_step_symmetric_l(
double _Complex  *a, 
double _Complex *b,
 
  348   double *a11, 
double *a12, 
double *a21, 
double *a22, 
double *x, 
double gam, 
int tau, 
fpt_set set)
 
  351   int length = 1<<(tau+1);
 
  353   double norm = 1.0/(length<<1);
 
  359   UNUSED(a11); UNUSED(a12);
 
  362   fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
 
  363   fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
 
  376     auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
 
  382     auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
 
  383     auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x);
 
  384     memcpy(b,set->z,length*
sizeof(
double _Complex));
 
  386     fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
 
  392   fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
 
  397 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \ 
  398 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, \ 
  399   double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \ 
  402   int length = 1<<(tau+1); \ 
  404   double norm = 1.0/(length<<1); \ 
  407   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \ 
  408   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \ 
  411   M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \ 
  412   M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \ 
  413   memcpy(a,set->z,length*sizeof(double _Complex)); \ 
  416   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \ 
  417   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \ 
  420 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
 
  421 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
 
  426 static inline 
void fpt_do_step_t_symmetric_u(
double _Complex  *a,
 
  427   double _Complex *b, 
double *a11, 
double *a12, 
double *x,
 
  428   double gam, 
int tau, 
fpt_set set)
 
  431   int length = 1<<(tau+1);
 
  433   double norm = 1.0/(length<<1);
 
  436   fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
 
  437   fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
 
  440   abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x);
 
  441   abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x);
 
  442   memcpy(a,set->z,length*
sizeof(
double _Complex));
 
  445   fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
 
  446   fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
 
  449 static inline void fpt_do_step_t_symmetric_l(
double _Complex  *a,
 
  450   double _Complex *b, 
double *a21, 
double *a22,
 
  451   double *x, 
double gam, 
int tau, 
fpt_set set)
 
  454   int length = 1<<(tau+1);
 
  456   double norm = 1.0/(length<<1);
 
  459   fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
 
  460   fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
 
  463   abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x);
 
  464   abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x);
 
  465   memcpy(a,set->z,length*
sizeof(
double _Complex));
 
  468   fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
 
  469   fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
 
  473 static void eval_clenshaw(
const double *x, 
double *y, 
int size, 
int k, 
const double *alpha,
 
  474   const double *beta, 
const double *gam)
 
  480   double a,b,x_val_act,a_old;
 
  483   const double *alpha_act, *beta_act, *gamma_act;
 
  488   for (i = 0; i < size; i++)
 
  500       alpha_act = &(alpha[k]);
 
  501       beta_act = &(beta[k]);
 
  502       gamma_act = &(gam[k]);
 
  503       for (j = k; j > 1; j--)
 
  506         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
 
  507          b = a_old*(*gamma_act);
 
  512       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
 
  519 static void eval_clenshaw2(
const double *x, 
double *z, 
double *y, 
int size1, 
int size, 
int k, 
const double *alpha,
 
  520   const double *beta, 
const double *gam)
 
  526   double a,b,x_val_act,a_old;
 
  528   double *y_act, *z_act;
 
  529   const double *alpha_act, *beta_act, *gamma_act;
 
  535   for (i = 0; i < size; i++)
 
  548       alpha_act = &(alpha[k]);
 
  549       beta_act = &(beta[k]);
 
  550       gamma_act = &(gam[k]);
 
  551       for (j = k; j > 1; j--)
 
  554         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
 
  555         b = a_old*(*gamma_act);
 
  562       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
 
  575 static int eval_clenshaw_thresh2(
const double *x, 
double *z, 
double *y, 
int size, 
int k,
 
  576   const double *alpha, 
const double *beta, 
const double *gam, 
const 
  583   double a,b,x_val_act,a_old;
 
  585   double *y_act, *z_act;
 
  586   const double *alpha_act, *beta_act, *gamma_act;
 
  587   R max = -nfft_float_property(NFFT_R_MAX);
 
  588   const R t = LOG10(FABS(threshold));
 
  594   for (i = 0; i < size; i++)
 
  607       alpha_act = &(alpha[k]);
 
  608       beta_act = &(beta[k]);
 
  609       gamma_act = &(gam[k]);
 
  610       for (j = k; j > 1; j--)
 
  613         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
 
  614          b = a_old*(*gamma_act);
 
  620       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
 
  621       max = FMAX(max,LOG10(FABS(*y_act)));
 
  632 static inline void eval_sum_clenshaw_fast(
const int N, 
const int M,
 
  633   const double _Complex *a, 
const double *x, 
double _Complex *y,
 
  634   const double *alpha, 
const double *beta, 
const double *gam,
 
  638   double _Complex tmp1, tmp2, tmp3;
 
  649     for (j = 0; j <= M; j++)
 
  653     for (j = 0; j <= M; j++)
 
  658       tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
 
  659       for (k = N-1; k > 0; k--)
 
  662                + (alpha[k-1] * xc + beta[k-1]) * tmp1
 
  667       y[j] = lambda * tmp1;
 
  672       for (k = N-1; k > 0; k--)
 
  674         tmp3 = a[k-1] + tmp2 * gam[k];
 
  675         tmp2 *= (alpha[k] * xc + beta[k]);
 
  683       tmp2 *= (alpha[0] * xc + beta[0]);
 
  685       y[j] = lambda * (tmp2 + tmp1);
 
  716   double _Complex *y, 
double _Complex *temp, 
double *alpha, 
double *beta, 
double *gam,
 
  720   double _Complex* it1 = temp;
 
  721   double _Complex* it2 = y;
 
  726   for (j = 0; j <= M; j++)
 
  728     it2[j] = lambda * y[j];
 
  736     for (j = 0; j <= M; j++)
 
  739       it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
 
  743     for (k = 2; k <= N; k++)
 
  746       for (j = 0; j <= M; j++)
 
  750         it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
 
  758 fpt_set fpt_init(
const int M, 
const int t, 
const unsigned int flags)
 
  768   int nthreads = nfft_get_omp_num_threads();
 
  785   for (m = 0; m < 
set->M; m++)
 
  786     set->dpt[m].steps = 0;
 
  796   set->xcvecs = (
double**) 
nfft_malloc((set->t)*
sizeof(
double*));
 
  800   for (tau = 1; tau < t+1; tau++)
 
  803     set->xcvecs[tau-1] = (
double*) 
nfft_malloc(plength*
sizeof(
double));
 
  804     for (k = 0; k < plength; k++)
 
  806       set->xcvecs[tau-1][k] = cos(((k+0.5)*
PI)/plength);
 
  808     plength = plength << 1;
 
  812   set->work = (
double _Complex*) 
nfft_malloc((2*set->N)*
sizeof(
double _Complex));
 
  813   set->result = (
double _Complex*) 
nfft_malloc((2*set->N)*
sizeof(
double _Complex));
 
  815   set->lengths = (
int*) 
nfft_malloc((set->t)*
sizeof(int));
 
  816   set->plans_dct2 = (fftw_plan*) 
nfft_malloc(
sizeof(fftw_plan)*(
set->t));
 
  817   set->kindsr     = (fftw_r2r_kind*) 
nfft_malloc(2*
sizeof(fftw_r2r_kind));
 
  818   set->kindsr[0]  = FFTW_REDFT10;
 
  819   set->kindsr[1]  = FFTW_REDFT10;
 
  820   for (tau = 0, plength = 4; tau < 
set->t; tau++, plength<<=1)
 
  822     set->lengths[tau] = plength;
 
  824 #pragma omp critical (nfft_omp_critical_fftw_plan) 
  826     fftw_plan_with_nthreads(nthreads);
 
  828     set->plans_dct2[tau] =
 
  829       fftw_plan_many_r2r(1, &set->lengths[tau], 2, (
double*)set->work, NULL,
 
  830                          2, 1, (
double*)set->result, NULL, 2, 1,set->kindsr,
 
  838   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
 
  841     set->vec3 = (
double _Complex*) 
nfft_malloc(set->N*
sizeof(
double _Complex));
 
  842     set->vec4 = (
double _Complex*) 
nfft_malloc(set->N*
sizeof(
double _Complex));
 
  843     set->z = (
double _Complex*) 
nfft_malloc(set->N*
sizeof(
double _Complex));
 
  846     set->plans_dct3 = (fftw_plan*) 
nfft_malloc(
sizeof(fftw_plan)*(
set->t));
 
  847     set->kinds      = (fftw_r2r_kind*) 
nfft_malloc(2*
sizeof(fftw_r2r_kind));
 
  848     set->kinds[0]   = FFTW_REDFT01;
 
  849     set->kinds[1]   = FFTW_REDFT01;
 
  850     for (tau = 0, plength = 4; tau < 
set->t; tau++, plength<<=1)
 
  852       set->lengths[tau] = plength;
 
  854 #pragma omp critical (nfft_omp_critical_fftw_plan) 
  856     fftw_plan_with_nthreads(nthreads);
 
  858       set->plans_dct3[tau] =
 
  859         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (
double*)set->work, NULL,
 
  860                            2, 1, (
double*)set->result, NULL, 2, 1, set->kinds,
 
  874   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
 
  876     set->xc_slow = (
double*) 
nfft_malloc((set->N+1)*
sizeof(double));
 
  877     set->temp = (
double _Complex*) 
nfft_malloc((set->N+1)*
sizeof(
double _Complex));
 
  884 void fpt_precompute(
fpt_set set, 
const int m, 
double *alpha, 
double *beta,
 
  885   double *gam, 
int k_start, 
const double threshold)
 
  908   const double *calpha;
 
  910   const double *cgamma;
 
  921   data = &(
set->dpt[m]);
 
  924   if (data->
steps != NULL)
 
  933   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
 
  940     for (tau = 2; tau <= 
set->t; tau++)
 
  943       data->
alphaN[tau-2] = alpha[1<<tau];
 
  944       data->
betaN[tau-2] = beta[1<<tau];
 
  945       data->
gammaN[tau-2] = gam[1<<tau];
 
  953     N_tilde = N_TILDE(set->N);
 
  960     for (tau = 1; tau < 
set->t; tau++)
 
  965       firstl = 
FIRST_L(k_start_tilde,plength);
 
  967       lastl = 
LAST_L(N_tilde,plength);
 
  975       for (l = firstl; l <= lastl; l++)
 
  977         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
 
  989         a11 = (
double*) 
nfft_malloc(
sizeof(
double)*clength);
 
  990         a12 = (
double*) 
nfft_malloc(
sizeof(
double)*clength);
 
  991         a21 = (
double*) 
nfft_malloc(
sizeof(
double)*clength);
 
  992         a22 = (
double*) 
nfft_malloc(
sizeof(
double)*clength);
 
  998         calpha = &(alpha[plength*l+1+1]);
 
  999         cbeta = &(beta[plength*l+1+1]);
 
 1000         cgamma = &(gam[plength*l+1+1]);
 
 1002         if (set->flags & FPT_NO_STABILIZATION)
 
 1008           eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
 
 1010           eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
 
 1020           needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
 
 1021             degree-1, calpha, cbeta, cgamma, threshold);
 
 1025             needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
 
 1026               degree, calpha, cbeta, cgamma, threshold);
 
 1040           data->
steps[tau][l].a11[0] = a11;
 
 1041           data->
steps[tau][l].a12[0] = a12;
 
 1042           data->
steps[tau][l].a21[0] = a21;
 
 1044           data->
steps[tau][l].
g[0] = gam[plength*l+1+1];
 
 1050           degree_stab = degree*(2*l+1);
 
 1051           X(next_power_of_2_exp)((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
 
 1065           plength_stab = N_stab;
 
 1067           if (set->flags & FPT_AL_SYMMETRY)
 
 1072               clength_1 = plength_stab;
 
 1073               clength_2 = plength_stab;
 
 1075               a11 = (
double*) 
nfft_malloc(
sizeof(
double)*clength_1);
 
 1076               a12 = (
double*) 
nfft_malloc(
sizeof(
double)*clength_1);
 
 1077               a21 = (
double*) 
nfft_malloc(
sizeof(
double)*clength_2);
 
 1078               a22 = (
double*) 
nfft_malloc(
sizeof(
double)*clength_2);
 
 1080               calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
 
 1081               eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
 
 1082                 clength_2, degree_stab-1, calpha, cbeta, cgamma);
 
 1083               eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
 
 1084                 clength_2, degree_stab+0, calpha, cbeta, cgamma);
 
 1088               clength = plength_stab/2;
 
 1091                 a11 = (
double*) 
nfft_malloc(
sizeof(
double)*clength);
 
 1092                 a12 = (
double*) 
nfft_malloc(
sizeof(
double)*clength);
 
 1095                 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
 
 1096                 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
 
 1097                   degree_stab-2, calpha, cbeta, cgamma);
 
 1098                 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
 
 1099                   degree_stab-1, calpha, cbeta, cgamma);
 
 1105                 a21 = (
double*) 
nfft_malloc(
sizeof(
double)*clength);
 
 1106                 a22 = (
double*) 
nfft_malloc(
sizeof(
double)*clength);
 
 1107                 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
 
 1108                 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
 
 1109                   degree_stab-1,calpha, cbeta, cgamma);
 
 1110                 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
 
 1111                   degree_stab+0, calpha, cbeta, cgamma);
 
 1117             clength_1 = plength_stab;
 
 1118             clength_2 = plength_stab;
 
 1119             a11 = (
double*) 
nfft_malloc(
sizeof(
double)*clength_1);
 
 1120             a12 = (
double*) 
nfft_malloc(
sizeof(
double)*clength_1);
 
 1121             a21 = (
double*) 
nfft_malloc(
sizeof(
double)*clength_2);
 
 1122             a22 = (
double*) 
nfft_malloc(
sizeof(
double)*clength_2);
 
 1123             calpha = &(alpha[2]);
 
 1129             eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
 
 1130               calpha, cbeta, cgamma);
 
 1131             eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
 
 1132               calpha, cbeta, cgamma);
 
 1135           data->
steps[tau][l].a11[0] = a11;
 
 1136           data->
steps[tau][l].a12[0] = a12;
 
 1137           data->
steps[tau][l].a21[0] = a21;
 
 1140           data->
steps[tau][l].
g[0] =  gam[1+1];
 
 1142           data->
steps[tau][l].
ts = t_stab;
 
 1143           data->
steps[tau][l].
Ns = N_stab;
 
 1147       plength = plength << 1;
 
 1151   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
 
 1154     if (set->flags & FPT_PERSISTENT_DATA)
 
 1156       data->
_alpha = (
double*) alpha;
 
 1157       data->
_beta = (
double*) beta;
 
 1158       data->
_gamma = (
double*) gam;
 
 1165       memcpy(data->
_alpha,alpha,(set->N+1)*
sizeof(
double));
 
 1166       memcpy(data->
_beta,beta,(set->N+1)*
sizeof(
double));
 
 1167       memcpy(data->
_gamma,gam,(set->N+1)*
sizeof(
double));
 
 1172 void fpt_trafo_direct(
fpt_set set, 
const int m, 
const double _Complex *x, 
double _Complex *y,
 
 1173   const int k_end, 
const unsigned int flags)
 
 1183   X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
 
 1188   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
 
 1193   if (flags & FPT_FUNCTION_VALUES)
 
 1196     for (j = 0; j <= k_end; j++)
 
 1198       set->xc_slow[j] = cos((
PI*(j+0.5))/(k_end+1));
 
 1202     memset(set->result,0U,data->
k_start*
sizeof(
double _Complex));
 
 1203     memcpy(&set->result[data->
k_start],x,(k_end-data->
k_start+1)*
sizeof(
double _Complex));
 
 1208     eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
 
 1213     memset(set->temp,0U,data->
k_start*
sizeof(
double _Complex));
 
 1214     memcpy(&set->temp[data->
k_start],x,(k_end-data->
k_start+1)*
sizeof(
double _Complex));
 
 1216     eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
 
 1220     fftw_execute_r2r(set->plans_dct2[tk-2],(
double*)set->result,
 
 1221       (
double*)set->result);
 
 1223     set->result[0] *= 0.5;
 
 1224     for (j = 0; j < Nk; j++)
 
 1226       set->result[j] *= norm;
 
 1229     memcpy(y,set->result,(k_end+1)*
sizeof(
double _Complex));
 
 1233 void fpt_trafo(
fpt_set set, 
const int m, 
const double _Complex *x, 
double _Complex *y,
 
 1234   const int k_end, 
const unsigned int flags)
 
 1264   int length = k_end+1;
 
 1265   fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
 
 1270   double _Complex *work_ptr;
 
 1271   const double _Complex *x_ptr;
 
 1274   if (k_end < FPT_BREAK_EVEN)
 
 1277     fpt_trafo_direct(
set, m, x, y, k_end, flags);
 
 1281   X(next_power_of_2_exp)(k_end,&Nk,&tk);
 
 1286   if (set->flags & FPT_NO_FAST_ALGORITHM)
 
 1289   if (flags & FPT_FUNCTION_VALUES)
 
 1292     int nthreads = nfft_get_omp_num_threads();
 
 1293 #pragma omp critical (nfft_omp_critical_fftw_plan) 
 1295     fftw_plan_with_nthreads(nthreads);
 
 1297     plan = fftw_plan_many_r2r(1, &length, 2, (
double*)set->work, NULL, 2, 1,
 
 1298       (
double*)set->work, NULL, 2, 1, kinds, 0U);
 
 1305   memset(set->result,0U,2*Nk*
sizeof(
double _Complex));
 
 1310   memset(set->work,0U,2*data->
k_start*
sizeof(
double _Complex));
 
 1312   work_ptr = &
set->work[2*data->
k_start];
 
 1315   for (k = 0; k <= k_end_tilde-data->
k_start; k++)
 
 1317     *work_ptr++ = *x_ptr++;
 
 1318     *work_ptr++ = K(0.0);
 
 1322   memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*
sizeof(
double _Complex));
 
 1328     set->work[2*(Nk-2)] += data->
gammaN[tk-2]*x[Nk-data->
k_start];
 
 1329     set->work[2*(Nk-1)] += data->
betaN[tk-2]*x[Nk-data->
k_start];
 
 1330     set->work[2*(Nk-1)+1] = data->
alphaN[tk-2]*x[Nk-data->
k_start];
 
 1335   for (tau = 1; tau < tk; tau++)
 
 1338     firstl = 
FIRST_L(k_start_tilde,plength);
 
 1340     lastl = 
LAST_L(k_end_tilde,plength);
 
 1343     for (l = firstl; l <= lastl; l++)
 
 1346       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*
sizeof(
double _Complex));
 
 1347       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*
sizeof(
double _Complex));
 
 1348       memset(&set->vec3[plength/2],0U,(plength/2)*
sizeof(
double _Complex));
 
 1349       memset(&set->vec4[plength/2],0U,(plength/2)*
sizeof(
double _Complex));
 
 1352       memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*
sizeof(
double _Complex));
 
 1353       memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*
sizeof(
double _Complex));
 
 1354       memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*
sizeof(
double _Complex));
 
 1357       step = &(data->
steps[tau][l]);
 
 1363         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
 
 1372           fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
 
 1373             step->a12[0], step->a21[0], step->
a22[0], step->
g[0], tau, 
set);
 
 1378           fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
 
 1379             step->a21[0], step->
a22[0], step->
g[0], tau, 
set);
 
 1382         if (step->
g[0] != 0.0)
 
 1384           for (k = 0; k < plength; k++)
 
 1386             set->work[plength*2*l+k] += 
set->vec3[k];
 
 1389         for (k = 0; k < plength; k++)
 
 1391           set->work[plength*(2*l+1)+k] += set->vec4[k];
 
 1399         plength_stab = step->
Ns;
 
 1412         memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*
sizeof(
double _Complex));
 
 1413         memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*
sizeof(
double _Complex));
 
 1417         if (set->flags & FPT_AL_SYMMETRY)
 
 1421             fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
 
 1422               step->a21[0], step->
a22[0], step->
g[0], t_stab-1, 
set);
 
 1428             fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
 
 1429               step->a21[0], step->
a22[0],
 
 1430               set->xcvecs[t_stab-2], step->
g[0], t_stab-1, 
set);
 
 1438             fpt_do_step_symmetric_l(set->vec3, set->vec4,
 
 1439               step->a11[0], step->a12[0],
 
 1441               step->
a22[0], set->xcvecs[t_stab-2], step->
g[0], t_stab-1, 
set);
 
 1448             fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
 
 1449               step->a21[0], step->
a22[0], step->
g[0], t_stab-1, 
set);
 
 1452         if (step->
g[0] != 0.0)
 
 1454           for (k = 0; k < plength_stab; k++)
 
 1456             set->result[k] += 
set->vec3[k];
 
 1460         for (k = 0; k < plength_stab; k++)
 
 1462           set->result[Nk+k] += 
set->vec4[k];
 
 1467     plength = plength<<1;
 
 1481   for (k = 0; k < 2*Nk; k++)
 
 1483     set->result[k] += 
set->work[k];
 
 1488   y[0] = data->
gamma_m1*(
set->result[0] + data->
beta_0*
set->result[Nk] +
 
 1489     data->
alpha_0*
set->result[Nk+1]*0.5);
 
 1490   y[1] = data->
gamma_m1*(
set->result[1] + data->
beta_0*
set->result[Nk+1]+
 
 1491     data->
alpha_0*(
set->result[Nk]+
set->result[Nk+2]*0.5));
 
 1492   y[k_end-1] = data->
gamma_m1*(
set->result[k_end-1] +
 
 1493     data->
beta_0*
set->result[Nk+k_end-1] +
 
 1494     data->
alpha_0*
set->result[Nk+k_end-2]*0.5);
 
 1495   y[k_end] = data->
gamma_m1*(0.5*data->
alpha_0*
set->result[Nk+k_end-1]);
 
 1496   for (k = 2; k <= k_end-2; k++)
 
 1498     y[k] = data->
gamma_m1*(
set->result[k] + data->
beta_0*
set->result[Nk+k] +
 
 1499       data->
alpha_0*0.5*(
set->result[Nk+k-1]+
set->result[Nk+k+1]));
 
 1502   if (flags & FPT_FUNCTION_VALUES)
 
 1505     fftw_execute_r2r(plan,(
double*)y,(
double*)y);
 
 1506 #pragma omp critical (nfft_omp_critical_fftw_plan) 
 1507     fftw_destroy_plan(plan);
 
 1508     for (k = 0; k <= k_end; k++)
 
 1515 void fpt_transposed_direct(
fpt_set set, 
const int m, 
double _Complex *x,
 
 1516   double _Complex *y, 
const int k_end, 
const unsigned int flags)
 
 1524   X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
 
 1527   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
 
 1532   if (flags & FPT_FUNCTION_VALUES)
 
 1534     for (j = 0; j <= k_end; j++)
 
 1536       set->xc_slow[j] = cos((
PI*(j+0.5))/(k_end+1));
 
 1544       sizeof(
double _Complex));
 
 1548     memcpy(set->result,y,(k_end+1)*
sizeof(
double _Complex));
 
 1549     memset(&set->result[k_end+1],0U,(Nk-k_end-1)*
sizeof(
double _Complex));
 
 1551     for (j = 0; j < Nk; j++)
 
 1553       set->result[j] *= norm;
 
 1556     fftw_execute_r2r(set->plans_dct3[tk-2],(
double*)set->result,
 
 1557       (
double*)set->result);
 
 1563     memcpy(x,&set->temp[data->
k_start],(k_end-data->
k_start+1)*
sizeof(
double _Complex));
 
 1567 void fpt_transposed(
fpt_set set, 
const int m, 
double _Complex *x,
 
 1568   double _Complex *y, 
const int k_end, 
const unsigned int flags)
 
 1597   int length = k_end+1;
 
 1598   fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
 
 1604   if (k_end < FPT_BREAK_EVEN)
 
 1607     fpt_transposed_direct(
set, m, x, y, k_end, flags);
 
 1611   X(next_power_of_2_exp)(k_end,&Nk,&tk);
 
 1616   if (set->flags & FPT_NO_FAST_ALGORITHM)
 
 1621   if (flags & FPT_FUNCTION_VALUES)
 
 1624     int nthreads = nfft_get_omp_num_threads();
 
 1625 #pragma omp critical (nfft_omp_critical_fftw_plan) 
 1627     fftw_plan_with_nthreads(nthreads);
 
 1629     plan = fftw_plan_many_r2r(1, &length, 2, (
double*)set->work, NULL, 2, 1,
 
 1630       (
double*)set->work, NULL, 2, 1, kinds, 0U);
 
 1634     fftw_execute_r2r(plan,(
double*)y,(
double*)set->result);
 
 1635 #pragma omp critical (nfft_omp_critical_fftw_plan) 
 1636     fftw_destroy_plan(plan);
 
 1637     for (k = 0; k <= k_end; k++)
 
 1639       set->result[k] *= 0.5;
 
 1644     memcpy(set->result,y,(k_end+1)*
sizeof(
double _Complex));
 
 1648   memset(set->work,0U,2*Nk*
sizeof(
double _Complex));
 
 1651   for (k = 0; k <= k_end; k++)
 
 1653     set->work[k] = data->
gamma_m1*
set->result[k];
 
 1658     data->
alpha_0*
set->result[1]);
 
 1659   for (k = 1; k < k_end; k++)
 
 1662       data->
alpha_0*0.5*(
set->result[k-1]+
set->result[k+1]));
 
 1666     memset(&set->work[k_end],0U,(Nk-k_end)*
sizeof(
double _Complex));
 
 1670   memcpy(set->result,set->work,2*Nk*
sizeof(
double _Complex));
 
 1674   for (tau = tk-1; tau >= 1; tau--)
 
 1677     firstl = 
FIRST_L(k_start_tilde,plength);
 
 1679     lastl = 
LAST_L(k_end_tilde,plength);
 
 1682     for (l = firstl; l <= lastl; l++)
 
 1685       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*
sizeof(
double _Complex));
 
 1686       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*
sizeof(
double _Complex));
 
 1688       memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
 
 1689         (plength/2)*
sizeof(
double _Complex));
 
 1692       step = &(data->
steps[tau][l]);
 
 1697         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
 
 1700           fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
 
 1701             step->a21[0], step->
a22[0], step->
g[0], tau, 
set);
 
 1706           fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
 
 1707             step->a21[0], step->
a22[0], step->
g[0], tau, 
set);
 
 1709         memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*
sizeof(
double _Complex));
 
 1711         for (k = 0; k < plength; k++)
 
 1713           set->work[plength*(4*l+2)/2+k] = set->vec3[k];
 
 1719         plength_stab = step->
Ns;
 
 1722         memcpy(set->vec3,set->result,plength_stab*
sizeof(
double _Complex));
 
 1723         memcpy(set->vec4,&(set->result[Nk]),plength_stab*
sizeof(
double _Complex));
 
 1726         if (set->flags & FPT_AL_SYMMETRY)
 
 1730             fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
 
 1731               step->a21[0], step->
a22[0], step->
g[0], t_stab-1, 
set);
 
 1735             fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
 
 1736               set->xcvecs[t_stab-2], step->
g[0], t_stab-1, 
set);
 
 1740             fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
 
 1741               step->a21[0], step->
a22[0], set->xcvecs[t_stab-2], step->
g[0], t_stab-1, 
set);
 
 1746             fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
 
 1747               step->a21[0], step->
a22[0], step->
g[0], t_stab-1, 
set);
 
 1750         memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*
sizeof(
double _Complex));
 
 1752         for (k = 0; k < plength; k++)
 
 1754           set->work[(plength/2)*(4*l+2)+k] = 
set->vec3[k];
 
 1759     plength = plength>>1;
 
 1763   for (k = 0; k <= k_end_tilde-data->
k_start; k++)
 
 1765     x[k] = 
set->work[2*(data->
k_start+k)];
 
 1770         data->
gammaN[tk-2]*
set->work[2*(Nk-2)]
 
 1771       + data->
betaN[tk-2] *set->work[2*(Nk-1)]
 
 1772       + data->
alphaN[tk-2]*
set->work[2*(Nk-1)+1];
 
 1776 void fpt_finalize(
fpt_set set)
 
 1786   const int M = 
set->M;
 
 1789   for (m = 0; m < M; m++)
 
 1792     data = &
set->dpt[m];
 
 1805       N_tilde = N_TILDE(set->N);
 
 1807       for (tau = 1; tau < 
set->t; tau++)
 
 1810         firstl = 
FIRST_L(k_start_tilde,plength);
 
 1812         lastl = 
LAST_L(N_tilde,plength);
 
 1815         for (l = firstl; l <= lastl; l++)
 
 1822           data->
steps[tau][l].a11[0] = NULL;
 
 1823           data->
steps[tau][l].a12[0] = NULL;
 
 1824           data->
steps[tau][l].a21[0] = NULL;
 
 1832           data->
steps[tau][l].a11 = NULL;
 
 1833           data->
steps[tau][l].a12 = NULL;
 
 1834           data->
steps[tau][l].a21 = NULL;
 
 1836           data->
steps[tau][l].
g = NULL;
 
 1840         data->
steps[tau] = NULL;
 
 1842         plength = plength<<1;
 
 1849     if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
 
 1853       if (!(set->flags & FPT_PERSISTENT_DATA))
 
 1869   for (tau = 1; tau < 
set->t+1; tau++)
 
 1872     set->xcvecs[tau-1] = NULL;
 
 1882   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
 
 1895     for(tau = 0; tau < 
set->t; tau++)
 
 1897 #pragma omp critical (nfft_omp_critical_fftw_plan) 
 1899       fftw_destroy_plan(set->plans_dct3[tau]);
 
 1900       fftw_destroy_plan(set->plans_dct2[tau]);
 
 1902       set->plans_dct3[tau] = NULL;
 
 1903       set->plans_dct2[tau] = NULL;
 
 1908     set->plans_dct3 = NULL;
 
 1909     set->plans_dct2 = NULL;
 
 1912   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
 
 1916     set->xc_slow = NULL;