30 static FILE* file_out_tex = NULL;
 
   32 int get_nthreads_array(
int **arr)
 
   34   int max_threads = nfft_get_omp_num_threads();
 
   38   int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
 
   42     *arr = (
int*) malloc(max_threads*
sizeof(
int));
 
   43     for (k = 0; k < max_threads; k++)
 
   48   for (k = 1; k <= max_threads; k*=2, alloc_num++);
 
   50   *arr = (
int*) malloc(alloc_num*
sizeof(
int));
 
   52   for (k = 1; k <= max_threads; k*=2)
 
   54     if (k != max_threads && 2*k > max_threads && max_threads_pw2)
 
   56       *(*arr + ret_number) = max_threads/2;
 
   60     *(*arr + ret_number) = k;
 
   63     if (k != max_threads && 2*k > max_threads)
 
   65       *(*arr + ret_number) = max_threads;
 
   75 void check_result_value(
const int val, 
const int ok, 
const char *msg)
 
   79     fprintf(stderr, 
"ERROR %s: %d not %d\n", msg, val, ok);
 
   85 void run_test_create(
int trafo_adjoint, 
int N, 
int M)
 
   89   snprintf(cmd, 1024, 
"./nfsft_benchomp_createdataset %d %d %d > nfsft_benchomp_test.data", trafo_adjoint, N, M);
 
   90   fprintf(stderr, 
"%s\n", cmd);
 
   91   check_result_value(system(cmd), 0, 
"createdataset");
 
   94 void run_test_init_output()
 
   96   FILE *f = fopen(
"nfsft_benchomp_test.result", 
"w");
 
  131 void run_test(
s_resval *res, 
int nrepeat, 
int m, 
int nfsft_flags, 
int psi_flags, 
int nthreads)
 
  137   for (t = 0; t < 6; t++)
 
  139     res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
 
  143     snprintf(cmd, 1024, 
"./nfsft_benchomp_detail_single %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", m, nfsft_flags, psi_flags, nrepeat);
 
  145     snprintf(cmd, 1024, 
"./nfsft_benchomp_detail_threads %d %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", m, nfsft_flags, psi_flags, nrepeat, nthreads);
 
  146   fprintf(stderr, 
"%s\n", cmd);
 
  148   check_result_value(system(cmd), 0, cmd);
 
  150   f = fopen(
"nfsft_benchomp_test.out", 
"r");
 
  151   for (r = 0; r < nrepeat; r++)
 
  158     retval = fscanf(f, 
"%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
 
  159     check_result_value(retval, 6, 
"read nfsft_benchomp_test.out");
 
  162     for (t = 0; t < 6; t++)
 
  165       if (res[t].min > v[t])
 
  167       if (res[t].max < v[t])
 
  173   for (t = 0; t < 6; t++)
 
  174     res[t].avg /= nrepeat;
 
  176   fprintf(stderr, 
"%d %d: ", nthreads, nrepeat);
 
  177   for (t = 0; t < 6; t++)
 
  178     fprintf(stderr, 
"%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
 
  179   fprintf(stderr, 
"\n");
 
  182 const char *get_psi_string(
int flags)
 
  186   else if (flags & PRE_ONE_PSI)
 
  191 const char *get_sort_string(
int flags)
 
  193   if (flags & NFFT_SORT_NODES)
 
  199 const char *get_adjoint_omp_string(
int flags)
 
  201   if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
 
  207 #define MASK_TA (1U<<1) 
  208 #define MASK_N (1U<<2) 
  209 #define MASK_M (1U<<4) 
  210 #define MASK_WINM (1U<<5) 
  211 #define MASK_FLAGS_PSI (1U<<6) 
  212 #define MASK_FLAGS_SORT (1U<<7) 
  213 #define MASK_FLAGS_BW (1U<<8) 
  214 #define MASK_FLAGS_FPT (1U<<9) 
  216 unsigned int determine_different_parameters(
s_testset *testsets, 
int ntestsets)
 
  219   unsigned int mask = 0;
 
  224   for (t = 1; t < ntestsets; t++)
 
  226     if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
 
  228     if (testsets[t-1].param.N != testsets[t].param.N)
 
  230     if (testsets[t-1].param.M != testsets[t].param.M)
 
  232     if (testsets[t-1].param.m != testsets[t].param.m)
 
  234     if ((testsets[t-1].param.psi_flags & PRE_ONE_PSI) != (testsets[t].param.psi_flags & PRE_ONE_PSI))
 
  235       mask |= MASK_FLAGS_PSI;
 
  236     if ((testsets[t-1].param.psi_flags & NFFT_SORT_NODES) != (testsets[t].param.psi_flags & NFFT_SORT_NODES))
 
  237       mask |= MASK_FLAGS_SORT;
 
  238     if ((testsets[t-1].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT))
 
  239       mask |= MASK_FLAGS_BW;
 
  240     if ((testsets[t-1].param.nfsft_flags & NFSFT_USE_DPT) != (testsets[t].param.nfsft_flags & NFSFT_USE_DPT))
 
  241       mask |= MASK_FLAGS_FPT;
 
  247 void get_plot_title(
char *outstr, 
int maxlen, 
char *hostname, 
s_param param, 
unsigned int diff_mask)
 
  249   unsigned int mask = ~diff_mask;
 
  253   len = snprintf(outstr, maxlen, 
"%s", hostname);
 
  254   if (len < 0 || len+offset >= maxlen-1) 
return;
 
  259     len = snprintf(outstr+offset, maxlen-offset, 
" $\\mathrm{NFSFT}%s$", param.trafo_adjoint==0?
"":
"^\\top");
 
  260     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  266     len = snprintf(outstr+offset, maxlen-offset, 
" N=%d", param.N);
 
  267     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  273     len = snprintf(outstr+offset, maxlen-offset, 
" M=%d", param.M);
 
  274     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  278   if (mask & MASK_WINM)
 
  280     len = snprintf(outstr+offset, maxlen-offset, 
" m=%d", param.m);
 
  281     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  285   if (mask & MASK_FLAGS_PSI)
 
  287     len = snprintf(outstr+offset, maxlen-offset, 
" %s", get_psi_string(param.psi_flags));
 
  288     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  292   if (mask & MASK_FLAGS_SORT)
 
  294     len = snprintf(outstr+offset, maxlen-offset, 
" %s", get_sort_string(param.psi_flags));
 
  295     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  299   if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.psi_flags)) > 0)
 
  301     len = snprintf(outstr+offset, maxlen-offset, 
" %s", get_adjoint_omp_string(param.psi_flags));
 
  302     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  306   if (mask & MASK_FLAGS_FPT)
 
  308     len = snprintf(outstr+offset, maxlen-offset, param.nfsft_flags & NFSFT_USE_DPT ? 
" DPT" : 
"");
 
  309     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  315 void print_output_speedup_total_tref(FILE *out, 
s_testset *testsets, 
int ntestsets, 
int use_tref, 
double tref)
 
  319   char plottitle[1025];
 
  320   unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
 
  322   if (gethostname(hostname, 1024) != 0)
 
  323     strncpy(hostname, 
"unnamed", 1024);
 
  325   get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask);
 
  327   fprintf(out, 
"\\begin{tikzpicture}\n");
 
  328   fprintf(out, 
"\\begin{axis}[");
 
  329   fprintf(out, 
"width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
 
  330   fprintf(out, 
" title={%s}", plottitle);
 
  331   fprintf(out, 
" ]\n");
 
  333   for (t = 0; t < ntestsets; t++)
 
  336     fprintf(stderr, 
"%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?
"":
"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
 
  337     fprintf(stderr, 
"\n");
 
  339     fprintf(out, 
"\\addplot coordinates {");
 
  340     for (i = 0; i < testset.nresults; i++)
 
  342         fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
 
  344         fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
 
  345     fprintf(out, 
"};\n");
 
  347     for (i = 0; i < testset.nresults; i++)
 
  349         fprintf(stderr, 
"%d:%.3f  ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
 
  351         fprintf(stderr, 
"%d:%.3f  ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
 
  352     fprintf(stderr, 
"\n\n");
 
  355   fprintf(out, 
"\\legend{{");
 
  356   for (t = 0; t < ntestsets; t++)
 
  361     get_plot_title(title, 255, 
"", testsets[t].param, ~(diff_mask));
 
  362     fprintf(out, 
"%s", title);
 
  364   fprintf(out, 
"}}\n");
 
  365   fprintf(out, 
"\\end{axis}\n");
 
  366   fprintf(out, 
"\\end{tikzpicture}\n");
 
  367   fprintf(out, 
"\n\n");
 
  372 void print_output_speedup_total(FILE *out, 
s_testset *testsets, 
int ntestsets, 
int use_tref)
 
  374   double tref = 1.0/0.0;
 
  378     for (t = 0; t < ntestsets; t++)
 
  379       for (k = 0; k < testsets[t].nresults; k++)
 
  380         if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
 
  381           tref = testsets[t].results[k].resval[5].avg;
 
  383   print_output_speedup_total_tref(out, testsets, ntestsets, use_tref, tref);
 
  386 void print_output_histo_PENRT(FILE *out, 
s_testset testset)
 
  388   int i, size = testset.nresults;
 
  391   if (gethostname(hostname, 1024) != 0)
 
  392     strncpy(hostname, 
"unnamed", 1024);
 
  394   fprintf(out, 
"\\begin{tikzpicture}\n");
 
  395   fprintf(out, 
"\\begin{axis}[");
 
  396   fprintf(out, 
"width=0.9\\textwidth, height=0.6\\textwidth, ");
 
  397   fprintf(out, 
"symbolic x coords={");
 
  398   for (i = 0; i < size; i++)
 
  400       fprintf(out, 
",%d", testset.results[i].nthreads);
 
  402       fprintf(out, 
"%d", testset.results[i].nthreads);
 
  404   fprintf(out, 
"}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
 
  405   fprintf(out, 
" title={%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?
"":
"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
 
  406   fprintf(out, 
" ]\n");
 
  407   fprintf(out, 
"\\addplot coordinates {");
 
  408   for (i = 0; i < size; i++)
 
  409     fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
 
  410   fprintf(out, 
"};\n");
 
  412   fprintf(out, 
"\\addplot coordinates {");
 
  413   for (i = 0; i < size; i++)
 
  414     fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
 
  415   fprintf(out, 
"};\n");
 
  417   fprintf(out, 
"\\addplot coordinates {");
 
  418   for (i = 0; i < size; i++)
 
  419     fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
 
  420   fprintf(out, 
"};\n");
 
  422   fprintf(out, 
"\\addplot coordinates {");
 
  423   for (i = 0; i < size; i++)
 
  424     fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
 
  425   fprintf(out, 
"};\n");
 
  427   fprintf(out, 
"\\addplot coordinates {");
 
  428   for (i = 0; i < size; i++)
 
  429     fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
 
  430   fprintf(out, 
"};\n");
 
  431   fprintf(out, 
"\\legend{%s,%s,$\\mathrm{NFFT}%s$,rest,total}\n", testset.param.nfsft_flags & NFSFT_USE_DPT ? 
"DPT" : 
"FPT", testset.param.trafo_adjoint==0?
"c2e":
"$\\mathrm{c2e}^\\top$", testset.param.trafo_adjoint==0?
"":
"^\\top");
 
  432   fprintf(out, 
"\\end{axis}\n");
 
  433   fprintf(out, 
"\\end{tikzpicture}\n");
 
  434   fprintf(out, 
"\n\n");
 
  439 void run_testset(
s_testset *testset, 
int trafo_adjoint, 
int N, 
int M, 
int m, 
int nfsft_flags, 
int psi_flags, 
int *nthreads_array, 
int n_threads_array_size)
 
  442   testset->param.trafo_adjoint = trafo_adjoint;
 
  443   testset->param.N = N;
 
  444   testset->param.M = M;
 
  445   testset->param.m = m;
 
  446   testset->param.nfsft_flags = nfsft_flags;
 
  447   testset->param.psi_flags = psi_flags;
 
  449   testset->results = (
s_result*) malloc(n_threads_array_size*
sizeof(
s_result));
 
  450   testset->nresults = n_threads_array_size;
 
  452   run_test_create(testset->param.trafo_adjoint, testset->param.N, testset->param.M);
 
  453   for (i = 0; i < n_threads_array_size; i++)
 
  455     testset->results[i].nthreads = nthreads_array[i];
 
  456     run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.nfsft_flags, testset->param.psi_flags, testset->results[i].nthreads = nthreads_array[i]);
 
  461 void test1(
int *nthreads_array, 
int n_threads_array_size, 
int m)
 
  465   run_testset(&testsets[0], 0, 1024, 1000000, m, 0, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
 
  466 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  467   print_output_histo_PENRT(file_out_tex, testsets[0]);
 
  470   run_testset(&testsets[1], 1, 1024, 1000000, m, 0, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
 
  471 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  472   print_output_histo_PENRT(file_out_tex, testsets[1]);
 
  475   print_output_speedup_total(file_out_tex, testsets, 2, 0);
 
  477   run_testset(&testsets[2], 0, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
 
  478 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  479   print_output_histo_PENRT(file_out_tex, testsets[2]);
 
  482   run_testset(&testsets[3], 1, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
 
  483 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  484   print_output_histo_PENRT(file_out_tex, testsets[3]);
 
  487   print_output_speedup_total(file_out_tex, testsets+2, 2, 0);
 
  490 int main(
int argc, 
char** argv)
 
  493   int n_threads_array_size = get_nthreads_array(&nthreads_array);
 
  496 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW) 
  497   fprintf(stderr, 
"WARNING: Detailed time measurements for NFSFT are not activated.\n");
 
  498   fprintf(stderr, 
"For more detailed plots, please re-run the configure script with options\n");
 
  499   fprintf(stderr, 
"--enable-measure-time --enable-measure-time-fftw --enable-nfsft --enable-openmp\n");
 
  500   fprintf(stderr, 
"and run \"make clean all\"\n\n");
 
  503   for (k = 0; k < n_threads_array_size; k++)
 
  504     fprintf(stderr, 
"%d ", nthreads_array[k]);
 
  505   fprintf(stderr, 
"\n");
 
  507   file_out_tex = fopen(
"nfsft_benchomp_results_plots.tex", 
"w");
 
  509   test1(nthreads_array, n_threads_array_size, 2);
 
  510   test1(nthreads_array, n_threads_array_size, 4);
 
  511   test1(nthreads_array, n_threads_array_size, 6);
 
  512   test1(nthreads_array, n_threads_array_size, 8);
 
  514   fclose(file_out_tex);