30 static FILE* file_out_tex = NULL;
 
   32 int get_nthreads_array(
int **arr)
 
   34   int max_threads = nfft_get_omp_num_threads();
 
   38   int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
 
   42     *arr = (
int*) malloc(max_threads*
sizeof(
int));
 
   43     for (k = 0; k < max_threads; k++)
 
   48   for (k = 1; k <= max_threads; k*=2, alloc_num++);
 
   50   *arr = (
int*) malloc(alloc_num*
sizeof(
int));
 
   52   for (k = 1; k <= max_threads; k*=2)
 
   54     if (k != max_threads && 2*k > max_threads && max_threads_pw2)
 
   56       *(*arr + ret_number) = max_threads/2;
 
   60     *(*arr + ret_number) = k;
 
   63     if (k != max_threads && 2*k > max_threads)
 
   65       *(*arr + ret_number) = max_threads;
 
   75 void check_result_value(
const int val, 
const int ok, 
const char *msg)
 
   79     fprintf(stderr, 
"ERROR %s: %d not %d\n", msg, val, ok);
 
   85 void run_test_create(
int d, 
int trafo_adjoint, 
int N, 
int M, 
double sigma)
 
   90     snprintf(cmd, 1024, 
"./nfft_benchomp_createdataset %d %d %d %d %lg > nfft_benchomp_test.data", d, trafo_adjoint, N, M, sigma);
 
   92     snprintf(cmd, 1024, 
"./nfft_benchomp_createdataset %d %d %d %d %d %lg > nfft_benchomp_test.data", d, trafo_adjoint, N, N, M, sigma);
 
   94     snprintf(cmd, 1024, 
"./nfft_benchomp_createdataset %d %d %d %d %d %d %lg > nfft_benchomp_test.data", d, trafo_adjoint, N, N, N, M, sigma);
 
   96     snprintf(cmd, 1024, 
"./nfft_benchomp_createdataset %d %d %d %d %d %d %d %lg > nfft_benchomp_test.data", d, trafo_adjoint, N, N, N, N, M, sigma);
 
   99   fprintf(stderr, 
"%s\n", cmd);
 
  100   check_result_value(system(cmd), 0, 
"createdataset");
 
  103 void run_test_init_output()
 
  105   FILE *f = fopen(
"nfft_benchomp_test.result", 
"w");
 
  141 void run_test(
s_resval *res, 
int nrepeat, 
int m, 
int flags, 
int nthreads)
 
  146   for (t = 0; t < 6; t++)
 
  148     res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
 
  152     snprintf(cmd, 1024, 
"./nfft_benchomp_detail_single %d %d < nfft_benchomp_test.data > nfft_benchomp_test.out", m, flags);
 
  154     snprintf(cmd, 1024, 
"./nfft_benchomp_detail_threads %d %d %d < nfft_benchomp_test.data > nfft_benchomp_test.out", m, flags, nthreads);
 
  155   fprintf(stderr, 
"%s\n", cmd);
 
  156   check_result_value(system(cmd), 0, cmd);
 
  158   for (r = 0; r < nrepeat; r++)
 
  163     check_result_value(system(cmd), 0, cmd);
 
  164     f = fopen(
"nfft_benchomp_test.out", 
"r");
 
  165     retval = fscanf(f, 
"%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
 
  166     check_result_value(retval, 6, 
"read nfft_benchomp_test.out");
 
  169     for (t = 0; t < 6; t++)
 
  172       if (res[t].min > v[t])
 
  174       if (res[t].max < v[t])
 
  179   for (t = 0; t < 6; t++)
 
  180     res[t].avg /= nrepeat;
 
  182   fprintf(stderr, 
"%d %d: ", nthreads, nrepeat);
 
  183   for (t = 0; t < 6; t++)
 
  184     fprintf(stderr, 
"%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
 
  185   fprintf(stderr, 
"\n");
 
  188 const char *get_psi_string(
int flags)
 
  190   if (flags & PRE_ONE_PSI)
 
  195 const char *get_sort_string(
int flags)
 
  197   if (flags & NFFT_SORT_NODES)
 
  203 const char *get_adjoint_omp_string(
int flags)
 
  205   if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
 
  211 #define MASK_D (1U<<0) 
  212 #define MASK_TA (1U<<1) 
  213 #define MASK_N (1U<<2) 
  214 #define MASK_SIGMA (1U<<3) 
  215 #define MASK_M (1U<<4) 
  216 #define MASK_WINM (1U<<5) 
  217 #define MASK_FLAGS_PSI (1U<<6) 
  218 #define MASK_FLAGS_SORT (1U<<7) 
  219 #define MASK_FLAGS_BW (1U<<8) 
  221 unsigned int determine_different_parameters(
s_testset *testsets, 
int ntestsets)
 
  224   unsigned int mask = 0;
 
  229   for (t = 1; t < ntestsets; t++)
 
  231     if (testsets[t-1].param.d != testsets[t].param.d)
 
  233     if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
 
  235     if (testsets[t-1].param.N != testsets[t].param.N)
 
  237     if (testsets[t-1].param.sigma != testsets[t].param.sigma)
 
  239     if (testsets[t-1].param.M != testsets[t].param.M)
 
  241     if (testsets[t-1].param.m != testsets[t].param.m)
 
  243     if ((testsets[t-1].param.flags & PRE_ONE_PSI) != (testsets[t].param.flags & PRE_ONE_PSI))
 
  244       mask |= MASK_FLAGS_PSI;
 
  245     if ((testsets[t-1].param.flags & NFFT_SORT_NODES) != (testsets[t].param.flags & NFFT_SORT_NODES))
 
  246       mask |= MASK_FLAGS_SORT;
 
  247     if ((testsets[t-1].param.flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.flags & NFFT_OMP_BLOCKWISE_ADJOINT))
 
  248       mask |= MASK_FLAGS_BW;
 
  254 void get_plot_title(
char *outstr, 
int maxlen, 
char *hostname, 
s_param param, 
unsigned int diff_mask)
 
  256   unsigned int mask = ~diff_mask;
 
  260   len = snprintf(outstr, maxlen, 
"%s", hostname);
 
  261   if (len < 0 || len+offset >= maxlen-1) 
return;
 
  266     len = snprintf(outstr+offset, maxlen-offset, 
" %dd", param.d);
 
  267     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  273     len = snprintf(outstr+offset, maxlen-offset, 
" $\\mathrm{NFFT}%s$", param.trafo_adjoint==0?
"":
"^\\top");
 
  274     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  280     len = snprintf(outstr+offset, maxlen-offset, 
" N=%d", param.N);
 
  281     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  285   if (mask & MASK_SIGMA)
 
  287     len = snprintf(outstr+offset, maxlen-offset, 
" N=%g", param.sigma);
 
  288     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  294     len = snprintf(outstr+offset, maxlen-offset, 
" M=%d", param.M);
 
  295     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  299   if (mask & MASK_WINM)
 
  301     len = snprintf(outstr+offset, maxlen-offset, 
" m=%d", param.m);
 
  302     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  306   if (mask & MASK_FLAGS_PSI)
 
  308     len = snprintf(outstr+offset, maxlen-offset, 
" %s", get_psi_string(param.flags));
 
  309     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  313   if (mask & MASK_FLAGS_SORT)
 
  315     len = snprintf(outstr+offset, maxlen-offset, 
" %s", get_sort_string(param.flags));
 
  316     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  320   if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.flags)) > 0)
 
  322     len = snprintf(outstr+offset, maxlen-offset, 
" %s", get_adjoint_omp_string(param.flags));
 
  323     if (len < 0 || len+offset >= maxlen-1) 
return;
 
  328 void print_output_speedup_total_tref(FILE *out, 
s_testset *testsets, 
int ntestsets, 
double tref)
 
  332   char plottitle[1025];
 
  333   unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
 
  335   if (gethostname(hostname, 1024) != 0)
 
  336     strncpy(hostname, 
"unnamed", 1024);
 
  338   get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask | MASK_FLAGS_SORT);
 
  340   fprintf(out, 
"\\begin{tikzpicture}\n");
 
  341   fprintf(out, 
"\\begin{axis}[");
 
  342   fprintf(out, 
"width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
 
  343   fprintf(out, 
" title={%s}", plottitle);
 
  344   fprintf(out, 
" ]\n");
 
  346   for (t = 0; t < ntestsets; t++)
 
  349     fprintf(stderr, 
"%s %dd $\\mathrm{NFFT}%s$ N=%d $\\sigma$=%g M=%d m=%d %s %s %s}", hostname, testset.param.d, testset.param.trafo_adjoint==0?
"":
"^\\top", testset.param.N, testset.param.sigma, testset.param.M, testset.param.m, get_psi_string(testset.param.flags), get_sort_string(testset.param.flags), get_adjoint_omp_string(testset.param.flags));
 
  350     fprintf(stderr, 
"\n");
 
  352     fprintf(out, 
"\\addplot coordinates {");
 
  353     for (i = 0; i < testset.nresults; i++)
 
  354       fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
 
  355     fprintf(out, 
"};\n");
 
  357     for (i = 0; i < testset.nresults; i++)
 
  359       fprintf(stderr, 
"%d:%.3f  ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
 
  361     fprintf(stderr, 
"\n\n");
 
  364   fprintf(out, 
"\\legend{{");
 
  365   for (t = 0; t < ntestsets; t++)
 
  370     get_plot_title(title, 255, 
"", testsets[t].param, ~(diff_mask | MASK_FLAGS_SORT));
 
  371     fprintf(out, 
"%s", title);
 
  373   fprintf(out, 
"}}\n");
 
  374   fprintf(out, 
"\\end{axis}\n");
 
  375   fprintf(out, 
"\\end{tikzpicture}\n");
 
  376   fprintf(out, 
"\n\n");
 
  381 void print_output_speedup_total(FILE *out, 
s_testset *testsets, 
int ntestsets)
 
  383   double tref = 1.0/0.0;
 
  386   for (t = 0; t < ntestsets; t++)
 
  387     for (k = 0; k < testsets[t].nresults; k++)
 
  388       if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
 
  389         tref = testsets[t].results[k].resval[5].avg;
 
  391   print_output_speedup_total_tref(out, testsets, ntestsets, tref);
 
  394 void print_output_histo_DFBRT(FILE *out, 
s_testset testset)
 
  396   int i, size = testset.nresults;
 
  399   if (gethostname(hostname, 1024) != 0)
 
  400     strncpy(hostname, 
"unnamed", 1024);
 
  402   fprintf(out, 
"\\begin{tikzpicture}\n");
 
  403   fprintf(out, 
"\\begin{axis}[");
 
  404   fprintf(out, 
"width=0.9\\textwidth, height=0.6\\textwidth, ");
 
  405   fprintf(out, 
"symbolic x coords={");
 
  406   for (i = 0; i < size; i++)
 
  408       fprintf(out, 
",%d", testset.results[i].nthreads);
 
  410       fprintf(out, 
"%d", testset.results[i].nthreads);
 
  411 fprintf(stderr, 
"FLAGS: %d\n", testset.param.flags);
 
  413   fprintf(out, 
"}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
 
  414   fprintf(out, 
" title={%s %dd $\\mathrm{NFFT}%s$ N=%d $\\sigma$=%g M=%d m=%d %s %s %s}", hostname, testset.param.d, testset.param.trafo_adjoint==0?
"":
"^\\top", testset.param.N, testset.param.sigma, testset.param.M, testset.param.m, get_psi_string(testset.param.flags), get_sort_string(testset.param.flags), get_adjoint_omp_string(testset.param.flags));
 
  415   fprintf(out, 
" ]\n");
 
  416   fprintf(out, 
"\\addplot coordinates {");
 
  417   for (i = 0; i < size; i++)
 
  418     fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
 
  419   fprintf(out, 
"};\n");
 
  421   fprintf(out, 
"\\addplot coordinates {");
 
  422   for (i = 0; i < size; i++)
 
  423     fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
 
  424   fprintf(out, 
"};\n");
 
  426   fprintf(out, 
"\\addplot coordinates {");
 
  427   for (i = 0; i < size; i++)
 
  428     fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
 
  429   fprintf(out, 
"};\n");
 
  431   fprintf(out, 
"\\addplot coordinates {");
 
  432   for (i = 0; i < size; i++)
 
  433     fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
 
  434   fprintf(out, 
"};\n");
 
  436   fprintf(out, 
"\\addplot coordinates {");
 
  437   for (i = 0; i < size; i++)
 
  438     fprintf(out, 
"(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
 
  439   fprintf(out, 
"};\n");
 
  440   fprintf(out, 
"\\legend{D,F,B,rest,total}\n");
 
  441   fprintf(out, 
"\\end{axis}\n");
 
  442   fprintf(out, 
"\\end{tikzpicture}\n");
 
  443   fprintf(out, 
"\n\n");
 
  448 void run_testset(
s_testset *testset, 
int d, 
int trafo_adjoint, 
int N, 
int M, 
double sigma, 
int m, 
int flags, 
int *nthreads_array, 
int n_threads_array_size)
 
  451   testset->param.d = d;
 
  452   testset->param.trafo_adjoint = trafo_adjoint;
 
  453   testset->param.N = N;
 
  454   testset->param.M = M;
 
  455   testset->param.sigma = sigma;
 
  456   testset->param.m = m;
 
  457   testset->param.flags = flags;
 
  459   testset->results = (
s_result*) malloc(n_threads_array_size*
sizeof(
s_result));
 
  460   testset->nresults = n_threads_array_size;
 
  462   run_test_create(testset->param.d, testset->param.trafo_adjoint, testset->param.N, testset->param.M, testset->param.sigma);
 
  463   for (i = 0; i < n_threads_array_size; i++)
 
  465     testset->results[i].nthreads = nthreads_array[i];
 
  466     run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.flags, testset->results[i].nthreads = nthreads_array[i]);
 
  471 void test1(
int *nthreads_array, 
int n_threads_array_size, 
int m)
 
  475   run_testset(&testsets[0], 1, 0, 2097152, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
 
  476 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  477   print_output_histo_DFBRT(file_out_tex, testsets[0]);
 
  480   run_testset(&testsets[1], 1, 0, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
 
  481 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  482   print_output_histo_DFBRT(file_out_tex, testsets[1]);
 
  485   print_output_speedup_total(file_out_tex, testsets, 2);
 
  487   run_testset(&testsets[2], 1, 1, 2097152, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
 
  488 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  489   print_output_histo_DFBRT(file_out_tex, testsets[2]);
 
  492   run_testset(&testsets[3], 1, 1, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
 
  493 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  494   print_output_histo_DFBRT(file_out_tex, testsets[3]);
 
  497   run_testset(&testsets[4], 1, 1, 2097152, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
 
  498 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  499   print_output_histo_DFBRT(file_out_tex, testsets[4]);
 
  502   print_output_speedup_total(file_out_tex, testsets+2, 3);
 
  504   run_testset(&testsets[5], 2, 0, 1024, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
 
  505 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  506   print_output_histo_DFBRT(file_out_tex, testsets[5]);
 
  509   run_testset(&testsets[6], 2, 0, 1024, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
 
  510 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  511   print_output_histo_DFBRT(file_out_tex, testsets[6]);
 
  514   print_output_speedup_total(file_out_tex, testsets+5, 2);
 
  516   run_testset(&testsets[7], 2, 1, 1024, 1048576, 2.0, m, 0, nthreads_array, n_threads_array_size);
 
  517 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  518   print_output_histo_DFBRT(file_out_tex, testsets[7]);
 
  521   run_testset(&testsets[8], 2, 1, 1024, 1048576, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
 
  522 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  523   print_output_histo_DFBRT(file_out_tex, testsets[8]);
 
  526   run_testset(&testsets[9], 2, 1, 1024, 1048576, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
 
  527 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  528   print_output_histo_DFBRT(file_out_tex, testsets[9]);
 
  531   print_output_speedup_total(file_out_tex, testsets+7, 3);
 
  533   run_testset(&testsets[10], 3, 0, 128, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
 
  534 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  535   print_output_histo_DFBRT(file_out_tex, testsets[10]);
 
  538   run_testset(&testsets[11], 3, 0, 128, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
 
  539 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  540   print_output_histo_DFBRT(file_out_tex, testsets[11]);
 
  543   print_output_speedup_total(file_out_tex, testsets+10, 2);
 
  545   run_testset(&testsets[12], 3, 1, 128, 2097152, 2.0, m, 0, nthreads_array, n_threads_array_size);
 
  546 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  547   print_output_histo_DFBRT(file_out_tex, testsets[12]);
 
  550   run_testset(&testsets[13], 3, 1, 128, 2097152, 2.0, m, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
 
  551 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  552   print_output_histo_DFBRT(file_out_tex, testsets[13]);
 
  555   run_testset(&testsets[14], 3, 1, 128, 2097152, 2.0, m, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
 
  556 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW 
  557   print_output_histo_DFBRT(file_out_tex, testsets[14]);
 
  560   print_output_speedup_total(file_out_tex, testsets+12, 3);
 
  564 int main(
int argc, 
char** argv)
 
  567   int n_threads_array_size = get_nthreads_array(&nthreads_array);
 
  570 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW) 
  571   fprintf(stderr, 
"WARNING: Detailed time measurements for NFFT are not activated.\n");
 
  572   fprintf(stderr, 
"For more detailed plots, please re-run the configure script with options\n");
 
  573   fprintf(stderr, 
"--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
 
  574   fprintf(stderr, 
"and run \"make clean all\"\n\n");
 
  577   for (k = 0; k < n_threads_array_size; k++)
 
  578     fprintf(stderr, 
"%d ", nthreads_array[k]);
 
  579   fprintf(stderr, 
"\n");
 
  581   file_out_tex = fopen(
"nfft_benchomp_results_plots.tex", 
"w");
 
  583   test1(nthreads_array, n_threads_array_size, 2);
 
  584   test1(nthreads_array, n_threads_array_size, 4);
 
  585   test1(nthreads_array, n_threads_array_size, 6);
 
  587   fclose(file_out_tex);