OpenMPI - Pi approximation via arctan()

Collective communication — MPI_Bcast(), MPI_Reduce()

/*
| Pi approximation, based on the derivative of arctan(x) evaluated over [0..1]
| ( a.k.a. integral of 1/(1+x*x) dx )
| 2019-11-17
*/
#include <stdio.h>
#include <stdlib.h> // strtod()
#include <time.h>   // clock_gettime(), clock()
#include <math.h>   // log10()
#include <mpi.h>

// long double only suppports 35 digits
#define PI35 3.1415926535897932384626433832795029L
//           -.---|----|----|----|----|----|----|

// Block-oriented terms:
long double integral(long double start, long double stop, long double dx)
{
    long double sum = 0.0L;
    for (long double x = start; x <= stop; x += dx) {
        long double term = dx / (1.0L + x*x);
        sum += term;
    }
    return 4.0L * sum;
}

const int root_proc = 0;

int main(int argc, char **argv)
{
    // printf("Starting...\n");

    //------------------------------------------------------------------
    // Get the starting time with sub-second precision...
    struct timespec walltime[2];
    clock_gettime(CLOCK_REALTIME, (walltime+0));
    //------------------------------------------------------------------

    MPI_Init(&argc, &argv); // args not needed, but supplied anyway.
    int name_len;
    char my_name[MPI_MAX_PROCESSOR_NAME];
    MPI_Get_processor_name(my_name, &name_len);
    int world_size, my_rank;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

    long double nterms;
    if (my_rank == root_proc) {
        fprintf(stderr, "How many terms? (Exponential notation okay) ");
        scanf(" %Lf", &nterms);
    }   // else "nterms" isn't initialized

    // share "nterms"
    MPI_Bcast(&nterms, 1, MPI_LONG_DOUBLE, root_proc, MPI_COMM_WORLD);

    //------------------------------------------------------------------
    // Sum up the differentials, and calculate pi:

    long double dx = 1.0L / nterms;

    long double range = 1.0L / world_size;
    long double start = my_rank * range;
    long double stop = (my_rank + 1) * range;

    long double pi_piece = integral(start, stop, dx);

    // Collect all the results, and sum them up.
    //  This simple approach leaves all the summing on the root process:
    long double pi_approx;
    MPI_Reduce(&pi_piece, &pi_approx, 1, MPI_LONG_DOUBLE, MPI_SUM, root_proc, MPI_COMM_WORLD);

    //------------------------------------------------------------------
    // Report the calculation results:
    if (my_rank == root_proc) {  // root process
        long double error = pi_approx - PI35;
        if (error < 0.0L)
            error = -error;
        int width = abs(log10(error)) + 2;  // show this many digits...
        printf("#----\npi35bits: %.*Lf\n  approx: %.*Lf\nerror=%.3Le\n#----\n",
                width, PI35, width, pi_approx, error);
    }

    MPI_Finalize();

    //------------------------------------------------------------------
    // Report the running time with sub-second precision:
    clock_gettime(CLOCK_REALTIME, (walltime+1));
    struct timespec deltat;
    deltat.tv_sec = walltime[1].tv_sec - walltime[0].tv_sec;
    deltat.tv_nsec = walltime[1].tv_nsec - walltime[0].tv_nsec;
    if (deltat.tv_nsec < 0) {
        deltat.tv_sec--;
        deltat.tv_nsec += 1000000000L ;
    }
    struct timespec rest;
    clock_getres(CLOCK_REALTIME, &rest);
    printf("process %s:%02i wallclock: %lu.%09ld seconds\t(resolution: %ld ns)\n",
            my_name, my_rank, deltat.tv_sec, deltat.tv_nsec, rest.tv_nsec);
    //------------------------------------------------------------------

    return 0;
}