#pragma clang diagnostic push #pragma ide diagnostic ignored "openmp-use-default-none" // par20_ex5_t21.c SJ #include #include #include #include #include #include // default input size #define defN 1000000 void initInput(int *A, int n, int max); void prefixSeq(int *A, int n); void prefixParallel(int *A, int n); double ltime(); int main(int argc, char *argv[]){ int N = defN; int *input = NULL; double starttime, endtime, seqtime = 0, partime; // take input size as first parameter if (argc >= 2) N = atoi(argv[1]); input = (int*)malloc(N*sizeof(int)); if (!input) exit(1); // warm up processor initInput(input, N, 1); prefixSeq(input, N); // generate input starttime = ltime(); initInput(input, N, 1); endtime = ltime(); seqtime = endtime-starttime; printf("Generate input %.6lf s, %.2lf Melem/s, input[%d] = %d. \n", seqtime, ((double)N)/(1000000*seqtime), N-1, input[N-1]); // sequential test starttime = ltime(); prefixSeq(input, N); endtime = ltime(); seqtime = endtime-starttime; printf("Sequential prefix %.6lf s, %.2lf Melem/s, input[%d] = %d. \n", seqtime, ((double)N)/(1000000*seqtime), N-1, input[N-1]); // parallel test initInput(input, N, 1); starttime = ltime(); prefixParallel(input, N); endtime = ltime(); partime = endtime-starttime; printf("Parallel prefix %.6f s, %.2lf Melem/s, speedup=%.2f, input[%d] = %d. \n", partime, ((double)N)/(1000000*partime), seqtime/partime, N-1, input[N-1]); free(input); return 0; } // main() /* make random or 1 input to array A */ void initInput(int *A, int n, int max) { int i; if (max == 1) { #pragma omp parallel for for (i = 0; i < n; i++) A[i] = 1; } else { #pragma omp parallel for for (i = 0; i < n; i++) A[i] = rand() % max; } } /* makes prefix sum of A in place */ void prefixSeq(int *A, int n) { int i; for (i = 1; i < n; i++) A[i] += A[i-1]; } /* makes prefix sum of A in place */ void prefixParallel(int *A, int n) { int *B = NULL; #pragma omp parallel shared(B) { // set up blocks int P = omp_get_num_threads(); int PID = omp_get_thread_num(); int blockSize = ceil((double)n/P); int start = PID*blockSize; int end = (PID+1)*blockSize-1; int i; // fix possible overflow if (end >= n) end = n-1; // single thread initializes B #pragma omp single { B = (int*)malloc(P*sizeof(int)); } // local prefix sum for (i = start+1; i <= end; i++) A[i] += A[i-1]; B[PID] = A[end]; // wait all processors to finish #pragma omp barrier #pragma omp single { for (i = 1; i < P; i++) B[i] += B[i-1]; } // all (except 0) update local values if (PID != 0) { int prevS = B[PID-1]; for (i = start; i <= end; i++) A[i] += prevS; } } // omp parallel } // return a real time stamp // cpu time does not show speedup double ltime() { struct timeval tv; struct timezone tz; gettimeofday(&tv, &tz); return tv.tv_sec + (double)tv.tv_usec/1000000; } #pragma clang diagnostic pop