/* par20_ex7_x3.cu SJ */ #include #include #include #include #include #include double ltime(); char *randomString(int n, int nchar); void printsubstring(char *A, int start, int count); int longestRepeatingSubstring(const char *A, int len, int *Rpos1, int *Rpos2); int longestRepeatingSubstring_cuda(const char *A, int len, int *Rpos1, int *Rpos2); __global__ void lrss_kernel(const char *A, int len, int *result); int main(int argc, char *argv[]) { char *input; int N = 50; // 10000; // string length, 10,000 or 100,000 are good examples int maxchar = 2; // character set size int pos1, pos2, len; double starttime, endtime, seqtime, partime; if (argc > 1) N = atoi(argv[1]); if (argc > 2) maxchar = atoi(argv[2]); input = randomString(N, maxchar); if (N <= 80) printf("Input: %s\n", input); starttime = ltime(); len = longestRepeatingSubstring(input, N, &pos1, &pos2); endtime = ltime(); seqtime = endtime - starttime; printf("\nLongest repeating (seq): len=%d, ", len); printf("pos1=:%d (", pos1); if (len < 80) printsubstring(input, pos1, len); printf(") pos2=%d: (", pos2); if (len < 80) printsubstring(input, pos2, len); printf(")\n"); printf("Sequential %.6lf s\n", seqtime); starttime = ltime(); len = longestRepeatingSubstring_cuda(input, N, &pos1, &pos2); endtime = ltime(); partime = endtime - starttime; printf("\nLongest repeating (par): len=%d, ", len); printf("pos1=:%d (", pos1); if (len < 80) printsubstring(input, pos1, len); printf(") pos2=%d: (", pos2); if (len < 80) printsubstring(input, pos2, len); printf(")\n"); printf("Parallel %.6lf s\n", partime); printf("Speedup: %.2f\n", seqtime / partime); // run the code twice as first run of a cuda code takes a while to // initialize the device starttime = ltime(); len = longestRepeatingSubstring_cuda(input, N, &pos1, &pos2); endtime = ltime(); partime = endtime - starttime; printf("\nLongest repeating (par): len=%d, ", len); printf("pos1=:%d (", pos1); if (len < 80) printsubstring(input, pos1, len); printf(") pos2=%d: (", pos2); if (len < 80) printsubstring(input, pos2, len); printf(")\n"); printf("Parallel %.6lf s\n", partime); printf("Speedup: %.2f\n", seqtime / partime); free(input); return 0; } void printsubstring(char *A, int start, int count) { int i; for (i = start; i < start + count; i++) putchar(A[i]); } /** * Generate random string of length n * @param n string length * @param nchar how many different characters to use * @return the new string, of NULL of n<0 */ char *randomString(int n, int nchar) { int i; char *s; char base = '0'; if (n < 0) return NULL; if (nchar > 26) nchar = 26; if (nchar > 10) base = 'a'; s = (char *) malloc(n + 1); for (i = 0; i < n; i++) s[i] = base + rand() % nchar; s[n] = '\0'; return s; } /** * Longest repeating substring of string A. * The substrings may overlap partially. * @param A input string * @param len length of A (redundant) * @param Rpos1 pointer to which the first starting position is stored (or -1 if no duplicate char) * @param Rpos2 pointer to which the second starting position is stored (or -1 if no duplicate char) * @return the length of the longest repeating substring, or 0 if no character occurs twice */ int longestRepeatingSubstring(const char *A, int len, int *Rpos1, int *Rpos2) { int maxlen = 0; // longest repeating found this far int maxpos1 = -1; int maxpos2 = -1; int offset = 1; // difference of starting positions while (offset < len - 1) { int pos1 = 0; // start from beginning int pos2 = offset; int samelen = 0; // how many same characters encountered int samestart1 = -1; // start index of current streak of same charcters int samestart2 = -1; while (pos2 < len) { // pos2 > pos1, iterate to the end if (A[pos1] == A[pos2]) { // same character if (samestart1 == -1) { // start of new common sequence samestart1 = pos1; samestart2 = pos2; } samelen++; if (samelen > maxlen) { // new maximum found maxpos1 = samestart1; maxpos2 = samestart2; maxlen = samelen; } } else { // different chacter, reset counters samelen = 0; samestart1 = -1; samestart2 = -1; } pos1++; pos2++; } offset++; } // store results and return *Rpos1 = maxpos1; *Rpos2 = maxpos2; return maxlen; } #define THREADSPERBLOCK 128 /** * Longest repeating substring of string A. * The substrings may overlap partially. * @param A input string * @param len length of A * @param Rpos1 pointer to which the first starting position is stored (or -1 if no duplicate char) * @param Rpos2 pointer to which the second starting position is stored (or -1 if no duplicate char) * @return the length of the longest repeating substring, or 0 if no character occurs twice */ // keep this header int longestRepeatingSubstring_cuda(const char *A, int len, int *Rpos1, int *Rpos2) { int maxlen = 0; int index1 = -1; int index2 = -1; int blocks = 0; // TODO int i; char *d_A; // device copy of input int *d_results; // device copy of (sub)results int *h_results; // host copy of (sub)results // allocate space for input and results cudaMalloc((void **)&d_A, len+1); cudaMemcpy(d_A, A, len+1, cudaMemcpyHostToDevice); // TODO h_results = (int*)malloc(0 /* TODO */ ); // call the kernel lrss_kernel<<>>(d_A, len, d_results); // copy (sub)results from device // TODO // free device allocated memory cudaFree(d_results); cudaFree(d_A); // TODO possibly combine the results free(h_results); *Rpos1 = index1; *Rpos2 = index2; return maxlen; } /** * Kernel to execute by each thread. * E.g., each kernel will do one offset, or some other dividing the work * @param A input * @param len input len * @param result pointer to which result is stored. */ // you may change this header for your own plan __global__ void lrss_kernel(const char *A, int len, int *result) { int tindex threadIdx.x + blockIdx.x * blockDim.x; // TODO // find local result // store result to global memory according to _your_ plan } double ltime() { return omp_get_wtime(); }