/******************************************************** * Kernels to be optimized for the CS:APP Performance Lab ********************************************************/ #include #include #include "defs.h" /* * Please fill in the following team struct */ team_t team = { "bovik", /* Team name */ "Harry Q. Bovik", /* First member full name */ "bovik@nowhere.edu", /* First member email address */ "", /* Second member full name (leave blank if none) */ "" /* Second member email addr (leave blank if none) */ }; /*************** * ROTATE KERNEL ***************/ /****************************************************** * Your different versions of the rotate kernel go here ******************************************************/ /* * naive_rotate - The naive baseline version of rotate */ char naive_rotate_descr[] = "naive_rotate: Naive baseline implementation"; void naive_rotate(int dim, pixel *src, pixel *dst) { int i, j; for (i = 0; i < dim; i++) for (j = 0; j < dim; j++) dst[RIDX(dim-1-j, i, dim)] = src[RIDX(i, j, dim)]; } /* * rotate - Your current working version of rotate * IMPORTANT: This is the version you will be graded on */ char rotate_descr[] = "rotate: Current working version"; void rotate(int dim, pixel *src, pixel *dst) { for (int i = 0; i < dim; i += 16) { for (int j = 0; j < dim; j++) { dst[RIDX(dim - 1 - j, i, dim)] = src[RIDX(i, j, dim)]; dst[RIDX(dim - 1 - j, i + 1, dim)] = src[RIDX(i + 1, j, dim)]; dst[RIDX(dim - 1 - j, i + 2, dim)] = src[RIDX(i + 2, j, dim)]; dst[RIDX(dim - 1 - j, i + 3, dim)] = src[RIDX(i + 3, j, dim)]; dst[RIDX(dim - 1 - j, i + 4, dim)] = src[RIDX(i + 4, j, dim)]; dst[RIDX(dim - 1 - j, i + 5, dim)] = src[RIDX(i + 5, j, dim)]; dst[RIDX(dim - 1 - j, i + 6, dim)] = src[RIDX(i + 6, j, dim)]; dst[RIDX(dim - 1 - j, i + 7, dim)] = src[RIDX(i + 7, j, dim)]; dst[RIDX(dim - 1 - j, i + 8, dim)] = src[RIDX(i + 8, j, dim)]; dst[RIDX(dim - 1 - j, i + 9, dim)] = src[RIDX(i + 9, j, dim)]; dst[RIDX(dim - 1 - j, i + 10, dim)] = src[RIDX(i + 10, j, dim)]; dst[RIDX(dim - 1 - j, i + 11, dim)] = src[RIDX(i + 11, j, dim)]; dst[RIDX(dim - 1 - j, i + 12, dim)] = src[RIDX(i + 12, j, dim)]; dst[RIDX(dim - 1 - j, i + 13, dim)] = src[RIDX(i + 13, j, dim)]; dst[RIDX(dim - 1 - j, i + 14, dim)] = src[RIDX(i + 14, j, dim)]; dst[RIDX(dim - 1 - j, i + 15, dim)] = src[RIDX(i + 15, j, dim)]; } } } /********************************************************************* * register_rotate_functions - Register all of your different versions * of the rotate kernel with the driver by calling the * add_rotate_function() for each test function. When you run the * driver program, it will test and report the performance of each * registered test function. *********************************************************************/ void register_rotate_functions() { add_rotate_function(&naive_rotate, naive_rotate_descr); add_rotate_function(&rotate, rotate_descr); /* ... Register additional test functions here */ } /*************** * SMOOTH KERNEL **************/ /*************************************************************** * Various typedefs and helper functions for the smooth function * You may modify these any way you like. **************************************************************/ /* A struct used to compute averaged pixel value */ typedef struct { int red; int green; int blue; int num; } pixel_sum; /* Compute min and max of two integers, respectively */ static int min(int a, int b) { return (a < b ? a : b); } static int max(int a, int b) { return (a > b ? a : b); } /* * initialize_pixel_sum - Initializes all fields of sum to 0 */ static void initialize_pixel_sum(pixel_sum *sum) { sum->red = sum->green = sum->blue = 0; sum->num = 0; return; } /* * accumulate_sum - Accumulates field values of p in corresponding * fields of sum */ static void accumulate_sum(pixel_sum *sum, pixel p) { sum->red += (int) p.red; sum->green += (int) p.green; sum->blue += (int) p.blue; sum->num++; return; } /* * assign_sum_to_pixel - Computes averaged pixel value in current_pixel */ static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum) { current_pixel->red = (unsigned short) (sum.red/sum.num); current_pixel->green = (unsigned short) (sum.green/sum.num); current_pixel->blue = (unsigned short) (sum.blue/sum.num); return; } /* * avg - Returns averaged pixel value at (i,j) */ static pixel avg(int dim, int i, int j, pixel *src) { int ii, jj; pixel_sum sum; pixel current_pixel; initialize_pixel_sum(&sum); for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++) for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++) accumulate_sum(&sum, src[RIDX(ii, jj, dim)]); assign_sum_to_pixel(¤t_pixel, sum); return current_pixel; } /****************************************************** * Your different versions of the smooth kernel go here ******************************************************/ /* * naive_smooth - The naive baseline version of smooth */ char naive_smooth_descr[] = "naive_smooth: Naive baseline implementation"; void naive_smooth(int dim, pixel *src, pixel *dst) { int i, j; for (i = 0; i < dim; i++) for (j = 0; j < dim; j++) { dst[RIDX(i, j, dim)] = avg(dim, i, j, src); } } /* * smooth - Your current working version of smooth. * IMPORTANT: This is the version you will be graded on */ char smooth_descr[] = "smooth: Current working version"; void smooth(int dim, pixel *src, pixel *dst) { int r = 0, g = 0, b = 0; #define set_pixel(i, j, div) dst[RIDX(i, j, dim)].red = r / div, dst[RIDX(i, j, dim)].green = g / div, dst[RIDX(i, j, dim)].blue = b / div #define add_pixel(i, j) r += src[RIDX(i, j, dim)].red, g += src[RIDX(i, j, dim)].green, b += src[RIDX(i, j, dim)].blue #define add_pixel2(i, j) add_pixel(i, j), add_pixel(i + 1, j) #define add_pixel3(i, j) add_pixel(i - 1, j), add_pixel2(i, j) #define sub_pixel(i, j) r -= src[RIDX(i, j, dim)].red, g -= src[RIDX(i, j, dim)].green, b -= src[RIDX(i, j, dim)].blue #define sub_pixel2(i, j) sub_pixel(i, j), sub_pixel(i + 1, j) #define sub_pixel3(i, j) sub_pixel(i - 1, j), sub_pixel2(i, j) add_pixel2(0, 0); add_pixel2(0, 1); set_pixel(0, 0, 4); for (int i = 1; i < 7; i += 2) { add_pixel2(0, i + 1); set_pixel(0, i, 6); sub_pixel2(0, i - 1); add_pixel2(0, i + 2); set_pixel(0, i + 1, 6); sub_pixel2(0, i); } for (int i = 7; i < dim - 1; i += 8) { add_pixel2(0, i + 1); set_pixel(0, i, 6); sub_pixel2(0, i - 1); add_pixel2(0, i + 2); set_pixel(0, i + 1, 6); sub_pixel2(0, i); add_pixel2(0, i + 3); set_pixel(0, i + 2, 6); sub_pixel2(0, i + 1); add_pixel2(0, i + 4); set_pixel(0, i + 3, 6); sub_pixel2(0, i + 2); add_pixel2(0, i + 5); set_pixel(0, i + 4, 6); sub_pixel2(0, i + 3); add_pixel2(0, i + 6); set_pixel(0, i + 5, 6); sub_pixel2(0, i + 4); add_pixel2(0, i + 7); set_pixel(0, i + 6, 6); sub_pixel2(0, i + 5); add_pixel2(0, i + 8); set_pixel(0, i + 7, 6); sub_pixel2(0, i + 6); } set_pixel(0, dim - 1, 4); for (int i = 1; i < dim - 1; i++) { r = g = b = 0; add_pixel3(i, 0); add_pixel3(i, 1); set_pixel(i, 0, 6); for (int j = 1; j < 7; j += 2) { add_pixel3(i, j + 1); set_pixel(i, j, 9); sub_pixel3(i, j - 1); add_pixel3(i, j + 2); set_pixel(i, j + 1, 9); sub_pixel3(i, j); } for (int j = 7; j < dim - 1; j += 8) { add_pixel3(i, j + 1); set_pixel(i, j, 9); sub_pixel3(i, j - 1); add_pixel3(i, j + 2); set_pixel(i, j + 1, 9); sub_pixel3(i, j); add_pixel3(i, j + 3); set_pixel(i, j + 2, 9); sub_pixel3(i, j + 1); add_pixel3(i, j + 4); set_pixel(i, j + 3, 9); sub_pixel3(i, j + 2); add_pixel3(i, j + 5); set_pixel(i, j + 4, 9); sub_pixel3(i, j + 3); add_pixel3(i, j + 6); set_pixel(i, j + 5, 9); sub_pixel3(i, j + 4); add_pixel3(i, j + 7); set_pixel(i, j + 6, 9); sub_pixel3(i, j + 5); add_pixel3(i, j + 8); set_pixel(i, j + 7, 9); sub_pixel3(i, j + 6); } set_pixel(i, dim - 1, 6); } r = g = b = 0; add_pixel2(dim - 2, 0); add_pixel2(dim - 2, 1); set_pixel(dim - 1, 0, 4); for (int i = 1; i < 7; i += 2) { add_pixel2(dim - 2, i + 1); set_pixel(dim - 1, i, 6); sub_pixel2(dim - 2, i - 1); add_pixel2(dim - 2, i + 2); set_pixel(dim - 1, i + 1, 6); sub_pixel2(dim - 2, i); } for (int i = 7; i < dim - 1; i += 8) { add_pixel2(dim - 2, i + 1); set_pixel(dim - 1, i, 6); sub_pixel2(dim - 2, i - 1); add_pixel2(dim - 2, i + 2); set_pixel(dim - 1, i + 1, 6); sub_pixel2(dim - 2, i); add_pixel2(dim - 2, i + 3); set_pixel(dim - 1, i + 2, 6); sub_pixel2(dim - 2, i + 1); add_pixel2(dim - 2, i + 4); set_pixel(dim - 1, i + 3, 6); sub_pixel2(dim - 2, i + 2); add_pixel2(dim - 2, i + 5); set_pixel(dim - 1, i + 4, 6); sub_pixel2(dim - 2, i + 3); add_pixel2(dim - 2, i + 6); set_pixel(dim - 1, i + 5, 6); sub_pixel2(dim - 2, i + 4); add_pixel2(dim - 2, i + 7); set_pixel(dim - 1, i + 6, 6); sub_pixel2(dim - 2, i + 5); add_pixel2(dim - 2, i + 8); set_pixel(dim - 1, i + 7, 6); sub_pixel2(dim - 2, i + 6); } set_pixel(dim - 1, dim - 1, 4); #undef set_pixel #undef add_pixel #undef add_pixel2 #undef add_pixel3 #undef sub_pixel #undef sub_pixel2 #undef sub_pixel3 } /********************************************************************* * register_smooth_functions - Register all of your different versions * of the smooth kernel with the driver by calling the * add_smooth_function() for each test function. When you run the * driver program, it will test and report the performance of each * registered test function. *********************************************************************/ void register_smooth_functions() { add_smooth_function(&smooth, smooth_descr); add_smooth_function(&naive_smooth, naive_smooth_descr); /* ... Register additional test functions here */ }