337 lines
10 KiB
C
337 lines
10 KiB
C
/********************************************************
|
|
* Kernels to be optimized for the CS:APP Performance Lab
|
|
********************************************************/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include "defs.h"
|
|
|
|
/*
|
|
* Please fill in the following team struct
|
|
*/
|
|
team_t team = {
|
|
"bovik", /* Team name */
|
|
|
|
"Harry Q. Bovik", /* First member full name */
|
|
"bovik@nowhere.edu", /* First member email address */
|
|
|
|
"", /* Second member full name (leave blank if none) */
|
|
"" /* Second member email addr (leave blank if none) */
|
|
};
|
|
|
|
/***************
|
|
* ROTATE KERNEL
|
|
***************/
|
|
|
|
/******************************************************
|
|
* Your different versions of the rotate kernel go here
|
|
******************************************************/
|
|
|
|
/*
|
|
* naive_rotate - The naive baseline version of rotate
|
|
*/
|
|
char naive_rotate_descr[] = "naive_rotate: Naive baseline implementation";
|
|
void naive_rotate(int dim, pixel *src, pixel *dst)
|
|
{
|
|
int i, j;
|
|
|
|
for (i = 0; i < dim; i++)
|
|
for (j = 0; j < dim; j++)
|
|
dst[RIDX(dim-1-j, i, dim)] = src[RIDX(i, j, dim)];
|
|
}
|
|
|
|
/*
|
|
* rotate - Your current working version of rotate
|
|
* IMPORTANT: This is the version you will be graded on
|
|
*/
|
|
char rotate_descr[] = "rotate: Current working version";
|
|
void rotate(int dim, pixel *src, pixel *dst) {
|
|
for (int i = 0; i < dim; i += 16) {
|
|
for (int j = 0; j < dim; j++) {
|
|
dst[RIDX(dim - 1 - j, i, dim)] = src[RIDX(i, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 1, dim)] = src[RIDX(i + 1, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 2, dim)] = src[RIDX(i + 2, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 3, dim)] = src[RIDX(i + 3, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 4, dim)] = src[RIDX(i + 4, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 5, dim)] = src[RIDX(i + 5, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 6, dim)] = src[RIDX(i + 6, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 7, dim)] = src[RIDX(i + 7, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 8, dim)] = src[RIDX(i + 8, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 9, dim)] = src[RIDX(i + 9, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 10, dim)] = src[RIDX(i + 10, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 11, dim)] = src[RIDX(i + 11, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 12, dim)] = src[RIDX(i + 12, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 13, dim)] = src[RIDX(i + 13, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 14, dim)] = src[RIDX(i + 14, j, dim)];
|
|
dst[RIDX(dim - 1 - j, i + 15, dim)] = src[RIDX(i + 15, j, dim)];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*********************************************************************
|
|
* register_rotate_functions - Register all of your different versions
|
|
* of the rotate kernel with the driver by calling the
|
|
* add_rotate_function() for each test function. When you run the
|
|
* driver program, it will test and report the performance of each
|
|
* registered test function.
|
|
*********************************************************************/
|
|
|
|
void register_rotate_functions()
|
|
{
|
|
add_rotate_function(&naive_rotate, naive_rotate_descr);
|
|
add_rotate_function(&rotate, rotate_descr);
|
|
/* ... Register additional test functions here */
|
|
}
|
|
|
|
|
|
/***************
|
|
* SMOOTH KERNEL
|
|
**************/
|
|
|
|
/***************************************************************
|
|
* Various typedefs and helper functions for the smooth function
|
|
* You may modify these any way you like.
|
|
**************************************************************/
|
|
|
|
/* A struct used to compute averaged pixel value */
|
|
typedef struct {
|
|
int red;
|
|
int green;
|
|
int blue;
|
|
int num;
|
|
} pixel_sum;
|
|
|
|
/* Compute min and max of two integers, respectively */
|
|
static int min(int a, int b) { return (a < b ? a : b); }
|
|
static int max(int a, int b) { return (a > b ? a : b); }
|
|
|
|
/*
|
|
* initialize_pixel_sum - Initializes all fields of sum to 0
|
|
*/
|
|
static void initialize_pixel_sum(pixel_sum *sum)
|
|
{
|
|
sum->red = sum->green = sum->blue = 0;
|
|
sum->num = 0;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* accumulate_sum - Accumulates field values of p in corresponding
|
|
* fields of sum
|
|
*/
|
|
static void accumulate_sum(pixel_sum *sum, pixel p)
|
|
{
|
|
sum->red += (int) p.red;
|
|
sum->green += (int) p.green;
|
|
sum->blue += (int) p.blue;
|
|
sum->num++;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* assign_sum_to_pixel - Computes averaged pixel value in current_pixel
|
|
*/
|
|
static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum)
|
|
{
|
|
current_pixel->red = (unsigned short) (sum.red/sum.num);
|
|
current_pixel->green = (unsigned short) (sum.green/sum.num);
|
|
current_pixel->blue = (unsigned short) (sum.blue/sum.num);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* avg - Returns averaged pixel value at (i,j)
|
|
*/
|
|
static pixel avg(int dim, int i, int j, pixel *src)
|
|
{
|
|
int ii, jj;
|
|
pixel_sum sum;
|
|
pixel current_pixel;
|
|
|
|
initialize_pixel_sum(&sum);
|
|
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
|
|
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
|
|
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
|
|
|
|
assign_sum_to_pixel(¤t_pixel, sum);
|
|
return current_pixel;
|
|
}
|
|
|
|
/******************************************************
|
|
* Your different versions of the smooth kernel go here
|
|
******************************************************/
|
|
|
|
/*
|
|
* naive_smooth - The naive baseline version of smooth
|
|
*/
|
|
char naive_smooth_descr[] = "naive_smooth: Naive baseline implementation";
|
|
void naive_smooth(int dim, pixel *src, pixel *dst)
|
|
{
|
|
int i, j;
|
|
|
|
for (i = 0; i < dim; i++)
|
|
for (j = 0; j < dim; j++)
|
|
{
|
|
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* smooth - Your current working version of smooth.
|
|
* IMPORTANT: This is the version you will be graded on
|
|
*/
|
|
char smooth_descr[] = "smooth: Current working version";
|
|
void smooth(int dim, pixel *src, pixel *dst) {
|
|
int r = 0, g = 0, b = 0;
|
|
#define set_pixel(i, j, div) dst[RIDX(i, j, dim)].red = r / div, dst[RIDX(i, j, dim)].green = g / div, dst[RIDX(i, j, dim)].blue = b / div
|
|
#define add_pixel(i, j) r += src[RIDX(i, j, dim)].red, g += src[RIDX(i, j, dim)].green, b += src[RIDX(i, j, dim)].blue
|
|
#define add_pixel2(i, j) add_pixel(i, j), add_pixel(i + 1, j)
|
|
#define add_pixel3(i, j) add_pixel(i - 1, j), add_pixel2(i, j)
|
|
#define sub_pixel(i, j) r -= src[RIDX(i, j, dim)].red, g -= src[RIDX(i, j, dim)].green, b -= src[RIDX(i, j, dim)].blue
|
|
#define sub_pixel2(i, j) sub_pixel(i, j), sub_pixel(i + 1, j)
|
|
#define sub_pixel3(i, j) sub_pixel(i - 1, j), sub_pixel2(i, j)
|
|
add_pixel2(0, 0);
|
|
add_pixel2(0, 1);
|
|
set_pixel(0, 0, 4);
|
|
for (int i = 1; i < 7; i += 2) {
|
|
add_pixel2(0, i + 1);
|
|
set_pixel(0, i, 6);
|
|
sub_pixel2(0, i - 1);
|
|
add_pixel2(0, i + 2);
|
|
set_pixel(0, i + 1, 6);
|
|
sub_pixel2(0, i);
|
|
}
|
|
for (int i = 7; i < dim - 1; i += 8) {
|
|
add_pixel2(0, i + 1);
|
|
set_pixel(0, i, 6);
|
|
sub_pixel2(0, i - 1);
|
|
add_pixel2(0, i + 2);
|
|
set_pixel(0, i + 1, 6);
|
|
sub_pixel2(0, i);
|
|
add_pixel2(0, i + 3);
|
|
set_pixel(0, i + 2, 6);
|
|
sub_pixel2(0, i + 1);
|
|
add_pixel2(0, i + 4);
|
|
set_pixel(0, i + 3, 6);
|
|
sub_pixel2(0, i + 2);
|
|
add_pixel2(0, i + 5);
|
|
set_pixel(0, i + 4, 6);
|
|
sub_pixel2(0, i + 3);
|
|
add_pixel2(0, i + 6);
|
|
set_pixel(0, i + 5, 6);
|
|
sub_pixel2(0, i + 4);
|
|
add_pixel2(0, i + 7);
|
|
set_pixel(0, i + 6, 6);
|
|
sub_pixel2(0, i + 5);
|
|
add_pixel2(0, i + 8);
|
|
set_pixel(0, i + 7, 6);
|
|
sub_pixel2(0, i + 6);
|
|
}
|
|
set_pixel(0, dim - 1, 4);
|
|
|
|
for (int i = 1; i < dim - 1; i++) {
|
|
r = g = b = 0;
|
|
add_pixel3(i, 0);
|
|
add_pixel3(i, 1);
|
|
set_pixel(i, 0, 6);
|
|
for (int j = 1; j < 7; j += 2) {
|
|
add_pixel3(i, j + 1);
|
|
set_pixel(i, j, 9);
|
|
sub_pixel3(i, j - 1);
|
|
add_pixel3(i, j + 2);
|
|
set_pixel(i, j + 1, 9);
|
|
sub_pixel3(i, j);
|
|
}
|
|
for (int j = 7; j < dim - 1; j += 8) {
|
|
add_pixel3(i, j + 1);
|
|
set_pixel(i, j, 9);
|
|
sub_pixel3(i, j - 1);
|
|
add_pixel3(i, j + 2);
|
|
set_pixel(i, j + 1, 9);
|
|
sub_pixel3(i, j);
|
|
add_pixel3(i, j + 3);
|
|
set_pixel(i, j + 2, 9);
|
|
sub_pixel3(i, j + 1);
|
|
add_pixel3(i, j + 4);
|
|
set_pixel(i, j + 3, 9);
|
|
sub_pixel3(i, j + 2);
|
|
add_pixel3(i, j + 5);
|
|
set_pixel(i, j + 4, 9);
|
|
sub_pixel3(i, j + 3);
|
|
add_pixel3(i, j + 6);
|
|
set_pixel(i, j + 5, 9);
|
|
sub_pixel3(i, j + 4);
|
|
add_pixel3(i, j + 7);
|
|
set_pixel(i, j + 6, 9);
|
|
sub_pixel3(i, j + 5);
|
|
add_pixel3(i, j + 8);
|
|
set_pixel(i, j + 7, 9);
|
|
sub_pixel3(i, j + 6);
|
|
}
|
|
set_pixel(i, dim - 1, 6);
|
|
}
|
|
|
|
r = g = b = 0;
|
|
add_pixel2(dim - 2, 0);
|
|
add_pixel2(dim - 2, 1);
|
|
set_pixel(dim - 1, 0, 4);
|
|
for (int i = 1; i < 7; i += 2) {
|
|
add_pixel2(dim - 2, i + 1);
|
|
set_pixel(dim - 1, i, 6);
|
|
sub_pixel2(dim - 2, i - 1);
|
|
add_pixel2(dim - 2, i + 2);
|
|
set_pixel(dim - 1, i + 1, 6);
|
|
sub_pixel2(dim - 2, i);
|
|
}
|
|
for (int i = 7; i < dim - 1; i += 8) {
|
|
add_pixel2(dim - 2, i + 1);
|
|
set_pixel(dim - 1, i, 6);
|
|
sub_pixel2(dim - 2, i - 1);
|
|
add_pixel2(dim - 2, i + 2);
|
|
set_pixel(dim - 1, i + 1, 6);
|
|
sub_pixel2(dim - 2, i);
|
|
add_pixel2(dim - 2, i + 3);
|
|
set_pixel(dim - 1, i + 2, 6);
|
|
sub_pixel2(dim - 2, i + 1);
|
|
add_pixel2(dim - 2, i + 4);
|
|
set_pixel(dim - 1, i + 3, 6);
|
|
sub_pixel2(dim - 2, i + 2);
|
|
add_pixel2(dim - 2, i + 5);
|
|
set_pixel(dim - 1, i + 4, 6);
|
|
sub_pixel2(dim - 2, i + 3);
|
|
add_pixel2(dim - 2, i + 6);
|
|
set_pixel(dim - 1, i + 5, 6);
|
|
sub_pixel2(dim - 2, i + 4);
|
|
add_pixel2(dim - 2, i + 7);
|
|
set_pixel(dim - 1, i + 6, 6);
|
|
sub_pixel2(dim - 2, i + 5);
|
|
add_pixel2(dim - 2, i + 8);
|
|
set_pixel(dim - 1, i + 7, 6);
|
|
sub_pixel2(dim - 2, i + 6);
|
|
}
|
|
set_pixel(dim - 1, dim - 1, 4);
|
|
#undef set_pixel
|
|
#undef add_pixel
|
|
#undef add_pixel2
|
|
#undef add_pixel3
|
|
#undef sub_pixel
|
|
#undef sub_pixel2
|
|
#undef sub_pixel3
|
|
}
|
|
|
|
|
|
/*********************************************************************
|
|
* register_smooth_functions - Register all of your different versions
|
|
* of the smooth kernel with the driver by calling the
|
|
* add_smooth_function() for each test function. When you run the
|
|
* driver program, it will test and report the performance of each
|
|
* registered test function.
|
|
*********************************************************************/
|
|
|
|
void register_smooth_functions() {
|
|
add_smooth_function(&smooth, smooth_descr);
|
|
add_smooth_function(&naive_smooth, naive_smooth_descr);
|
|
/* ... Register additional test functions here */
|
|
}
|
|
|