From 1b66891883aa07c6875f99a2866d25a172bd603e Mon Sep 17 00:00:00 2001 From: "18218461270@163.com" <18218461270@163.com> Date: Thu, 28 Aug 2025 13:13:50 +0800 Subject: [PATCH] init perflab --- perf/Makefile | 23 ++ perf/README | 38 +++ perf/clock.c | 242 ++++++++++++++++ perf/clock.h | 22 ++ perf/config.h | 32 +++ perf/defs.h | 38 +++ perf/driver.c | 752 +++++++++++++++++++++++++++++++++++++++++++++++++ perf/fcyc.c | 270 ++++++++++++++++++ perf/fcyc.h | 55 ++++ perf/kernels.c | 183 ++++++++++++ 10 files changed, 1655 insertions(+) create mode 100644 perf/Makefile create mode 100644 perf/README create mode 100644 perf/clock.c create mode 100644 perf/clock.h create mode 100644 perf/config.h create mode 100644 perf/defs.h create mode 100644 perf/driver.c create mode 100644 perf/fcyc.c create mode 100644 perf/fcyc.h create mode 100644 perf/kernels.c diff --git a/perf/Makefile b/perf/Makefile new file mode 100644 index 0000000..30e5534 --- /dev/null +++ b/perf/Makefile @@ -0,0 +1,23 @@ +# Student's Makefile for the CS:APP Performance Lab +TEAM = bovik +VERSION = 1 +HANDINDIR = + +CC = gcc +CFLAGS = -Wall -O2 -m32 +LIBS = -lm + +OBJS = driver.o kernels.o fcyc.o clock.o + +all: driver + +driver: $(OBJS) fcyc.h clock.h defs.h config.h + $(CC) $(CFLAGS) $(OBJS) $(LIBS) -o driver + +handin: + cp kernels.c $(HANDINDIR)/$(TEAM)-$(VERSION)-kernels.c + +clean: + -rm -f $(OBJS) driver core *~ *.o + + diff --git a/perf/README b/perf/README new file mode 100644 index 0000000..9e7a608 --- /dev/null +++ b/perf/README @@ -0,0 +1,38 @@ +##################################################################### +# CS:APP Performance Lab +# +# Student's Source Files +# +# Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. +# May not be used, modified, or copied without permission. +# +###################################################################### + +This directory contains the files you will need for the CS:APP +Performance Lab. + +kernels.c + This is the file you will be modifying and handing in. + +######################################### +# You shouldn't modify any of these files +######################################### +driver.c + This is the driver that tests the performance of all + of the versions of the rotate and smooth kernels + in your kernels.c file. + +config.h + This is a site-specific configuration file that was created by + your instructor for your system. + +defs.h + Various definitions needed by kernels.c and driver.c + +clock.{c,h} +fcyc.{c,h} + These contain timing routines that measure the performance of your + code with our k-best measurement scheme using IA32 cycle counters. + +Makefile: + This is the makefile that builds the driver program. diff --git a/perf/clock.c b/perf/clock.c new file mode 100644 index 0000000..ce4335a --- /dev/null +++ b/perf/clock.c @@ -0,0 +1,242 @@ +#include +#include +#include +#include +#include "clock.h" + +/* + * Routines for using the cycle counter + */ + +/* Detect whether running on Alpha */ +#ifdef __alpha +#define IS_ALPHA 1 +#else +#define IS_ALPHA 0 +#endif + +/* Detect whether running on x86 */ +#ifdef __i386__ +#define IS_x86 1 +#else +#define IS_x86 0 +#endif + +#if IS_ALPHA +/* Initialize the cycle counter */ +static unsigned cyc_hi = 0; +static unsigned cyc_lo = 0; + + +/* Use Alpha cycle timer to compute cycles. Then use + measured clock speed to compute seconds +*/ + +/* + * counterRoutine is an array of Alpha instructions to access + * the Alpha's processor cycle counter. It uses the rpcc + * instruction to access the counter. This 64 bit register is + * divided into two parts. The lower 32 bits are the cycles + * used by the current process. The upper 32 bits are wall + * clock cycles. These instructions read the counter, and + * convert the lower 32 bits into an unsigned int - this is the + * user space counter value. + * NOTE: The counter has a very limited time span. With a + * 450MhZ clock the counter can time things for about 9 + * seconds. */ +static unsigned int counterRoutine[] = +{ + 0x601fc000u, + 0x401f0000u, + 0x6bfa8001u +}; + +/* Cast the above instructions into a function. */ +static unsigned int (*counter)(void)= (void *)counterRoutine; + + +void start_counter() +{ + /* Get cycle counter */ + cyc_hi = 0; + cyc_lo = counter(); +} + +double get_counter() +{ + unsigned ncyc_hi, ncyc_lo; + unsigned hi, lo, borrow; + double result; + ncyc_lo = counter(); + ncyc_hi = 0; + lo = ncyc_lo - cyc_lo; + borrow = lo > ncyc_lo; + hi = ncyc_hi - cyc_hi - borrow; + result = (double) hi * (1 << 30) * 4 + lo; + if (result < 0) { + fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result); + } + return result; +} +#endif /* Alpha */ + +#if IS_x86 +/* $begin x86cyclecounter */ +/* Initialize the cycle counter */ +static unsigned cyc_hi = 0; +static unsigned cyc_lo = 0; + + +/* Set *hi and *lo to the high and low order bits of the cycle counter. + Implementation requires assembly code to use the rdtsc instruction. */ +void access_counter(unsigned *hi, unsigned *lo) +{ + asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */ + : "=r" (*hi), "=r" (*lo) /* and move results to */ + : /* No input */ /* the two outputs */ + : "%edx", "%eax"); +} + +/* Record the current value of the cycle counter. */ +void start_counter() +{ + access_counter(&cyc_hi, &cyc_lo); +} + +/* Return the number of cycles since the last call to start_counter. */ +double get_counter() +{ + unsigned ncyc_hi, ncyc_lo; + unsigned hi, lo, borrow; + double result; + + /* Get cycle counter */ + access_counter(&ncyc_hi, &ncyc_lo); + + /* Do double precision subtraction */ + lo = ncyc_lo - cyc_lo; + borrow = lo > ncyc_lo; + hi = ncyc_hi - cyc_hi - borrow; + result = (double) hi * (1 << 30) * 4 + lo; + if (result < 0) { + fprintf(stderr, "Error: counter returns neg value: %.0f\n", result); + } + return result; +} +/* $end x86cyclecounter */ +#endif /* x86 */ + +double ovhd() +{ + /* Do it twice to eliminate cache effects */ + int i; + double result; + + for (i = 0; i < 2; i++) { + start_counter(); + result = get_counter(); + } + return result; +} + +/* $begin mhz */ +/* Estimate the clock rate by measuring the cycles that elapse */ +/* while sleeping for sleeptime seconds */ +double mhz_full(int verbose, int sleeptime) +{ + double rate; + + start_counter(); + sleep(sleeptime); + rate = get_counter() / (1e6*sleeptime); + if (verbose) + printf("Processor clock rate ~= %.1f MHz\n", rate); + return rate; +} +/* $end mhz */ + +/* Version using a default sleeptime */ +double mhz(int verbose) +{ + return mhz_full(verbose, 2); +} + +/** Special counters that compensate for timer interrupt overhead */ + +static double cyc_per_tick = 0.0; + +#define NEVENT 100 +#define THRESHOLD 1000 +#define RECORDTHRESH 3000 + +/* Attempt to see how much time is used by timer interrupt */ +static void callibrate(int verbose) +{ + double oldt; + struct tms t; + clock_t oldc; + int e = 0; + + times(&t); + oldc = t.tms_utime; + start_counter(); + oldt = get_counter(); + while (e = THRESHOLD) { + clock_t newc; + times(&t); + newc = t.tms_utime; + if (newc > oldc) { + double cpt = (newt-oldt)/(newc-oldc); + if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH) + cyc_per_tick = cpt; + /* + if (verbose) + printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n", + newt-oldt, (int) (newc-oldc), cpt); + */ + e++; + oldc = newc; + } + oldt = newt; + } + } + /* ifdef added by Sanjit - 10/2001 */ +#ifdef DEBUG + if (verbose) + printf("Setting cyc_per_tick to %f\n", cyc_per_tick); +#endif +} + +static clock_t start_tick = 0; + +void start_comp_counter() +{ + struct tms t; + + if (cyc_per_tick == 0.0) + callibrate(1); + times(&t); + start_tick = t.tms_utime; + start_counter(); +} + +double get_comp_counter() +{ + double time = get_counter(); + double ctime; + struct tms t; + clock_t ticks; + + times(&t); + ticks = t.tms_utime - start_tick; + ctime = time - ticks*cyc_per_tick; + /* + printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n", + time, (int) ticks, ctime); + */ + return ctime; +} + diff --git a/perf/clock.h b/perf/clock.h new file mode 100644 index 0000000..54d26ac --- /dev/null +++ b/perf/clock.h @@ -0,0 +1,22 @@ +/* Routines for using cycle counter */ + +/* Start the counter */ +void start_counter(); + +/* Get # cycles since counter started */ +double get_counter(); + +/* Measure overhead for counter */ +double ovhd(); + +/* Determine clock rate of processor (using a default sleeptime) */ +double mhz(int verbose); + +/* Determine clock rate of processor, having more control over accuracy */ +double mhz_full(int verbose, int sleeptime); + +/** Special counters that compensate for timer interrupt overhead */ + +void start_comp_counter(); + +double get_comp_counter(); diff --git a/perf/config.h b/perf/config.h new file mode 100644 index 0000000..bf57a64 --- /dev/null +++ b/perf/config.h @@ -0,0 +1,32 @@ +/********************************************************* + * config.h - Configuration data for the driver.c program. + *********************************************************/ +#ifndef _CONFIG_H_ +#define _CONFIG_H_ + +/* + * CPEs for the baseline (naive) version of the rotate function that + * was handed out to the students. Rd is the measured CPE for a dxd + * image. Run the driver.c program on your system to get these + * numbers. + */ +#define R64 14.7 +#define R128 40.1 +#define R256 46.4 +#define R512 65.9 +#define R1024 94.5 + +/* + * CPEs for the baseline (naive) version of the smooth function that + * was handed out to the students. Sd is the measure CPE for a dxd + * image. Run the driver.c program on your system to get these + * numbers. + */ +#define S32 695 +#define S64 698 +#define S128 702 +#define S256 717 +#define S512 722 + + +#endif /* _CONFIG_H_ */ diff --git a/perf/defs.h b/perf/defs.h new file mode 100644 index 0000000..6a2c2cf --- /dev/null +++ b/perf/defs.h @@ -0,0 +1,38 @@ +/* + * driver.h - Various definitions for the Performance Lab. + * + * DO NOT MODIFY ANYTHING IN THIS FILE + */ +#ifndef _DEFS_H_ +#define _DEFS_H_ + +#include + +#define RIDX(i,j,n) ((i)*(n)+(j)) + +typedef struct { + char *team; + char *name1, *email1; + char *name2, *email2; +} team_t; + +extern team_t team; + +typedef struct { + unsigned short red; + unsigned short green; + unsigned short blue; +} pixel; + +typedef void (*lab_test_func) (int, pixel*, pixel*); + +void smooth(int, pixel *, pixel *); +void rotate(int, pixel *, pixel *); + +void register_rotate_functions(void); +void register_smooth_functions(void); +void add_smooth_function(lab_test_func, char*); +void add_rotate_function(lab_test_func, char*); + +#endif /* _DEFS_H_ */ + diff --git a/perf/driver.c b/perf/driver.c new file mode 100644 index 0000000..8b4d1aa --- /dev/null +++ b/perf/driver.c @@ -0,0 +1,752 @@ +/******************************************************************* + * + * driver.c - Driver program for CS:APP Performance Lab + * + * In kernels.c, students generate an arbitrary number of rotate and + * smooth test functions, which they then register with the driver + * program using the add_rotate_function() and add_smooth_function() + * functions. + * + * The driver program runs and measures the registered test functions + * and reports their performance. + * + * Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights + * reserved. May not be used, modified, or copied without permission. + * + ********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "fcyc.h" +#include "defs.h" +#include "config.h" + +/* Team structure that identifies the students */ +extern team_t team; + +/* Keep track of a number of different test functions */ +#define MAX_BENCHMARKS 100 +#define DIM_CNT 5 + +/* Misc constants */ +#define BSIZE 32 /* cache block size in bytes */ +#define MAX_DIM 1280 /* 1024 + 256 */ +#define ODD_DIM 96 /* not a power of 2 */ + +/* fast versions of min and max */ +#define min(a,b) (a < b ? a : b) +#define max(a,b) (a > b ? a : b) + +/* This struct characterizes the results for one benchmark test */ +typedef struct { + lab_test_func tfunct; /* The test function */ + double cpes[DIM_CNT]; /* One CPE result for each dimension */ + char *description; /* ASCII description of the test function */ + unsigned short valid; /* The function is tested if this is non zero */ +} bench_t; + +/* The range of image dimensions that we will be testing */ +static int test_dim_rotate[] = {64, 128, 256, 512, 1024}; +static int test_dim_smooth[] = {32, 64, 128, 256, 512}; + +/* Baseline CPEs (see config.h) */ +static double rotate_baseline_cpes[] = {R64, R128, R256, R512, R1024}; +static double smooth_baseline_cpes[] = {S32, S64, S128, S256, S512}; + +/* These hold the results for all benchmarks */ +static bench_t benchmarks_rotate[MAX_BENCHMARKS]; +static bench_t benchmarks_smooth[MAX_BENCHMARKS]; + +/* These give the sizes of the above lists */ +static int rotate_benchmark_count = 0; +static int smooth_benchmark_count = 0; + +/* + * An image is a dimxdim matrix of pixels stored in a 1D array. The + * data array holds three images (the input original, a copy of the original, + * and the output result array. There is also an additional BSIZE bytes + * of padding for alignment to cache block boundaries. + */ +static pixel data[(3*MAX_DIM*MAX_DIM) + (BSIZE/sizeof(pixel))]; + +/* Various image pointers */ +static pixel *orig = NULL; /* original image */ +static pixel *copy_of_orig = NULL; /* copy of original for checking result */ +static pixel *result = NULL; /* result image */ + +/* Keep track of the best rotate and smooth score for grading */ +double rotate_maxmean = 0.0; +char *rotate_maxmean_desc = NULL; + +double smooth_maxmean = 0.0; +char *smooth_maxmean_desc = NULL; + + +/******************** Functions begin *************************/ + +void add_smooth_function(lab_test_func f, char *description) +{ + benchmarks_smooth[smooth_benchmark_count].tfunct = f; + benchmarks_smooth[smooth_benchmark_count].description = description; + benchmarks_smooth[smooth_benchmark_count].valid = 0; + smooth_benchmark_count++; +} + + +void add_rotate_function(lab_test_func f, char *description) +{ + benchmarks_rotate[rotate_benchmark_count].tfunct = f; + benchmarks_rotate[rotate_benchmark_count].description = description; + benchmarks_rotate[rotate_benchmark_count].valid = 0; + rotate_benchmark_count++; +} + +/* + * random_in_interval - Returns random integer in interval [low, high) + */ +static int random_in_interval(int low, int high) +{ + int size = high - low; + return (rand()% size) + low; +} + +/* + * create - creates a dimxdim image aligned to a BSIZE byte boundary + */ +static void create(int dim) +{ + int i, j; + + /* Align the images to BSIZE byte boundaries */ + orig = data; + while ((unsigned)orig % BSIZE) + orig = (pixel *)((char *)orig) + 1; + result = orig + dim*dim; + copy_of_orig = result + dim*dim; + + for (i = 0; i < dim; i++) { + for (j = 0; j < dim; j++) { + /* Original image initialized to random colors */ + orig[RIDX(i,j,dim)].red = random_in_interval(0, 65536); + orig[RIDX(i,j,dim)].green = random_in_interval(0, 65536); + orig[RIDX(i,j,dim)].blue = random_in_interval(0, 65536); + + /* Copy of original image for checking result */ + copy_of_orig[RIDX(i,j,dim)].red = orig[RIDX(i,j,dim)].red; + copy_of_orig[RIDX(i,j,dim)].green = orig[RIDX(i,j,dim)].green; + copy_of_orig[RIDX(i,j,dim)].blue = orig[RIDX(i,j,dim)].blue; + + /* Result image initialized to all black */ + result[RIDX(i,j,dim)].red = 0; + result[RIDX(i,j,dim)].green = 0; + result[RIDX(i,j,dim)].blue = 0; + } + } + + return; +} + + +/* + * compare_pixels - Returns 1 if the two arguments don't have same RGB + * values, 0 o.w. + */ +static int compare_pixels(pixel p1, pixel p2) +{ + return + (p1.red != p2.red) || + (p1.green != p2.green) || + (p1.blue != p2.blue); +} + + +/* Make sure the orig array is unchanged */ +static int check_orig(int dim) +{ + int i, j; + + for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + if (compare_pixels(orig[RIDX(i,j,dim)], copy_of_orig[RIDX(i,j,dim)])) { + printf("\n"); + printf("Error: Original image has been changed!\n"); + return 1; + } + + return 0; +} + +/* + * check_rotate - Make sure the rotate actually works. + * The orig array should not have been tampered with! + */ +static int check_rotate(int dim) +{ + int err = 0; + int i, j; + int badi = 0; + int badj = 0; + pixel orig_bad, res_bad; + + /* return 1 if the original image has been changed */ + if (check_orig(dim)) + return 1; + + for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + if (compare_pixels(orig[RIDX(i,j,dim)], + result[RIDX(dim-1-j,i,dim)])) { + err++; + badi = i; + badj = j; + orig_bad = orig[RIDX(i,j,dim)]; + res_bad = result[RIDX(dim-1-j,i,dim)]; + } + + if (err) { + printf("\n"); + printf("ERROR: Dimension=%d, %d errors\n", dim, err); + printf("E.g., The following two pixels should have equal value:\n"); + printf("src[%d][%d].{red,green,blue} = {%d,%d,%d}\n", + badi, badj, orig_bad.red, orig_bad.green, orig_bad.blue); + printf("dst[%d][%d].{red,green,blue} = {%d,%d,%d}\n", + (dim-1-badj), badi, res_bad.red, res_bad.green, res_bad.blue); + } + + return err; +} + +static pixel check_average(int dim, int i, int j, pixel *src) { + pixel result; + int num = 0; + int ii, jj; + int sum0, sum1, sum2; + int top_left_i, top_left_j; + int bottom_right_i, bottom_right_j; + + top_left_i = max(i-1, 0); + top_left_j = max(j-1, 0); + bottom_right_i = min(i+1, dim-1); + bottom_right_j = min(j+1, dim-1); + + sum0 = sum1 = sum2 = 0; + for(ii=top_left_i; ii <= bottom_right_i; ii++) { + for(jj=top_left_j; jj <= bottom_right_j; jj++) { + num++; + sum0 += (int) src[RIDX(ii,jj,dim)].red; + sum1 += (int) src[RIDX(ii,jj,dim)].green; + sum2 += (int) src[RIDX(ii,jj,dim)].blue; + } + } + result.red = (unsigned short) (sum0/num); + result.green = (unsigned short) (sum1/num); + result.blue = (unsigned short) (sum2/num); + + return result; +} + + +/* + * check_smooth - Make sure the smooth function actually works. The + * orig array should not have been tampered with! + */ +static int check_smooth(int dim) { + int err = 0; + int i, j; + int badi = 0; + int badj = 0; + pixel right, wrong; + + /* return 1 if original image has been changed */ + if (check_orig(dim)) + return 1; + + for (i = 0; i < dim; i++) { + for (j = 0; j < dim; j++) { + pixel smoothed = check_average(dim, i, j, orig); + if (compare_pixels(result[RIDX(i,j,dim)], smoothed)) { + err++; + badi = i; + badj = j; + wrong = result[RIDX(i,j,dim)]; + right = smoothed; + } + } + } + + if (err) { + printf("\n"); + printf("ERROR: Dimension=%d, %d errors\n", dim, err); + printf("E.g., \n"); + printf("You have dst[%d][%d].{red,green,blue} = {%d,%d,%d}\n", + badi, badj, wrong.red, wrong.green, wrong.blue); + printf("It should be dst[%d][%d].{red,green,blue} = {%d,%d,%d}\n", + badi, badj, right.red, right.green, right.blue); + } + + return err; +} + + +void func_wrapper(void *arglist[]) +{ + pixel *src, *dst; + int mydim; + lab_test_func f; + + f = (lab_test_func) arglist[0]; + mydim = *((int *) arglist[1]); + src = (pixel *) arglist[2]; + dst = (pixel *) arglist[3]; + + (*f)(mydim, src, dst); + + return; +} + +void run_rotate_benchmark(int idx, int dim) +{ + benchmarks_rotate[idx].tfunct(dim, orig, result); +} + +void test_rotate(int bench_index) +{ + int i; + int test_num; + char *description = benchmarks_rotate[bench_index].description; + + for (test_num = 0; test_num < DIM_CNT; test_num++) { + int dim; + + /* Check for odd dimension */ + create(ODD_DIM); + run_rotate_benchmark(bench_index, ODD_DIM); + if (check_rotate(ODD_DIM)) { + printf("Benchmark \"%s\" failed correctness check for dimension %d.\n", + benchmarks_rotate[bench_index].description, ODD_DIM); + return; + } + + /* Create a test image of the required dimension */ + dim = test_dim_rotate[test_num]; + create(dim); +#ifdef DEBUG + printf("DEBUG: Running benchmark \"%s\"\n", benchmarks_rotate[bench_index].description); +#endif + + /* Check that the code works */ + run_rotate_benchmark(bench_index, dim); + if (check_rotate(dim)) { + printf("Benchmark \"%s\" failed correctness check for dimension %d.\n", + benchmarks_rotate[bench_index].description, dim); + return; + } + + /* Measure CPE */ + { + double num_cycles, cpe; + int tmpdim = dim; + void *arglist[4]; + double dimension = (double) dim; + double work = dimension*dimension; +#ifdef DEBUG + printf("DEBUG: dimension=%.1f\n",dimension); + printf("DEBUG: work=%.1f\n",work); +#endif + arglist[0] = (void *) benchmarks_rotate[bench_index].tfunct; + arglist[1] = (void *) &tmpdim; + arglist[2] = (void *) orig; + arglist[3] = (void *) result; + + create(dim); + num_cycles = fcyc_v((test_funct_v)&func_wrapper, arglist); + cpe = num_cycles/work; + benchmarks_rotate[bench_index].cpes[test_num] = cpe; + } + } + + /* + * Print results as a table + */ + printf("Rotate: Version = %s:\n", description); + printf("Dim\t"); + for (i = 0; i < DIM_CNT; i++) + printf("\t%d", test_dim_rotate[i]); + printf("\tMean\n"); + + printf("Your CPEs"); + for (i = 0; i < DIM_CNT; i++) { + printf("\t%.1f", benchmarks_rotate[bench_index].cpes[i]); + } + printf("\n"); + + printf("Baseline CPEs"); + for (i = 0; i < DIM_CNT; i++) { + printf("\t%.1f", rotate_baseline_cpes[i]); + } + printf("\n"); + + /* Compute Speedup */ + { + double prod, ratio, mean; + prod = 1.0; /* Geometric mean */ + printf("Speedup\t"); + for (i = 0; i < DIM_CNT; i++) { + if (benchmarks_rotate[bench_index].cpes[i] > 0.0) { + ratio = rotate_baseline_cpes[i]/ + benchmarks_rotate[bench_index].cpes[i]; + } + else { + printf("Fatal Error: Non-positive CPE value...\n"); + exit(EXIT_FAILURE); + } + prod *= ratio; + printf("\t%.1f", ratio); + } + + /* Geometric mean */ + mean = pow(prod, 1.0/(double) DIM_CNT); + printf("\t%.1f", mean); + printf("\n\n"); + if (mean > rotate_maxmean) { + rotate_maxmean = mean; + rotate_maxmean_desc = benchmarks_rotate[bench_index].description; + } + } + + +#ifdef DEBUG + fflush(stdout); +#endif + return; +} + +void run_smooth_benchmark(int idx, int dim) +{ + benchmarks_smooth[idx].tfunct(dim, orig, result); +} + +void test_smooth(int bench_index) +{ + int i; + int test_num; + char *description = benchmarks_smooth[bench_index].description; + + for(test_num=0; test_num < DIM_CNT; test_num++) { + int dim; + + /* Check correctness for odd (non power of two dimensions */ + create(ODD_DIM); + run_smooth_benchmark(bench_index, ODD_DIM); + if (check_smooth(ODD_DIM)) { + printf("Benchmark \"%s\" failed correctness check for dimension %d.\n", + benchmarks_smooth[bench_index].description, ODD_DIM); + return; + } + + /* Create a test image of the required dimension */ + dim = test_dim_smooth[test_num]; + create(dim); + +#ifdef DEBUG + printf("DEBUG: Running benchmark \"%s\"\n", benchmarks_smooth[bench_index].description); +#endif + /* Check that the code works */ + run_smooth_benchmark(bench_index, dim); + if (check_smooth(dim)) { + printf("Benchmark \"%s\" failed correctness check for dimension %d.\n", + benchmarks_smooth[bench_index].description, dim); + return; + } + + /* Measure CPE */ + { + double num_cycles, cpe; + int tmpdim = dim; + void *arglist[4]; + double dimension = (double) dim; + double work = dimension*dimension; +#ifdef DEBUG + printf("DEBUG: dimension=%.1f\n",dimension); + printf("DEBUG: work=%.1f\n",work); +#endif + arglist[0] = (void *) benchmarks_smooth[bench_index].tfunct; + arglist[1] = (void *) &tmpdim; + arglist[2] = (void *) orig; + arglist[3] = (void *) result; + + create(dim); + num_cycles = fcyc_v((test_funct_v)&func_wrapper, arglist); + cpe = num_cycles/work; + benchmarks_smooth[bench_index].cpes[test_num] = cpe; + } + } + + /* Print results as a table */ + printf("Smooth: Version = %s:\n", description); + printf("Dim\t"); + for (i = 0; i < DIM_CNT; i++) + printf("\t%d", test_dim_smooth[i]); + printf("\tMean\n"); + + printf("Your CPEs"); + for (i = 0; i < DIM_CNT; i++) { + printf("\t%.1f", benchmarks_smooth[bench_index].cpes[i]); + } + printf("\n"); + + printf("Baseline CPEs"); + for (i = 0; i < DIM_CNT; i++) { + printf("\t%.1f", smooth_baseline_cpes[i]); + } + printf("\n"); + + /* Compute speedup */ + { + double prod, ratio, mean; + prod = 1.0; /* Geometric mean */ + printf("Speedup\t"); + for (i = 0; i < DIM_CNT; i++) { + if (benchmarks_smooth[bench_index].cpes[i] > 0.0) { + ratio = smooth_baseline_cpes[i]/ + benchmarks_smooth[bench_index].cpes[i]; + } + else { + printf("Fatal Error: Non-positive CPE value...\n"); + exit(EXIT_FAILURE); + } + prod *= ratio; + printf("\t%.1f", ratio); + } + /* Geometric mean */ + mean = pow(prod, 1.0/(double) DIM_CNT); + printf("\t%.1f", mean); + printf("\n\n"); + if (mean > smooth_maxmean) { + smooth_maxmean = mean; + smooth_maxmean_desc = benchmarks_smooth[bench_index].description; + } + } + + return; +} + + +void usage(char *progname) +{ + fprintf(stderr, "Usage: %s [-hqg] [-f ] [-d ]\n", progname); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -h Print this message\n"); + fprintf(stderr, " -q Quit after dumping (use with -d )\n"); + fprintf(stderr, " -g Autograder mode: checks only rotate() and smooth()\n"); + fprintf(stderr, " -f Get test function names from dump file \n"); + fprintf(stderr, " -d Emit a dump file for later use with -f\n"); + exit(EXIT_FAILURE); +} + + + +int main(int argc, char *argv[]) +{ + int i; + int quit_after_dump = 0; + int skip_teamname_check = 0; + int autograder = 0; + int seed = 1729; + char c = '0'; + char *bench_func_file = NULL; + char *func_dump_file = NULL; + + /* register all the defined functions */ + register_rotate_functions(); + register_smooth_functions(); + + /* parse command line args */ + while ((c = getopt(argc, argv, "tgqf:d:s:h")) != -1) + switch (c) { + + case 't': /* skip team name check (hidden flag) */ + skip_teamname_check = 1; + break; + + case 's': /* seed for random number generator (hidden flag) */ + seed = atoi(optarg); + break; + + case 'g': /* autograder mode (checks only rotate() and smooth()) */ + autograder = 1; + break; + + case 'q': + quit_after_dump = 1; + break; + + case 'f': /* get names of benchmark functions from this file */ + bench_func_file = strdup(optarg); + break; + + case 'd': /* dump names of benchmark functions to this file */ + func_dump_file = strdup(optarg); + { + int i; + FILE *fp = fopen(func_dump_file, "w"); + + if (fp == NULL) { + printf("Can't open file %s\n",func_dump_file); + exit(-5); + } + + for(i = 0; i < rotate_benchmark_count; i++) { + fprintf(fp, "R:%s\n", benchmarks_rotate[i].description); + } + for(i = 0; i < smooth_benchmark_count; i++) { + fprintf(fp, "S:%s\n", benchmarks_smooth[i].description); + } + fclose(fp); + } + break; + + case 'h': /* print help message */ + usage(argv[0]); + + default: /* unrecognized argument */ + usage(argv[0]); + } + + if (quit_after_dump) + exit(EXIT_SUCCESS); + + + /* Print team info */ + if (!skip_teamname_check) { + if (strcmp("bovik", team.team) == 0) { + printf("%s: Please fill in the team struct in kernels.c.\n", argv[0]); + exit(1); + } + printf("Teamname: %s\n", team.team); + printf("Member 1: %s\n", team.name1); + printf("Email 1: %s\n", team.email1); + if (*team.name2 || *team.email2) { + printf("Member 2: %s\n", team.name2); + printf("Email 2: %s\n", team.email2); + } + printf("\n"); + } + + srand(seed); + + /* + * If we are running in autograder mode, we will only test + * the rotate() and bench() functions. + */ + if (autograder) { + rotate_benchmark_count = 1; + smooth_benchmark_count = 1; + + benchmarks_rotate[0].tfunct = rotate; + benchmarks_rotate[0].description = "rotate() function"; + benchmarks_rotate[0].valid = 1; + + benchmarks_smooth[0].tfunct = smooth; + benchmarks_smooth[0].description = "smooth() function"; + benchmarks_smooth[0].valid = 1; + } + + /* + * If the user specified a file name using -f, then use + * the file to determine the versions of rotate and smooth to test + */ + else if (bench_func_file != NULL) { + char flag; + char func_line[256]; + FILE *fp = fopen(bench_func_file, "r"); + + if (fp == NULL) { + printf("Can't open file %s\n",bench_func_file); + exit(-5); + } + + while(func_line == fgets(func_line, 256, fp)) { + char *func_name = func_line; + char **strptr = &func_name; + char *token = strsep(strptr, ":"); + flag = token[0]; + func_name = strsep(strptr, "\n"); +#ifdef DEBUG + printf("Function Description is %s\n",func_name); +#endif + + if (flag == 'R') { + for(i=0; i +#include +#include + +#include "clock.h" +#include "fcyc.h" + +#define K 3 +#define MAXSAMPLES 20 +#define EPSILON 0.01 +#define COMPENSATE 0 +#define CLEAR_CACHE 0 +#define CACHE_BYTES (1<<19) +#define CACHE_BLOCK 32 + +static int kbest = K; +static int compensate = COMPENSATE; +static int clear_cache = CLEAR_CACHE; +static int maxsamples = MAXSAMPLES; +static double epsilon = EPSILON; +static int cache_bytes = CACHE_BYTES; +static int cache_block = CACHE_BLOCK; + +static int *cache_buf = NULL; + +static double *values = NULL; +static int samplecount = 0; + +#define KEEP_VALS 0 +#define KEEP_SAMPLES 0 + +#if KEEP_SAMPLES +static double *samples = NULL; +#endif + +/* Start new sampling process */ +static void init_sampler() +{ + if (values) + free(values); + values = calloc(kbest, sizeof(double)); +#if KEEP_SAMPLES + if (samples) + free(samples); + /* Allocate extra for wraparound analysis */ + samples = calloc(maxsamples+kbest, sizeof(double)); +#endif + samplecount = 0; +} + +/* Add new sample. */ +static void add_sample(double val) +{ + int pos = 0; + if (samplecount < kbest) { + pos = samplecount; + values[pos] = val; + } else if (val < values[kbest-1]) { + pos = kbest-1; + values[pos] = val; + } +#if KEEP_SAMPLES + samples[samplecount] = val; +#endif + samplecount++; + /* Insertion sort */ + while (pos > 0 && values[pos-1] > values[pos]) { + double temp = values[pos-1]; + values[pos-1] = values[pos]; + values[pos] = temp; + pos--; + } +} + +/* Have kbest minimum measurements converged within epsilon? */ +static int has_converged() +{ + return + (samplecount >= kbest) && + ((1 + epsilon)*values[0] >= values[kbest-1]); +} + +/* Code to clear cache */ + + +static volatile int sink = 0; + +static void clear() +{ + int x = sink; + int *cptr, *cend; + int incr = cache_block/sizeof(int); + if (!cache_buf) { + cache_buf = malloc(cache_bytes); + if (!cache_buf) { + fprintf(stderr, "Fatal error. Malloc returned null when trying to clear cache\n"); + exit(1); + } + } + cptr = (int *) cache_buf; + cend = cptr + cache_bytes/sizeof(int); + while (cptr < cend) { + x += *cptr; + cptr += incr; + } + sink = x; +} + +double fcyc(test_funct f, int *params) +{ + double result; + init_sampler(); + if (compensate) { + do { + double cyc; + if (clear_cache) + clear(); + start_comp_counter(); + f(params); + cyc = get_comp_counter(); + add_sample(cyc); + } while (!has_converged() && samplecount < maxsamples); + } else { + do { + double cyc; + if (clear_cache) + clear(); + start_counter(); + f(params); + cyc = get_counter(); + add_sample(cyc); + } while (!has_converged() && samplecount < maxsamples); + } +#ifdef DEBUG + { + int i; + printf(" %d smallest values: [", kbest); + for (i = 0; i < kbest; i++) + printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", "); + } +#endif + result = values[0]; +#if !KEEP_VALS + free(values); + values = NULL; +#endif + return result; +} + + +/* A version of the above function added so as to pass arguments of + any type to the function + Added by Sanjit, Fall 2001 +*/ +double fcyc_v(test_funct_v f, void *params[]) +{ + double result; + init_sampler(); + if (compensate) { + do { + double cyc; + if (clear_cache) + clear(); + start_comp_counter(); + f(params); + cyc = get_comp_counter(); + add_sample(cyc); + } while (!has_converged() && samplecount < maxsamples); + } else { + do { + double cyc; + if (clear_cache) + clear(); + start_counter(); + f(params); + cyc = get_counter(); + add_sample(cyc); + } while (!has_converged() && samplecount < maxsamples); + } +#ifdef DEBUG + { + int i; + printf(" %d smallest values: [", kbest); + for (i = 0; i < kbest; i++) + printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", "); + } +#endif + result = values[0]; +#if !KEEP_VALS + free(values); + values = NULL; +#endif + return result; +} + + + + +/***********************************************************/ +/* Set the various parameters used by measurement routines */ + + +/* When set, will run code to clear cache before each measurement + Default = 0 +*/ +void set_fcyc_clear_cache(int clear) +{ + clear_cache = clear; +} + +/* Set size of cache to use when clearing cache + Default = 1<<19 (512KB) +*/ +void set_fcyc_cache_size(int bytes) +{ + if (bytes != cache_bytes) { + cache_bytes = bytes; + if (cache_buf) { + free(cache_buf); + cache_buf = NULL; + } + } +} + +/* Set size of cache block + Default = 32 +*/ +void set_fcyc_cache_block(int bytes) { + cache_block = bytes; +} + + +/* When set, will attempt to compensate for timer interrupt overhead + Default = 0 +*/ +void set_fcyc_compensate(int compensate_arg) +{ + compensate = compensate_arg; +} + +/* Value of K in K-best + Default = 3 +*/ +void set_fcyc_k(int k) +{ + kbest = k; +} + +/* Maximum number of samples attempting to find K-best within some tolerance. + When exceeded, just return best sample found. + Default = 20 +*/ +void set_fcyc_maxsamples(int maxsamples_arg) +{ + maxsamples = maxsamples_arg; +} + +/* Tolerance required for K-best + Default = 0.01 +*/ +void set_fcyc_epsilon(double epsilon_arg) +{ + epsilon = epsilon_arg; +} + + + + + diff --git a/perf/fcyc.h b/perf/fcyc.h new file mode 100644 index 0000000..5da0bfb --- /dev/null +++ b/perf/fcyc.h @@ -0,0 +1,55 @@ + +/* Fcyc measures the speed of any "test function." Such a function + is passed a list of integer parameters, which it may interpret + in any way it chooses. +*/ + +typedef void (*test_funct)(int *); +typedef void (*test_funct_v)(void *); + +/* Compute number of cycles used by function f on given set of parameters */ +double fcyc(test_funct f, int* params); +double fcyc_v(test_funct_v f, void* params[]); + +/***********************************************************/ +/* Set the various parameters used by measurement routines */ + + +/* When set, will run code to clear cache before each measurement + Default = 0 +*/ +void set_fcyc_clear_cache(int clear); + +/* Set size of cache to use when clearing cache + Default = 1<<19 (512KB) +*/ +void set_fcyc_cache_size(int bytes); + +/* Set size of cache block + Default = 32 +*/ +void set_fcyc_cache_block(int bytes); + +/* When set, will attempt to compensate for timer interrupt overhead + Default = 0 +*/ +void set_fcyc_compensate(int compensate); + +/* Value of K in K-best + Default = 3 +*/ +void set_fcyc_k(int k); + +/* Maximum number of samples attempting to find K-best within some tolerance. + When exceeded, just return best sample found. + Default = 20 +*/ +void set_fcyc_maxsamples(int maxsamples); + +/* Tolerance required for K-best + Default = 0.01 +*/ +void set_fcyc_epsilon(double epsilon); + + + diff --git a/perf/kernels.c b/perf/kernels.c new file mode 100644 index 0000000..9af7842 --- /dev/null +++ b/perf/kernels.c @@ -0,0 +1,183 @@ +/******************************************************** + * Kernels to be optimized for the CS:APP Performance Lab + ********************************************************/ + +#include +#include +#include "defs.h" + +/* + * Please fill in the following team struct + */ +team_t team = { + "bovik", /* Team name */ + + "Harry Q. Bovik", /* First member full name */ + "bovik@nowhere.edu", /* First member email address */ + + "", /* Second member full name (leave blank if none) */ + "" /* Second member email addr (leave blank if none) */ +}; + +/*************** + * ROTATE KERNEL + ***************/ + +/****************************************************** + * Your different versions of the rotate kernel go here + ******************************************************/ + +/* + * naive_rotate - The naive baseline version of rotate + */ +char naive_rotate_descr[] = "naive_rotate: Naive baseline implementation"; +void naive_rotate(int dim, pixel *src, pixel *dst) +{ + int i, j; + + for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + dst[RIDX(dim-1-j, i, dim)] = src[RIDX(i, j, dim)]; +} + +/* + * rotate - Your current working version of rotate + * IMPORTANT: This is the version you will be graded on + */ +char rotate_descr[] = "rotate: Current working version"; +void rotate(int dim, pixel *src, pixel *dst) +{ + naive_rotate(dim, src, dst); +} + +/********************************************************************* + * register_rotate_functions - Register all of your different versions + * of the rotate kernel with the driver by calling the + * add_rotate_function() for each test function. When you run the + * driver program, it will test and report the performance of each + * registered test function. + *********************************************************************/ + +void register_rotate_functions() +{ + add_rotate_function(&naive_rotate, naive_rotate_descr); + add_rotate_function(&rotate, rotate_descr); + /* ... Register additional test functions here */ +} + + +/*************** + * SMOOTH KERNEL + **************/ + +/*************************************************************** + * Various typedefs and helper functions for the smooth function + * You may modify these any way you like. + **************************************************************/ + +/* A struct used to compute averaged pixel value */ +typedef struct { + int red; + int green; + int blue; + int num; +} pixel_sum; + +/* Compute min and max of two integers, respectively */ +static int min(int a, int b) { return (a < b ? a : b); } +static int max(int a, int b) { return (a > b ? a : b); } + +/* + * initialize_pixel_sum - Initializes all fields of sum to 0 + */ +static void initialize_pixel_sum(pixel_sum *sum) +{ + sum->red = sum->green = sum->blue = 0; + sum->num = 0; + return; +} + +/* + * accumulate_sum - Accumulates field values of p in corresponding + * fields of sum + */ +static void accumulate_sum(pixel_sum *sum, pixel p) +{ + sum->red += (int) p.red; + sum->green += (int) p.green; + sum->blue += (int) p.blue; + sum->num++; + return; +} + +/* + * assign_sum_to_pixel - Computes averaged pixel value in current_pixel + */ +static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum) +{ + current_pixel->red = (unsigned short) (sum.red/sum.num); + current_pixel->green = (unsigned short) (sum.green/sum.num); + current_pixel->blue = (unsigned short) (sum.blue/sum.num); + return; +} + +/* + * avg - Returns averaged pixel value at (i,j) + */ +static pixel avg(int dim, int i, int j, pixel *src) +{ + int ii, jj; + pixel_sum sum; + pixel current_pixel; + + initialize_pixel_sum(&sum); + for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++) + for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++) + accumulate_sum(&sum, src[RIDX(ii, jj, dim)]); + + assign_sum_to_pixel(¤t_pixel, sum); + return current_pixel; +} + +/****************************************************** + * Your different versions of the smooth kernel go here + ******************************************************/ + +/* + * naive_smooth - The naive baseline version of smooth + */ +char naive_smooth_descr[] = "naive_smooth: Naive baseline implementation"; +void naive_smooth(int dim, pixel *src, pixel *dst) +{ + int i, j; + + for (i = 0; i < dim; i++) + for (j = 0; j < dim; j++) + dst[RIDX(i, j, dim)] = avg(dim, i, j, src); +} + +/* + * smooth - Your current working version of smooth. + * IMPORTANT: This is the version you will be graded on + */ +char smooth_descr[] = "smooth: Current working version"; +void smooth(int dim, pixel *src, pixel *dst) +{ + naive_smooth(dim, src, dst); +} + + +/********************************************************************* + * register_smooth_functions - Register all of your different versions + * of the smooth kernel with the driver by calling the + * add_smooth_function() for each test function. When you run the + * driver program, it will test and report the performance of each + * registered test function. + *********************************************************************/ + +void register_smooth_functions() { + add_smooth_function(&smooth, smooth_descr); + add_smooth_function(&naive_smooth, naive_smooth_descr); + /* ... Register additional test functions here */ +} +