init perflab

This commit is contained in:
18218461270@163.com 2025-08-28 13:13:50 +08:00
parent 55ddbc6b41
commit 1b66891883
10 changed files with 1655 additions and 0 deletions

23
perf/Makefile Normal file
View File

@ -0,0 +1,23 @@
# Student's Makefile for the CS:APP Performance Lab
TEAM = bovik
VERSION = 1
HANDINDIR =
CC = gcc
CFLAGS = -Wall -O2 -m32
LIBS = -lm
OBJS = driver.o kernels.o fcyc.o clock.o
all: driver
driver: $(OBJS) fcyc.h clock.h defs.h config.h
$(CC) $(CFLAGS) $(OBJS) $(LIBS) -o driver
handin:
cp kernels.c $(HANDINDIR)/$(TEAM)-$(VERSION)-kernels.c
clean:
-rm -f $(OBJS) driver core *~ *.o

38
perf/README Normal file
View File

@ -0,0 +1,38 @@
#####################################################################
# CS:APP Performance Lab
#
# Student's Source Files
#
# Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved.
# May not be used, modified, or copied without permission.
#
######################################################################
This directory contains the files you will need for the CS:APP
Performance Lab.
kernels.c
This is the file you will be modifying and handing in.
#########################################
# You shouldn't modify any of these files
#########################################
driver.c
This is the driver that tests the performance of all
of the versions of the rotate and smooth kernels
in your kernels.c file.
config.h
This is a site-specific configuration file that was created by
your instructor for your system.
defs.h
Various definitions needed by kernels.c and driver.c
clock.{c,h}
fcyc.{c,h}
These contain timing routines that measure the performance of your
code with our k-best measurement scheme using IA32 cycle counters.
Makefile:
This is the makefile that builds the driver program.

242
perf/clock.c Normal file
View File

@ -0,0 +1,242 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/times.h>
#include "clock.h"
/*
* Routines for using the cycle counter
*/
/* Detect whether running on Alpha */
#ifdef __alpha
#define IS_ALPHA 1
#else
#define IS_ALPHA 0
#endif
/* Detect whether running on x86 */
#ifdef __i386__
#define IS_x86 1
#else
#define IS_x86 0
#endif
#if IS_ALPHA
/* Initialize the cycle counter */
static unsigned cyc_hi = 0;
static unsigned cyc_lo = 0;
/* Use Alpha cycle timer to compute cycles. Then use
measured clock speed to compute seconds
*/
/*
* counterRoutine is an array of Alpha instructions to access
* the Alpha's processor cycle counter. It uses the rpcc
* instruction to access the counter. This 64 bit register is
* divided into two parts. The lower 32 bits are the cycles
* used by the current process. The upper 32 bits are wall
* clock cycles. These instructions read the counter, and
* convert the lower 32 bits into an unsigned int - this is the
* user space counter value.
* NOTE: The counter has a very limited time span. With a
* 450MhZ clock the counter can time things for about 9
* seconds. */
static unsigned int counterRoutine[] =
{
0x601fc000u,
0x401f0000u,
0x6bfa8001u
};
/* Cast the above instructions into a function. */
static unsigned int (*counter)(void)= (void *)counterRoutine;
void start_counter()
{
/* Get cycle counter */
cyc_hi = 0;
cyc_lo = counter();
}
double get_counter()
{
unsigned ncyc_hi, ncyc_lo;
unsigned hi, lo, borrow;
double result;
ncyc_lo = counter();
ncyc_hi = 0;
lo = ncyc_lo - cyc_lo;
borrow = lo > ncyc_lo;
hi = ncyc_hi - cyc_hi - borrow;
result = (double) hi * (1 << 30) * 4 + lo;
if (result < 0) {
fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result);
}
return result;
}
#endif /* Alpha */
#if IS_x86
/* $begin x86cyclecounter */
/* Initialize the cycle counter */
static unsigned cyc_hi = 0;
static unsigned cyc_lo = 0;
/* Set *hi and *lo to the high and low order bits of the cycle counter.
Implementation requires assembly code to use the rdtsc instruction. */
void access_counter(unsigned *hi, unsigned *lo)
{
asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */
: "=r" (*hi), "=r" (*lo) /* and move results to */
: /* No input */ /* the two outputs */
: "%edx", "%eax");
}
/* Record the current value of the cycle counter. */
void start_counter()
{
access_counter(&cyc_hi, &cyc_lo);
}
/* Return the number of cycles since the last call to start_counter. */
double get_counter()
{
unsigned ncyc_hi, ncyc_lo;
unsigned hi, lo, borrow;
double result;
/* Get cycle counter */
access_counter(&ncyc_hi, &ncyc_lo);
/* Do double precision subtraction */
lo = ncyc_lo - cyc_lo;
borrow = lo > ncyc_lo;
hi = ncyc_hi - cyc_hi - borrow;
result = (double) hi * (1 << 30) * 4 + lo;
if (result < 0) {
fprintf(stderr, "Error: counter returns neg value: %.0f\n", result);
}
return result;
}
/* $end x86cyclecounter */
#endif /* x86 */
double ovhd()
{
/* Do it twice to eliminate cache effects */
int i;
double result;
for (i = 0; i < 2; i++) {
start_counter();
result = get_counter();
}
return result;
}
/* $begin mhz */
/* Estimate the clock rate by measuring the cycles that elapse */
/* while sleeping for sleeptime seconds */
double mhz_full(int verbose, int sleeptime)
{
double rate;
start_counter();
sleep(sleeptime);
rate = get_counter() / (1e6*sleeptime);
if (verbose)
printf("Processor clock rate ~= %.1f MHz\n", rate);
return rate;
}
/* $end mhz */
/* Version using a default sleeptime */
double mhz(int verbose)
{
return mhz_full(verbose, 2);
}
/** Special counters that compensate for timer interrupt overhead */
static double cyc_per_tick = 0.0;
#define NEVENT 100
#define THRESHOLD 1000
#define RECORDTHRESH 3000
/* Attempt to see how much time is used by timer interrupt */
static void callibrate(int verbose)
{
double oldt;
struct tms t;
clock_t oldc;
int e = 0;
times(&t);
oldc = t.tms_utime;
start_counter();
oldt = get_counter();
while (e <NEVENT) {
double newt = get_counter();
if (newt-oldt >= THRESHOLD) {
clock_t newc;
times(&t);
newc = t.tms_utime;
if (newc > oldc) {
double cpt = (newt-oldt)/(newc-oldc);
if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH)
cyc_per_tick = cpt;
/*
if (verbose)
printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n",
newt-oldt, (int) (newc-oldc), cpt);
*/
e++;
oldc = newc;
}
oldt = newt;
}
}
/* ifdef added by Sanjit - 10/2001 */
#ifdef DEBUG
if (verbose)
printf("Setting cyc_per_tick to %f\n", cyc_per_tick);
#endif
}
static clock_t start_tick = 0;
void start_comp_counter()
{
struct tms t;
if (cyc_per_tick == 0.0)
callibrate(1);
times(&t);
start_tick = t.tms_utime;
start_counter();
}
double get_comp_counter()
{
double time = get_counter();
double ctime;
struct tms t;
clock_t ticks;
times(&t);
ticks = t.tms_utime - start_tick;
ctime = time - ticks*cyc_per_tick;
/*
printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n",
time, (int) ticks, ctime);
*/
return ctime;
}

22
perf/clock.h Normal file
View File

@ -0,0 +1,22 @@
/* Routines for using cycle counter */
/* Start the counter */
void start_counter();
/* Get # cycles since counter started */
double get_counter();
/* Measure overhead for counter */
double ovhd();
/* Determine clock rate of processor (using a default sleeptime) */
double mhz(int verbose);
/* Determine clock rate of processor, having more control over accuracy */
double mhz_full(int verbose, int sleeptime);
/** Special counters that compensate for timer interrupt overhead */
void start_comp_counter();
double get_comp_counter();

32
perf/config.h Normal file
View File

@ -0,0 +1,32 @@
/*********************************************************
* config.h - Configuration data for the driver.c program.
*********************************************************/
#ifndef _CONFIG_H_
#define _CONFIG_H_
/*
* CPEs for the baseline (naive) version of the rotate function that
* was handed out to the students. Rd is the measured CPE for a dxd
* image. Run the driver.c program on your system to get these
* numbers.
*/
#define R64 14.7
#define R128 40.1
#define R256 46.4
#define R512 65.9
#define R1024 94.5
/*
* CPEs for the baseline (naive) version of the smooth function that
* was handed out to the students. Sd is the measure CPE for a dxd
* image. Run the driver.c program on your system to get these
* numbers.
*/
#define S32 695
#define S64 698
#define S128 702
#define S256 717
#define S512 722
#endif /* _CONFIG_H_ */

38
perf/defs.h Normal file
View File

@ -0,0 +1,38 @@
/*
* driver.h - Various definitions for the Performance Lab.
*
* DO NOT MODIFY ANYTHING IN THIS FILE
*/
#ifndef _DEFS_H_
#define _DEFS_H_
#include <stdlib.h>
#define RIDX(i,j,n) ((i)*(n)+(j))
typedef struct {
char *team;
char *name1, *email1;
char *name2, *email2;
} team_t;
extern team_t team;
typedef struct {
unsigned short red;
unsigned short green;
unsigned short blue;
} pixel;
typedef void (*lab_test_func) (int, pixel*, pixel*);
void smooth(int, pixel *, pixel *);
void rotate(int, pixel *, pixel *);
void register_rotate_functions(void);
void register_smooth_functions(void);
void add_smooth_function(lab_test_func, char*);
void add_rotate_function(lab_test_func, char*);
#endif /* _DEFS_H_ */

752
perf/driver.c Normal file
View File

@ -0,0 +1,752 @@
/*******************************************************************
*
* driver.c - Driver program for CS:APP Performance Lab
*
* In kernels.c, students generate an arbitrary number of rotate and
* smooth test functions, which they then register with the driver
* program using the add_rotate_function() and add_smooth_function()
* functions.
*
* The driver program runs and measures the registered test functions
* and reports their performance.
*
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights
* reserved. May not be used, modified, or copied without permission.
*
********************************************************************/
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <assert.h>
#include <math.h>
#include "fcyc.h"
#include "defs.h"
#include "config.h"
/* Team structure that identifies the students */
extern team_t team;
/* Keep track of a number of different test functions */
#define MAX_BENCHMARKS 100
#define DIM_CNT 5
/* Misc constants */
#define BSIZE 32 /* cache block size in bytes */
#define MAX_DIM 1280 /* 1024 + 256 */
#define ODD_DIM 96 /* not a power of 2 */
/* fast versions of min and max */
#define min(a,b) (a < b ? a : b)
#define max(a,b) (a > b ? a : b)
/* This struct characterizes the results for one benchmark test */
typedef struct {
lab_test_func tfunct; /* The test function */
double cpes[DIM_CNT]; /* One CPE result for each dimension */
char *description; /* ASCII description of the test function */
unsigned short valid; /* The function is tested if this is non zero */
} bench_t;
/* The range of image dimensions that we will be testing */
static int test_dim_rotate[] = {64, 128, 256, 512, 1024};
static int test_dim_smooth[] = {32, 64, 128, 256, 512};
/* Baseline CPEs (see config.h) */
static double rotate_baseline_cpes[] = {R64, R128, R256, R512, R1024};
static double smooth_baseline_cpes[] = {S32, S64, S128, S256, S512};
/* These hold the results for all benchmarks */
static bench_t benchmarks_rotate[MAX_BENCHMARKS];
static bench_t benchmarks_smooth[MAX_BENCHMARKS];
/* These give the sizes of the above lists */
static int rotate_benchmark_count = 0;
static int smooth_benchmark_count = 0;
/*
* An image is a dimxdim matrix of pixels stored in a 1D array. The
* data array holds three images (the input original, a copy of the original,
* and the output result array. There is also an additional BSIZE bytes
* of padding for alignment to cache block boundaries.
*/
static pixel data[(3*MAX_DIM*MAX_DIM) + (BSIZE/sizeof(pixel))];
/* Various image pointers */
static pixel *orig = NULL; /* original image */
static pixel *copy_of_orig = NULL; /* copy of original for checking result */
static pixel *result = NULL; /* result image */
/* Keep track of the best rotate and smooth score for grading */
double rotate_maxmean = 0.0;
char *rotate_maxmean_desc = NULL;
double smooth_maxmean = 0.0;
char *smooth_maxmean_desc = NULL;
/******************** Functions begin *************************/
void add_smooth_function(lab_test_func f, char *description)
{
benchmarks_smooth[smooth_benchmark_count].tfunct = f;
benchmarks_smooth[smooth_benchmark_count].description = description;
benchmarks_smooth[smooth_benchmark_count].valid = 0;
smooth_benchmark_count++;
}
void add_rotate_function(lab_test_func f, char *description)
{
benchmarks_rotate[rotate_benchmark_count].tfunct = f;
benchmarks_rotate[rotate_benchmark_count].description = description;
benchmarks_rotate[rotate_benchmark_count].valid = 0;
rotate_benchmark_count++;
}
/*
* random_in_interval - Returns random integer in interval [low, high)
*/
static int random_in_interval(int low, int high)
{
int size = high - low;
return (rand()% size) + low;
}
/*
* create - creates a dimxdim image aligned to a BSIZE byte boundary
*/
static void create(int dim)
{
int i, j;
/* Align the images to BSIZE byte boundaries */
orig = data;
while ((unsigned)orig % BSIZE)
orig = (pixel *)((char *)orig) + 1;
result = orig + dim*dim;
copy_of_orig = result + dim*dim;
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
/* Original image initialized to random colors */
orig[RIDX(i,j,dim)].red = random_in_interval(0, 65536);
orig[RIDX(i,j,dim)].green = random_in_interval(0, 65536);
orig[RIDX(i,j,dim)].blue = random_in_interval(0, 65536);
/* Copy of original image for checking result */
copy_of_orig[RIDX(i,j,dim)].red = orig[RIDX(i,j,dim)].red;
copy_of_orig[RIDX(i,j,dim)].green = orig[RIDX(i,j,dim)].green;
copy_of_orig[RIDX(i,j,dim)].blue = orig[RIDX(i,j,dim)].blue;
/* Result image initialized to all black */
result[RIDX(i,j,dim)].red = 0;
result[RIDX(i,j,dim)].green = 0;
result[RIDX(i,j,dim)].blue = 0;
}
}
return;
}
/*
* compare_pixels - Returns 1 if the two arguments don't have same RGB
* values, 0 o.w.
*/
static int compare_pixels(pixel p1, pixel p2)
{
return
(p1.red != p2.red) ||
(p1.green != p2.green) ||
(p1.blue != p2.blue);
}
/* Make sure the orig array is unchanged */
static int check_orig(int dim)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
if (compare_pixels(orig[RIDX(i,j,dim)], copy_of_orig[RIDX(i,j,dim)])) {
printf("\n");
printf("Error: Original image has been changed!\n");
return 1;
}
return 0;
}
/*
* check_rotate - Make sure the rotate actually works.
* The orig array should not have been tampered with!
*/
static int check_rotate(int dim)
{
int err = 0;
int i, j;
int badi = 0;
int badj = 0;
pixel orig_bad, res_bad;
/* return 1 if the original image has been changed */
if (check_orig(dim))
return 1;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
if (compare_pixels(orig[RIDX(i,j,dim)],
result[RIDX(dim-1-j,i,dim)])) {
err++;
badi = i;
badj = j;
orig_bad = orig[RIDX(i,j,dim)];
res_bad = result[RIDX(dim-1-j,i,dim)];
}
if (err) {
printf("\n");
printf("ERROR: Dimension=%d, %d errors\n", dim, err);
printf("E.g., The following two pixels should have equal value:\n");
printf("src[%d][%d].{red,green,blue} = {%d,%d,%d}\n",
badi, badj, orig_bad.red, orig_bad.green, orig_bad.blue);
printf("dst[%d][%d].{red,green,blue} = {%d,%d,%d}\n",
(dim-1-badj), badi, res_bad.red, res_bad.green, res_bad.blue);
}
return err;
}
static pixel check_average(int dim, int i, int j, pixel *src) {
pixel result;
int num = 0;
int ii, jj;
int sum0, sum1, sum2;
int top_left_i, top_left_j;
int bottom_right_i, bottom_right_j;
top_left_i = max(i-1, 0);
top_left_j = max(j-1, 0);
bottom_right_i = min(i+1, dim-1);
bottom_right_j = min(j+1, dim-1);
sum0 = sum1 = sum2 = 0;
for(ii=top_left_i; ii <= bottom_right_i; ii++) {
for(jj=top_left_j; jj <= bottom_right_j; jj++) {
num++;
sum0 += (int) src[RIDX(ii,jj,dim)].red;
sum1 += (int) src[RIDX(ii,jj,dim)].green;
sum2 += (int) src[RIDX(ii,jj,dim)].blue;
}
}
result.red = (unsigned short) (sum0/num);
result.green = (unsigned short) (sum1/num);
result.blue = (unsigned short) (sum2/num);
return result;
}
/*
* check_smooth - Make sure the smooth function actually works. The
* orig array should not have been tampered with!
*/
static int check_smooth(int dim) {
int err = 0;
int i, j;
int badi = 0;
int badj = 0;
pixel right, wrong;
/* return 1 if original image has been changed */
if (check_orig(dim))
return 1;
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
pixel smoothed = check_average(dim, i, j, orig);
if (compare_pixels(result[RIDX(i,j,dim)], smoothed)) {
err++;
badi = i;
badj = j;
wrong = result[RIDX(i,j,dim)];
right = smoothed;
}
}
}
if (err) {
printf("\n");
printf("ERROR: Dimension=%d, %d errors\n", dim, err);
printf("E.g., \n");
printf("You have dst[%d][%d].{red,green,blue} = {%d,%d,%d}\n",
badi, badj, wrong.red, wrong.green, wrong.blue);
printf("It should be dst[%d][%d].{red,green,blue} = {%d,%d,%d}\n",
badi, badj, right.red, right.green, right.blue);
}
return err;
}
void func_wrapper(void *arglist[])
{
pixel *src, *dst;
int mydim;
lab_test_func f;
f = (lab_test_func) arglist[0];
mydim = *((int *) arglist[1]);
src = (pixel *) arglist[2];
dst = (pixel *) arglist[3];
(*f)(mydim, src, dst);
return;
}
void run_rotate_benchmark(int idx, int dim)
{
benchmarks_rotate[idx].tfunct(dim, orig, result);
}
void test_rotate(int bench_index)
{
int i;
int test_num;
char *description = benchmarks_rotate[bench_index].description;
for (test_num = 0; test_num < DIM_CNT; test_num++) {
int dim;
/* Check for odd dimension */
create(ODD_DIM);
run_rotate_benchmark(bench_index, ODD_DIM);
if (check_rotate(ODD_DIM)) {
printf("Benchmark \"%s\" failed correctness check for dimension %d.\n",
benchmarks_rotate[bench_index].description, ODD_DIM);
return;
}
/* Create a test image of the required dimension */
dim = test_dim_rotate[test_num];
create(dim);
#ifdef DEBUG
printf("DEBUG: Running benchmark \"%s\"\n", benchmarks_rotate[bench_index].description);
#endif
/* Check that the code works */
run_rotate_benchmark(bench_index, dim);
if (check_rotate(dim)) {
printf("Benchmark \"%s\" failed correctness check for dimension %d.\n",
benchmarks_rotate[bench_index].description, dim);
return;
}
/* Measure CPE */
{
double num_cycles, cpe;
int tmpdim = dim;
void *arglist[4];
double dimension = (double) dim;
double work = dimension*dimension;
#ifdef DEBUG
printf("DEBUG: dimension=%.1f\n",dimension);
printf("DEBUG: work=%.1f\n",work);
#endif
arglist[0] = (void *) benchmarks_rotate[bench_index].tfunct;
arglist[1] = (void *) &tmpdim;
arglist[2] = (void *) orig;
arglist[3] = (void *) result;
create(dim);
num_cycles = fcyc_v((test_funct_v)&func_wrapper, arglist);
cpe = num_cycles/work;
benchmarks_rotate[bench_index].cpes[test_num] = cpe;
}
}
/*
* Print results as a table
*/
printf("Rotate: Version = %s:\n", description);
printf("Dim\t");
for (i = 0; i < DIM_CNT; i++)
printf("\t%d", test_dim_rotate[i]);
printf("\tMean\n");
printf("Your CPEs");
for (i = 0; i < DIM_CNT; i++) {
printf("\t%.1f", benchmarks_rotate[bench_index].cpes[i]);
}
printf("\n");
printf("Baseline CPEs");
for (i = 0; i < DIM_CNT; i++) {
printf("\t%.1f", rotate_baseline_cpes[i]);
}
printf("\n");
/* Compute Speedup */
{
double prod, ratio, mean;
prod = 1.0; /* Geometric mean */
printf("Speedup\t");
for (i = 0; i < DIM_CNT; i++) {
if (benchmarks_rotate[bench_index].cpes[i] > 0.0) {
ratio = rotate_baseline_cpes[i]/
benchmarks_rotate[bench_index].cpes[i];
}
else {
printf("Fatal Error: Non-positive CPE value...\n");
exit(EXIT_FAILURE);
}
prod *= ratio;
printf("\t%.1f", ratio);
}
/* Geometric mean */
mean = pow(prod, 1.0/(double) DIM_CNT);
printf("\t%.1f", mean);
printf("\n\n");
if (mean > rotate_maxmean) {
rotate_maxmean = mean;
rotate_maxmean_desc = benchmarks_rotate[bench_index].description;
}
}
#ifdef DEBUG
fflush(stdout);
#endif
return;
}
void run_smooth_benchmark(int idx, int dim)
{
benchmarks_smooth[idx].tfunct(dim, orig, result);
}
void test_smooth(int bench_index)
{
int i;
int test_num;
char *description = benchmarks_smooth[bench_index].description;
for(test_num=0; test_num < DIM_CNT; test_num++) {
int dim;
/* Check correctness for odd (non power of two dimensions */
create(ODD_DIM);
run_smooth_benchmark(bench_index, ODD_DIM);
if (check_smooth(ODD_DIM)) {
printf("Benchmark \"%s\" failed correctness check for dimension %d.\n",
benchmarks_smooth[bench_index].description, ODD_DIM);
return;
}
/* Create a test image of the required dimension */
dim = test_dim_smooth[test_num];
create(dim);
#ifdef DEBUG
printf("DEBUG: Running benchmark \"%s\"\n", benchmarks_smooth[bench_index].description);
#endif
/* Check that the code works */
run_smooth_benchmark(bench_index, dim);
if (check_smooth(dim)) {
printf("Benchmark \"%s\" failed correctness check for dimension %d.\n",
benchmarks_smooth[bench_index].description, dim);
return;
}
/* Measure CPE */
{
double num_cycles, cpe;
int tmpdim = dim;
void *arglist[4];
double dimension = (double) dim;
double work = dimension*dimension;
#ifdef DEBUG
printf("DEBUG: dimension=%.1f\n",dimension);
printf("DEBUG: work=%.1f\n",work);
#endif
arglist[0] = (void *) benchmarks_smooth[bench_index].tfunct;
arglist[1] = (void *) &tmpdim;
arglist[2] = (void *) orig;
arglist[3] = (void *) result;
create(dim);
num_cycles = fcyc_v((test_funct_v)&func_wrapper, arglist);
cpe = num_cycles/work;
benchmarks_smooth[bench_index].cpes[test_num] = cpe;
}
}
/* Print results as a table */
printf("Smooth: Version = %s:\n", description);
printf("Dim\t");
for (i = 0; i < DIM_CNT; i++)
printf("\t%d", test_dim_smooth[i]);
printf("\tMean\n");
printf("Your CPEs");
for (i = 0; i < DIM_CNT; i++) {
printf("\t%.1f", benchmarks_smooth[bench_index].cpes[i]);
}
printf("\n");
printf("Baseline CPEs");
for (i = 0; i < DIM_CNT; i++) {
printf("\t%.1f", smooth_baseline_cpes[i]);
}
printf("\n");
/* Compute speedup */
{
double prod, ratio, mean;
prod = 1.0; /* Geometric mean */
printf("Speedup\t");
for (i = 0; i < DIM_CNT; i++) {
if (benchmarks_smooth[bench_index].cpes[i] > 0.0) {
ratio = smooth_baseline_cpes[i]/
benchmarks_smooth[bench_index].cpes[i];
}
else {
printf("Fatal Error: Non-positive CPE value...\n");
exit(EXIT_FAILURE);
}
prod *= ratio;
printf("\t%.1f", ratio);
}
/* Geometric mean */
mean = pow(prod, 1.0/(double) DIM_CNT);
printf("\t%.1f", mean);
printf("\n\n");
if (mean > smooth_maxmean) {
smooth_maxmean = mean;
smooth_maxmean_desc = benchmarks_smooth[bench_index].description;
}
}
return;
}
void usage(char *progname)
{
fprintf(stderr, "Usage: %s [-hqg] [-f <func_file>] [-d <dump_file>]\n", progname);
fprintf(stderr, "Options:\n");
fprintf(stderr, " -h Print this message\n");
fprintf(stderr, " -q Quit after dumping (use with -d )\n");
fprintf(stderr, " -g Autograder mode: checks only rotate() and smooth()\n");
fprintf(stderr, " -f <file> Get test function names from dump file <file>\n");
fprintf(stderr, " -d <file> Emit a dump file <file> for later use with -f\n");
exit(EXIT_FAILURE);
}
int main(int argc, char *argv[])
{
int i;
int quit_after_dump = 0;
int skip_teamname_check = 0;
int autograder = 0;
int seed = 1729;
char c = '0';
char *bench_func_file = NULL;
char *func_dump_file = NULL;
/* register all the defined functions */
register_rotate_functions();
register_smooth_functions();
/* parse command line args */
while ((c = getopt(argc, argv, "tgqf:d:s:h")) != -1)
switch (c) {
case 't': /* skip team name check (hidden flag) */
skip_teamname_check = 1;
break;
case 's': /* seed for random number generator (hidden flag) */
seed = atoi(optarg);
break;
case 'g': /* autograder mode (checks only rotate() and smooth()) */
autograder = 1;
break;
case 'q':
quit_after_dump = 1;
break;
case 'f': /* get names of benchmark functions from this file */
bench_func_file = strdup(optarg);
break;
case 'd': /* dump names of benchmark functions to this file */
func_dump_file = strdup(optarg);
{
int i;
FILE *fp = fopen(func_dump_file, "w");
if (fp == NULL) {
printf("Can't open file %s\n",func_dump_file);
exit(-5);
}
for(i = 0; i < rotate_benchmark_count; i++) {
fprintf(fp, "R:%s\n", benchmarks_rotate[i].description);
}
for(i = 0; i < smooth_benchmark_count; i++) {
fprintf(fp, "S:%s\n", benchmarks_smooth[i].description);
}
fclose(fp);
}
break;
case 'h': /* print help message */
usage(argv[0]);
default: /* unrecognized argument */
usage(argv[0]);
}
if (quit_after_dump)
exit(EXIT_SUCCESS);
/* Print team info */
if (!skip_teamname_check) {
if (strcmp("bovik", team.team) == 0) {
printf("%s: Please fill in the team struct in kernels.c.\n", argv[0]);
exit(1);
}
printf("Teamname: %s\n", team.team);
printf("Member 1: %s\n", team.name1);
printf("Email 1: %s\n", team.email1);
if (*team.name2 || *team.email2) {
printf("Member 2: %s\n", team.name2);
printf("Email 2: %s\n", team.email2);
}
printf("\n");
}
srand(seed);
/*
* If we are running in autograder mode, we will only test
* the rotate() and bench() functions.
*/
if (autograder) {
rotate_benchmark_count = 1;
smooth_benchmark_count = 1;
benchmarks_rotate[0].tfunct = rotate;
benchmarks_rotate[0].description = "rotate() function";
benchmarks_rotate[0].valid = 1;
benchmarks_smooth[0].tfunct = smooth;
benchmarks_smooth[0].description = "smooth() function";
benchmarks_smooth[0].valid = 1;
}
/*
* If the user specified a file name using -f, then use
* the file to determine the versions of rotate and smooth to test
*/
else if (bench_func_file != NULL) {
char flag;
char func_line[256];
FILE *fp = fopen(bench_func_file, "r");
if (fp == NULL) {
printf("Can't open file %s\n",bench_func_file);
exit(-5);
}
while(func_line == fgets(func_line, 256, fp)) {
char *func_name = func_line;
char **strptr = &func_name;
char *token = strsep(strptr, ":");
flag = token[0];
func_name = strsep(strptr, "\n");
#ifdef DEBUG
printf("Function Description is %s\n",func_name);
#endif
if (flag == 'R') {
for(i=0; i<rotate_benchmark_count; i++) {
if (strcmp(benchmarks_rotate[i].description, func_name) == 0)
benchmarks_rotate[i].valid = 1;
}
}
else if (flag == 'S') {
for(i=0; i<smooth_benchmark_count; i++) {
if (strcmp(benchmarks_smooth[i].description, func_name) == 0)
benchmarks_smooth[i].valid = 1;
}
}
}
fclose(fp);
}
/*
* If the user didn't specify a dump file using -f, then
* test all of the functions
*/
else { /* set all valid flags to 1 */
for (i = 0; i < rotate_benchmark_count; i++)
benchmarks_rotate[i].valid = 1;
for (i = 0; i < smooth_benchmark_count; i++)
benchmarks_smooth[i].valid = 1;
}
/* Set measurement (fcyc) parameters */
set_fcyc_cache_size(1 << 14); /* 16 KB cache size */
set_fcyc_clear_cache(1); /* clear the cache before each measurement */
set_fcyc_compensate(1); /* try to compensate for timer overhead */
for (i = 0; i < rotate_benchmark_count; i++) {
if (benchmarks_rotate[i].valid)
test_rotate(i);
}
for (i = 0; i < smooth_benchmark_count; i++) {
if (benchmarks_smooth[i].valid)
test_smooth(i);
}
if (autograder) {
printf("\nbestscores:%.1f:%.1f:\n", rotate_maxmean, smooth_maxmean);
}
else {
printf("Summary of Your Best Scores:\n");
printf(" Rotate: %3.1f (%s)\n", rotate_maxmean, rotate_maxmean_desc);
printf(" Smooth: %3.1f (%s)\n", smooth_maxmean, smooth_maxmean_desc);
}
return 0;
}

270
perf/fcyc.c Normal file
View File

@ -0,0 +1,270 @@
/* Compute time used by function f */
#include <stdlib.h>
#include <sys/times.h>
#include <stdio.h>
#include "clock.h"
#include "fcyc.h"
#define K 3
#define MAXSAMPLES 20
#define EPSILON 0.01
#define COMPENSATE 0
#define CLEAR_CACHE 0
#define CACHE_BYTES (1<<19)
#define CACHE_BLOCK 32
static int kbest = K;
static int compensate = COMPENSATE;
static int clear_cache = CLEAR_CACHE;
static int maxsamples = MAXSAMPLES;
static double epsilon = EPSILON;
static int cache_bytes = CACHE_BYTES;
static int cache_block = CACHE_BLOCK;
static int *cache_buf = NULL;
static double *values = NULL;
static int samplecount = 0;
#define KEEP_VALS 0
#define KEEP_SAMPLES 0
#if KEEP_SAMPLES
static double *samples = NULL;
#endif
/* Start new sampling process */
static void init_sampler()
{
if (values)
free(values);
values = calloc(kbest, sizeof(double));
#if KEEP_SAMPLES
if (samples)
free(samples);
/* Allocate extra for wraparound analysis */
samples = calloc(maxsamples+kbest, sizeof(double));
#endif
samplecount = 0;
}
/* Add new sample. */
static void add_sample(double val)
{
int pos = 0;
if (samplecount < kbest) {
pos = samplecount;
values[pos] = val;
} else if (val < values[kbest-1]) {
pos = kbest-1;
values[pos] = val;
}
#if KEEP_SAMPLES
samples[samplecount] = val;
#endif
samplecount++;
/* Insertion sort */
while (pos > 0 && values[pos-1] > values[pos]) {
double temp = values[pos-1];
values[pos-1] = values[pos];
values[pos] = temp;
pos--;
}
}
/* Have kbest minimum measurements converged within epsilon? */
static int has_converged()
{
return
(samplecount >= kbest) &&
((1 + epsilon)*values[0] >= values[kbest-1]);
}
/* Code to clear cache */
static volatile int sink = 0;
static void clear()
{
int x = sink;
int *cptr, *cend;
int incr = cache_block/sizeof(int);
if (!cache_buf) {
cache_buf = malloc(cache_bytes);
if (!cache_buf) {
fprintf(stderr, "Fatal error. Malloc returned null when trying to clear cache\n");
exit(1);
}
}
cptr = (int *) cache_buf;
cend = cptr + cache_bytes/sizeof(int);
while (cptr < cend) {
x += *cptr;
cptr += incr;
}
sink = x;
}
double fcyc(test_funct f, int *params)
{
double result;
init_sampler();
if (compensate) {
do {
double cyc;
if (clear_cache)
clear();
start_comp_counter();
f(params);
cyc = get_comp_counter();
add_sample(cyc);
} while (!has_converged() && samplecount < maxsamples);
} else {
do {
double cyc;
if (clear_cache)
clear();
start_counter();
f(params);
cyc = get_counter();
add_sample(cyc);
} while (!has_converged() && samplecount < maxsamples);
}
#ifdef DEBUG
{
int i;
printf(" %d smallest values: [", kbest);
for (i = 0; i < kbest; i++)
printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", ");
}
#endif
result = values[0];
#if !KEEP_VALS
free(values);
values = NULL;
#endif
return result;
}
/* A version of the above function added so as to pass arguments of
any type to the function
Added by Sanjit, Fall 2001
*/
double fcyc_v(test_funct_v f, void *params[])
{
double result;
init_sampler();
if (compensate) {
do {
double cyc;
if (clear_cache)
clear();
start_comp_counter();
f(params);
cyc = get_comp_counter();
add_sample(cyc);
} while (!has_converged() && samplecount < maxsamples);
} else {
do {
double cyc;
if (clear_cache)
clear();
start_counter();
f(params);
cyc = get_counter();
add_sample(cyc);
} while (!has_converged() && samplecount < maxsamples);
}
#ifdef DEBUG
{
int i;
printf(" %d smallest values: [", kbest);
for (i = 0; i < kbest; i++)
printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", ");
}
#endif
result = values[0];
#if !KEEP_VALS
free(values);
values = NULL;
#endif
return result;
}
/***********************************************************/
/* Set the various parameters used by measurement routines */
/* When set, will run code to clear cache before each measurement
Default = 0
*/
void set_fcyc_clear_cache(int clear)
{
clear_cache = clear;
}
/* Set size of cache to use when clearing cache
Default = 1<<19 (512KB)
*/
void set_fcyc_cache_size(int bytes)
{
if (bytes != cache_bytes) {
cache_bytes = bytes;
if (cache_buf) {
free(cache_buf);
cache_buf = NULL;
}
}
}
/* Set size of cache block
Default = 32
*/
void set_fcyc_cache_block(int bytes) {
cache_block = bytes;
}
/* When set, will attempt to compensate for timer interrupt overhead
Default = 0
*/
void set_fcyc_compensate(int compensate_arg)
{
compensate = compensate_arg;
}
/* Value of K in K-best
Default = 3
*/
void set_fcyc_k(int k)
{
kbest = k;
}
/* Maximum number of samples attempting to find K-best within some tolerance.
When exceeded, just return best sample found.
Default = 20
*/
void set_fcyc_maxsamples(int maxsamples_arg)
{
maxsamples = maxsamples_arg;
}
/* Tolerance required for K-best
Default = 0.01
*/
void set_fcyc_epsilon(double epsilon_arg)
{
epsilon = epsilon_arg;
}

55
perf/fcyc.h Normal file
View File

@ -0,0 +1,55 @@
/* Fcyc measures the speed of any "test function." Such a function
is passed a list of integer parameters, which it may interpret
in any way it chooses.
*/
typedef void (*test_funct)(int *);
typedef void (*test_funct_v)(void *);
/* Compute number of cycles used by function f on given set of parameters */
double fcyc(test_funct f, int* params);
double fcyc_v(test_funct_v f, void* params[]);
/***********************************************************/
/* Set the various parameters used by measurement routines */
/* When set, will run code to clear cache before each measurement
Default = 0
*/
void set_fcyc_clear_cache(int clear);
/* Set size of cache to use when clearing cache
Default = 1<<19 (512KB)
*/
void set_fcyc_cache_size(int bytes);
/* Set size of cache block
Default = 32
*/
void set_fcyc_cache_block(int bytes);
/* When set, will attempt to compensate for timer interrupt overhead
Default = 0
*/
void set_fcyc_compensate(int compensate);
/* Value of K in K-best
Default = 3
*/
void set_fcyc_k(int k);
/* Maximum number of samples attempting to find K-best within some tolerance.
When exceeded, just return best sample found.
Default = 20
*/
void set_fcyc_maxsamples(int maxsamples);
/* Tolerance required for K-best
Default = 0.01
*/
void set_fcyc_epsilon(double epsilon);

183
perf/kernels.c Normal file
View File

@ -0,0 +1,183 @@
/********************************************************
* Kernels to be optimized for the CS:APP Performance Lab
********************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "defs.h"
/*
* Please fill in the following team struct
*/
team_t team = {
"bovik", /* Team name */
"Harry Q. Bovik", /* First member full name */
"bovik@nowhere.edu", /* First member email address */
"", /* Second member full name (leave blank if none) */
"" /* Second member email addr (leave blank if none) */
};
/***************
* ROTATE KERNEL
***************/
/******************************************************
* Your different versions of the rotate kernel go here
******************************************************/
/*
* naive_rotate - The naive baseline version of rotate
*/
char naive_rotate_descr[] = "naive_rotate: Naive baseline implementation";
void naive_rotate(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
dst[RIDX(dim-1-j, i, dim)] = src[RIDX(i, j, dim)];
}
/*
* rotate - Your current working version of rotate
* IMPORTANT: This is the version you will be graded on
*/
char rotate_descr[] = "rotate: Current working version";
void rotate(int dim, pixel *src, pixel *dst)
{
naive_rotate(dim, src, dst);
}
/*********************************************************************
* register_rotate_functions - Register all of your different versions
* of the rotate kernel with the driver by calling the
* add_rotate_function() for each test function. When you run the
* driver program, it will test and report the performance of each
* registered test function.
*********************************************************************/
void register_rotate_functions()
{
add_rotate_function(&naive_rotate, naive_rotate_descr);
add_rotate_function(&rotate, rotate_descr);
/* ... Register additional test functions here */
}
/***************
* SMOOTH KERNEL
**************/
/***************************************************************
* Various typedefs and helper functions for the smooth function
* You may modify these any way you like.
**************************************************************/
/* A struct used to compute averaged pixel value */
typedef struct {
int red;
int green;
int blue;
int num;
} pixel_sum;
/* Compute min and max of two integers, respectively */
static int min(int a, int b) { return (a < b ? a : b); }
static int max(int a, int b) { return (a > b ? a : b); }
/*
* initialize_pixel_sum - Initializes all fields of sum to 0
*/
static void initialize_pixel_sum(pixel_sum *sum)
{
sum->red = sum->green = sum->blue = 0;
sum->num = 0;
return;
}
/*
* accumulate_sum - Accumulates field values of p in corresponding
* fields of sum
*/
static void accumulate_sum(pixel_sum *sum, pixel p)
{
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
sum->num++;
return;
}
/*
* assign_sum_to_pixel - Computes averaged pixel value in current_pixel
*/
static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum)
{
current_pixel->red = (unsigned short) (sum.red/sum.num);
current_pixel->green = (unsigned short) (sum.green/sum.num);
current_pixel->blue = (unsigned short) (sum.blue/sum.num);
return;
}
/*
* avg - Returns averaged pixel value at (i,j)
*/
static pixel avg(int dim, int i, int j, pixel *src)
{
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_pixel_sum(&sum);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
assign_sum_to_pixel(&current_pixel, sum);
return current_pixel;
}
/******************************************************
* Your different versions of the smooth kernel go here
******************************************************/
/*
* naive_smooth - The naive baseline version of smooth
*/
char naive_smooth_descr[] = "naive_smooth: Naive baseline implementation";
void naive_smooth(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}
/*
* smooth - Your current working version of smooth.
* IMPORTANT: This is the version you will be graded on
*/
char smooth_descr[] = "smooth: Current working version";
void smooth(int dim, pixel *src, pixel *dst)
{
naive_smooth(dim, src, dst);
}
/*********************************************************************
* register_smooth_functions - Register all of your different versions
* of the smooth kernel with the driver by calling the
* add_smooth_function() for each test function. When you run the
* driver program, it will test and report the performance of each
* registered test function.
*********************************************************************/
void register_smooth_functions() {
add_smooth_function(&smooth, smooth_descr);
add_smooth_function(&naive_smooth, naive_smooth_descr);
/* ... Register additional test functions here */
}