Compare commits

..

2 Commits

Author SHA1 Message Date
8ff13361f5 init cachelab 2025-08-28 13:14:03 +08:00
1b66891883 init perflab 2025-08-28 13:13:50 +08:00
26 changed files with 271056 additions and 0 deletions

33
cache/Makefile vendored Normal file
View File

@ -0,0 +1,33 @@
#
# Student makefile for Cache Lab
# Note: requires a 64-bit x86-64 system
#
CC = gcc
CFLAGS = -g -Wall -Werror -std=c99 -m64
all: csim test-trans tracegen
# Generate a handin tar file each time you compile
-tar -cvf ${USER}-handin.tar csim.c trans.c
csim: csim.c cachelab.c cachelab.h
$(CC) $(CFLAGS) -o csim csim.c cachelab.c -lm
test-trans: test-trans.c trans.o cachelab.c cachelab.h
$(CC) $(CFLAGS) -o test-trans test-trans.c cachelab.c trans.o
tracegen: tracegen.c trans.o cachelab.c
$(CC) $(CFLAGS) -O0 -o tracegen tracegen.c trans.o cachelab.c
trans.o: trans.c
$(CC) $(CFLAGS) -O0 -c trans.c
#
# Clean the src dirctory
#
clean:
rm -rf *.o
rm -f *.tar
rm -f csim
rm -f test-trans tracegen
rm -f trace.all trace.f*
rm -f .csim_results .marker

39
cache/README vendored Normal file
View File

@ -0,0 +1,39 @@
This is the handout directory for the CS:APP Cache Lab.
************************
Running the autograders:
************************
Before running the autograders, compile your code:
linux> make
Check the correctness of your simulator:
linux> ./test-csim
Check the correctness and performance of your transpose functions:
linux> ./test-trans -M 32 -N 32
linux> ./test-trans -M 64 -N 64
linux> ./test-trans -M 61 -N 67
Check everything at once (this is the program that your instructor runs):
linux> ./driver.py
******
Files:
******
# You will modifying and handing in these two files
csim.c Your cache simulator
trans.c Your transpose function
# Tools for evaluating your simulator and transpose function
Makefile Builds the simulator and tools
README This file
driver.py* The driver program, runs test-csim and test-trans
cachelab.c Required helper functions
cachelab.h Required header file
csim-ref* The executable reference cache simulator
test-csim* Tests your cache simulator
test-trans.c Tests your transpose function
tracegen.c Helper program used by test-trans
traces/ Trace files used by test-csim.c

83
cache/cachelab.c vendored Normal file
View File

@ -0,0 +1,83 @@
/*
* cachelab.c - Cache Lab helper functions
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "cachelab.h"
#include <time.h>
trans_func_t func_list[MAX_TRANS_FUNCS];
int func_counter = 0;
/*
* printSummary - Summarize the cache simulation statistics. Student cache simulators
* must call this function in order to be properly autograded.
*/
void printSummary(int hits, int misses, int evictions)
{
printf("hits:%d misses:%d evictions:%d\n", hits, misses, evictions);
FILE* output_fp = fopen(".csim_results", "w");
assert(output_fp);
fprintf(output_fp, "%d %d %d\n", hits, misses, evictions);
fclose(output_fp);
}
/*
* initMatrix - Initialize the given matrix
*/
void initMatrix(int M, int N, int A[N][M], int B[M][N])
{
int i, j;
srand(time(NULL));
for (i = 0; i < N; i++){
for (j = 0; j < M; j++){
// A[i][j] = i+j; /* The matrix created this way is symmetric */
A[i][j]=rand();
B[j][i]=rand();
}
}
}
void randMatrix(int M, int N, int A[N][M]) {
int i, j;
srand(time(NULL));
for (i = 0; i < N; i++){
for (j = 0; j < M; j++){
// A[i][j] = i+j; /* The matrix created this way is symmetric */
A[i][j]=rand();
}
}
}
/*
* correctTrans - baseline transpose function used to evaluate correctness
*/
void correctTrans(int M, int N, int A[N][M], int B[M][N])
{
int i, j, tmp;
for (i = 0; i < N; i++){
for (j = 0; j < M; j++){
tmp = A[i][j];
B[j][i] = tmp;
}
}
}
/*
* registerTransFunction - Add the given trans function into your list
* of functions to be tested
*/
void registerTransFunction(void (*trans)(int M, int N, int[N][M], int[M][N]),
char* desc)
{
func_list[func_counter].func_ptr = trans;
func_list[func_counter].description = desc;
func_list[func_counter].correct = 0;
func_list[func_counter].num_hits = 0;
func_list[func_counter].num_misses = 0;
func_list[func_counter].num_evictions =0;
func_counter++;
}

37
cache/cachelab.h vendored Normal file
View File

@ -0,0 +1,37 @@
/*
* cachelab.h - Prototypes for Cache Lab helper functions
*/
#ifndef CACHELAB_TOOLS_H
#define CACHELAB_TOOLS_H
#define MAX_TRANS_FUNCS 100
typedef struct trans_func{
void (*func_ptr)(int M,int N,int[N][M],int[M][N]);
char* description;
char correct;
unsigned int num_hits;
unsigned int num_misses;
unsigned int num_evictions;
} trans_func_t;
/*
* printSummary - This function provides a standard way for your cache
* simulator * to display its final hit and miss statistics
*/
void printSummary(int hits, /* number of hits */
int misses, /* number of misses */
int evictions); /* number of evictions */
/* Fill the matrix with data */
void initMatrix(int M, int N, int A[N][M], int B[M][N]);
/* The baseline trans function that produces correct results. */
void correctTrans(int M, int N, int A[N][M], int B[M][N]);
/* Add the given function to the function list */
void registerTransFunction(
void (*trans)(int M,int N,int[N][M],int[M][N]), char* desc);
#endif /* CACHELAB_TOOLS_H */

BIN
cache/csim-ref vendored Executable file

Binary file not shown.

7
cache/csim.c vendored Normal file
View File

@ -0,0 +1,7 @@
#include "cachelab.h"
int main()
{
printSummary(0, 0, 0);
return 0;
}

138
cache/driver.py vendored Executable file
View File

@ -0,0 +1,138 @@
#!/usr//bin/python
#
# driver.py - The driver tests the correctness of the student's cache
# simulator and the correctness and performance of their transpose
# function. It uses ./test-csim to check the correctness of the
# simulator and it runs ./test-trans on three different sized
# matrices (32x32, 64x64, and 61x67) to test the correctness and
# performance of the transpose function.
#
import subprocess;
import re;
import os;
import sys;
import optparse;
#
# computeMissScore - compute the score depending on the number of
# cache misses
#
def computeMissScore(miss, lower, upper, full_score):
if miss <= lower:
return full_score
if miss >= upper:
return 0
score = (miss - lower) * 1.0
range = (upper- lower) * 1.0
return round((1 - score / range) * full_score, 1)
#
# main - Main function
#
def main():
# Configure maxscores here
maxscore= {};
maxscore['csim'] = 27
maxscore['transc'] = 1
maxscore['trans32'] = 8
maxscore['trans64'] = 8
maxscore['trans61'] = 10
# Parse the command line arguments
p = optparse.OptionParser()
p.add_option("-A", action="store_true", dest="autograde",
help="emit autoresult string for Autolab");
opts, args = p.parse_args()
autograde = opts.autograde
# Check the correctness of the cache simulator
print "Part A: Testing cache simulator"
print "Running ./test-csim"
p = subprocess.Popen("./test-csim",
shell=True, stdout=subprocess.PIPE)
stdout_data = p.communicate()[0]
# Emit the output from test-csim
stdout_data = re.split('\n', stdout_data)
for line in stdout_data:
if re.match("TEST_CSIM_RESULTS", line):
resultsim = re.findall(r'(\d+)', line)
else:
print "%s" % (line)
# Check the correctness and performance of the transpose function
# 32x32 transpose
print "Part B: Testing transpose function"
print "Running ./test-trans -M 32 -N 32"
p = subprocess.Popen("./test-trans -M 32 -N 32 | grep TEST_TRANS_RESULTS",
shell=True, stdout=subprocess.PIPE)
stdout_data = p.communicate()[0]
result32 = re.findall(r'(\d+)', stdout_data)
# 64x64 transpose
print "Running ./test-trans -M 64 -N 64"
p = subprocess.Popen("./test-trans -M 64 -N 64 | grep TEST_TRANS_RESULTS",
shell=True, stdout=subprocess.PIPE)
stdout_data = p.communicate()[0]
result64 = re.findall(r'(\d+)', stdout_data)
# 61x67 transpose
print "Running ./test-trans -M 61 -N 67"
p = subprocess.Popen("./test-trans -M 61 -N 67 | grep TEST_TRANS_RESULTS",
shell=True, stdout=subprocess.PIPE)
stdout_data = p.communicate()[0]
result61 = re.findall(r'(\d+)', stdout_data)
# Compute the scores for each step
csim_cscore = map(int, resultsim[0:1])
trans_cscore = int(result32[0]) * int(result64[0]) * int(result61[0]);
miss32 = int(result32[1])
miss64 = int(result64[1])
miss61 = int(result61[1])
trans32_score = computeMissScore(miss32, 300, 600, maxscore['trans32']) * int(result32[0])
trans64_score = computeMissScore(miss64, 1300, 2000, maxscore['trans64']) * int(result64[0])
trans61_score = computeMissScore(miss61, 2000, 3000, maxscore['trans61']) * int(result61[0])
total_score = csim_cscore[0] + trans32_score + trans64_score + trans61_score
# Summarize the results
print "\nCache Lab summary:"
print "%-22s%8s%10s%12s" % ("", "Points", "Max pts", "Misses")
print "%-22s%8.1f%10d" % ("Csim correctness", csim_cscore[0],
maxscore['csim'])
misses = str(miss32)
if miss32 == 2**31-1 :
misses = "invalid"
print "%-22s%8.1f%10d%12s" % ("Trans perf 32x32", trans32_score,
maxscore['trans32'], misses)
misses = str(miss64)
if miss64 == 2**31-1 :
misses = "invalid"
print "%-22s%8.1f%10d%12s" % ("Trans perf 64x64", trans64_score,
maxscore['trans64'], misses)
misses = str(miss61)
if miss61 == 2**31-1 :
misses = "invalid"
print "%-22s%8.1f%10d%12s" % ("Trans perf 61x67", trans61_score,
maxscore['trans61'], misses)
print "%22s%8.1f%10d" % ("Total points", total_score,
maxscore['csim'] +
maxscore['trans32'] +
maxscore['trans64'] +
maxscore['trans61'])
# Emit autoresult string for Autolab if called with -A option
if autograde:
autoresult="%.1f:%d:%d:%d" % (total_score, miss32, miss64, miss61)
print "\nAUTORESULT_STRING=%s" % autoresult
# execute main only if called as a script
if __name__ == "__main__":
main()

BIN
cache/test-csim vendored Executable file

Binary file not shown.

261
cache/test-trans.c vendored Normal file
View File

@ -0,0 +1,261 @@
/*
* test-trans.c - Checks the correctness and performance of all of the
* student's transpose functions and records the results for their
* official submitted version as well.
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
#include <string.h>
#include <signal.h>
#include <getopt.h>
#include <sys/types.h>
#include "cachelab.h"
#include <sys/wait.h> // fir WEXITSTATUS
#include <limits.h> // for INT_MAX
/* Maximum array dimension */
#define MAXN 256
/* The description string for the transpose_submit() function that the
student submits for credit */
#define SUBMIT_DESCRIPTION "Transpose submission"
/* External function defined in trans.c */
extern void registerFunctions();
/* External variables defined in cachelab-tools.c */
extern trans_func_t func_list[MAX_TRANS_FUNCS];
extern int func_counter;
/* Globals set on the command line */
static int M = 0;
static int N = 0;
/* The correctness and performance for the submitted transpose function */
struct results {
int funcid;
int correct;
int misses;
};
static struct results results = {-1, 0, INT_MAX};
/*
* eval_perf - Evaluate the performance of the registered transpose functions
*/
void eval_perf(unsigned int s, unsigned int E, unsigned int b)
{
int i,flag;
unsigned int len, hits, misses, evictions;
unsigned long long int marker_start, marker_end, addr;
char buf[1000], cmd[255];
char filename[128];
registerFunctions();
/* Open the complete trace file */
FILE* full_trace_fp;
FILE* part_trace_fp;
/* Evaluate the performance of each registered transpose function */
for (i=0; i<func_counter; i++) {
if (strcmp(func_list[i].description, SUBMIT_DESCRIPTION) == 0 )
results.funcid = i; /* remember which function is the submission */
printf("\nFunction %d (%d total)\nStep 1: Validating and generating memory traces\n",i,func_counter);
/* Use valgrind to generate the trace */
sprintf(cmd, "valgrind --tool=lackey --trace-mem=yes --log-fd=1 -v ./tracegen -M %d -N %d -F %d > trace.tmp", M, N,i);
flag=WEXITSTATUS(system(cmd));
if (0!=flag) {
printf("Validation error at function %d! Run ./tracegen -M %d -N %d -F %d for details.\nSkipping performance evaluation for this function.\n",flag-1,M,N,i);
continue;
}
/* Get the start and end marker addresses */
FILE* marker_fp = fopen(".marker", "r");
assert(marker_fp);
fscanf(marker_fp, "%llx %llx", &marker_start, &marker_end);
fclose(marker_fp);
func_list[i].correct=1;
/* Save the correctness of the transpose submission */
if (results.funcid == i ) {
results.correct = 1;
}
full_trace_fp = fopen("trace.tmp", "r");
assert(full_trace_fp);
/* Filtered trace for each transpose function goes in a separate file */
sprintf(filename, "trace.f%d", i);
part_trace_fp = fopen(filename, "w");
assert(part_trace_fp);
/* Locate trace corresponding to the trans function */
flag = 0;
while (fgets(buf, 1000, full_trace_fp) != NULL) {
/* We are only interested in memory access instructions */
if (buf[0]==' ' && buf[2]==' ' &&
(buf[1]=='S' || buf[1]=='M' || buf[1]=='L' )) {
sscanf(buf+3, "%llx,%u", &addr, &len);
/* If start marker found, set flag */
if (addr == marker_start)
flag = 1;
/* Valgrind creates many spurious accesses to the
stack that have nothing to do with the students
code. At the moment, we are ignoring all stack
accesses by using the simple filter of recording
accesses to only the low 32-bit portion of the
address space. At some point it would be nice to
try to do more informed filtering so that would
eliminate the valgrind stack references while
include the student stack references. */
if (flag && addr < 0xffffffff) {
fputs(buf, part_trace_fp);
}
/* if end marker found, close trace file */
if (addr == marker_end) {
flag = 0;
fclose(part_trace_fp);
break;
}
}
}
fclose(full_trace_fp);
/* Run the reference simulator */
printf("Step 2: Evaluating performance (s=%d, E=%d, b=%d)\n", s, E, b);
char cmd[255];
sprintf(cmd, "./csim-ref -s %u -E %u -b %u -t trace.f%d > /dev/null",
s, E, b, i);
system(cmd);
/* Collect results from the reference simulator */
FILE* in_fp = fopen(".csim_results","r");
assert(in_fp);
fscanf(in_fp, "%u %u %u", &hits, &misses, &evictions);
fclose(in_fp);
func_list[i].num_hits = hits;
func_list[i].num_misses = misses;
func_list[i].num_evictions = evictions;
printf("func %u (%s): hits:%u, misses:%u, evictions:%u\n",
i, func_list[i].description, hits, misses, evictions);
/* If it is transpose_submit(), record number of misses */
if (results.funcid == i) {
results.misses = misses;
}
}
}
/*
* usage - Print usage info
*/
void usage(char *argv[]){
printf("Usage: %s [-h] -M <rows> -N <cols>\n", argv[0]);
printf("Options:\n");
printf(" -h Print this help message.\n");
printf(" -M <rows> Number of matrix rows (max %d)\n", MAXN);
printf(" -N <cols> Number of matrix columns (max %d)\n", MAXN);
printf("Example: %s -M 8 -N 8\n", argv[0]);
}
/*
* sigsegv_handler - SIGSEGV handler
*/
void sigsegv_handler(int signum){
printf("Error: Segmentation Fault.\n");
printf("TEST_TRANS_RESULTS=0:0\n");
fflush(stdout);
exit(1);
}
/*
* sigalrm_handler - SIGALRM handler
*/
void sigalrm_handler(int signum){
printf("Error: Program timed out.\n");
printf("TEST_TRANS_RESULTS=0:0\n");
fflush(stdout);
exit(1);
}
/*
* main - Main routine
*/
int main(int argc, char* argv[])
{
char c;
while ((c = getopt(argc,argv,"M:N:h")) != -1) {
switch(c) {
case 'M':
M = atoi(optarg);
break;
case 'N':
N = atoi(optarg);
break;
case 'h':
usage(argv);
exit(0);
default:
usage(argv);
exit(1);
}
}
if (M == 0 || N == 0) {
printf("Error: Missing required argument\n");
usage(argv);
exit(1);
}
if (M > MAXN || N > MAXN) {
printf("Error: M or N exceeds %d\n", MAXN);
usage(argv);
exit(1);
}
/* Install SIGSEGV and SIGALRM handlers */
if (signal(SIGSEGV, sigsegv_handler) == SIG_ERR) {
fprintf(stderr, "Unable to install SIGALRM handler\n");
exit(1);
}
if (signal(SIGALRM, sigalrm_handler) == SIG_ERR) {
fprintf(stderr, "Unable to install SIGALRM handler\n");
exit(1);
}
/* Time out and give up after a while */
alarm(120);
/* Check the performance of the student's transpose function */
eval_perf(5, 1, 5);
/* Emit the results for this particular test */
if (results.funcid == -1) {
printf("\nError: We could not find your transpose_submit() function\n");
printf("Error: Please ensure that description field is exactly \"%s\"\n",
SUBMIT_DESCRIPTION);
printf("\nTEST_TRANS_RESULTS=0:0\n");
}
else {
printf("\nSummary for official submission (func %d): correctness=%d misses=%d\n",
results.funcid, results.correct, results.misses);
printf("\nTEST_TRANS_RESULTS=%d:%d\n", results.correct, results.misses);
}
return 0;
}

107
cache/tracegen.c vendored Normal file
View File

@ -0,0 +1,107 @@
/*
* tracegen.c - Running the binary tracegen with valgrind produces
* a memory trace of all of the registered transpose functions.
*
* The beginning and end of each registered transpose function's trace
* is indicated by reading from "marker" addresses. These two marker
* addresses are recorded in file for later use.
*/
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <unistd.h>
#include <getopt.h>
#include "cachelab.h"
#include <string.h>
/* External variables declared in cachelab.c */
extern trans_func_t func_list[MAX_TRANS_FUNCS];
extern int func_counter;
/* External function from trans.c */
extern void registerFunctions();
/* Markers used to bound trace regions of interest */
volatile char MARKER_START, MARKER_END;
static int A[256][256];
static int B[256][256];
static int M;
static int N;
int validate(int fn,int M, int N, int A[N][M], int B[M][N]) {
int C[M][N];
memset(C,0,sizeof(C));
correctTrans(M,N,A,C);
for(int i=0;i<M;i++) {
for(int j=0;j<N;j++) {
if(B[i][j]!=C[i][j]) {
printf("Validation failed on function %d! Expected %d but got %d at B[%d][%d]\n",fn,C[i][j],B[i][j],i,j);
return 0;
}
}
}
return 1;
}
int main(int argc, char* argv[]){
int i;
char c;
int selectedFunc=-1;
while( (c=getopt(argc,argv,"M:N:F:")) != -1){
switch(c){
case 'M':
M = atoi(optarg);
break;
case 'N':
N = atoi(optarg);
break;
case 'F':
selectedFunc = atoi(optarg);
break;
case '?':
default:
printf("./tracegen failed to parse its options.\n");
exit(1);
}
}
/* Register transpose functions */
registerFunctions();
/* Fill A with data */
initMatrix(M,N, A, B);
/* Record marker addresses */
FILE* marker_fp = fopen(".marker","w");
assert(marker_fp);
fprintf(marker_fp, "%llx %llx",
(unsigned long long int) &MARKER_START,
(unsigned long long int) &MARKER_END );
fclose(marker_fp);
if (-1==selectedFunc) {
/* Invoke registered transpose functions */
for (i=0; i < func_counter; i++) {
MARKER_START = 33;
(*func_list[i].func_ptr)(M, N, A, B);
MARKER_END = 34;
if (!validate(i,M,N,A,B))
return i+1;
}
} else {
MARKER_START = 33;
(*func_list[selectedFunc].func_ptr)(M, N, A, B);
MARKER_END = 34;
if (!validate(selectedFunc,M,N,A,B))
return selectedFunc+1;
}
return 0;
}

5
cache/traces/dave.trace vendored Normal file
View File

@ -0,0 +1,5 @@
L 10,4
S 18,4
L 20,4
S 28,4
S 50,4

267988
cache/traces/long.trace vendored Normal file

File diff suppressed because it is too large Load Diff

596
cache/traces/trans.trace vendored Normal file
View File

@ -0,0 +1,596 @@
S 00600aa0,1
I 004005b6,5
I 004005bb,5
I 004005c0,5
S 7ff000398,8
I 0040051e,1
S 7ff000390,8
I 0040051f,3
I 00400522,4
S 7ff000378,8
I 00400526,4
S 7ff000370,8
I 0040052a,7
S 7ff000384,4
I 00400531,2
I 00400581,4
L 7ff000384,4
I 00400585,2
I 00400533,7
S 7ff000388,4
I 0040053a,2
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a20,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a60,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a24,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a70,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a28,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a80,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a2c,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a90,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040057d,4
M 7ff000384,4
I 00400581,4
L 7ff000384,4
I 00400585,2
I 00400533,7
S 7ff000388,4
I 0040053a,2
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a30,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a64,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a34,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a74,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a38,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a84,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a3c,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a94,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040057d,4
M 7ff000384,4
I 00400581,4
L 7ff000384,4
I 00400585,2
I 00400533,7
S 7ff000388,4
I 0040053a,2
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a40,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a68,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a44,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a78,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a48,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a88,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a4c,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a98,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040057d,4
M 7ff000384,4
I 00400581,4
L 7ff000384,4
I 00400585,2
I 00400533,7
S 7ff000388,4
I 0040053a,2
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a50,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a6c,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a54,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a7c,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a58,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a8c,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040053c,3
L 7ff000384,4
I 0040053f,2
I 00400541,4
I 00400545,3
I 00400548,4
L 7ff000378,8
I 0040054c,3
L 7ff000388,4
I 0040054f,2
I 00400551,3
L 00600a5c,4
I 00400554,3
S 7ff00038c,4
I 00400557,3
L 7ff000388,4
I 0040055a,2
I 0040055c,4
I 00400560,3
I 00400563,4
L 7ff000370,8
I 00400567,3
L 7ff000384,4
I 0040056a,3
I 0040056d,3
L 7ff00038c,4
I 00400570,3
S 00600a9c,4
I 00400573,4
M 7ff000388,4
I 00400577,4
L 7ff000388,4
I 0040057b,2
I 0040057d,4
M 7ff000384,4
I 00400581,4
L 7ff000384,4
I 00400585,2
I 00400587,1
L 7ff000390,8
I 00400588,1
L 7ff000398,8
I 004005c5,7
L 00600aa0,1

7
cache/traces/yi.trace vendored Normal file
View File

@ -0,0 +1,7 @@
L 10,1
M 20,1
L 22,1
S 18,1
L 110,1
L 210,1
M 12,1

16
cache/traces/yi2.trace vendored Normal file
View File

@ -0,0 +1,16 @@
L 0,1
L 1,1
L 2,1
L 3,1
S 4,1
L 5,1
S 6,1
L 7,1
S 8,1
L 9,1
S a,1
L b,1
S c,1
L d,1
S e,1
M f,1

84
cache/trans.c vendored Normal file
View File

@ -0,0 +1,84 @@
/*
* trans.c - Matrix transpose B = A^T
*
* Each transpose function must have a prototype of the form:
* void trans(int M, int N, int A[N][M], int B[M][N]);
*
* A transpose function is evaluated by counting the number of misses
* on a 1KB direct mapped cache with a block size of 32 bytes.
*/
#include <stdio.h>
#include "cachelab.h"
int is_transpose(int M, int N, int A[N][M], int B[M][N]);
/*
* transpose_submit - This is the solution transpose function that you
* will be graded on for Part B of the assignment. Do not change
* the description string "Transpose submission", as the driver
* searches for that string to identify the transpose function to
* be graded.
*/
char transpose_submit_desc[] = "Transpose submission";
void transpose_submit(int M, int N, int A[N][M], int B[M][N])
{
}
/*
* You can define additional transpose functions below. We've defined
* a simple one below to help you get started.
*/
/*
* trans - A simple baseline transpose function, not optimized for the cache.
*/
char trans_desc[] = "Simple row-wise scan transpose";
void trans(int M, int N, int A[N][M], int B[M][N])
{
int i, j, tmp;
for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
tmp = A[i][j];
B[j][i] = tmp;
}
}
}
/*
* registerFunctions - This function registers your transpose
* functions with the driver. At runtime, the driver will
* evaluate each of the registered functions and summarize their
* performance. This is a handy way to experiment with different
* transpose strategies.
*/
void registerFunctions()
{
/* Register your solution function */
registerTransFunction(transpose_submit, transpose_submit_desc);
/* Register any additional transpose functions */
registerTransFunction(trans, trans_desc);
}
/*
* is_transpose - This helper function checks if B is the transpose of
* A. You can check the correctness of your transpose by calling
* it before returning from the transpose function.
*/
int is_transpose(int M, int N, int A[N][M], int B[M][N])
{
int i, j;
for (i = 0; i < N; i++) {
for (j = 0; j < M; ++j) {
if (A[i][j] != B[j][i]) {
return 0;
}
}
}
return 1;
}

23
perf/Makefile Normal file
View File

@ -0,0 +1,23 @@
# Student's Makefile for the CS:APP Performance Lab
TEAM = bovik
VERSION = 1
HANDINDIR =
CC = gcc
CFLAGS = -Wall -O2 -m32
LIBS = -lm
OBJS = driver.o kernels.o fcyc.o clock.o
all: driver
driver: $(OBJS) fcyc.h clock.h defs.h config.h
$(CC) $(CFLAGS) $(OBJS) $(LIBS) -o driver
handin:
cp kernels.c $(HANDINDIR)/$(TEAM)-$(VERSION)-kernels.c
clean:
-rm -f $(OBJS) driver core *~ *.o

38
perf/README Normal file
View File

@ -0,0 +1,38 @@
#####################################################################
# CS:APP Performance Lab
#
# Student's Source Files
#
# Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved.
# May not be used, modified, or copied without permission.
#
######################################################################
This directory contains the files you will need for the CS:APP
Performance Lab.
kernels.c
This is the file you will be modifying and handing in.
#########################################
# You shouldn't modify any of these files
#########################################
driver.c
This is the driver that tests the performance of all
of the versions of the rotate and smooth kernels
in your kernels.c file.
config.h
This is a site-specific configuration file that was created by
your instructor for your system.
defs.h
Various definitions needed by kernels.c and driver.c
clock.{c,h}
fcyc.{c,h}
These contain timing routines that measure the performance of your
code with our k-best measurement scheme using IA32 cycle counters.
Makefile:
This is the makefile that builds the driver program.

242
perf/clock.c Normal file
View File

@ -0,0 +1,242 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/times.h>
#include "clock.h"
/*
* Routines for using the cycle counter
*/
/* Detect whether running on Alpha */
#ifdef __alpha
#define IS_ALPHA 1
#else
#define IS_ALPHA 0
#endif
/* Detect whether running on x86 */
#ifdef __i386__
#define IS_x86 1
#else
#define IS_x86 0
#endif
#if IS_ALPHA
/* Initialize the cycle counter */
static unsigned cyc_hi = 0;
static unsigned cyc_lo = 0;
/* Use Alpha cycle timer to compute cycles. Then use
measured clock speed to compute seconds
*/
/*
* counterRoutine is an array of Alpha instructions to access
* the Alpha's processor cycle counter. It uses the rpcc
* instruction to access the counter. This 64 bit register is
* divided into two parts. The lower 32 bits are the cycles
* used by the current process. The upper 32 bits are wall
* clock cycles. These instructions read the counter, and
* convert the lower 32 bits into an unsigned int - this is the
* user space counter value.
* NOTE: The counter has a very limited time span. With a
* 450MhZ clock the counter can time things for about 9
* seconds. */
static unsigned int counterRoutine[] =
{
0x601fc000u,
0x401f0000u,
0x6bfa8001u
};
/* Cast the above instructions into a function. */
static unsigned int (*counter)(void)= (void *)counterRoutine;
void start_counter()
{
/* Get cycle counter */
cyc_hi = 0;
cyc_lo = counter();
}
double get_counter()
{
unsigned ncyc_hi, ncyc_lo;
unsigned hi, lo, borrow;
double result;
ncyc_lo = counter();
ncyc_hi = 0;
lo = ncyc_lo - cyc_lo;
borrow = lo > ncyc_lo;
hi = ncyc_hi - cyc_hi - borrow;
result = (double) hi * (1 << 30) * 4 + lo;
if (result < 0) {
fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result);
}
return result;
}
#endif /* Alpha */
#if IS_x86
/* $begin x86cyclecounter */
/* Initialize the cycle counter */
static unsigned cyc_hi = 0;
static unsigned cyc_lo = 0;
/* Set *hi and *lo to the high and low order bits of the cycle counter.
Implementation requires assembly code to use the rdtsc instruction. */
void access_counter(unsigned *hi, unsigned *lo)
{
asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */
: "=r" (*hi), "=r" (*lo) /* and move results to */
: /* No input */ /* the two outputs */
: "%edx", "%eax");
}
/* Record the current value of the cycle counter. */
void start_counter()
{
access_counter(&cyc_hi, &cyc_lo);
}
/* Return the number of cycles since the last call to start_counter. */
double get_counter()
{
unsigned ncyc_hi, ncyc_lo;
unsigned hi, lo, borrow;
double result;
/* Get cycle counter */
access_counter(&ncyc_hi, &ncyc_lo);
/* Do double precision subtraction */
lo = ncyc_lo - cyc_lo;
borrow = lo > ncyc_lo;
hi = ncyc_hi - cyc_hi - borrow;
result = (double) hi * (1 << 30) * 4 + lo;
if (result < 0) {
fprintf(stderr, "Error: counter returns neg value: %.0f\n", result);
}
return result;
}
/* $end x86cyclecounter */
#endif /* x86 */
double ovhd()
{
/* Do it twice to eliminate cache effects */
int i;
double result;
for (i = 0; i < 2; i++) {
start_counter();
result = get_counter();
}
return result;
}
/* $begin mhz */
/* Estimate the clock rate by measuring the cycles that elapse */
/* while sleeping for sleeptime seconds */
double mhz_full(int verbose, int sleeptime)
{
double rate;
start_counter();
sleep(sleeptime);
rate = get_counter() / (1e6*sleeptime);
if (verbose)
printf("Processor clock rate ~= %.1f MHz\n", rate);
return rate;
}
/* $end mhz */
/* Version using a default sleeptime */
double mhz(int verbose)
{
return mhz_full(verbose, 2);
}
/** Special counters that compensate for timer interrupt overhead */
static double cyc_per_tick = 0.0;
#define NEVENT 100
#define THRESHOLD 1000
#define RECORDTHRESH 3000
/* Attempt to see how much time is used by timer interrupt */
static void callibrate(int verbose)
{
double oldt;
struct tms t;
clock_t oldc;
int e = 0;
times(&t);
oldc = t.tms_utime;
start_counter();
oldt = get_counter();
while (e <NEVENT) {
double newt = get_counter();
if (newt-oldt >= THRESHOLD) {
clock_t newc;
times(&t);
newc = t.tms_utime;
if (newc > oldc) {
double cpt = (newt-oldt)/(newc-oldc);
if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH)
cyc_per_tick = cpt;
/*
if (verbose)
printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n",
newt-oldt, (int) (newc-oldc), cpt);
*/
e++;
oldc = newc;
}
oldt = newt;
}
}
/* ifdef added by Sanjit - 10/2001 */
#ifdef DEBUG
if (verbose)
printf("Setting cyc_per_tick to %f\n", cyc_per_tick);
#endif
}
static clock_t start_tick = 0;
void start_comp_counter()
{
struct tms t;
if (cyc_per_tick == 0.0)
callibrate(1);
times(&t);
start_tick = t.tms_utime;
start_counter();
}
double get_comp_counter()
{
double time = get_counter();
double ctime;
struct tms t;
clock_t ticks;
times(&t);
ticks = t.tms_utime - start_tick;
ctime = time - ticks*cyc_per_tick;
/*
printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n",
time, (int) ticks, ctime);
*/
return ctime;
}

22
perf/clock.h Normal file
View File

@ -0,0 +1,22 @@
/* Routines for using cycle counter */
/* Start the counter */
void start_counter();
/* Get # cycles since counter started */
double get_counter();
/* Measure overhead for counter */
double ovhd();
/* Determine clock rate of processor (using a default sleeptime) */
double mhz(int verbose);
/* Determine clock rate of processor, having more control over accuracy */
double mhz_full(int verbose, int sleeptime);
/** Special counters that compensate for timer interrupt overhead */
void start_comp_counter();
double get_comp_counter();

32
perf/config.h Normal file
View File

@ -0,0 +1,32 @@
/*********************************************************
* config.h - Configuration data for the driver.c program.
*********************************************************/
#ifndef _CONFIG_H_
#define _CONFIG_H_
/*
* CPEs for the baseline (naive) version of the rotate function that
* was handed out to the students. Rd is the measured CPE for a dxd
* image. Run the driver.c program on your system to get these
* numbers.
*/
#define R64 14.7
#define R128 40.1
#define R256 46.4
#define R512 65.9
#define R1024 94.5
/*
* CPEs for the baseline (naive) version of the smooth function that
* was handed out to the students. Sd is the measure CPE for a dxd
* image. Run the driver.c program on your system to get these
* numbers.
*/
#define S32 695
#define S64 698
#define S128 702
#define S256 717
#define S512 722
#endif /* _CONFIG_H_ */

38
perf/defs.h Normal file
View File

@ -0,0 +1,38 @@
/*
* driver.h - Various definitions for the Performance Lab.
*
* DO NOT MODIFY ANYTHING IN THIS FILE
*/
#ifndef _DEFS_H_
#define _DEFS_H_
#include <stdlib.h>
#define RIDX(i,j,n) ((i)*(n)+(j))
typedef struct {
char *team;
char *name1, *email1;
char *name2, *email2;
} team_t;
extern team_t team;
typedef struct {
unsigned short red;
unsigned short green;
unsigned short blue;
} pixel;
typedef void (*lab_test_func) (int, pixel*, pixel*);
void smooth(int, pixel *, pixel *);
void rotate(int, pixel *, pixel *);
void register_rotate_functions(void);
void register_smooth_functions(void);
void add_smooth_function(lab_test_func, char*);
void add_rotate_function(lab_test_func, char*);
#endif /* _DEFS_H_ */

752
perf/driver.c Normal file
View File

@ -0,0 +1,752 @@
/*******************************************************************
*
* driver.c - Driver program for CS:APP Performance Lab
*
* In kernels.c, students generate an arbitrary number of rotate and
* smooth test functions, which they then register with the driver
* program using the add_rotate_function() and add_smooth_function()
* functions.
*
* The driver program runs and measures the registered test functions
* and reports their performance.
*
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights
* reserved. May not be used, modified, or copied without permission.
*
********************************************************************/
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <assert.h>
#include <math.h>
#include "fcyc.h"
#include "defs.h"
#include "config.h"
/* Team structure that identifies the students */
extern team_t team;
/* Keep track of a number of different test functions */
#define MAX_BENCHMARKS 100
#define DIM_CNT 5
/* Misc constants */
#define BSIZE 32 /* cache block size in bytes */
#define MAX_DIM 1280 /* 1024 + 256 */
#define ODD_DIM 96 /* not a power of 2 */
/* fast versions of min and max */
#define min(a,b) (a < b ? a : b)
#define max(a,b) (a > b ? a : b)
/* This struct characterizes the results for one benchmark test */
typedef struct {
lab_test_func tfunct; /* The test function */
double cpes[DIM_CNT]; /* One CPE result for each dimension */
char *description; /* ASCII description of the test function */
unsigned short valid; /* The function is tested if this is non zero */
} bench_t;
/* The range of image dimensions that we will be testing */
static int test_dim_rotate[] = {64, 128, 256, 512, 1024};
static int test_dim_smooth[] = {32, 64, 128, 256, 512};
/* Baseline CPEs (see config.h) */
static double rotate_baseline_cpes[] = {R64, R128, R256, R512, R1024};
static double smooth_baseline_cpes[] = {S32, S64, S128, S256, S512};
/* These hold the results for all benchmarks */
static bench_t benchmarks_rotate[MAX_BENCHMARKS];
static bench_t benchmarks_smooth[MAX_BENCHMARKS];
/* These give the sizes of the above lists */
static int rotate_benchmark_count = 0;
static int smooth_benchmark_count = 0;
/*
* An image is a dimxdim matrix of pixels stored in a 1D array. The
* data array holds three images (the input original, a copy of the original,
* and the output result array. There is also an additional BSIZE bytes
* of padding for alignment to cache block boundaries.
*/
static pixel data[(3*MAX_DIM*MAX_DIM) + (BSIZE/sizeof(pixel))];
/* Various image pointers */
static pixel *orig = NULL; /* original image */
static pixel *copy_of_orig = NULL; /* copy of original for checking result */
static pixel *result = NULL; /* result image */
/* Keep track of the best rotate and smooth score for grading */
double rotate_maxmean = 0.0;
char *rotate_maxmean_desc = NULL;
double smooth_maxmean = 0.0;
char *smooth_maxmean_desc = NULL;
/******************** Functions begin *************************/
void add_smooth_function(lab_test_func f, char *description)
{
benchmarks_smooth[smooth_benchmark_count].tfunct = f;
benchmarks_smooth[smooth_benchmark_count].description = description;
benchmarks_smooth[smooth_benchmark_count].valid = 0;
smooth_benchmark_count++;
}
void add_rotate_function(lab_test_func f, char *description)
{
benchmarks_rotate[rotate_benchmark_count].tfunct = f;
benchmarks_rotate[rotate_benchmark_count].description = description;
benchmarks_rotate[rotate_benchmark_count].valid = 0;
rotate_benchmark_count++;
}
/*
* random_in_interval - Returns random integer in interval [low, high)
*/
static int random_in_interval(int low, int high)
{
int size = high - low;
return (rand()% size) + low;
}
/*
* create - creates a dimxdim image aligned to a BSIZE byte boundary
*/
static void create(int dim)
{
int i, j;
/* Align the images to BSIZE byte boundaries */
orig = data;
while ((unsigned)orig % BSIZE)
orig = (pixel *)((char *)orig) + 1;
result = orig + dim*dim;
copy_of_orig = result + dim*dim;
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
/* Original image initialized to random colors */
orig[RIDX(i,j,dim)].red = random_in_interval(0, 65536);
orig[RIDX(i,j,dim)].green = random_in_interval(0, 65536);
orig[RIDX(i,j,dim)].blue = random_in_interval(0, 65536);
/* Copy of original image for checking result */
copy_of_orig[RIDX(i,j,dim)].red = orig[RIDX(i,j,dim)].red;
copy_of_orig[RIDX(i,j,dim)].green = orig[RIDX(i,j,dim)].green;
copy_of_orig[RIDX(i,j,dim)].blue = orig[RIDX(i,j,dim)].blue;
/* Result image initialized to all black */
result[RIDX(i,j,dim)].red = 0;
result[RIDX(i,j,dim)].green = 0;
result[RIDX(i,j,dim)].blue = 0;
}
}
return;
}
/*
* compare_pixels - Returns 1 if the two arguments don't have same RGB
* values, 0 o.w.
*/
static int compare_pixels(pixel p1, pixel p2)
{
return
(p1.red != p2.red) ||
(p1.green != p2.green) ||
(p1.blue != p2.blue);
}
/* Make sure the orig array is unchanged */
static int check_orig(int dim)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
if (compare_pixels(orig[RIDX(i,j,dim)], copy_of_orig[RIDX(i,j,dim)])) {
printf("\n");
printf("Error: Original image has been changed!\n");
return 1;
}
return 0;
}
/*
* check_rotate - Make sure the rotate actually works.
* The orig array should not have been tampered with!
*/
static int check_rotate(int dim)
{
int err = 0;
int i, j;
int badi = 0;
int badj = 0;
pixel orig_bad, res_bad;
/* return 1 if the original image has been changed */
if (check_orig(dim))
return 1;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
if (compare_pixels(orig[RIDX(i,j,dim)],
result[RIDX(dim-1-j,i,dim)])) {
err++;
badi = i;
badj = j;
orig_bad = orig[RIDX(i,j,dim)];
res_bad = result[RIDX(dim-1-j,i,dim)];
}
if (err) {
printf("\n");
printf("ERROR: Dimension=%d, %d errors\n", dim, err);
printf("E.g., The following two pixels should have equal value:\n");
printf("src[%d][%d].{red,green,blue} = {%d,%d,%d}\n",
badi, badj, orig_bad.red, orig_bad.green, orig_bad.blue);
printf("dst[%d][%d].{red,green,blue} = {%d,%d,%d}\n",
(dim-1-badj), badi, res_bad.red, res_bad.green, res_bad.blue);
}
return err;
}
static pixel check_average(int dim, int i, int j, pixel *src) {
pixel result;
int num = 0;
int ii, jj;
int sum0, sum1, sum2;
int top_left_i, top_left_j;
int bottom_right_i, bottom_right_j;
top_left_i = max(i-1, 0);
top_left_j = max(j-1, 0);
bottom_right_i = min(i+1, dim-1);
bottom_right_j = min(j+1, dim-1);
sum0 = sum1 = sum2 = 0;
for(ii=top_left_i; ii <= bottom_right_i; ii++) {
for(jj=top_left_j; jj <= bottom_right_j; jj++) {
num++;
sum0 += (int) src[RIDX(ii,jj,dim)].red;
sum1 += (int) src[RIDX(ii,jj,dim)].green;
sum2 += (int) src[RIDX(ii,jj,dim)].blue;
}
}
result.red = (unsigned short) (sum0/num);
result.green = (unsigned short) (sum1/num);
result.blue = (unsigned short) (sum2/num);
return result;
}
/*
* check_smooth - Make sure the smooth function actually works. The
* orig array should not have been tampered with!
*/
static int check_smooth(int dim) {
int err = 0;
int i, j;
int badi = 0;
int badj = 0;
pixel right, wrong;
/* return 1 if original image has been changed */
if (check_orig(dim))
return 1;
for (i = 0; i < dim; i++) {
for (j = 0; j < dim; j++) {
pixel smoothed = check_average(dim, i, j, orig);
if (compare_pixels(result[RIDX(i,j,dim)], smoothed)) {
err++;
badi = i;
badj = j;
wrong = result[RIDX(i,j,dim)];
right = smoothed;
}
}
}
if (err) {
printf("\n");
printf("ERROR: Dimension=%d, %d errors\n", dim, err);
printf("E.g., \n");
printf("You have dst[%d][%d].{red,green,blue} = {%d,%d,%d}\n",
badi, badj, wrong.red, wrong.green, wrong.blue);
printf("It should be dst[%d][%d].{red,green,blue} = {%d,%d,%d}\n",
badi, badj, right.red, right.green, right.blue);
}
return err;
}
void func_wrapper(void *arglist[])
{
pixel *src, *dst;
int mydim;
lab_test_func f;
f = (lab_test_func) arglist[0];
mydim = *((int *) arglist[1]);
src = (pixel *) arglist[2];
dst = (pixel *) arglist[3];
(*f)(mydim, src, dst);
return;
}
void run_rotate_benchmark(int idx, int dim)
{
benchmarks_rotate[idx].tfunct(dim, orig, result);
}
void test_rotate(int bench_index)
{
int i;
int test_num;
char *description = benchmarks_rotate[bench_index].description;
for (test_num = 0; test_num < DIM_CNT; test_num++) {
int dim;
/* Check for odd dimension */
create(ODD_DIM);
run_rotate_benchmark(bench_index, ODD_DIM);
if (check_rotate(ODD_DIM)) {
printf("Benchmark \"%s\" failed correctness check for dimension %d.\n",
benchmarks_rotate[bench_index].description, ODD_DIM);
return;
}
/* Create a test image of the required dimension */
dim = test_dim_rotate[test_num];
create(dim);
#ifdef DEBUG
printf("DEBUG: Running benchmark \"%s\"\n", benchmarks_rotate[bench_index].description);
#endif
/* Check that the code works */
run_rotate_benchmark(bench_index, dim);
if (check_rotate(dim)) {
printf("Benchmark \"%s\" failed correctness check for dimension %d.\n",
benchmarks_rotate[bench_index].description, dim);
return;
}
/* Measure CPE */
{
double num_cycles, cpe;
int tmpdim = dim;
void *arglist[4];
double dimension = (double) dim;
double work = dimension*dimension;
#ifdef DEBUG
printf("DEBUG: dimension=%.1f\n",dimension);
printf("DEBUG: work=%.1f\n",work);
#endif
arglist[0] = (void *) benchmarks_rotate[bench_index].tfunct;
arglist[1] = (void *) &tmpdim;
arglist[2] = (void *) orig;
arglist[3] = (void *) result;
create(dim);
num_cycles = fcyc_v((test_funct_v)&func_wrapper, arglist);
cpe = num_cycles/work;
benchmarks_rotate[bench_index].cpes[test_num] = cpe;
}
}
/*
* Print results as a table
*/
printf("Rotate: Version = %s:\n", description);
printf("Dim\t");
for (i = 0; i < DIM_CNT; i++)
printf("\t%d", test_dim_rotate[i]);
printf("\tMean\n");
printf("Your CPEs");
for (i = 0; i < DIM_CNT; i++) {
printf("\t%.1f", benchmarks_rotate[bench_index].cpes[i]);
}
printf("\n");
printf("Baseline CPEs");
for (i = 0; i < DIM_CNT; i++) {
printf("\t%.1f", rotate_baseline_cpes[i]);
}
printf("\n");
/* Compute Speedup */
{
double prod, ratio, mean;
prod = 1.0; /* Geometric mean */
printf("Speedup\t");
for (i = 0; i < DIM_CNT; i++) {
if (benchmarks_rotate[bench_index].cpes[i] > 0.0) {
ratio = rotate_baseline_cpes[i]/
benchmarks_rotate[bench_index].cpes[i];
}
else {
printf("Fatal Error: Non-positive CPE value...\n");
exit(EXIT_FAILURE);
}
prod *= ratio;
printf("\t%.1f", ratio);
}
/* Geometric mean */
mean = pow(prod, 1.0/(double) DIM_CNT);
printf("\t%.1f", mean);
printf("\n\n");
if (mean > rotate_maxmean) {
rotate_maxmean = mean;
rotate_maxmean_desc = benchmarks_rotate[bench_index].description;
}
}
#ifdef DEBUG
fflush(stdout);
#endif
return;
}
void run_smooth_benchmark(int idx, int dim)
{
benchmarks_smooth[idx].tfunct(dim, orig, result);
}
void test_smooth(int bench_index)
{
int i;
int test_num;
char *description = benchmarks_smooth[bench_index].description;
for(test_num=0; test_num < DIM_CNT; test_num++) {
int dim;
/* Check correctness for odd (non power of two dimensions */
create(ODD_DIM);
run_smooth_benchmark(bench_index, ODD_DIM);
if (check_smooth(ODD_DIM)) {
printf("Benchmark \"%s\" failed correctness check for dimension %d.\n",
benchmarks_smooth[bench_index].description, ODD_DIM);
return;
}
/* Create a test image of the required dimension */
dim = test_dim_smooth[test_num];
create(dim);
#ifdef DEBUG
printf("DEBUG: Running benchmark \"%s\"\n", benchmarks_smooth[bench_index].description);
#endif
/* Check that the code works */
run_smooth_benchmark(bench_index, dim);
if (check_smooth(dim)) {
printf("Benchmark \"%s\" failed correctness check for dimension %d.\n",
benchmarks_smooth[bench_index].description, dim);
return;
}
/* Measure CPE */
{
double num_cycles, cpe;
int tmpdim = dim;
void *arglist[4];
double dimension = (double) dim;
double work = dimension*dimension;
#ifdef DEBUG
printf("DEBUG: dimension=%.1f\n",dimension);
printf("DEBUG: work=%.1f\n",work);
#endif
arglist[0] = (void *) benchmarks_smooth[bench_index].tfunct;
arglist[1] = (void *) &tmpdim;
arglist[2] = (void *) orig;
arglist[3] = (void *) result;
create(dim);
num_cycles = fcyc_v((test_funct_v)&func_wrapper, arglist);
cpe = num_cycles/work;
benchmarks_smooth[bench_index].cpes[test_num] = cpe;
}
}
/* Print results as a table */
printf("Smooth: Version = %s:\n", description);
printf("Dim\t");
for (i = 0; i < DIM_CNT; i++)
printf("\t%d", test_dim_smooth[i]);
printf("\tMean\n");
printf("Your CPEs");
for (i = 0; i < DIM_CNT; i++) {
printf("\t%.1f", benchmarks_smooth[bench_index].cpes[i]);
}
printf("\n");
printf("Baseline CPEs");
for (i = 0; i < DIM_CNT; i++) {
printf("\t%.1f", smooth_baseline_cpes[i]);
}
printf("\n");
/* Compute speedup */
{
double prod, ratio, mean;
prod = 1.0; /* Geometric mean */
printf("Speedup\t");
for (i = 0; i < DIM_CNT; i++) {
if (benchmarks_smooth[bench_index].cpes[i] > 0.0) {
ratio = smooth_baseline_cpes[i]/
benchmarks_smooth[bench_index].cpes[i];
}
else {
printf("Fatal Error: Non-positive CPE value...\n");
exit(EXIT_FAILURE);
}
prod *= ratio;
printf("\t%.1f", ratio);
}
/* Geometric mean */
mean = pow(prod, 1.0/(double) DIM_CNT);
printf("\t%.1f", mean);
printf("\n\n");
if (mean > smooth_maxmean) {
smooth_maxmean = mean;
smooth_maxmean_desc = benchmarks_smooth[bench_index].description;
}
}
return;
}
void usage(char *progname)
{
fprintf(stderr, "Usage: %s [-hqg] [-f <func_file>] [-d <dump_file>]\n", progname);
fprintf(stderr, "Options:\n");
fprintf(stderr, " -h Print this message\n");
fprintf(stderr, " -q Quit after dumping (use with -d )\n");
fprintf(stderr, " -g Autograder mode: checks only rotate() and smooth()\n");
fprintf(stderr, " -f <file> Get test function names from dump file <file>\n");
fprintf(stderr, " -d <file> Emit a dump file <file> for later use with -f\n");
exit(EXIT_FAILURE);
}
int main(int argc, char *argv[])
{
int i;
int quit_after_dump = 0;
int skip_teamname_check = 0;
int autograder = 0;
int seed = 1729;
char c = '0';
char *bench_func_file = NULL;
char *func_dump_file = NULL;
/* register all the defined functions */
register_rotate_functions();
register_smooth_functions();
/* parse command line args */
while ((c = getopt(argc, argv, "tgqf:d:s:h")) != -1)
switch (c) {
case 't': /* skip team name check (hidden flag) */
skip_teamname_check = 1;
break;
case 's': /* seed for random number generator (hidden flag) */
seed = atoi(optarg);
break;
case 'g': /* autograder mode (checks only rotate() and smooth()) */
autograder = 1;
break;
case 'q':
quit_after_dump = 1;
break;
case 'f': /* get names of benchmark functions from this file */
bench_func_file = strdup(optarg);
break;
case 'd': /* dump names of benchmark functions to this file */
func_dump_file = strdup(optarg);
{
int i;
FILE *fp = fopen(func_dump_file, "w");
if (fp == NULL) {
printf("Can't open file %s\n",func_dump_file);
exit(-5);
}
for(i = 0; i < rotate_benchmark_count; i++) {
fprintf(fp, "R:%s\n", benchmarks_rotate[i].description);
}
for(i = 0; i < smooth_benchmark_count; i++) {
fprintf(fp, "S:%s\n", benchmarks_smooth[i].description);
}
fclose(fp);
}
break;
case 'h': /* print help message */
usage(argv[0]);
default: /* unrecognized argument */
usage(argv[0]);
}
if (quit_after_dump)
exit(EXIT_SUCCESS);
/* Print team info */
if (!skip_teamname_check) {
if (strcmp("bovik", team.team) == 0) {
printf("%s: Please fill in the team struct in kernels.c.\n", argv[0]);
exit(1);
}
printf("Teamname: %s\n", team.team);
printf("Member 1: %s\n", team.name1);
printf("Email 1: %s\n", team.email1);
if (*team.name2 || *team.email2) {
printf("Member 2: %s\n", team.name2);
printf("Email 2: %s\n", team.email2);
}
printf("\n");
}
srand(seed);
/*
* If we are running in autograder mode, we will only test
* the rotate() and bench() functions.
*/
if (autograder) {
rotate_benchmark_count = 1;
smooth_benchmark_count = 1;
benchmarks_rotate[0].tfunct = rotate;
benchmarks_rotate[0].description = "rotate() function";
benchmarks_rotate[0].valid = 1;
benchmarks_smooth[0].tfunct = smooth;
benchmarks_smooth[0].description = "smooth() function";
benchmarks_smooth[0].valid = 1;
}
/*
* If the user specified a file name using -f, then use
* the file to determine the versions of rotate and smooth to test
*/
else if (bench_func_file != NULL) {
char flag;
char func_line[256];
FILE *fp = fopen(bench_func_file, "r");
if (fp == NULL) {
printf("Can't open file %s\n",bench_func_file);
exit(-5);
}
while(func_line == fgets(func_line, 256, fp)) {
char *func_name = func_line;
char **strptr = &func_name;
char *token = strsep(strptr, ":");
flag = token[0];
func_name = strsep(strptr, "\n");
#ifdef DEBUG
printf("Function Description is %s\n",func_name);
#endif
if (flag == 'R') {
for(i=0; i<rotate_benchmark_count; i++) {
if (strcmp(benchmarks_rotate[i].description, func_name) == 0)
benchmarks_rotate[i].valid = 1;
}
}
else if (flag == 'S') {
for(i=0; i<smooth_benchmark_count; i++) {
if (strcmp(benchmarks_smooth[i].description, func_name) == 0)
benchmarks_smooth[i].valid = 1;
}
}
}
fclose(fp);
}
/*
* If the user didn't specify a dump file using -f, then
* test all of the functions
*/
else { /* set all valid flags to 1 */
for (i = 0; i < rotate_benchmark_count; i++)
benchmarks_rotate[i].valid = 1;
for (i = 0; i < smooth_benchmark_count; i++)
benchmarks_smooth[i].valid = 1;
}
/* Set measurement (fcyc) parameters */
set_fcyc_cache_size(1 << 14); /* 16 KB cache size */
set_fcyc_clear_cache(1); /* clear the cache before each measurement */
set_fcyc_compensate(1); /* try to compensate for timer overhead */
for (i = 0; i < rotate_benchmark_count; i++) {
if (benchmarks_rotate[i].valid)
test_rotate(i);
}
for (i = 0; i < smooth_benchmark_count; i++) {
if (benchmarks_smooth[i].valid)
test_smooth(i);
}
if (autograder) {
printf("\nbestscores:%.1f:%.1f:\n", rotate_maxmean, smooth_maxmean);
}
else {
printf("Summary of Your Best Scores:\n");
printf(" Rotate: %3.1f (%s)\n", rotate_maxmean, rotate_maxmean_desc);
printf(" Smooth: %3.1f (%s)\n", smooth_maxmean, smooth_maxmean_desc);
}
return 0;
}

270
perf/fcyc.c Normal file
View File

@ -0,0 +1,270 @@
/* Compute time used by function f */
#include <stdlib.h>
#include <sys/times.h>
#include <stdio.h>
#include "clock.h"
#include "fcyc.h"
#define K 3
#define MAXSAMPLES 20
#define EPSILON 0.01
#define COMPENSATE 0
#define CLEAR_CACHE 0
#define CACHE_BYTES (1<<19)
#define CACHE_BLOCK 32
static int kbest = K;
static int compensate = COMPENSATE;
static int clear_cache = CLEAR_CACHE;
static int maxsamples = MAXSAMPLES;
static double epsilon = EPSILON;
static int cache_bytes = CACHE_BYTES;
static int cache_block = CACHE_BLOCK;
static int *cache_buf = NULL;
static double *values = NULL;
static int samplecount = 0;
#define KEEP_VALS 0
#define KEEP_SAMPLES 0
#if KEEP_SAMPLES
static double *samples = NULL;
#endif
/* Start new sampling process */
static void init_sampler()
{
if (values)
free(values);
values = calloc(kbest, sizeof(double));
#if KEEP_SAMPLES
if (samples)
free(samples);
/* Allocate extra for wraparound analysis */
samples = calloc(maxsamples+kbest, sizeof(double));
#endif
samplecount = 0;
}
/* Add new sample. */
static void add_sample(double val)
{
int pos = 0;
if (samplecount < kbest) {
pos = samplecount;
values[pos] = val;
} else if (val < values[kbest-1]) {
pos = kbest-1;
values[pos] = val;
}
#if KEEP_SAMPLES
samples[samplecount] = val;
#endif
samplecount++;
/* Insertion sort */
while (pos > 0 && values[pos-1] > values[pos]) {
double temp = values[pos-1];
values[pos-1] = values[pos];
values[pos] = temp;
pos--;
}
}
/* Have kbest minimum measurements converged within epsilon? */
static int has_converged()
{
return
(samplecount >= kbest) &&
((1 + epsilon)*values[0] >= values[kbest-1]);
}
/* Code to clear cache */
static volatile int sink = 0;
static void clear()
{
int x = sink;
int *cptr, *cend;
int incr = cache_block/sizeof(int);
if (!cache_buf) {
cache_buf = malloc(cache_bytes);
if (!cache_buf) {
fprintf(stderr, "Fatal error. Malloc returned null when trying to clear cache\n");
exit(1);
}
}
cptr = (int *) cache_buf;
cend = cptr + cache_bytes/sizeof(int);
while (cptr < cend) {
x += *cptr;
cptr += incr;
}
sink = x;
}
double fcyc(test_funct f, int *params)
{
double result;
init_sampler();
if (compensate) {
do {
double cyc;
if (clear_cache)
clear();
start_comp_counter();
f(params);
cyc = get_comp_counter();
add_sample(cyc);
} while (!has_converged() && samplecount < maxsamples);
} else {
do {
double cyc;
if (clear_cache)
clear();
start_counter();
f(params);
cyc = get_counter();
add_sample(cyc);
} while (!has_converged() && samplecount < maxsamples);
}
#ifdef DEBUG
{
int i;
printf(" %d smallest values: [", kbest);
for (i = 0; i < kbest; i++)
printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", ");
}
#endif
result = values[0];
#if !KEEP_VALS
free(values);
values = NULL;
#endif
return result;
}
/* A version of the above function added so as to pass arguments of
any type to the function
Added by Sanjit, Fall 2001
*/
double fcyc_v(test_funct_v f, void *params[])
{
double result;
init_sampler();
if (compensate) {
do {
double cyc;
if (clear_cache)
clear();
start_comp_counter();
f(params);
cyc = get_comp_counter();
add_sample(cyc);
} while (!has_converged() && samplecount < maxsamples);
} else {
do {
double cyc;
if (clear_cache)
clear();
start_counter();
f(params);
cyc = get_counter();
add_sample(cyc);
} while (!has_converged() && samplecount < maxsamples);
}
#ifdef DEBUG
{
int i;
printf(" %d smallest values: [", kbest);
for (i = 0; i < kbest; i++)
printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", ");
}
#endif
result = values[0];
#if !KEEP_VALS
free(values);
values = NULL;
#endif
return result;
}
/***********************************************************/
/* Set the various parameters used by measurement routines */
/* When set, will run code to clear cache before each measurement
Default = 0
*/
void set_fcyc_clear_cache(int clear)
{
clear_cache = clear;
}
/* Set size of cache to use when clearing cache
Default = 1<<19 (512KB)
*/
void set_fcyc_cache_size(int bytes)
{
if (bytes != cache_bytes) {
cache_bytes = bytes;
if (cache_buf) {
free(cache_buf);
cache_buf = NULL;
}
}
}
/* Set size of cache block
Default = 32
*/
void set_fcyc_cache_block(int bytes) {
cache_block = bytes;
}
/* When set, will attempt to compensate for timer interrupt overhead
Default = 0
*/
void set_fcyc_compensate(int compensate_arg)
{
compensate = compensate_arg;
}
/* Value of K in K-best
Default = 3
*/
void set_fcyc_k(int k)
{
kbest = k;
}
/* Maximum number of samples attempting to find K-best within some tolerance.
When exceeded, just return best sample found.
Default = 20
*/
void set_fcyc_maxsamples(int maxsamples_arg)
{
maxsamples = maxsamples_arg;
}
/* Tolerance required for K-best
Default = 0.01
*/
void set_fcyc_epsilon(double epsilon_arg)
{
epsilon = epsilon_arg;
}

55
perf/fcyc.h Normal file
View File

@ -0,0 +1,55 @@
/* Fcyc measures the speed of any "test function." Such a function
is passed a list of integer parameters, which it may interpret
in any way it chooses.
*/
typedef void (*test_funct)(int *);
typedef void (*test_funct_v)(void *);
/* Compute number of cycles used by function f on given set of parameters */
double fcyc(test_funct f, int* params);
double fcyc_v(test_funct_v f, void* params[]);
/***********************************************************/
/* Set the various parameters used by measurement routines */
/* When set, will run code to clear cache before each measurement
Default = 0
*/
void set_fcyc_clear_cache(int clear);
/* Set size of cache to use when clearing cache
Default = 1<<19 (512KB)
*/
void set_fcyc_cache_size(int bytes);
/* Set size of cache block
Default = 32
*/
void set_fcyc_cache_block(int bytes);
/* When set, will attempt to compensate for timer interrupt overhead
Default = 0
*/
void set_fcyc_compensate(int compensate);
/* Value of K in K-best
Default = 3
*/
void set_fcyc_k(int k);
/* Maximum number of samples attempting to find K-best within some tolerance.
When exceeded, just return best sample found.
Default = 20
*/
void set_fcyc_maxsamples(int maxsamples);
/* Tolerance required for K-best
Default = 0.01
*/
void set_fcyc_epsilon(double epsilon);

183
perf/kernels.c Normal file
View File

@ -0,0 +1,183 @@
/********************************************************
* Kernels to be optimized for the CS:APP Performance Lab
********************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "defs.h"
/*
* Please fill in the following team struct
*/
team_t team = {
"bovik", /* Team name */
"Harry Q. Bovik", /* First member full name */
"bovik@nowhere.edu", /* First member email address */
"", /* Second member full name (leave blank if none) */
"" /* Second member email addr (leave blank if none) */
};
/***************
* ROTATE KERNEL
***************/
/******************************************************
* Your different versions of the rotate kernel go here
******************************************************/
/*
* naive_rotate - The naive baseline version of rotate
*/
char naive_rotate_descr[] = "naive_rotate: Naive baseline implementation";
void naive_rotate(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
dst[RIDX(dim-1-j, i, dim)] = src[RIDX(i, j, dim)];
}
/*
* rotate - Your current working version of rotate
* IMPORTANT: This is the version you will be graded on
*/
char rotate_descr[] = "rotate: Current working version";
void rotate(int dim, pixel *src, pixel *dst)
{
naive_rotate(dim, src, dst);
}
/*********************************************************************
* register_rotate_functions - Register all of your different versions
* of the rotate kernel with the driver by calling the
* add_rotate_function() for each test function. When you run the
* driver program, it will test and report the performance of each
* registered test function.
*********************************************************************/
void register_rotate_functions()
{
add_rotate_function(&naive_rotate, naive_rotate_descr);
add_rotate_function(&rotate, rotate_descr);
/* ... Register additional test functions here */
}
/***************
* SMOOTH KERNEL
**************/
/***************************************************************
* Various typedefs and helper functions for the smooth function
* You may modify these any way you like.
**************************************************************/
/* A struct used to compute averaged pixel value */
typedef struct {
int red;
int green;
int blue;
int num;
} pixel_sum;
/* Compute min and max of two integers, respectively */
static int min(int a, int b) { return (a < b ? a : b); }
static int max(int a, int b) { return (a > b ? a : b); }
/*
* initialize_pixel_sum - Initializes all fields of sum to 0
*/
static void initialize_pixel_sum(pixel_sum *sum)
{
sum->red = sum->green = sum->blue = 0;
sum->num = 0;
return;
}
/*
* accumulate_sum - Accumulates field values of p in corresponding
* fields of sum
*/
static void accumulate_sum(pixel_sum *sum, pixel p)
{
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
sum->num++;
return;
}
/*
* assign_sum_to_pixel - Computes averaged pixel value in current_pixel
*/
static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum)
{
current_pixel->red = (unsigned short) (sum.red/sum.num);
current_pixel->green = (unsigned short) (sum.green/sum.num);
current_pixel->blue = (unsigned short) (sum.blue/sum.num);
return;
}
/*
* avg - Returns averaged pixel value at (i,j)
*/
static pixel avg(int dim, int i, int j, pixel *src)
{
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_pixel_sum(&sum);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
assign_sum_to_pixel(&current_pixel, sum);
return current_pixel;
}
/******************************************************
* Your different versions of the smooth kernel go here
******************************************************/
/*
* naive_smooth - The naive baseline version of smooth
*/
char naive_smooth_descr[] = "naive_smooth: Naive baseline implementation";
void naive_smooth(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}
/*
* smooth - Your current working version of smooth.
* IMPORTANT: This is the version you will be graded on
*/
char smooth_descr[] = "smooth: Current working version";
void smooth(int dim, pixel *src, pixel *dst)
{
naive_smooth(dim, src, dst);
}
/*********************************************************************
* register_smooth_functions - Register all of your different versions
* of the smooth kernel with the driver by calling the
* add_smooth_function() for each test function. When you run the
* driver program, it will test and report the performance of each
* registered test function.
*********************************************************************/
void register_smooth_functions() {
add_smooth_function(&smooth, smooth_descr);
add_smooth_function(&naive_smooth, naive_smooth_descr);
/* ... Register additional test functions here */
}