CSAPP-sol/cache/trans.c

229 lines
6.2 KiB
C

/*
* trans.c - Matrix transpose B = A^T
*
* Each transpose function must have a prototype of the form:
* void trans(int M, int N, int A[N][M], int B[M][N]);
*
* A transpose function is evaluated by counting the number of misses
* on a 1KB direct mapped cache with a block size of 32 bytes.
*/
#include <stdio.h>
#include "cachelab.h"
int is_transpose(int M, int N, int A[N][M], int B[M][N]);
/*
* transpose_submit - This is the solution transpose function that you
* will be graded on for Part B of the assignment. Do not change
* the description string "Transpose submission", as the driver
* searches for that string to identify the transpose function to
* be graded.
*/
char transpose_submit_desc[] = "Transpose submission";
void transpose_sub(int N, int t, int l, int size, int B[][N]) {
for (int i = 0; i < size; i++) {
for (int j = i + 1; j < size; j++) {
B[t + i][l + j] ^= B[t + j][l + i];
B[t + j][l + i] ^= B[t + i][l + j];
B[t + i][l + j] ^= B[t + j][l + i];
}
}
}
void transpose_submit(int M, int N, int A[N][M], int B[M][N]) {
int v0, v1, v2, v3, v4, v5, v6, v7;
if (M == 32) {
for (int j = 0; j < M; j += 8) {
for (int i = 0; i < N; i++) {
v0 = A[i][j];
v1 = A[i][j + 1];
v2 = A[i][j + 2];
v3 = A[i][j + 3];
v4 = A[i][j + 4];
v5 = A[i][j + 5];
v6 = A[i][j + 6];
v7 = A[i][j + 7];
B[j][i] = v0;
B[j + 1][i] = v1;
B[j + 2][i] = v2;
B[j + 3][i] = v3;
B[j + 4][i] = v4;
B[j + 5][i] = v5;
B[j + 6][i] = v6;
B[j + 7][i] = v7;
}
}
} else if (M == 64) {
for (int i = 0; i < N; i += 8) {
for (int j = 0; j < M; j += 8) {
if (i == j) {
for (int ii = i; ii < i + 4; ii++) {
v0 = A[ii][j];
v1 = A[ii][j + 1];
v2 = A[ii][j + 2];
v3 = A[ii][j + 3];
v4 = A[ii][j + 4];
v5 = A[ii][j + 5];
v6 = A[ii][j + 6];
v7 = A[ii][j + 7];
B[ii][j] = v0;
B[ii][j + 1] = v1;
B[ii][j + 2] = v2;
B[ii][j + 3] = v3;
B[ii][j + 4] = v4;
B[ii][j + 5] = v5;
B[ii][j + 6] = v6;
B[ii][j + 7] = v7;
}
transpose_sub(N, i, j, 4, B);
transpose_sub(N, i, j + 4, 4, B);
for (int ii = i + 4; ii < i + 8; ii++) {
v0 = A[ii][j];
v1 = A[ii][j + 1];
v2 = A[ii][j + 2];
v3 = A[ii][j + 3];
v4 = A[ii][j + 4];
v5 = A[ii][j + 5];
v6 = A[ii][j + 6];
v7 = A[ii][j + 7];
B[ii][j] = v0;
B[ii][j + 1] = v1;
B[ii][j + 2] = v2;
B[ii][j + 3] = v3;
B[ii][j + 4] = v4;
B[ii][j + 5] = v5;
B[ii][j + 6] = v6;
B[ii][j + 7] = v7;
}
transpose_sub(N, i + 4, j, 4, B);
transpose_sub(N, i + 4, j + 4, 4, B);
for (int ii = i; ii < i + 4; ii++) {
v0 = B[ii + 4][j];
v1 = B[ii + 4][j + 1];
v2 = B[ii + 4][j + 2];
v3 = B[ii + 4][j + 3];
v4 = B[ii][j + 4];
v5 = B[ii][j + 5];
v6 = B[ii][j + 6];
v7 = B[ii][j + 7];
B[ii][j + 4] = v0;
B[ii][j + 5] = v1;
B[ii][j + 6] = v2;
B[ii][j + 7] = v3;
B[ii + 4][j] = v4;
B[ii + 4][j + 1] = v5;
B[ii + 4][j + 2] = v6;
B[ii + 4][j + 3] = v7;
}
} else {
for (int ii = i; ii < i + 4; ii++) {
for (int jj = j; jj < j + 4; jj++) {
B[jj][ii] = A[ii][jj];
}
for (int jj = j + 4; jj < j + 8; jj++) {
B[jj - 4][ii + 4] = A[ii][jj];
}
}
for (int jj = j; jj < j + 4; jj++) {
v0 = B[jj][i + 4];
v1 = B[jj][i + 5];
v2 = B[jj][i + 6];
v3 = B[jj][i + 7];
B[jj][i + 4] = A[i + 4][jj];
B[jj][i + 5] = A[i + 5][jj];
B[jj][i + 6] = A[i + 6][jj];
B[jj][i + 7] = A[i + 7][jj];
B[jj + 4][i] = v0;
B[jj + 4][i + 1] = v1;
B[jj + 4][i + 2] = v2;
B[jj + 4][i + 3] = v3;
}
for (int ii = i + 4; ii < i + 8; ii++) {
for (int jj = j + 4; jj < j + 8; jj++) {
B[jj][ii] = A[ii][jj];
}
}
}
}
}
} else if (M == 61) {
for (int j = 0; j < M; j += 17) {
for (int i = 0; i < N; i++) {
for (int jj = j; jj < j + 17 && jj < M; jj++) {
B[jj][i] = A[i][jj];
}
}
}
}
}
/*
* You can define additional transpose functions below. We've defined
* a simple one below to help you get started.
*/
/*
* trans - A simple baseline transpose function, not optimized for the cache.
*/
char trans_desc[] = "Simple row-wise scan transpose";
void trans(int M, int N, int A[N][M], int B[M][N])
{
int i, j, tmp;
for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
tmp = A[i][j];
B[j][i] = tmp;
}
}
}
/*
* registerFunctions - This function registers your transpose
* functions with the driver. At runtime, the driver will
* evaluate each of the registered functions and summarize their
* performance. This is a handy way to experiment with different
* transpose strategies.
*/
void registerFunctions()
{
/* Register your solution function */
registerTransFunction(transpose_submit, transpose_submit_desc);
/* Register any additional transpose functions */
registerTransFunction(trans, trans_desc);
}
/*
* is_transpose - This helper function checks if B is the transpose of
* A. You can check the correctness of your transpose by calling
* it before returning from the transpose function.
*/
int is_transpose(int M, int N, int A[N][M], int B[M][N])
{
int i, j;
for (i = 0; i < N; i++) {
for (j = 0; j < M; ++j) {
if (A[i][j] != B[j][i]) {
return 0;
}
}
}
return 1;
}