/* * trans.c - Matrix transpose B = A^T * * Each transpose function must have a prototype of the form: * void trans(int M, int N, int A[N][M], int B[M][N]); * * A transpose function is evaluated by counting the number of misses * on a 1KB direct mapped cache with a block size of 32 bytes. */ #include #include "cachelab.h" int is_transpose(int M, int N, int A[N][M], int B[M][N]); /* * transpose_submit - This is the solution transpose function that you * will be graded on for Part B of the assignment. Do not change * the description string "Transpose submission", as the driver * searches for that string to identify the transpose function to * be graded. */ char transpose_submit_desc[] = "Transpose submission"; void block_8x8_32(int t, int l, int A[32][32], int B[32][32]) { for (int i = t; i < t + 8; i++) { for (int j = l; j < l + 8; j++) { B[j][i] = A[i][j]; } } } void block_8x8_64(int t, int l, int A[64][64], int B[64][64]) { int v0, v1, v2, v3; for (int i = t; i < t + 4; i++) { for (int j = l; j < l + 4; j++) { B[j][i] = A[i][j]; } for (int j = l + 4; j < l + 8; j++) { B[j - 4][i + 4] = A[i][j]; } } for (int j = l; j < l + 4; j++) { v0 = B[j][t + 4]; v1 = B[j][t + 5]; v2 = B[j][t + 6]; v3 = B[j][t + 7]; B[j][t + 4] = A[t + 4][j]; B[j][t + 5] = A[t + 5][j]; B[j][t + 6] = A[t + 6][j]; B[j][t + 7] = A[t + 7][j]; B[j + 4][t] = v0; B[j + 4][t + 1] = v1; B[j + 4][t + 2] = v2; B[j + 4][t + 3] = v3; } for (int i = t + 4; i < t + 8; i++) { for (int j = l + 4; j < l + 8; j++) { B[j][i] = A[i][j]; } } } void transpose_submit(int M, int N, int A[N][M], int B[M][N]) { if (M == 32) { for (int i = 0; i < N; i += 8) { for (int ii = i; ii < i + 8; ii++) { for (int jj = i; jj < i + 8; jj++) { B[ii][(jj + 8) % N] = A[ii][jj]; } } for (int ii = i; ii < i + 8; ii++) { for (int jj = i; jj < i + 8; jj++) { B[jj][ii] = B[ii][(jj + 8) % N]; } } block_8x8_32((i + 8) % N, i, A, B); } for (int i = 0; i < N; i += 8) { for (int j = 0; j < M; j += 8) { if (i != j && (j + 8) % N != i) { block_8x8_32(i, j, A, B); } } } } else if (M == 64) { for (int i = 0; i < N; i += 8) { int v0, v1, v2, v3; for (int ii = i; ii < i + 4; ii++) { for (int jj = i; jj < i + 8; jj++) { B[ii][(jj + 8) % N] = A[ii][jj]; } } for (int ii = i + 4; ii < i + 8; ii++) { for (int jj = i; jj < i + 8; jj++) { B[ii - 4][(jj + 16) % N] = A[ii][jj]; } } for (int ii = i; ii < i + 4; ii++) { for (int jj = i; jj < i + 4; jj++) { B[jj][ii] = B[ii][(jj + 8) % N]; } for (int jj = i + 4; jj < i + 8; jj++) { B[jj - 4][ii + 4] = B[ii][(jj + 8) % N]; } } for (int jj = i; jj < i + 4; jj++) { v0 = B[jj][i + 4]; v1 = B[jj][i + 5]; v2 = B[jj][i + 6]; v3 = B[jj][i + 7]; B[jj][i + 4] = B[i][(jj + 16) % N]; B[jj][i + 5] = B[i + 1][(jj + 16) % N]; B[jj][i + 6] = B[i + 2][(jj + 16) % N]; B[jj][i + 7] = B[i + 3][(jj + 16) % N]; B[jj + 4][i] = v0; B[jj + 4][i + 1] = v1; B[jj + 4][i + 2] = v2; B[jj + 4][i + 3] = v3; } for (int ii = i + 4; ii < i + 8; ii++) { for (int jj = i + 4; jj < i + 8; jj++) { B[jj][ii] = B[ii - 4][(jj + 16) % N]; } } block_8x8_64((i + 8) % N, i, A, B); block_8x8_64((i + 16) % N, i, A, B); } for (int i = 0; i < N; i += 8) { for (int j = 0; j < M; j += 8) { if (i != j && (j + 8) % N != i && (j + 16) % N != i) { block_8x8_64(i, j, A, B); } } } } else if (M == 61) { for (int j = 0; j < M; j += 17) { for (int i = 0; i < N; i++) { for (int jj = j; jj < j + 17 && jj < M; jj++) { B[jj][i] = A[i][jj]; } } } } } /* * You can define additional transpose functions below. We've defined * a simple one below to help you get started. */ /* * trans - A simple baseline transpose function, not optimized for the cache. */ char trans_desc[] = "Simple row-wise scan transpose"; void trans(int M, int N, int A[N][M], int B[M][N]) { int i, j, tmp; for (i = 0; i < N; i++) { for (j = 0; j < M; j++) { tmp = A[i][j]; B[j][i] = tmp; } } } /* * registerFunctions - This function registers your transpose * functions with the driver. At runtime, the driver will * evaluate each of the registered functions and summarize their * performance. This is a handy way to experiment with different * transpose strategies. */ void registerFunctions() { /* Register your solution function */ registerTransFunction(transpose_submit, transpose_submit_desc); /* Register any additional transpose functions */ registerTransFunction(trans, trans_desc); } /* * is_transpose - This helper function checks if B is the transpose of * A. You can check the correctness of your transpose by calling * it before returning from the transpose function. */ int is_transpose(int M, int N, int A[N][M], int B[M][N]) { int i, j; for (i = 0; i < N; i++) { for (j = 0; j < M; ++j) { if (A[i][j] != B[j][i]) { return 0; } } } return 1; }