/* * trans.c - Matrix transpose B = A^T * * Each transpose function must have a prototype of the form: * void trans(int M, int N, int A[N][M], int B[M][N]); * * A transpose function is evaluated by counting the number of misses * on a 1KB direct mapped cache with a block size of 32 bytes. */ #include #include "cachelab.h" int is_transpose(int M, int N, int A[N][M], int B[M][N]); /* * transpose_submit - This is the solution transpose function that you * will be graded on for Part B of the assignment. Do not change * the description string "Transpose submission", as the driver * searches for that string to identify the transpose function to * be graded. */ char transpose_submit_desc[] = "Transpose submission"; void transpose_sub(int N, int t, int l, int size, int B[][N]) { for (int i = 0; i < size; i++) { for (int j = i + 1; j < size; j++) { B[t + i][l + j] ^= B[t + j][l + i]; B[t + j][l + i] ^= B[t + i][l + j]; B[t + i][l + j] ^= B[t + j][l + i]; } } } void transpose_submit(int M, int N, int A[N][M], int B[M][N]) { int v0, v1, v2, v3, v4, v5, v6, v7; if (M == 32) { for (int j = 0; j < M; j += 8) { for (int i = 0; i < N; i++) { v0 = A[i][j]; v1 = A[i][j + 1]; v2 = A[i][j + 2]; v3 = A[i][j + 3]; v4 = A[i][j + 4]; v5 = A[i][j + 5]; v6 = A[i][j + 6]; v7 = A[i][j + 7]; B[j][i] = v0; B[j + 1][i] = v1; B[j + 2][i] = v2; B[j + 3][i] = v3; B[j + 4][i] = v4; B[j + 5][i] = v5; B[j + 6][i] = v6; B[j + 7][i] = v7; } } } else if (M == 64) { for (int i = 0; i < N; i += 8) { for (int j = 0; j < M; j += 8) { if (i == j) { for (int ii = i; ii < i + 4; ii++) { v0 = A[ii][j]; v1 = A[ii][j + 1]; v2 = A[ii][j + 2]; v3 = A[ii][j + 3]; v4 = A[ii][j + 4]; v5 = A[ii][j + 5]; v6 = A[ii][j + 6]; v7 = A[ii][j + 7]; B[ii][j] = v0; B[ii][j + 1] = v1; B[ii][j + 2] = v2; B[ii][j + 3] = v3; B[ii][j + 4] = v4; B[ii][j + 5] = v5; B[ii][j + 6] = v6; B[ii][j + 7] = v7; } transpose_sub(N, i, j, 4, B); transpose_sub(N, i, j + 4, 4, B); for (int ii = i + 4; ii < i + 8; ii++) { v0 = A[ii][j]; v1 = A[ii][j + 1]; v2 = A[ii][j + 2]; v3 = A[ii][j + 3]; v4 = A[ii][j + 4]; v5 = A[ii][j + 5]; v6 = A[ii][j + 6]; v7 = A[ii][j + 7]; B[ii][j] = v0; B[ii][j + 1] = v1; B[ii][j + 2] = v2; B[ii][j + 3] = v3; B[ii][j + 4] = v4; B[ii][j + 5] = v5; B[ii][j + 6] = v6; B[ii][j + 7] = v7; } transpose_sub(N, i + 4, j, 4, B); transpose_sub(N, i + 4, j + 4, 4, B); for (int ii = i; ii < i + 4; ii++) { v0 = B[ii + 4][j]; v1 = B[ii + 4][j + 1]; v2 = B[ii + 4][j + 2]; v3 = B[ii + 4][j + 3]; v4 = B[ii][j + 4]; v5 = B[ii][j + 5]; v6 = B[ii][j + 6]; v7 = B[ii][j + 7]; B[ii][j + 4] = v0; B[ii][j + 5] = v1; B[ii][j + 6] = v2; B[ii][j + 7] = v3; B[ii + 4][j] = v4; B[ii + 4][j + 1] = v5; B[ii + 4][j + 2] = v6; B[ii + 4][j + 3] = v7; } } else { for (int ii = i; ii < i + 4; ii++) { for (int jj = j; jj < j + 4; jj++) { B[jj][ii] = A[ii][jj]; } for (int jj = j + 4; jj < j + 8; jj++) { B[jj - 4][ii + 4] = A[ii][jj]; } } for (int jj = j; jj < j + 4; jj++) { v0 = B[jj][i + 4]; v1 = B[jj][i + 5]; v2 = B[jj][i + 6]; v3 = B[jj][i + 7]; B[jj][i + 4] = A[i + 4][jj]; B[jj][i + 5] = A[i + 5][jj]; B[jj][i + 6] = A[i + 6][jj]; B[jj][i + 7] = A[i + 7][jj]; B[jj + 4][i] = v0; B[jj + 4][i + 1] = v1; B[jj + 4][i + 2] = v2; B[jj + 4][i + 3] = v3; } for (int ii = i + 4; ii < i + 8; ii++) { for (int jj = j + 4; jj < j + 8; jj++) { B[jj][ii] = A[ii][jj]; } } } } } } else if (M == 61) { for (int j = 0; j < M; j += 17) { for (int i = 0; i < N; i++) { for (int jj = j; jj < j + 17 && jj < M; jj++) { B[jj][i] = A[i][jj]; } } } } } /* * You can define additional transpose functions below. We've defined * a simple one below to help you get started. */ /* * trans - A simple baseline transpose function, not optimized for the cache. */ char trans_desc[] = "Simple row-wise scan transpose"; void trans(int M, int N, int A[N][M], int B[M][N]) { int i, j, tmp; for (i = 0; i < N; i++) { for (j = 0; j < M; j++) { tmp = A[i][j]; B[j][i] = tmp; } } } /* * registerFunctions - This function registers your transpose * functions with the driver. At runtime, the driver will * evaluate each of the registered functions and summarize their * performance. This is a handy way to experiment with different * transpose strategies. */ void registerFunctions() { /* Register your solution function */ registerTransFunction(transpose_submit, transpose_submit_desc); /* Register any additional transpose functions */ registerTransFunction(trans, trans_desc); } /* * is_transpose - This helper function checks if B is the transpose of * A. You can check the correctness of your transpose by calling * it before returning from the transpose function. */ int is_transpose(int M, int N, int A[N][M], int B[M][N]) { int i, j; for (i = 0; i < N; i++) { for (j = 0; j < M; ++j) { if (A[i][j] != B[j][i]) { return 0; } } } return 1; }