diff --git a/arch/sim/pipe/ldriver.ys b/arch/sim/pipe/ldriver.ys deleted file mode 100644 index 806c9d6..0000000 --- a/arch/sim/pipe/ldriver.ys +++ /dev/null @@ -1,263 +0,0 @@ -####################################################################### -# Test for copying block of size 63; -####################################################################### - .pos 0 -main: irmovq Stack, %rsp # Set up stack pointer - - # Set up arguments for copy function and then invoke it - irmovq $63, %rdx # src and dst have 63 elements - irmovq dest, %rsi # dst array - irmovq src, %rdi # src array - call ncopy - halt # should halt with num nonzeros in %rax -StartFun: -#/* $begin ncopy-ys */ -################################################################## -# ncopy.ys - Copy a src block of len words to dst. -# Return the number of positive words (>0) contained in src. -# -# Include your name and ID here. -# -# Describe how and why you modified the baseline code. -# -################################################################## -# Do not modify this portion -# Function prologue. -# %rdi = src, %rsi = dst, %rdx = len -ncopy: - -################################################################## -# You can modify this portion - # Loop header - xorq %rax,%rax # count = 0; - iaddq $0xfffffffffffffffc, %rdx - jle Tail # if so, goto Done: - -Loop1: - mrmovq (%rdi), %r10 # read val from src... - rmmovq %r10, (%rsi) # ...and store it to dst - andq %r10, %r10 # val <= 0? - jle Npos1 # if so, goto Npos: - iaddq $1, %rax # count++ -Npos1: - - mrmovq 8(%rdi), %r10 # read val from src... - rmmovq %r10, 8(%rsi) # ...and store it to dst - andq %r10, %r10 # val <= 0? - jle Npos2 # if so, goto Npos: - iaddq $1, %rax # count++ -Npos2: - - mrmovq 16(%rdi), %r10 # read val from src... - rmmovq %r10, 16(%rsi) # ...and store it to dst - andq %r10, %r10 # val <= 0? - jle Npos3 # if so, goto Npos: - iaddq $1, %rax # count++ -Npos3: - - mrmovq 24(%rdi), %r10 # read val from src... - rmmovq %r10, 24(%rsi) # ...and store it to dst - andq %r10, %r10 # val <= 0? - jle Npos4 # if so, goto Npos: - iaddq $1, %rax # count++ -Npos4: - - mrmovq 32(%rdi), %r10 # read val from src... - rmmovq %r10, 32(%rsi) # ...and store it to dst - andq %r10, %r10 # val <= 0? - jle Npos5 # if so, goto Npos: - iaddq $1, %rax # count++ -Npos5: - - iaddq $40, %rdi # src++ - iaddq $40, %rsi # dst++ - iaddq $0xfffffffffffffffb, %rdx # len-- - jg Loop1 - -Tail: - iaddq $4, %rdx - jle Done -Loop2: - mrmovq (%rdi), %r10 - rmmovq %r10, (%rsi) - andq %r10, %r10 - jle NposT - iaddq $1, %rax -NposT: - iaddq $8, %rdi - iaddq $8, %rsi - iaddq $0xffffffffffffffff, %rdx - jg Loop2 - - -################################################################## -# Do not modify the following section of code -# Function epilogue. -Done: - ret -################################################################## -# Keep the following label at the end of your function -End: -#/* $end ncopy-ys */ -EndFun: - -############################### -# Source and destination blocks -############################### - .align 8 -src: - .quad 1 - .quad 2 - .quad -3 - .quad -4 - .quad -5 - .quad 6 - .quad -7 - .quad 8 - .quad 9 - .quad -10 - .quad 11 - .quad 12 - .quad -13 - .quad 14 - .quad -15 - .quad 16 - .quad -17 - .quad -18 - .quad 19 - .quad -20 - .quad 21 - .quad -22 - .quad -23 - .quad -24 - .quad -25 - .quad -26 - .quad 27 - .quad -28 - .quad -29 - .quad -30 - .quad 31 - .quad -32 - .quad 33 - .quad -34 - .quad 35 - .quad 36 - .quad -37 - .quad 38 - .quad 39 - .quad 40 - .quad 41 - .quad -42 - .quad 43 - .quad 44 - .quad 45 - .quad 46 - .quad 47 - .quad 48 - .quad 49 - .quad 50 - .quad 51 - .quad -52 - .quad -53 - .quad 54 - .quad -55 - .quad -56 - .quad 57 - .quad -58 - .quad -59 - .quad -60 - .quad -61 - .quad -62 - .quad -63 - .quad 0xbcdefa # This shouldn't get moved - - .align 16 -Predest: - .quad 0xbcdefa -dest: - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab -Postdest: - .quad 0xdefabc - -.align 8 -# Run time stack - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - -Stack: diff --git a/arch/sim/pipe/ncopy.ys b/arch/sim/pipe/ncopy.ys index 08aa9f1..5c69039 100644 --- a/arch/sim/pipe/ncopy.ys +++ b/arch/sim/pipe/ncopy.ys @@ -16,84 +16,74 @@ ncopy: ################################################################## # You can modify this portion - xorq %rax, %rax + xorq %rax, %rax iaddq $0xfffffffffffffff7, %rdx - jle Tail + jle Tail Loop: - mrmovq (%rdi), %r10 - rmmovq %r10, (%rsi) - andq %r10, %r10 - jle Npos1 - iaddq $1, %rax -Npos1: - - mrmovq 8(%rdi), %r10 - rmmovq %r10, 8(%rsi) - andq %r10, %r10 - jle Npos2 - iaddq $1, %rax -Npos2: - - mrmovq 16(%rdi), %r10 - rmmovq %r10, 16(%rsi) - andq %r10, %r10 - jle Npos3 - iaddq $1, %rax -Npos3: - - mrmovq 24(%rdi), %r10 - rmmovq %r10, 24(%rsi) - andq %r10, %r10 - jle Npos4 - iaddq $1, %rax -Npos4: - - mrmovq 32(%rdi), %r10 - rmmovq %r10, 32(%rsi) - andq %r10, %r10 - jle Npos5 - iaddq $1, %rax -Npos5: - - mrmovq 40(%rdi), %r10 - rmmovq %r10, 40(%rsi) - andq %r10, %r10 - jle Npos6 - iaddq $1, %rax -Npos6: - - mrmovq 48(%rdi), %r10 - rmmovq %r10, 48(%rsi) - andq %r10, %r10 - jle Npos7 - iaddq $1, %rax -Npos7: - - mrmovq 56(%rdi), %r10 - rmmovq %r10, 56(%rsi) - andq %r10, %r10 - jle Npos8 - iaddq $1, %rax -Npos8: - - mrmovq 64(%rdi), %r10 - rmmovq %r10, 64(%rsi) - andq %r10, %r10 - jle Npos9 - iaddq $1, %rax -Npos9: - - mrmovq 72(%rdi), %r10 - rmmovq %r10, 72(%rsi) - andq %r10, %r10 - jle Npos10 - iaddq $1, %rax -Npos10: - - iaddq $80, %rdi - iaddq $80, %rsi - iaddq $0xfffffffffffffff6, %rdx + mrmovq (%rdi), %r10 + rmmovq %r10, (%rsi) + andq %r10, %r10 + jle F1 + iaddq $1, %rax +F1: + mrmovq 8(%rdi), %r10 + rmmovq %r10, 8(%rsi) + andq %r10, %r10 + jle F2 + iaddq $1, %rax +F2: + mrmovq 16(%rdi), %r10 + rmmovq %r10, 16(%rsi) + andq %r10, %r10 + jle F3 + iaddq $1, %rax +F3: + mrmovq 24(%rdi), %r10 + rmmovq %r10, 24(%rsi) + andq %r10, %r10 + jle F4 + iaddq $1, %rax +F4: + mrmovq 32(%rdi), %r10 + rmmovq %r10, 32(%rsi) + andq %r10, %r10 + jle F5 + iaddq $1, %rax +F5: + mrmovq 40(%rdi), %r10 + rmmovq %r10, 40(%rsi) + andq %r10, %r10 + jle F6 + iaddq $1, %rax +F6: + mrmovq 48(%rdi), %r10 + rmmovq %r10, 48(%rsi) + andq %r10, %r10 + jle F7 + iaddq $1, %rax +F7: + mrmovq 56(%rdi), %r10 + rmmovq %r10, 56(%rsi) + andq %r10, %r10 + jle F8 + iaddq $1, %rax +F8: + mrmovq 64(%rdi), %r10 + rmmovq %r10, 64(%rsi) + andq %r10, %r10 + jle F9 + iaddq $1, %rax +F9: + mrmovq 72(%rdi), %r10 + rmmovq %r10, 72(%rsi) + andq %r10, %r10 + jle F10 + iaddq $1, %rax +F10: + iaddq $80, %rdi + iaddq $80, %rsi + iaddq $0xfffffffffffffff6, %rdx jg Loop Tail: @@ -117,66 +107,65 @@ JT: .quad J9 J9: - mrmovq 64(%rdi), %r10 - rmmovq %r10, 64(%rsi) - andq %r10, %r10 - jle J8 - iaddq $1, %rax + mrmovq 64(%rdi), %r10 + rmmovq %r10, 64(%rsi) + andq %r10, %r10 + jle J8 + iaddq $1, %rax J8: - mrmovq 56(%rdi), %r10 - rmmovq %r10, 56(%rsi) - andq %r10, %r10 - jle J7 - iaddq $1, %rax + mrmovq 56(%rdi), %r10 + rmmovq %r10, 56(%rsi) + andq %r10, %r10 + jle J7 + iaddq $1, %rax J7: - mrmovq 48(%rdi), %r10 - rmmovq %r10, 48(%rsi) - andq %r10, %r10 - jle J6 - iaddq $1, %rax + mrmovq 48(%rdi), %r10 + rmmovq %r10, 48(%rsi) + andq %r10, %r10 + jle J6 + iaddq $1, %rax J6: - mrmovq 40(%rdi), %r10 - rmmovq %r10, 40(%rsi) - andq %r10, %r10 - jle J5 - iaddq $1, %rax + mrmovq 40(%rdi), %r10 + rmmovq %r10, 40(%rsi) + andq %r10, %r10 + jle J5 + iaddq $1, %rax J5: - mrmovq 32(%rdi), %r10 - rmmovq %r10, 32(%rsi) - andq %r10, %r10 - jle J4 - iaddq $1, %rax + mrmovq 32(%rdi), %r10 + rmmovq %r10, 32(%rsi) + andq %r10, %r10 + jle J4 + iaddq $1, %rax J4: - mrmovq 24(%rdi), %r10 - rmmovq %r10, 24(%rsi) - andq %r10, %r10 - jle J3 - iaddq $1, %rax + mrmovq 24(%rdi), %r10 + rmmovq %r10, 24(%rsi) + andq %r10, %r10 + jle J3 + iaddq $1, %rax J3: - mrmovq 16(%rdi), %r10 - rmmovq %r10, 16(%rsi) - andq %r10, %r10 - jle J2 - iaddq $1, %rax + mrmovq 16(%rdi), %r10 + rmmovq %r10, 16(%rsi) + andq %r10, %r10 + jle J2 + iaddq $1, %rax J2: - mrmovq 8(%rdi), %r10 - rmmovq %r10, 8(%rsi) - andq %r10, %r10 - jle J1 - iaddq $1, %rax + mrmovq 8(%rdi), %r10 + rmmovq %r10, 8(%rsi) + andq %r10, %r10 + jle J1 + iaddq $1, %rax J1: - mrmovq (%rdi), %r10 - rmmovq %r10, (%rsi) - andq %r10, %r10 - jle Done - iaddq $1, %rax + mrmovq (%rdi), %r10 + rmmovq %r10, (%rsi) + andq %r10, %r10 + jle Done + iaddq $1, %rax ################################################################## # Do not modify the following section of code # Function epilogue. Done: ret - ################################################################## # Keep the following label at the end of your function End: diff --git a/arch/sim/pipe/pipe-full.hcl b/arch/sim/pipe/pipe-full.hcl index 1d666c5..d7868fe 100644 --- a/arch/sim/pipe/pipe-full.hcl +++ b/arch/sim/pipe/pipe-full.hcl @@ -51,6 +51,9 @@ wordsig RNONE 'REG_NONE' # Special value indicating "no register" ##### ALU Functions referenced explicitly ########################## wordsig ALUADD 'A_ADD' # ALU should add its arguments +##### Jump conditions referenced explicitly +wordsig UNCOND 'C_YES' # Unconditional transfer + ##### Possible instruction status values ##### wordsig SBUB 'STAT_BUB' # Bubble in stage wordsig SAOK 'STAT_AOK' # Normal execution @@ -76,6 +79,7 @@ boolsig imem_error 'imem_error' # Error signal from instruction memory boolsig instr_valid 'instr_valid' # Is fetched instruction valid? ##### Pipeline Register D ########################################## +wordsig D_stat 'if_id_curr->status' # Instruction status wordsig D_icode 'if_id_curr->icode' # Instruction code wordsig D_rA 'if_id_curr->ra' # rA field from instruction wordsig D_rB 'if_id_curr->rb' # rB field from instruction @@ -135,12 +139,14 @@ wordsig W_valM 'mem_wb_curr->valm' # Memory M value ## What address should instruction be fetched at word f_pc = [ - # Mispredicted branch. Fetch at incremented PC - M_icode == IJXX && !M_Cnd : M_valA; - # Completion of RET instruction + # Completion of RET instruction W_icode == IRET : W_valM; # Default: Use predicted value of PC - 1 : F_predPC; + M_icode != IJXX || M_ifun == UNCOND : F_predPC; + # Mispredicted branch. Fetch at incremented PC + M_valA < M_valE && M_Cnd && D_stat == SBUB : M_valE; + M_valA > M_valE && !M_Cnd && D_stat == SBUB : M_valA; + 1 : F_predPC; ]; ## Determine icode of fetched instruction @@ -179,7 +185,8 @@ bool need_valC = # Predict next value of PC word f_predPC = [ - f_icode in { IJXX, ICALL } : f_valC; + f_icode == ICALL || + f_icode == IJXX && (f_ifun == UNCOND || f_valC < f_valP) : f_valC; 1 : f_valP; ]; @@ -239,7 +246,7 @@ word d_valB = [ ## Select input A to ALU word aluA = [ E_icode in { IRRMOVQ, IOPQ } : E_valA; - E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : E_valC; + E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ, IJXX } : E_valC; E_icode in { ICALL, IPUSHQ } : -8; E_icode in { IRET, IPOPQ } : 8; # Other instructions don't need ALU @@ -249,7 +256,7 @@ word aluA = [ word aluB = [ E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB; - E_icode in { IRRMOVQ, IIRMOVQ } : 0; + E_icode in { IRRMOVQ, IIRMOVQ, IJXX } : 0; # Other instructions don't need ALU ]; @@ -329,7 +336,10 @@ bool F_stall = E_dstM in { d_srcA, d_srcB } && !(D_icode in { IPUSHQ, IRMMOVQ } && E_dstM == d_srcA) || # Stalling at fetch while ret passes through pipeline - IRET in { D_icode, E_icode, M_icode }; + IRET in { D_icode, E_icode, M_icode } && + !(E_icode == IJXX && E_ifun != UNCOND && + (E_valA < e_valE && e_Cnd || + E_valA > e_valE && !e_Cnd)); # Should I stall or inject a bubble into Pipeline Register D? # At most one of these can be true. @@ -342,7 +352,9 @@ bool D_stall = bool D_bubble = # Mispredicted branch - (E_icode == IJXX && !e_Cnd) || + (E_icode == IJXX && E_ifun != UNCOND && + (E_valA < e_valE && e_Cnd && F_predPC != e_valE || + E_valA > e_valE && !e_Cnd && F_predPC != E_valA)) || # Stalling at fetch while ret passes through pipeline # but not condition for a load/use hazard !(E_icode in { IMRMOVQ, IPOPQ } && @@ -355,7 +367,9 @@ bool D_bubble = bool E_stall = 0; bool E_bubble = # Mispredicted branch - (E_icode == IJXX && !e_Cnd) || + (E_icode == IJXX && E_ifun != UNCOND && + (E_valA < e_valE && e_Cnd || + E_valA > e_valE && !e_Cnd)) || # Conditions for a load/use hazard ## Set this to the new load/use condition E_icode in { IMRMOVQ, IPOPQ } && diff --git a/arch/sim/pipe/sdriver.ys b/arch/sim/pipe/sdriver.ys deleted file mode 100644 index 929bc2c..0000000 --- a/arch/sim/pipe/sdriver.ys +++ /dev/null @@ -1,145 +0,0 @@ -####################################################################### -# Test for copying block of size 4; -####################################################################### - .pos 0 -main: irmovq Stack, %rsp # Set up stack pointer - - # Set up arguments for copy function and then invoke it - irmovq $4, %rdx # src and dst have 4 elements - irmovq dest, %rsi # dst array - irmovq src, %rdi # src array - call ncopy - halt # should halt with num nonzeros in %rax -StartFun: -#/* $begin ncopy-ys */ -################################################################## -# ncopy.ys - Copy a src block of len words to dst. -# Return the number of positive words (>0) contained in src. -# -# Include your name and ID here. -# -# Describe how and why you modified the baseline code. -# -################################################################## -# Do not modify this portion -# Function prologue. -# %rdi = src, %rsi = dst, %rdx = len -ncopy: - -################################################################## -# You can modify this portion - # Loop header - xorq %rax,%rax # count = 0; - iaddq $0xfffffffffffffffc, %rdx - jle Tail # if so, goto Done: - -Loop1: - mrmovq (%rdi), %r10 # read val from src... - rmmovq %r10, (%rsi) # ...and store it to dst - andq %r10, %r10 # val <= 0? - jle Npos1 # if so, goto Npos: - iaddq $1, %rax # count++ -Npos1: - - mrmovq 8(%rdi), %r10 # read val from src... - rmmovq %r10, 8(%rsi) # ...and store it to dst - andq %r10, %r10 # val <= 0? - jle Npos2 # if so, goto Npos: - iaddq $1, %rax # count++ -Npos2: - - mrmovq 16(%rdi), %r10 # read val from src... - rmmovq %r10, 16(%rsi) # ...and store it to dst - andq %r10, %r10 # val <= 0? - jle Npos3 # if so, goto Npos: - iaddq $1, %rax # count++ -Npos3: - - mrmovq 24(%rdi), %r10 # read val from src... - rmmovq %r10, 24(%rsi) # ...and store it to dst - andq %r10, %r10 # val <= 0? - jle Npos4 # if so, goto Npos: - iaddq $1, %rax # count++ -Npos4: - - mrmovq 32(%rdi), %r10 # read val from src... - rmmovq %r10, 32(%rsi) # ...and store it to dst - andq %r10, %r10 # val <= 0? - jle Npos5 # if so, goto Npos: - iaddq $1, %rax # count++ -Npos5: - - iaddq $40, %rdi # src++ - iaddq $40, %rsi # dst++ - iaddq $0xfffffffffffffffb, %rdx # len-- - jg Loop1 - -Tail: - iaddq $4, %rdx - jle Done -Loop2: - mrmovq (%rdi), %r10 - rmmovq %r10, (%rsi) - andq %r10, %r10 - jle NposT - iaddq $1, %rax -NposT: - iaddq $8, %rdi - iaddq $8, %rsi - iaddq $0xffffffffffffffff, %rdx - jg Loop2 - - -################################################################## -# Do not modify the following section of code -# Function epilogue. -Done: - ret -################################################################## -# Keep the following label at the end of your function -End: -#/* $end ncopy-ys */ -EndFun: - -############################### -# Source and destination blocks -############################### - .align 8 -src: - .quad 1 - .quad -2 - .quad 3 - .quad -4 - .quad 0xbcdefa # This shouldn't get moved - - .align 16 -Predest: - .quad 0xbcdefa -dest: - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab - .quad 0xcdefab -Postdest: - .quad 0xdefabc - -.align 8 -# Run time stack - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - .quad 0 - -Stack: