optimize archlab

This commit is contained in:
18218461270@163.com 2025-08-23 13:28:47 +08:00
parent 911ff3d3bd
commit 5cfa01dde0
4 changed files with 134 additions and 539 deletions

View File

@ -1,263 +0,0 @@
#######################################################################
# Test for copying block of size 63;
#######################################################################
.pos 0
main: irmovq Stack, %rsp # Set up stack pointer
# Set up arguments for copy function and then invoke it
irmovq $63, %rdx # src and dst have 63 elements
irmovq dest, %rsi # dst array
irmovq src, %rdi # src array
call ncopy
halt # should halt with num nonzeros in %rax
StartFun:
#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len words to dst.
# Return the number of positive words (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
#
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:
##################################################################
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
iaddq $0xfffffffffffffffc, %rdx
jle Tail # if so, goto Done:
Loop1:
mrmovq (%rdi), %r10 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos1 # if so, goto Npos:
iaddq $1, %rax # count++
Npos1:
mrmovq 8(%rdi), %r10 # read val from src...
rmmovq %r10, 8(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos2 # if so, goto Npos:
iaddq $1, %rax # count++
Npos2:
mrmovq 16(%rdi), %r10 # read val from src...
rmmovq %r10, 16(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos3 # if so, goto Npos:
iaddq $1, %rax # count++
Npos3:
mrmovq 24(%rdi), %r10 # read val from src...
rmmovq %r10, 24(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos4 # if so, goto Npos:
iaddq $1, %rax # count++
Npos4:
mrmovq 32(%rdi), %r10 # read val from src...
rmmovq %r10, 32(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos5 # if so, goto Npos:
iaddq $1, %rax # count++
Npos5:
iaddq $40, %rdi # src++
iaddq $40, %rsi # dst++
iaddq $0xfffffffffffffffb, %rdx # len--
jg Loop1
Tail:
iaddq $4, %rdx
jle Done
Loop2:
mrmovq (%rdi), %r10
rmmovq %r10, (%rsi)
andq %r10, %r10
jle NposT
iaddq $1, %rax
NposT:
iaddq $8, %rdi
iaddq $8, %rsi
iaddq $0xffffffffffffffff, %rdx
jg Loop2
##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */
EndFun:
###############################
# Source and destination blocks
###############################
.align 8
src:
.quad 1
.quad 2
.quad -3
.quad -4
.quad -5
.quad 6
.quad -7
.quad 8
.quad 9
.quad -10
.quad 11
.quad 12
.quad -13
.quad 14
.quad -15
.quad 16
.quad -17
.quad -18
.quad 19
.quad -20
.quad 21
.quad -22
.quad -23
.quad -24
.quad -25
.quad -26
.quad 27
.quad -28
.quad -29
.quad -30
.quad 31
.quad -32
.quad 33
.quad -34
.quad 35
.quad 36
.quad -37
.quad 38
.quad 39
.quad 40
.quad 41
.quad -42
.quad 43
.quad 44
.quad 45
.quad 46
.quad 47
.quad 48
.quad 49
.quad 50
.quad 51
.quad -52
.quad -53
.quad 54
.quad -55
.quad -56
.quad 57
.quad -58
.quad -59
.quad -60
.quad -61
.quad -62
.quad -63
.quad 0xbcdefa # This shouldn't get moved
.align 16
Predest:
.quad 0xbcdefa
dest:
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
Postdest:
.quad 0xdefabc
.align 8
# Run time stack
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
Stack:

View File

@ -16,84 +16,74 @@ ncopy:
################################################################## ##################################################################
# You can modify this portion # You can modify this portion
xorq %rax, %rax xorq %rax, %rax
iaddq $0xfffffffffffffff7, %rdx iaddq $0xfffffffffffffff7, %rdx
jle Tail jle Tail
Loop: Loop:
mrmovq (%rdi), %r10 mrmovq (%rdi), %r10
rmmovq %r10, (%rsi) rmmovq %r10, (%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos1 jle F1
iaddq $1, %rax iaddq $1, %rax
Npos1: F1:
mrmovq 8(%rdi), %r10
mrmovq 8(%rdi), %r10 rmmovq %r10, 8(%rsi)
rmmovq %r10, 8(%rsi) andq %r10, %r10
andq %r10, %r10 jle F2
jle Npos2 iaddq $1, %rax
iaddq $1, %rax F2:
Npos2: mrmovq 16(%rdi), %r10
rmmovq %r10, 16(%rsi)
mrmovq 16(%rdi), %r10 andq %r10, %r10
rmmovq %r10, 16(%rsi) jle F3
andq %r10, %r10 iaddq $1, %rax
jle Npos3 F3:
iaddq $1, %rax mrmovq 24(%rdi), %r10
Npos3: rmmovq %r10, 24(%rsi)
andq %r10, %r10
mrmovq 24(%rdi), %r10 jle F4
rmmovq %r10, 24(%rsi) iaddq $1, %rax
andq %r10, %r10 F4:
jle Npos4 mrmovq 32(%rdi), %r10
iaddq $1, %rax rmmovq %r10, 32(%rsi)
Npos4: andq %r10, %r10
jle F5
mrmovq 32(%rdi), %r10 iaddq $1, %rax
rmmovq %r10, 32(%rsi) F5:
andq %r10, %r10 mrmovq 40(%rdi), %r10
jle Npos5 rmmovq %r10, 40(%rsi)
iaddq $1, %rax andq %r10, %r10
Npos5: jle F6
iaddq $1, %rax
mrmovq 40(%rdi), %r10 F6:
rmmovq %r10, 40(%rsi) mrmovq 48(%rdi), %r10
andq %r10, %r10 rmmovq %r10, 48(%rsi)
jle Npos6 andq %r10, %r10
iaddq $1, %rax jle F7
Npos6: iaddq $1, %rax
F7:
mrmovq 48(%rdi), %r10 mrmovq 56(%rdi), %r10
rmmovq %r10, 48(%rsi) rmmovq %r10, 56(%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos7 jle F8
iaddq $1, %rax iaddq $1, %rax
Npos7: F8:
mrmovq 64(%rdi), %r10
mrmovq 56(%rdi), %r10 rmmovq %r10, 64(%rsi)
rmmovq %r10, 56(%rsi) andq %r10, %r10
andq %r10, %r10 jle F9
jle Npos8 iaddq $1, %rax
iaddq $1, %rax F9:
Npos8: mrmovq 72(%rdi), %r10
rmmovq %r10, 72(%rsi)
mrmovq 64(%rdi), %r10 andq %r10, %r10
rmmovq %r10, 64(%rsi) jle F10
andq %r10, %r10 iaddq $1, %rax
jle Npos9 F10:
iaddq $1, %rax iaddq $80, %rdi
Npos9: iaddq $80, %rsi
iaddq $0xfffffffffffffff6, %rdx
mrmovq 72(%rdi), %r10
rmmovq %r10, 72(%rsi)
andq %r10, %r10
jle Npos10
iaddq $1, %rax
Npos10:
iaddq $80, %rdi
iaddq $80, %rsi
iaddq $0xfffffffffffffff6, %rdx
jg Loop jg Loop
Tail: Tail:
@ -117,66 +107,65 @@ JT:
.quad J9 .quad J9
J9: J9:
mrmovq 64(%rdi), %r10 mrmovq 64(%rdi), %r10
rmmovq %r10, 64(%rsi) rmmovq %r10, 64(%rsi)
andq %r10, %r10 andq %r10, %r10
jle J8 jle J8
iaddq $1, %rax iaddq $1, %rax
J8: J8:
mrmovq 56(%rdi), %r10 mrmovq 56(%rdi), %r10
rmmovq %r10, 56(%rsi) rmmovq %r10, 56(%rsi)
andq %r10, %r10 andq %r10, %r10
jle J7 jle J7
iaddq $1, %rax iaddq $1, %rax
J7: J7:
mrmovq 48(%rdi), %r10 mrmovq 48(%rdi), %r10
rmmovq %r10, 48(%rsi) rmmovq %r10, 48(%rsi)
andq %r10, %r10 andq %r10, %r10
jle J6 jle J6
iaddq $1, %rax iaddq $1, %rax
J6: J6:
mrmovq 40(%rdi), %r10 mrmovq 40(%rdi), %r10
rmmovq %r10, 40(%rsi) rmmovq %r10, 40(%rsi)
andq %r10, %r10 andq %r10, %r10
jle J5 jle J5
iaddq $1, %rax iaddq $1, %rax
J5: J5:
mrmovq 32(%rdi), %r10 mrmovq 32(%rdi), %r10
rmmovq %r10, 32(%rsi) rmmovq %r10, 32(%rsi)
andq %r10, %r10 andq %r10, %r10
jle J4 jle J4
iaddq $1, %rax iaddq $1, %rax
J4: J4:
mrmovq 24(%rdi), %r10 mrmovq 24(%rdi), %r10
rmmovq %r10, 24(%rsi) rmmovq %r10, 24(%rsi)
andq %r10, %r10 andq %r10, %r10
jle J3 jle J3
iaddq $1, %rax iaddq $1, %rax
J3: J3:
mrmovq 16(%rdi), %r10 mrmovq 16(%rdi), %r10
rmmovq %r10, 16(%rsi) rmmovq %r10, 16(%rsi)
andq %r10, %r10 andq %r10, %r10
jle J2 jle J2
iaddq $1, %rax iaddq $1, %rax
J2: J2:
mrmovq 8(%rdi), %r10 mrmovq 8(%rdi), %r10
rmmovq %r10, 8(%rsi) rmmovq %r10, 8(%rsi)
andq %r10, %r10 andq %r10, %r10
jle J1 jle J1
iaddq $1, %rax iaddq $1, %rax
J1: J1:
mrmovq (%rdi), %r10 mrmovq (%rdi), %r10
rmmovq %r10, (%rsi) rmmovq %r10, (%rsi)
andq %r10, %r10 andq %r10, %r10
jle Done jle Done
iaddq $1, %rax iaddq $1, %rax
################################################################## ##################################################################
# Do not modify the following section of code # Do not modify the following section of code
# Function epilogue. # Function epilogue.
Done: Done:
ret ret
################################################################## ##################################################################
# Keep the following label at the end of your function # Keep the following label at the end of your function
End: End:

View File

@ -51,6 +51,9 @@ wordsig RNONE 'REG_NONE' # Special value indicating "no register"
##### ALU Functions referenced explicitly ########################## ##### ALU Functions referenced explicitly ##########################
wordsig ALUADD 'A_ADD' # ALU should add its arguments wordsig ALUADD 'A_ADD' # ALU should add its arguments
##### Jump conditions referenced explicitly
wordsig UNCOND 'C_YES' # Unconditional transfer
##### Possible instruction status values ##### ##### Possible instruction status values #####
wordsig SBUB 'STAT_BUB' # Bubble in stage wordsig SBUB 'STAT_BUB' # Bubble in stage
wordsig SAOK 'STAT_AOK' # Normal execution wordsig SAOK 'STAT_AOK' # Normal execution
@ -76,6 +79,7 @@ boolsig imem_error 'imem_error' # Error signal from instruction memory
boolsig instr_valid 'instr_valid' # Is fetched instruction valid? boolsig instr_valid 'instr_valid' # Is fetched instruction valid?
##### Pipeline Register D ########################################## ##### Pipeline Register D ##########################################
wordsig D_stat 'if_id_curr->status' # Instruction status
wordsig D_icode 'if_id_curr->icode' # Instruction code wordsig D_icode 'if_id_curr->icode' # Instruction code
wordsig D_rA 'if_id_curr->ra' # rA field from instruction wordsig D_rA 'if_id_curr->ra' # rA field from instruction
wordsig D_rB 'if_id_curr->rb' # rB field from instruction wordsig D_rB 'if_id_curr->rb' # rB field from instruction
@ -135,12 +139,14 @@ wordsig W_valM 'mem_wb_curr->valm' # Memory M value
## What address should instruction be fetched at ## What address should instruction be fetched at
word f_pc = [ word f_pc = [
# Mispredicted branch. Fetch at incremented PC # Completion of RET instruction
M_icode == IJXX && !M_Cnd : M_valA;
# Completion of RET instruction
W_icode == IRET : W_valM; W_icode == IRET : W_valM;
# Default: Use predicted value of PC # Default: Use predicted value of PC
1 : F_predPC; M_icode != IJXX || M_ifun == UNCOND : F_predPC;
# Mispredicted branch. Fetch at incremented PC
M_valA < M_valE && M_Cnd && D_stat == SBUB : M_valE;
M_valA > M_valE && !M_Cnd && D_stat == SBUB : M_valA;
1 : F_predPC;
]; ];
## Determine icode of fetched instruction ## Determine icode of fetched instruction
@ -179,7 +185,8 @@ bool need_valC =
# Predict next value of PC # Predict next value of PC
word f_predPC = [ word f_predPC = [
f_icode in { IJXX, ICALL } : f_valC; f_icode == ICALL ||
f_icode == IJXX && (f_ifun == UNCOND || f_valC < f_valP) : f_valC;
1 : f_valP; 1 : f_valP;
]; ];
@ -239,7 +246,7 @@ word d_valB = [
## Select input A to ALU ## Select input A to ALU
word aluA = [ word aluA = [
E_icode in { IRRMOVQ, IOPQ } : E_valA; E_icode in { IRRMOVQ, IOPQ } : E_valA;
E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : E_valC; E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ, IJXX } : E_valC;
E_icode in { ICALL, IPUSHQ } : -8; E_icode in { ICALL, IPUSHQ } : -8;
E_icode in { IRET, IPOPQ } : 8; E_icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU # Other instructions don't need ALU
@ -249,7 +256,7 @@ word aluA = [
word aluB = [ word aluB = [
E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB; IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB;
E_icode in { IRRMOVQ, IIRMOVQ } : 0; E_icode in { IRRMOVQ, IIRMOVQ, IJXX } : 0;
# Other instructions don't need ALU # Other instructions don't need ALU
]; ];
@ -329,7 +336,10 @@ bool F_stall =
E_dstM in { d_srcA, d_srcB } && E_dstM in { d_srcA, d_srcB } &&
!(D_icode in { IPUSHQ, IRMMOVQ } && E_dstM == d_srcA) || !(D_icode in { IPUSHQ, IRMMOVQ } && E_dstM == d_srcA) ||
# Stalling at fetch while ret passes through pipeline # Stalling at fetch while ret passes through pipeline
IRET in { D_icode, E_icode, M_icode }; IRET in { D_icode, E_icode, M_icode } &&
!(E_icode == IJXX && E_ifun != UNCOND &&
(E_valA < e_valE && e_Cnd ||
E_valA > e_valE && !e_Cnd));
# Should I stall or inject a bubble into Pipeline Register D? # Should I stall or inject a bubble into Pipeline Register D?
# At most one of these can be true. # At most one of these can be true.
@ -342,7 +352,9 @@ bool D_stall =
bool D_bubble = bool D_bubble =
# Mispredicted branch # Mispredicted branch
(E_icode == IJXX && !e_Cnd) || (E_icode == IJXX && E_ifun != UNCOND &&
(E_valA < e_valE && e_Cnd && F_predPC != e_valE ||
E_valA > e_valE && !e_Cnd && F_predPC != E_valA)) ||
# Stalling at fetch while ret passes through pipeline # Stalling at fetch while ret passes through pipeline
# but not condition for a load/use hazard # but not condition for a load/use hazard
!(E_icode in { IMRMOVQ, IPOPQ } && !(E_icode in { IMRMOVQ, IPOPQ } &&
@ -355,7 +367,9 @@ bool D_bubble =
bool E_stall = 0; bool E_stall = 0;
bool E_bubble = bool E_bubble =
# Mispredicted branch # Mispredicted branch
(E_icode == IJXX && !e_Cnd) || (E_icode == IJXX && E_ifun != UNCOND &&
(E_valA < e_valE && e_Cnd ||
E_valA > e_valE && !e_Cnd)) ||
# Conditions for a load/use hazard # Conditions for a load/use hazard
## Set this to the new load/use condition ## Set this to the new load/use condition
E_icode in { IMRMOVQ, IPOPQ } && E_icode in { IMRMOVQ, IPOPQ } &&

View File

@ -1,145 +0,0 @@
#######################################################################
# Test for copying block of size 4;
#######################################################################
.pos 0
main: irmovq Stack, %rsp # Set up stack pointer
# Set up arguments for copy function and then invoke it
irmovq $4, %rdx # src and dst have 4 elements
irmovq dest, %rsi # dst array
irmovq src, %rdi # src array
call ncopy
halt # should halt with num nonzeros in %rax
StartFun:
#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len words to dst.
# Return the number of positive words (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
#
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:
##################################################################
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
iaddq $0xfffffffffffffffc, %rdx
jle Tail # if so, goto Done:
Loop1:
mrmovq (%rdi), %r10 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos1 # if so, goto Npos:
iaddq $1, %rax # count++
Npos1:
mrmovq 8(%rdi), %r10 # read val from src...
rmmovq %r10, 8(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos2 # if so, goto Npos:
iaddq $1, %rax # count++
Npos2:
mrmovq 16(%rdi), %r10 # read val from src...
rmmovq %r10, 16(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos3 # if so, goto Npos:
iaddq $1, %rax # count++
Npos3:
mrmovq 24(%rdi), %r10 # read val from src...
rmmovq %r10, 24(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos4 # if so, goto Npos:
iaddq $1, %rax # count++
Npos4:
mrmovq 32(%rdi), %r10 # read val from src...
rmmovq %r10, 32(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos5 # if so, goto Npos:
iaddq $1, %rax # count++
Npos5:
iaddq $40, %rdi # src++
iaddq $40, %rsi # dst++
iaddq $0xfffffffffffffffb, %rdx # len--
jg Loop1
Tail:
iaddq $4, %rdx
jle Done
Loop2:
mrmovq (%rdi), %r10
rmmovq %r10, (%rsi)
andq %r10, %r10
jle NposT
iaddq $1, %rax
NposT:
iaddq $8, %rdi
iaddq $8, %rsi
iaddq $0xffffffffffffffff, %rdx
jg Loop2
##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */
EndFun:
###############################
# Source and destination blocks
###############################
.align 8
src:
.quad 1
.quad -2
.quad 3
.quad -4
.quad 0xbcdefa # This shouldn't get moved
.align 16
Predest:
.quad 0xbcdefa
dest:
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
Postdest:
.quad 0xdefabc
.align 8
# Run time stack
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
Stack: