optimize archlab

This commit is contained in:
18218461270@163.com 2025-08-23 13:28:47 +08:00
parent 911ff3d3bd
commit 5cfa01dde0
4 changed files with 134 additions and 539 deletions

View File

@ -1,263 +0,0 @@
#######################################################################
# Test for copying block of size 63;
#######################################################################
.pos 0
main: irmovq Stack, %rsp # Set up stack pointer
# Set up arguments for copy function and then invoke it
irmovq $63, %rdx # src and dst have 63 elements
irmovq dest, %rsi # dst array
irmovq src, %rdi # src array
call ncopy
halt # should halt with num nonzeros in %rax
StartFun:
#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len words to dst.
# Return the number of positive words (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
#
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:
##################################################################
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
iaddq $0xfffffffffffffffc, %rdx
jle Tail # if so, goto Done:
Loop1:
mrmovq (%rdi), %r10 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos1 # if so, goto Npos:
iaddq $1, %rax # count++
Npos1:
mrmovq 8(%rdi), %r10 # read val from src...
rmmovq %r10, 8(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos2 # if so, goto Npos:
iaddq $1, %rax # count++
Npos2:
mrmovq 16(%rdi), %r10 # read val from src...
rmmovq %r10, 16(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos3 # if so, goto Npos:
iaddq $1, %rax # count++
Npos3:
mrmovq 24(%rdi), %r10 # read val from src...
rmmovq %r10, 24(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos4 # if so, goto Npos:
iaddq $1, %rax # count++
Npos4:
mrmovq 32(%rdi), %r10 # read val from src...
rmmovq %r10, 32(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos5 # if so, goto Npos:
iaddq $1, %rax # count++
Npos5:
iaddq $40, %rdi # src++
iaddq $40, %rsi # dst++
iaddq $0xfffffffffffffffb, %rdx # len--
jg Loop1
Tail:
iaddq $4, %rdx
jle Done
Loop2:
mrmovq (%rdi), %r10
rmmovq %r10, (%rsi)
andq %r10, %r10
jle NposT
iaddq $1, %rax
NposT:
iaddq $8, %rdi
iaddq $8, %rsi
iaddq $0xffffffffffffffff, %rdx
jg Loop2
##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */
EndFun:
###############################
# Source and destination blocks
###############################
.align 8
src:
.quad 1
.quad 2
.quad -3
.quad -4
.quad -5
.quad 6
.quad -7
.quad 8
.quad 9
.quad -10
.quad 11
.quad 12
.quad -13
.quad 14
.quad -15
.quad 16
.quad -17
.quad -18
.quad 19
.quad -20
.quad 21
.quad -22
.quad -23
.quad -24
.quad -25
.quad -26
.quad 27
.quad -28
.quad -29
.quad -30
.quad 31
.quad -32
.quad 33
.quad -34
.quad 35
.quad 36
.quad -37
.quad 38
.quad 39
.quad 40
.quad 41
.quad -42
.quad 43
.quad 44
.quad 45
.quad 46
.quad 47
.quad 48
.quad 49
.quad 50
.quad 51
.quad -52
.quad -53
.quad 54
.quad -55
.quad -56
.quad 57
.quad -58
.quad -59
.quad -60
.quad -61
.quad -62
.quad -63
.quad 0xbcdefa # This shouldn't get moved
.align 16
Predest:
.quad 0xbcdefa
dest:
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
Postdest:
.quad 0xdefabc
.align 8
# Run time stack
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
Stack:

View File

@ -24,73 +24,63 @@ Loop:
mrmovq (%rdi), %r10 mrmovq (%rdi), %r10
rmmovq %r10, (%rsi) rmmovq %r10, (%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos1 jle F1
iaddq $1, %rax iaddq $1, %rax
Npos1: F1:
mrmovq 8(%rdi), %r10 mrmovq 8(%rdi), %r10
rmmovq %r10, 8(%rsi) rmmovq %r10, 8(%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos2 jle F2
iaddq $1, %rax iaddq $1, %rax
Npos2: F2:
mrmovq 16(%rdi), %r10 mrmovq 16(%rdi), %r10
rmmovq %r10, 16(%rsi) rmmovq %r10, 16(%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos3 jle F3
iaddq $1, %rax iaddq $1, %rax
Npos3: F3:
mrmovq 24(%rdi), %r10 mrmovq 24(%rdi), %r10
rmmovq %r10, 24(%rsi) rmmovq %r10, 24(%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos4 jle F4
iaddq $1, %rax iaddq $1, %rax
Npos4: F4:
mrmovq 32(%rdi), %r10 mrmovq 32(%rdi), %r10
rmmovq %r10, 32(%rsi) rmmovq %r10, 32(%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos5 jle F5
iaddq $1, %rax iaddq $1, %rax
Npos5: F5:
mrmovq 40(%rdi), %r10 mrmovq 40(%rdi), %r10
rmmovq %r10, 40(%rsi) rmmovq %r10, 40(%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos6 jle F6
iaddq $1, %rax iaddq $1, %rax
Npos6: F6:
mrmovq 48(%rdi), %r10 mrmovq 48(%rdi), %r10
rmmovq %r10, 48(%rsi) rmmovq %r10, 48(%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos7 jle F7
iaddq $1, %rax iaddq $1, %rax
Npos7: F7:
mrmovq 56(%rdi), %r10 mrmovq 56(%rdi), %r10
rmmovq %r10, 56(%rsi) rmmovq %r10, 56(%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos8 jle F8
iaddq $1, %rax iaddq $1, %rax
Npos8: F8:
mrmovq 64(%rdi), %r10 mrmovq 64(%rdi), %r10
rmmovq %r10, 64(%rsi) rmmovq %r10, 64(%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos9 jle F9
iaddq $1, %rax iaddq $1, %rax
Npos9: F9:
mrmovq 72(%rdi), %r10 mrmovq 72(%rdi), %r10
rmmovq %r10, 72(%rsi) rmmovq %r10, 72(%rsi)
andq %r10, %r10 andq %r10, %r10
jle Npos10 jle F10
iaddq $1, %rax iaddq $1, %rax
Npos10: F10:
iaddq $80, %rdi iaddq $80, %rdi
iaddq $80, %rsi iaddq $80, %rsi
iaddq $0xfffffffffffffff6, %rdx iaddq $0xfffffffffffffff6, %rdx
@ -176,7 +166,6 @@ J1:
# Function epilogue. # Function epilogue.
Done: Done:
ret ret
################################################################## ##################################################################
# Keep the following label at the end of your function # Keep the following label at the end of your function
End: End:

View File

@ -51,6 +51,9 @@ wordsig RNONE 'REG_NONE' # Special value indicating "no register"
##### ALU Functions referenced explicitly ########################## ##### ALU Functions referenced explicitly ##########################
wordsig ALUADD 'A_ADD' # ALU should add its arguments wordsig ALUADD 'A_ADD' # ALU should add its arguments
##### Jump conditions referenced explicitly
wordsig UNCOND 'C_YES' # Unconditional transfer
##### Possible instruction status values ##### ##### Possible instruction status values #####
wordsig SBUB 'STAT_BUB' # Bubble in stage wordsig SBUB 'STAT_BUB' # Bubble in stage
wordsig SAOK 'STAT_AOK' # Normal execution wordsig SAOK 'STAT_AOK' # Normal execution
@ -76,6 +79,7 @@ boolsig imem_error 'imem_error' # Error signal from instruction memory
boolsig instr_valid 'instr_valid' # Is fetched instruction valid? boolsig instr_valid 'instr_valid' # Is fetched instruction valid?
##### Pipeline Register D ########################################## ##### Pipeline Register D ##########################################
wordsig D_stat 'if_id_curr->status' # Instruction status
wordsig D_icode 'if_id_curr->icode' # Instruction code wordsig D_icode 'if_id_curr->icode' # Instruction code
wordsig D_rA 'if_id_curr->ra' # rA field from instruction wordsig D_rA 'if_id_curr->ra' # rA field from instruction
wordsig D_rB 'if_id_curr->rb' # rB field from instruction wordsig D_rB 'if_id_curr->rb' # rB field from instruction
@ -135,11 +139,13 @@ wordsig W_valM 'mem_wb_curr->valm' # Memory M value
## What address should instruction be fetched at ## What address should instruction be fetched at
word f_pc = [ word f_pc = [
# Mispredicted branch. Fetch at incremented PC
M_icode == IJXX && !M_Cnd : M_valA;
# Completion of RET instruction # Completion of RET instruction
W_icode == IRET : W_valM; W_icode == IRET : W_valM;
# Default: Use predicted value of PC # Default: Use predicted value of PC
M_icode != IJXX || M_ifun == UNCOND : F_predPC;
# Mispredicted branch. Fetch at incremented PC
M_valA < M_valE && M_Cnd && D_stat == SBUB : M_valE;
M_valA > M_valE && !M_Cnd && D_stat == SBUB : M_valA;
1 : F_predPC; 1 : F_predPC;
]; ];
@ -179,7 +185,8 @@ bool need_valC =
# Predict next value of PC # Predict next value of PC
word f_predPC = [ word f_predPC = [
f_icode in { IJXX, ICALL } : f_valC; f_icode == ICALL ||
f_icode == IJXX && (f_ifun == UNCOND || f_valC < f_valP) : f_valC;
1 : f_valP; 1 : f_valP;
]; ];
@ -239,7 +246,7 @@ word d_valB = [
## Select input A to ALU ## Select input A to ALU
word aluA = [ word aluA = [
E_icode in { IRRMOVQ, IOPQ } : E_valA; E_icode in { IRRMOVQ, IOPQ } : E_valA;
E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : E_valC; E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ, IJXX } : E_valC;
E_icode in { ICALL, IPUSHQ } : -8; E_icode in { ICALL, IPUSHQ } : -8;
E_icode in { IRET, IPOPQ } : 8; E_icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU # Other instructions don't need ALU
@ -249,7 +256,7 @@ word aluA = [
word aluB = [ word aluB = [
E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB; IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB;
E_icode in { IRRMOVQ, IIRMOVQ } : 0; E_icode in { IRRMOVQ, IIRMOVQ, IJXX } : 0;
# Other instructions don't need ALU # Other instructions don't need ALU
]; ];
@ -329,7 +336,10 @@ bool F_stall =
E_dstM in { d_srcA, d_srcB } && E_dstM in { d_srcA, d_srcB } &&
!(D_icode in { IPUSHQ, IRMMOVQ } && E_dstM == d_srcA) || !(D_icode in { IPUSHQ, IRMMOVQ } && E_dstM == d_srcA) ||
# Stalling at fetch while ret passes through pipeline # Stalling at fetch while ret passes through pipeline
IRET in { D_icode, E_icode, M_icode }; IRET in { D_icode, E_icode, M_icode } &&
!(E_icode == IJXX && E_ifun != UNCOND &&
(E_valA < e_valE && e_Cnd ||
E_valA > e_valE && !e_Cnd));
# Should I stall or inject a bubble into Pipeline Register D? # Should I stall or inject a bubble into Pipeline Register D?
# At most one of these can be true. # At most one of these can be true.
@ -342,7 +352,9 @@ bool D_stall =
bool D_bubble = bool D_bubble =
# Mispredicted branch # Mispredicted branch
(E_icode == IJXX && !e_Cnd) || (E_icode == IJXX && E_ifun != UNCOND &&
(E_valA < e_valE && e_Cnd && F_predPC != e_valE ||
E_valA > e_valE && !e_Cnd && F_predPC != E_valA)) ||
# Stalling at fetch while ret passes through pipeline # Stalling at fetch while ret passes through pipeline
# but not condition for a load/use hazard # but not condition for a load/use hazard
!(E_icode in { IMRMOVQ, IPOPQ } && !(E_icode in { IMRMOVQ, IPOPQ } &&
@ -355,7 +367,9 @@ bool D_bubble =
bool E_stall = 0; bool E_stall = 0;
bool E_bubble = bool E_bubble =
# Mispredicted branch # Mispredicted branch
(E_icode == IJXX && !e_Cnd) || (E_icode == IJXX && E_ifun != UNCOND &&
(E_valA < e_valE && e_Cnd ||
E_valA > e_valE && !e_Cnd)) ||
# Conditions for a load/use hazard # Conditions for a load/use hazard
## Set this to the new load/use condition ## Set this to the new load/use condition
E_icode in { IMRMOVQ, IPOPQ } && E_icode in { IMRMOVQ, IPOPQ } &&

View File

@ -1,145 +0,0 @@
#######################################################################
# Test for copying block of size 4;
#######################################################################
.pos 0
main: irmovq Stack, %rsp # Set up stack pointer
# Set up arguments for copy function and then invoke it
irmovq $4, %rdx # src and dst have 4 elements
irmovq dest, %rsi # dst array
irmovq src, %rdi # src array
call ncopy
halt # should halt with num nonzeros in %rax
StartFun:
#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len words to dst.
# Return the number of positive words (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
#
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:
##################################################################
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
iaddq $0xfffffffffffffffc, %rdx
jle Tail # if so, goto Done:
Loop1:
mrmovq (%rdi), %r10 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos1 # if so, goto Npos:
iaddq $1, %rax # count++
Npos1:
mrmovq 8(%rdi), %r10 # read val from src...
rmmovq %r10, 8(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos2 # if so, goto Npos:
iaddq $1, %rax # count++
Npos2:
mrmovq 16(%rdi), %r10 # read val from src...
rmmovq %r10, 16(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos3 # if so, goto Npos:
iaddq $1, %rax # count++
Npos3:
mrmovq 24(%rdi), %r10 # read val from src...
rmmovq %r10, 24(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos4 # if so, goto Npos:
iaddq $1, %rax # count++
Npos4:
mrmovq 32(%rdi), %r10 # read val from src...
rmmovq %r10, 32(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos5 # if so, goto Npos:
iaddq $1, %rax # count++
Npos5:
iaddq $40, %rdi # src++
iaddq $40, %rsi # dst++
iaddq $0xfffffffffffffffb, %rdx # len--
jg Loop1
Tail:
iaddq $4, %rdx
jle Done
Loop2:
mrmovq (%rdi), %r10
rmmovq %r10, (%rsi)
andq %r10, %r10
jle NposT
iaddq $1, %rax
NposT:
iaddq $8, %rdi
iaddq $8, %rsi
iaddq $0xffffffffffffffff, %rdx
jg Loop2
##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */
EndFun:
###############################
# Source and destination blocks
###############################
.align 8
src:
.quad 1
.quad -2
.quad 3
.quad -4
.quad 0xbcdefa # This shouldn't get moved
.align 16
Predest:
.quad 0xbcdefa
dest:
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
Postdest:
.quad 0xdefabc
.align 8
# Run time stack
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
Stack: