optimize archlab

This commit is contained in:
18218461270@163.com 2025-08-23 13:28:47 +08:00
parent 911ff3d3bd
commit 5cfa01dde0
4 changed files with 134 additions and 539 deletions

View File

@ -1,263 +0,0 @@
#######################################################################
# Test for copying block of size 63;
#######################################################################
.pos 0
main: irmovq Stack, %rsp # Set up stack pointer
# Set up arguments for copy function and then invoke it
irmovq $63, %rdx # src and dst have 63 elements
irmovq dest, %rsi # dst array
irmovq src, %rdi # src array
call ncopy
halt # should halt with num nonzeros in %rax
StartFun:
#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len words to dst.
# Return the number of positive words (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
#
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:
##################################################################
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
iaddq $0xfffffffffffffffc, %rdx
jle Tail # if so, goto Done:
Loop1:
mrmovq (%rdi), %r10 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos1 # if so, goto Npos:
iaddq $1, %rax # count++
Npos1:
mrmovq 8(%rdi), %r10 # read val from src...
rmmovq %r10, 8(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos2 # if so, goto Npos:
iaddq $1, %rax # count++
Npos2:
mrmovq 16(%rdi), %r10 # read val from src...
rmmovq %r10, 16(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos3 # if so, goto Npos:
iaddq $1, %rax # count++
Npos3:
mrmovq 24(%rdi), %r10 # read val from src...
rmmovq %r10, 24(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos4 # if so, goto Npos:
iaddq $1, %rax # count++
Npos4:
mrmovq 32(%rdi), %r10 # read val from src...
rmmovq %r10, 32(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos5 # if so, goto Npos:
iaddq $1, %rax # count++
Npos5:
iaddq $40, %rdi # src++
iaddq $40, %rsi # dst++
iaddq $0xfffffffffffffffb, %rdx # len--
jg Loop1
Tail:
iaddq $4, %rdx
jle Done
Loop2:
mrmovq (%rdi), %r10
rmmovq %r10, (%rsi)
andq %r10, %r10
jle NposT
iaddq $1, %rax
NposT:
iaddq $8, %rdi
iaddq $8, %rsi
iaddq $0xffffffffffffffff, %rdx
jg Loop2
##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */
EndFun:
###############################
# Source and destination blocks
###############################
.align 8
src:
.quad 1
.quad 2
.quad -3
.quad -4
.quad -5
.quad 6
.quad -7
.quad 8
.quad 9
.quad -10
.quad 11
.quad 12
.quad -13
.quad 14
.quad -15
.quad 16
.quad -17
.quad -18
.quad 19
.quad -20
.quad 21
.quad -22
.quad -23
.quad -24
.quad -25
.quad -26
.quad 27
.quad -28
.quad -29
.quad -30
.quad 31
.quad -32
.quad 33
.quad -34
.quad 35
.quad 36
.quad -37
.quad 38
.quad 39
.quad 40
.quad 41
.quad -42
.quad 43
.quad 44
.quad 45
.quad 46
.quad 47
.quad 48
.quad 49
.quad 50
.quad 51
.quad -52
.quad -53
.quad 54
.quad -55
.quad -56
.quad 57
.quad -58
.quad -59
.quad -60
.quad -61
.quad -62
.quad -63
.quad 0xbcdefa # This shouldn't get moved
.align 16
Predest:
.quad 0xbcdefa
dest:
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
Postdest:
.quad 0xdefabc
.align 8
# Run time stack
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
Stack:

View File

@ -24,73 +24,63 @@ Loop:
mrmovq (%rdi), %r10
rmmovq %r10, (%rsi)
andq %r10, %r10
jle Npos1
jle F1
iaddq $1, %rax
Npos1:
F1:
mrmovq 8(%rdi), %r10
rmmovq %r10, 8(%rsi)
andq %r10, %r10
jle Npos2
jle F2
iaddq $1, %rax
Npos2:
F2:
mrmovq 16(%rdi), %r10
rmmovq %r10, 16(%rsi)
andq %r10, %r10
jle Npos3
jle F3
iaddq $1, %rax
Npos3:
F3:
mrmovq 24(%rdi), %r10
rmmovq %r10, 24(%rsi)
andq %r10, %r10
jle Npos4
jle F4
iaddq $1, %rax
Npos4:
F4:
mrmovq 32(%rdi), %r10
rmmovq %r10, 32(%rsi)
andq %r10, %r10
jle Npos5
jle F5
iaddq $1, %rax
Npos5:
F5:
mrmovq 40(%rdi), %r10
rmmovq %r10, 40(%rsi)
andq %r10, %r10
jle Npos6
jle F6
iaddq $1, %rax
Npos6:
F6:
mrmovq 48(%rdi), %r10
rmmovq %r10, 48(%rsi)
andq %r10, %r10
jle Npos7
jle F7
iaddq $1, %rax
Npos7:
F7:
mrmovq 56(%rdi), %r10
rmmovq %r10, 56(%rsi)
andq %r10, %r10
jle Npos8
jle F8
iaddq $1, %rax
Npos8:
F8:
mrmovq 64(%rdi), %r10
rmmovq %r10, 64(%rsi)
andq %r10, %r10
jle Npos9
jle F9
iaddq $1, %rax
Npos9:
F9:
mrmovq 72(%rdi), %r10
rmmovq %r10, 72(%rsi)
andq %r10, %r10
jle Npos10
jle F10
iaddq $1, %rax
Npos10:
F10:
iaddq $80, %rdi
iaddq $80, %rsi
iaddq $0xfffffffffffffff6, %rdx
@ -176,7 +166,6 @@ J1:
# Function epilogue.
Done:
ret
##################################################################
# Keep the following label at the end of your function
End:

View File

@ -51,6 +51,9 @@ wordsig RNONE 'REG_NONE' # Special value indicating "no register"
##### ALU Functions referenced explicitly ##########################
wordsig ALUADD 'A_ADD' # ALU should add its arguments
##### Jump conditions referenced explicitly
wordsig UNCOND 'C_YES' # Unconditional transfer
##### Possible instruction status values #####
wordsig SBUB 'STAT_BUB' # Bubble in stage
wordsig SAOK 'STAT_AOK' # Normal execution
@ -76,6 +79,7 @@ boolsig imem_error 'imem_error' # Error signal from instruction memory
boolsig instr_valid 'instr_valid' # Is fetched instruction valid?
##### Pipeline Register D ##########################################
wordsig D_stat 'if_id_curr->status' # Instruction status
wordsig D_icode 'if_id_curr->icode' # Instruction code
wordsig D_rA 'if_id_curr->ra' # rA field from instruction
wordsig D_rB 'if_id_curr->rb' # rB field from instruction
@ -135,12 +139,14 @@ wordsig W_valM 'mem_wb_curr->valm' # Memory M value
## What address should instruction be fetched at
word f_pc = [
# Mispredicted branch. Fetch at incremented PC
M_icode == IJXX && !M_Cnd : M_valA;
# Completion of RET instruction
# Completion of RET instruction
W_icode == IRET : W_valM;
# Default: Use predicted value of PC
1 : F_predPC;
M_icode != IJXX || M_ifun == UNCOND : F_predPC;
# Mispredicted branch. Fetch at incremented PC
M_valA < M_valE && M_Cnd && D_stat == SBUB : M_valE;
M_valA > M_valE && !M_Cnd && D_stat == SBUB : M_valA;
1 : F_predPC;
];
## Determine icode of fetched instruction
@ -179,7 +185,8 @@ bool need_valC =
# Predict next value of PC
word f_predPC = [
f_icode in { IJXX, ICALL } : f_valC;
f_icode == ICALL ||
f_icode == IJXX && (f_ifun == UNCOND || f_valC < f_valP) : f_valC;
1 : f_valP;
];
@ -239,7 +246,7 @@ word d_valB = [
## Select input A to ALU
word aluA = [
E_icode in { IRRMOVQ, IOPQ } : E_valA;
E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : E_valC;
E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ, IJXX } : E_valC;
E_icode in { ICALL, IPUSHQ } : -8;
E_icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU
@ -249,7 +256,7 @@ word aluA = [
word aluB = [
E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB;
E_icode in { IRRMOVQ, IIRMOVQ } : 0;
E_icode in { IRRMOVQ, IIRMOVQ, IJXX } : 0;
# Other instructions don't need ALU
];
@ -329,7 +336,10 @@ bool F_stall =
E_dstM in { d_srcA, d_srcB } &&
!(D_icode in { IPUSHQ, IRMMOVQ } && E_dstM == d_srcA) ||
# Stalling at fetch while ret passes through pipeline
IRET in { D_icode, E_icode, M_icode };
IRET in { D_icode, E_icode, M_icode } &&
!(E_icode == IJXX && E_ifun != UNCOND &&
(E_valA < e_valE && e_Cnd ||
E_valA > e_valE && !e_Cnd));
# Should I stall or inject a bubble into Pipeline Register D?
# At most one of these can be true.
@ -342,7 +352,9 @@ bool D_stall =
bool D_bubble =
# Mispredicted branch
(E_icode == IJXX && !e_Cnd) ||
(E_icode == IJXX && E_ifun != UNCOND &&
(E_valA < e_valE && e_Cnd && F_predPC != e_valE ||
E_valA > e_valE && !e_Cnd && F_predPC != E_valA)) ||
# Stalling at fetch while ret passes through pipeline
# but not condition for a load/use hazard
!(E_icode in { IMRMOVQ, IPOPQ } &&
@ -355,7 +367,9 @@ bool D_bubble =
bool E_stall = 0;
bool E_bubble =
# Mispredicted branch
(E_icode == IJXX && !e_Cnd) ||
(E_icode == IJXX && E_ifun != UNCOND &&
(E_valA < e_valE && e_Cnd ||
E_valA > e_valE && !e_Cnd)) ||
# Conditions for a load/use hazard
## Set this to the new load/use condition
E_icode in { IMRMOVQ, IPOPQ } &&

View File

@ -1,145 +0,0 @@
#######################################################################
# Test for copying block of size 4;
#######################################################################
.pos 0
main: irmovq Stack, %rsp # Set up stack pointer
# Set up arguments for copy function and then invoke it
irmovq $4, %rdx # src and dst have 4 elements
irmovq dest, %rsi # dst array
irmovq src, %rdi # src array
call ncopy
halt # should halt with num nonzeros in %rax
StartFun:
#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len words to dst.
# Return the number of positive words (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
#
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:
##################################################################
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
iaddq $0xfffffffffffffffc, %rdx
jle Tail # if so, goto Done:
Loop1:
mrmovq (%rdi), %r10 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos1 # if so, goto Npos:
iaddq $1, %rax # count++
Npos1:
mrmovq 8(%rdi), %r10 # read val from src...
rmmovq %r10, 8(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos2 # if so, goto Npos:
iaddq $1, %rax # count++
Npos2:
mrmovq 16(%rdi), %r10 # read val from src...
rmmovq %r10, 16(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos3 # if so, goto Npos:
iaddq $1, %rax # count++
Npos3:
mrmovq 24(%rdi), %r10 # read val from src...
rmmovq %r10, 24(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos4 # if so, goto Npos:
iaddq $1, %rax # count++
Npos4:
mrmovq 32(%rdi), %r10 # read val from src...
rmmovq %r10, 32(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos5 # if so, goto Npos:
iaddq $1, %rax # count++
Npos5:
iaddq $40, %rdi # src++
iaddq $40, %rsi # dst++
iaddq $0xfffffffffffffffb, %rdx # len--
jg Loop1
Tail:
iaddq $4, %rdx
jle Done
Loop2:
mrmovq (%rdi), %r10
rmmovq %r10, (%rsi)
andq %r10, %r10
jle NposT
iaddq $1, %rax
NposT:
iaddq $8, %rdi
iaddq $8, %rsi
iaddq $0xffffffffffffffff, %rdx
jg Loop2
##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */
EndFun:
###############################
# Source and destination blocks
###############################
.align 8
src:
.quad 1
.quad -2
.quad 3
.quad -4
.quad 0xbcdefa # This shouldn't get moved
.align 16
Predest:
.quad 0xbcdefa
dest:
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
.quad 0xcdefab
Postdest:
.quad 0xdefabc
.align 8
# Run time stack
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
.quad 0
Stack: