optimize archlab
This commit is contained in:
parent
911ff3d3bd
commit
5cfa01dde0
@ -1,263 +0,0 @@
|
||||
#######################################################################
|
||||
# Test for copying block of size 63;
|
||||
#######################################################################
|
||||
.pos 0
|
||||
main: irmovq Stack, %rsp # Set up stack pointer
|
||||
|
||||
# Set up arguments for copy function and then invoke it
|
||||
irmovq $63, %rdx # src and dst have 63 elements
|
||||
irmovq dest, %rsi # dst array
|
||||
irmovq src, %rdi # src array
|
||||
call ncopy
|
||||
halt # should halt with num nonzeros in %rax
|
||||
StartFun:
|
||||
#/* $begin ncopy-ys */
|
||||
##################################################################
|
||||
# ncopy.ys - Copy a src block of len words to dst.
|
||||
# Return the number of positive words (>0) contained in src.
|
||||
#
|
||||
# Include your name and ID here.
|
||||
#
|
||||
# Describe how and why you modified the baseline code.
|
||||
#
|
||||
##################################################################
|
||||
# Do not modify this portion
|
||||
# Function prologue.
|
||||
# %rdi = src, %rsi = dst, %rdx = len
|
||||
ncopy:
|
||||
|
||||
##################################################################
|
||||
# You can modify this portion
|
||||
# Loop header
|
||||
xorq %rax,%rax # count = 0;
|
||||
iaddq $0xfffffffffffffffc, %rdx
|
||||
jle Tail # if so, goto Done:
|
||||
|
||||
Loop1:
|
||||
mrmovq (%rdi), %r10 # read val from src...
|
||||
rmmovq %r10, (%rsi) # ...and store it to dst
|
||||
andq %r10, %r10 # val <= 0?
|
||||
jle Npos1 # if so, goto Npos:
|
||||
iaddq $1, %rax # count++
|
||||
Npos1:
|
||||
|
||||
mrmovq 8(%rdi), %r10 # read val from src...
|
||||
rmmovq %r10, 8(%rsi) # ...and store it to dst
|
||||
andq %r10, %r10 # val <= 0?
|
||||
jle Npos2 # if so, goto Npos:
|
||||
iaddq $1, %rax # count++
|
||||
Npos2:
|
||||
|
||||
mrmovq 16(%rdi), %r10 # read val from src...
|
||||
rmmovq %r10, 16(%rsi) # ...and store it to dst
|
||||
andq %r10, %r10 # val <= 0?
|
||||
jle Npos3 # if so, goto Npos:
|
||||
iaddq $1, %rax # count++
|
||||
Npos3:
|
||||
|
||||
mrmovq 24(%rdi), %r10 # read val from src...
|
||||
rmmovq %r10, 24(%rsi) # ...and store it to dst
|
||||
andq %r10, %r10 # val <= 0?
|
||||
jle Npos4 # if so, goto Npos:
|
||||
iaddq $1, %rax # count++
|
||||
Npos4:
|
||||
|
||||
mrmovq 32(%rdi), %r10 # read val from src...
|
||||
rmmovq %r10, 32(%rsi) # ...and store it to dst
|
||||
andq %r10, %r10 # val <= 0?
|
||||
jle Npos5 # if so, goto Npos:
|
||||
iaddq $1, %rax # count++
|
||||
Npos5:
|
||||
|
||||
iaddq $40, %rdi # src++
|
||||
iaddq $40, %rsi # dst++
|
||||
iaddq $0xfffffffffffffffb, %rdx # len--
|
||||
jg Loop1
|
||||
|
||||
Tail:
|
||||
iaddq $4, %rdx
|
||||
jle Done
|
||||
Loop2:
|
||||
mrmovq (%rdi), %r10
|
||||
rmmovq %r10, (%rsi)
|
||||
andq %r10, %r10
|
||||
jle NposT
|
||||
iaddq $1, %rax
|
||||
NposT:
|
||||
iaddq $8, %rdi
|
||||
iaddq $8, %rsi
|
||||
iaddq $0xffffffffffffffff, %rdx
|
||||
jg Loop2
|
||||
|
||||
|
||||
##################################################################
|
||||
# Do not modify the following section of code
|
||||
# Function epilogue.
|
||||
Done:
|
||||
ret
|
||||
##################################################################
|
||||
# Keep the following label at the end of your function
|
||||
End:
|
||||
#/* $end ncopy-ys */
|
||||
EndFun:
|
||||
|
||||
###############################
|
||||
# Source and destination blocks
|
||||
###############################
|
||||
.align 8
|
||||
src:
|
||||
.quad 1
|
||||
.quad 2
|
||||
.quad -3
|
||||
.quad -4
|
||||
.quad -5
|
||||
.quad 6
|
||||
.quad -7
|
||||
.quad 8
|
||||
.quad 9
|
||||
.quad -10
|
||||
.quad 11
|
||||
.quad 12
|
||||
.quad -13
|
||||
.quad 14
|
||||
.quad -15
|
||||
.quad 16
|
||||
.quad -17
|
||||
.quad -18
|
||||
.quad 19
|
||||
.quad -20
|
||||
.quad 21
|
||||
.quad -22
|
||||
.quad -23
|
||||
.quad -24
|
||||
.quad -25
|
||||
.quad -26
|
||||
.quad 27
|
||||
.quad -28
|
||||
.quad -29
|
||||
.quad -30
|
||||
.quad 31
|
||||
.quad -32
|
||||
.quad 33
|
||||
.quad -34
|
||||
.quad 35
|
||||
.quad 36
|
||||
.quad -37
|
||||
.quad 38
|
||||
.quad 39
|
||||
.quad 40
|
||||
.quad 41
|
||||
.quad -42
|
||||
.quad 43
|
||||
.quad 44
|
||||
.quad 45
|
||||
.quad 46
|
||||
.quad 47
|
||||
.quad 48
|
||||
.quad 49
|
||||
.quad 50
|
||||
.quad 51
|
||||
.quad -52
|
||||
.quad -53
|
||||
.quad 54
|
||||
.quad -55
|
||||
.quad -56
|
||||
.quad 57
|
||||
.quad -58
|
||||
.quad -59
|
||||
.quad -60
|
||||
.quad -61
|
||||
.quad -62
|
||||
.quad -63
|
||||
.quad 0xbcdefa # This shouldn't get moved
|
||||
|
||||
.align 16
|
||||
Predest:
|
||||
.quad 0xbcdefa
|
||||
dest:
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
Postdest:
|
||||
.quad 0xdefabc
|
||||
|
||||
.align 8
|
||||
# Run time stack
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
|
||||
Stack:
|
@ -24,73 +24,63 @@ Loop:
|
||||
mrmovq (%rdi), %r10
|
||||
rmmovq %r10, (%rsi)
|
||||
andq %r10, %r10
|
||||
jle Npos1
|
||||
jle F1
|
||||
iaddq $1, %rax
|
||||
Npos1:
|
||||
|
||||
F1:
|
||||
mrmovq 8(%rdi), %r10
|
||||
rmmovq %r10, 8(%rsi)
|
||||
andq %r10, %r10
|
||||
jle Npos2
|
||||
jle F2
|
||||
iaddq $1, %rax
|
||||
Npos2:
|
||||
|
||||
F2:
|
||||
mrmovq 16(%rdi), %r10
|
||||
rmmovq %r10, 16(%rsi)
|
||||
andq %r10, %r10
|
||||
jle Npos3
|
||||
jle F3
|
||||
iaddq $1, %rax
|
||||
Npos3:
|
||||
|
||||
F3:
|
||||
mrmovq 24(%rdi), %r10
|
||||
rmmovq %r10, 24(%rsi)
|
||||
andq %r10, %r10
|
||||
jle Npos4
|
||||
jle F4
|
||||
iaddq $1, %rax
|
||||
Npos4:
|
||||
|
||||
F4:
|
||||
mrmovq 32(%rdi), %r10
|
||||
rmmovq %r10, 32(%rsi)
|
||||
andq %r10, %r10
|
||||
jle Npos5
|
||||
jle F5
|
||||
iaddq $1, %rax
|
||||
Npos5:
|
||||
|
||||
F5:
|
||||
mrmovq 40(%rdi), %r10
|
||||
rmmovq %r10, 40(%rsi)
|
||||
andq %r10, %r10
|
||||
jle Npos6
|
||||
jle F6
|
||||
iaddq $1, %rax
|
||||
Npos6:
|
||||
|
||||
F6:
|
||||
mrmovq 48(%rdi), %r10
|
||||
rmmovq %r10, 48(%rsi)
|
||||
andq %r10, %r10
|
||||
jle Npos7
|
||||
jle F7
|
||||
iaddq $1, %rax
|
||||
Npos7:
|
||||
|
||||
F7:
|
||||
mrmovq 56(%rdi), %r10
|
||||
rmmovq %r10, 56(%rsi)
|
||||
andq %r10, %r10
|
||||
jle Npos8
|
||||
jle F8
|
||||
iaddq $1, %rax
|
||||
Npos8:
|
||||
|
||||
F8:
|
||||
mrmovq 64(%rdi), %r10
|
||||
rmmovq %r10, 64(%rsi)
|
||||
andq %r10, %r10
|
||||
jle Npos9
|
||||
jle F9
|
||||
iaddq $1, %rax
|
||||
Npos9:
|
||||
|
||||
F9:
|
||||
mrmovq 72(%rdi), %r10
|
||||
rmmovq %r10, 72(%rsi)
|
||||
andq %r10, %r10
|
||||
jle Npos10
|
||||
jle F10
|
||||
iaddq $1, %rax
|
||||
Npos10:
|
||||
|
||||
F10:
|
||||
iaddq $80, %rdi
|
||||
iaddq $80, %rsi
|
||||
iaddq $0xfffffffffffffff6, %rdx
|
||||
@ -176,7 +166,6 @@ J1:
|
||||
# Function epilogue.
|
||||
Done:
|
||||
ret
|
||||
|
||||
##################################################################
|
||||
# Keep the following label at the end of your function
|
||||
End:
|
||||
|
@ -51,6 +51,9 @@ wordsig RNONE 'REG_NONE' # Special value indicating "no register"
|
||||
##### ALU Functions referenced explicitly ##########################
|
||||
wordsig ALUADD 'A_ADD' # ALU should add its arguments
|
||||
|
||||
##### Jump conditions referenced explicitly
|
||||
wordsig UNCOND 'C_YES' # Unconditional transfer
|
||||
|
||||
##### Possible instruction status values #####
|
||||
wordsig SBUB 'STAT_BUB' # Bubble in stage
|
||||
wordsig SAOK 'STAT_AOK' # Normal execution
|
||||
@ -76,6 +79,7 @@ boolsig imem_error 'imem_error' # Error signal from instruction memory
|
||||
boolsig instr_valid 'instr_valid' # Is fetched instruction valid?
|
||||
|
||||
##### Pipeline Register D ##########################################
|
||||
wordsig D_stat 'if_id_curr->status' # Instruction status
|
||||
wordsig D_icode 'if_id_curr->icode' # Instruction code
|
||||
wordsig D_rA 'if_id_curr->ra' # rA field from instruction
|
||||
wordsig D_rB 'if_id_curr->rb' # rB field from instruction
|
||||
@ -135,12 +139,14 @@ wordsig W_valM 'mem_wb_curr->valm' # Memory M value
|
||||
|
||||
## What address should instruction be fetched at
|
||||
word f_pc = [
|
||||
# Mispredicted branch. Fetch at incremented PC
|
||||
M_icode == IJXX && !M_Cnd : M_valA;
|
||||
# Completion of RET instruction
|
||||
# Completion of RET instruction
|
||||
W_icode == IRET : W_valM;
|
||||
# Default: Use predicted value of PC
|
||||
1 : F_predPC;
|
||||
M_icode != IJXX || M_ifun == UNCOND : F_predPC;
|
||||
# Mispredicted branch. Fetch at incremented PC
|
||||
M_valA < M_valE && M_Cnd && D_stat == SBUB : M_valE;
|
||||
M_valA > M_valE && !M_Cnd && D_stat == SBUB : M_valA;
|
||||
1 : F_predPC;
|
||||
];
|
||||
|
||||
## Determine icode of fetched instruction
|
||||
@ -179,7 +185,8 @@ bool need_valC =
|
||||
|
||||
# Predict next value of PC
|
||||
word f_predPC = [
|
||||
f_icode in { IJXX, ICALL } : f_valC;
|
||||
f_icode == ICALL ||
|
||||
f_icode == IJXX && (f_ifun == UNCOND || f_valC < f_valP) : f_valC;
|
||||
1 : f_valP;
|
||||
];
|
||||
|
||||
@ -239,7 +246,7 @@ word d_valB = [
|
||||
## Select input A to ALU
|
||||
word aluA = [
|
||||
E_icode in { IRRMOVQ, IOPQ } : E_valA;
|
||||
E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : E_valC;
|
||||
E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ, IJXX } : E_valC;
|
||||
E_icode in { ICALL, IPUSHQ } : -8;
|
||||
E_icode in { IRET, IPOPQ } : 8;
|
||||
# Other instructions don't need ALU
|
||||
@ -249,7 +256,7 @@ word aluA = [
|
||||
word aluB = [
|
||||
E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
|
||||
IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB;
|
||||
E_icode in { IRRMOVQ, IIRMOVQ } : 0;
|
||||
E_icode in { IRRMOVQ, IIRMOVQ, IJXX } : 0;
|
||||
# Other instructions don't need ALU
|
||||
];
|
||||
|
||||
@ -329,7 +336,10 @@ bool F_stall =
|
||||
E_dstM in { d_srcA, d_srcB } &&
|
||||
!(D_icode in { IPUSHQ, IRMMOVQ } && E_dstM == d_srcA) ||
|
||||
# Stalling at fetch while ret passes through pipeline
|
||||
IRET in { D_icode, E_icode, M_icode };
|
||||
IRET in { D_icode, E_icode, M_icode } &&
|
||||
!(E_icode == IJXX && E_ifun != UNCOND &&
|
||||
(E_valA < e_valE && e_Cnd ||
|
||||
E_valA > e_valE && !e_Cnd));
|
||||
|
||||
# Should I stall or inject a bubble into Pipeline Register D?
|
||||
# At most one of these can be true.
|
||||
@ -342,7 +352,9 @@ bool D_stall =
|
||||
|
||||
bool D_bubble =
|
||||
# Mispredicted branch
|
||||
(E_icode == IJXX && !e_Cnd) ||
|
||||
(E_icode == IJXX && E_ifun != UNCOND &&
|
||||
(E_valA < e_valE && e_Cnd && F_predPC != e_valE ||
|
||||
E_valA > e_valE && !e_Cnd && F_predPC != E_valA)) ||
|
||||
# Stalling at fetch while ret passes through pipeline
|
||||
# but not condition for a load/use hazard
|
||||
!(E_icode in { IMRMOVQ, IPOPQ } &&
|
||||
@ -355,7 +367,9 @@ bool D_bubble =
|
||||
bool E_stall = 0;
|
||||
bool E_bubble =
|
||||
# Mispredicted branch
|
||||
(E_icode == IJXX && !e_Cnd) ||
|
||||
(E_icode == IJXX && E_ifun != UNCOND &&
|
||||
(E_valA < e_valE && e_Cnd ||
|
||||
E_valA > e_valE && !e_Cnd)) ||
|
||||
# Conditions for a load/use hazard
|
||||
## Set this to the new load/use condition
|
||||
E_icode in { IMRMOVQ, IPOPQ } &&
|
||||
|
@ -1,145 +0,0 @@
|
||||
#######################################################################
|
||||
# Test for copying block of size 4;
|
||||
#######################################################################
|
||||
.pos 0
|
||||
main: irmovq Stack, %rsp # Set up stack pointer
|
||||
|
||||
# Set up arguments for copy function and then invoke it
|
||||
irmovq $4, %rdx # src and dst have 4 elements
|
||||
irmovq dest, %rsi # dst array
|
||||
irmovq src, %rdi # src array
|
||||
call ncopy
|
||||
halt # should halt with num nonzeros in %rax
|
||||
StartFun:
|
||||
#/* $begin ncopy-ys */
|
||||
##################################################################
|
||||
# ncopy.ys - Copy a src block of len words to dst.
|
||||
# Return the number of positive words (>0) contained in src.
|
||||
#
|
||||
# Include your name and ID here.
|
||||
#
|
||||
# Describe how and why you modified the baseline code.
|
||||
#
|
||||
##################################################################
|
||||
# Do not modify this portion
|
||||
# Function prologue.
|
||||
# %rdi = src, %rsi = dst, %rdx = len
|
||||
ncopy:
|
||||
|
||||
##################################################################
|
||||
# You can modify this portion
|
||||
# Loop header
|
||||
xorq %rax,%rax # count = 0;
|
||||
iaddq $0xfffffffffffffffc, %rdx
|
||||
jle Tail # if so, goto Done:
|
||||
|
||||
Loop1:
|
||||
mrmovq (%rdi), %r10 # read val from src...
|
||||
rmmovq %r10, (%rsi) # ...and store it to dst
|
||||
andq %r10, %r10 # val <= 0?
|
||||
jle Npos1 # if so, goto Npos:
|
||||
iaddq $1, %rax # count++
|
||||
Npos1:
|
||||
|
||||
mrmovq 8(%rdi), %r10 # read val from src...
|
||||
rmmovq %r10, 8(%rsi) # ...and store it to dst
|
||||
andq %r10, %r10 # val <= 0?
|
||||
jle Npos2 # if so, goto Npos:
|
||||
iaddq $1, %rax # count++
|
||||
Npos2:
|
||||
|
||||
mrmovq 16(%rdi), %r10 # read val from src...
|
||||
rmmovq %r10, 16(%rsi) # ...and store it to dst
|
||||
andq %r10, %r10 # val <= 0?
|
||||
jle Npos3 # if so, goto Npos:
|
||||
iaddq $1, %rax # count++
|
||||
Npos3:
|
||||
|
||||
mrmovq 24(%rdi), %r10 # read val from src...
|
||||
rmmovq %r10, 24(%rsi) # ...and store it to dst
|
||||
andq %r10, %r10 # val <= 0?
|
||||
jle Npos4 # if so, goto Npos:
|
||||
iaddq $1, %rax # count++
|
||||
Npos4:
|
||||
|
||||
mrmovq 32(%rdi), %r10 # read val from src...
|
||||
rmmovq %r10, 32(%rsi) # ...and store it to dst
|
||||
andq %r10, %r10 # val <= 0?
|
||||
jle Npos5 # if so, goto Npos:
|
||||
iaddq $1, %rax # count++
|
||||
Npos5:
|
||||
|
||||
iaddq $40, %rdi # src++
|
||||
iaddq $40, %rsi # dst++
|
||||
iaddq $0xfffffffffffffffb, %rdx # len--
|
||||
jg Loop1
|
||||
|
||||
Tail:
|
||||
iaddq $4, %rdx
|
||||
jle Done
|
||||
Loop2:
|
||||
mrmovq (%rdi), %r10
|
||||
rmmovq %r10, (%rsi)
|
||||
andq %r10, %r10
|
||||
jle NposT
|
||||
iaddq $1, %rax
|
||||
NposT:
|
||||
iaddq $8, %rdi
|
||||
iaddq $8, %rsi
|
||||
iaddq $0xffffffffffffffff, %rdx
|
||||
jg Loop2
|
||||
|
||||
|
||||
##################################################################
|
||||
# Do not modify the following section of code
|
||||
# Function epilogue.
|
||||
Done:
|
||||
ret
|
||||
##################################################################
|
||||
# Keep the following label at the end of your function
|
||||
End:
|
||||
#/* $end ncopy-ys */
|
||||
EndFun:
|
||||
|
||||
###############################
|
||||
# Source and destination blocks
|
||||
###############################
|
||||
.align 8
|
||||
src:
|
||||
.quad 1
|
||||
.quad -2
|
||||
.quad 3
|
||||
.quad -4
|
||||
.quad 0xbcdefa # This shouldn't get moved
|
||||
|
||||
.align 16
|
||||
Predest:
|
||||
.quad 0xbcdefa
|
||||
dest:
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
.quad 0xcdefab
|
||||
Postdest:
|
||||
.quad 0xdefabc
|
||||
|
||||
.align 8
|
||||
# Run time stack
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
.quad 0
|
||||
|
||||
Stack:
|
Loading…
x
Reference in New Issue
Block a user