optimize archlab
This commit is contained in:
parent
911ff3d3bd
commit
5cfa01dde0
@ -1,263 +0,0 @@
|
|||||||
#######################################################################
|
|
||||||
# Test for copying block of size 63;
|
|
||||||
#######################################################################
|
|
||||||
.pos 0
|
|
||||||
main: irmovq Stack, %rsp # Set up stack pointer
|
|
||||||
|
|
||||||
# Set up arguments for copy function and then invoke it
|
|
||||||
irmovq $63, %rdx # src and dst have 63 elements
|
|
||||||
irmovq dest, %rsi # dst array
|
|
||||||
irmovq src, %rdi # src array
|
|
||||||
call ncopy
|
|
||||||
halt # should halt with num nonzeros in %rax
|
|
||||||
StartFun:
|
|
||||||
#/* $begin ncopy-ys */
|
|
||||||
##################################################################
|
|
||||||
# ncopy.ys - Copy a src block of len words to dst.
|
|
||||||
# Return the number of positive words (>0) contained in src.
|
|
||||||
#
|
|
||||||
# Include your name and ID here.
|
|
||||||
#
|
|
||||||
# Describe how and why you modified the baseline code.
|
|
||||||
#
|
|
||||||
##################################################################
|
|
||||||
# Do not modify this portion
|
|
||||||
# Function prologue.
|
|
||||||
# %rdi = src, %rsi = dst, %rdx = len
|
|
||||||
ncopy:
|
|
||||||
|
|
||||||
##################################################################
|
|
||||||
# You can modify this portion
|
|
||||||
# Loop header
|
|
||||||
xorq %rax,%rax # count = 0;
|
|
||||||
iaddq $0xfffffffffffffffc, %rdx
|
|
||||||
jle Tail # if so, goto Done:
|
|
||||||
|
|
||||||
Loop1:
|
|
||||||
mrmovq (%rdi), %r10 # read val from src...
|
|
||||||
rmmovq %r10, (%rsi) # ...and store it to dst
|
|
||||||
andq %r10, %r10 # val <= 0?
|
|
||||||
jle Npos1 # if so, goto Npos:
|
|
||||||
iaddq $1, %rax # count++
|
|
||||||
Npos1:
|
|
||||||
|
|
||||||
mrmovq 8(%rdi), %r10 # read val from src...
|
|
||||||
rmmovq %r10, 8(%rsi) # ...and store it to dst
|
|
||||||
andq %r10, %r10 # val <= 0?
|
|
||||||
jle Npos2 # if so, goto Npos:
|
|
||||||
iaddq $1, %rax # count++
|
|
||||||
Npos2:
|
|
||||||
|
|
||||||
mrmovq 16(%rdi), %r10 # read val from src...
|
|
||||||
rmmovq %r10, 16(%rsi) # ...and store it to dst
|
|
||||||
andq %r10, %r10 # val <= 0?
|
|
||||||
jle Npos3 # if so, goto Npos:
|
|
||||||
iaddq $1, %rax # count++
|
|
||||||
Npos3:
|
|
||||||
|
|
||||||
mrmovq 24(%rdi), %r10 # read val from src...
|
|
||||||
rmmovq %r10, 24(%rsi) # ...and store it to dst
|
|
||||||
andq %r10, %r10 # val <= 0?
|
|
||||||
jle Npos4 # if so, goto Npos:
|
|
||||||
iaddq $1, %rax # count++
|
|
||||||
Npos4:
|
|
||||||
|
|
||||||
mrmovq 32(%rdi), %r10 # read val from src...
|
|
||||||
rmmovq %r10, 32(%rsi) # ...and store it to dst
|
|
||||||
andq %r10, %r10 # val <= 0?
|
|
||||||
jle Npos5 # if so, goto Npos:
|
|
||||||
iaddq $1, %rax # count++
|
|
||||||
Npos5:
|
|
||||||
|
|
||||||
iaddq $40, %rdi # src++
|
|
||||||
iaddq $40, %rsi # dst++
|
|
||||||
iaddq $0xfffffffffffffffb, %rdx # len--
|
|
||||||
jg Loop1
|
|
||||||
|
|
||||||
Tail:
|
|
||||||
iaddq $4, %rdx
|
|
||||||
jle Done
|
|
||||||
Loop2:
|
|
||||||
mrmovq (%rdi), %r10
|
|
||||||
rmmovq %r10, (%rsi)
|
|
||||||
andq %r10, %r10
|
|
||||||
jle NposT
|
|
||||||
iaddq $1, %rax
|
|
||||||
NposT:
|
|
||||||
iaddq $8, %rdi
|
|
||||||
iaddq $8, %rsi
|
|
||||||
iaddq $0xffffffffffffffff, %rdx
|
|
||||||
jg Loop2
|
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
|
||||||
# Do not modify the following section of code
|
|
||||||
# Function epilogue.
|
|
||||||
Done:
|
|
||||||
ret
|
|
||||||
##################################################################
|
|
||||||
# Keep the following label at the end of your function
|
|
||||||
End:
|
|
||||||
#/* $end ncopy-ys */
|
|
||||||
EndFun:
|
|
||||||
|
|
||||||
###############################
|
|
||||||
# Source and destination blocks
|
|
||||||
###############################
|
|
||||||
.align 8
|
|
||||||
src:
|
|
||||||
.quad 1
|
|
||||||
.quad 2
|
|
||||||
.quad -3
|
|
||||||
.quad -4
|
|
||||||
.quad -5
|
|
||||||
.quad 6
|
|
||||||
.quad -7
|
|
||||||
.quad 8
|
|
||||||
.quad 9
|
|
||||||
.quad -10
|
|
||||||
.quad 11
|
|
||||||
.quad 12
|
|
||||||
.quad -13
|
|
||||||
.quad 14
|
|
||||||
.quad -15
|
|
||||||
.quad 16
|
|
||||||
.quad -17
|
|
||||||
.quad -18
|
|
||||||
.quad 19
|
|
||||||
.quad -20
|
|
||||||
.quad 21
|
|
||||||
.quad -22
|
|
||||||
.quad -23
|
|
||||||
.quad -24
|
|
||||||
.quad -25
|
|
||||||
.quad -26
|
|
||||||
.quad 27
|
|
||||||
.quad -28
|
|
||||||
.quad -29
|
|
||||||
.quad -30
|
|
||||||
.quad 31
|
|
||||||
.quad -32
|
|
||||||
.quad 33
|
|
||||||
.quad -34
|
|
||||||
.quad 35
|
|
||||||
.quad 36
|
|
||||||
.quad -37
|
|
||||||
.quad 38
|
|
||||||
.quad 39
|
|
||||||
.quad 40
|
|
||||||
.quad 41
|
|
||||||
.quad -42
|
|
||||||
.quad 43
|
|
||||||
.quad 44
|
|
||||||
.quad 45
|
|
||||||
.quad 46
|
|
||||||
.quad 47
|
|
||||||
.quad 48
|
|
||||||
.quad 49
|
|
||||||
.quad 50
|
|
||||||
.quad 51
|
|
||||||
.quad -52
|
|
||||||
.quad -53
|
|
||||||
.quad 54
|
|
||||||
.quad -55
|
|
||||||
.quad -56
|
|
||||||
.quad 57
|
|
||||||
.quad -58
|
|
||||||
.quad -59
|
|
||||||
.quad -60
|
|
||||||
.quad -61
|
|
||||||
.quad -62
|
|
||||||
.quad -63
|
|
||||||
.quad 0xbcdefa # This shouldn't get moved
|
|
||||||
|
|
||||||
.align 16
|
|
||||||
Predest:
|
|
||||||
.quad 0xbcdefa
|
|
||||||
dest:
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
Postdest:
|
|
||||||
.quad 0xdefabc
|
|
||||||
|
|
||||||
.align 8
|
|
||||||
# Run time stack
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
|
|
||||||
Stack:
|
|
@ -16,84 +16,74 @@ ncopy:
|
|||||||
##################################################################
|
##################################################################
|
||||||
# You can modify this portion
|
# You can modify this portion
|
||||||
|
|
||||||
xorq %rax, %rax
|
xorq %rax, %rax
|
||||||
iaddq $0xfffffffffffffff7, %rdx
|
iaddq $0xfffffffffffffff7, %rdx
|
||||||
jle Tail
|
jle Tail
|
||||||
|
|
||||||
Loop:
|
Loop:
|
||||||
mrmovq (%rdi), %r10
|
mrmovq (%rdi), %r10
|
||||||
rmmovq %r10, (%rsi)
|
rmmovq %r10, (%rsi)
|
||||||
andq %r10, %r10
|
andq %r10, %r10
|
||||||
jle Npos1
|
jle F1
|
||||||
iaddq $1, %rax
|
iaddq $1, %rax
|
||||||
Npos1:
|
F1:
|
||||||
|
mrmovq 8(%rdi), %r10
|
||||||
mrmovq 8(%rdi), %r10
|
rmmovq %r10, 8(%rsi)
|
||||||
rmmovq %r10, 8(%rsi)
|
andq %r10, %r10
|
||||||
andq %r10, %r10
|
jle F2
|
||||||
jle Npos2
|
iaddq $1, %rax
|
||||||
iaddq $1, %rax
|
F2:
|
||||||
Npos2:
|
mrmovq 16(%rdi), %r10
|
||||||
|
rmmovq %r10, 16(%rsi)
|
||||||
mrmovq 16(%rdi), %r10
|
andq %r10, %r10
|
||||||
rmmovq %r10, 16(%rsi)
|
jle F3
|
||||||
andq %r10, %r10
|
iaddq $1, %rax
|
||||||
jle Npos3
|
F3:
|
||||||
iaddq $1, %rax
|
mrmovq 24(%rdi), %r10
|
||||||
Npos3:
|
rmmovq %r10, 24(%rsi)
|
||||||
|
andq %r10, %r10
|
||||||
mrmovq 24(%rdi), %r10
|
jle F4
|
||||||
rmmovq %r10, 24(%rsi)
|
iaddq $1, %rax
|
||||||
andq %r10, %r10
|
F4:
|
||||||
jle Npos4
|
mrmovq 32(%rdi), %r10
|
||||||
iaddq $1, %rax
|
rmmovq %r10, 32(%rsi)
|
||||||
Npos4:
|
andq %r10, %r10
|
||||||
|
jle F5
|
||||||
mrmovq 32(%rdi), %r10
|
iaddq $1, %rax
|
||||||
rmmovq %r10, 32(%rsi)
|
F5:
|
||||||
andq %r10, %r10
|
mrmovq 40(%rdi), %r10
|
||||||
jle Npos5
|
rmmovq %r10, 40(%rsi)
|
||||||
iaddq $1, %rax
|
andq %r10, %r10
|
||||||
Npos5:
|
jle F6
|
||||||
|
iaddq $1, %rax
|
||||||
mrmovq 40(%rdi), %r10
|
F6:
|
||||||
rmmovq %r10, 40(%rsi)
|
mrmovq 48(%rdi), %r10
|
||||||
andq %r10, %r10
|
rmmovq %r10, 48(%rsi)
|
||||||
jle Npos6
|
andq %r10, %r10
|
||||||
iaddq $1, %rax
|
jle F7
|
||||||
Npos6:
|
iaddq $1, %rax
|
||||||
|
F7:
|
||||||
mrmovq 48(%rdi), %r10
|
mrmovq 56(%rdi), %r10
|
||||||
rmmovq %r10, 48(%rsi)
|
rmmovq %r10, 56(%rsi)
|
||||||
andq %r10, %r10
|
andq %r10, %r10
|
||||||
jle Npos7
|
jle F8
|
||||||
iaddq $1, %rax
|
iaddq $1, %rax
|
||||||
Npos7:
|
F8:
|
||||||
|
mrmovq 64(%rdi), %r10
|
||||||
mrmovq 56(%rdi), %r10
|
rmmovq %r10, 64(%rsi)
|
||||||
rmmovq %r10, 56(%rsi)
|
andq %r10, %r10
|
||||||
andq %r10, %r10
|
jle F9
|
||||||
jle Npos8
|
iaddq $1, %rax
|
||||||
iaddq $1, %rax
|
F9:
|
||||||
Npos8:
|
mrmovq 72(%rdi), %r10
|
||||||
|
rmmovq %r10, 72(%rsi)
|
||||||
mrmovq 64(%rdi), %r10
|
andq %r10, %r10
|
||||||
rmmovq %r10, 64(%rsi)
|
jle F10
|
||||||
andq %r10, %r10
|
iaddq $1, %rax
|
||||||
jle Npos9
|
F10:
|
||||||
iaddq $1, %rax
|
iaddq $80, %rdi
|
||||||
Npos9:
|
iaddq $80, %rsi
|
||||||
|
iaddq $0xfffffffffffffff6, %rdx
|
||||||
mrmovq 72(%rdi), %r10
|
|
||||||
rmmovq %r10, 72(%rsi)
|
|
||||||
andq %r10, %r10
|
|
||||||
jle Npos10
|
|
||||||
iaddq $1, %rax
|
|
||||||
Npos10:
|
|
||||||
|
|
||||||
iaddq $80, %rdi
|
|
||||||
iaddq $80, %rsi
|
|
||||||
iaddq $0xfffffffffffffff6, %rdx
|
|
||||||
jg Loop
|
jg Loop
|
||||||
|
|
||||||
Tail:
|
Tail:
|
||||||
@ -117,66 +107,65 @@ JT:
|
|||||||
.quad J9
|
.quad J9
|
||||||
|
|
||||||
J9:
|
J9:
|
||||||
mrmovq 64(%rdi), %r10
|
mrmovq 64(%rdi), %r10
|
||||||
rmmovq %r10, 64(%rsi)
|
rmmovq %r10, 64(%rsi)
|
||||||
andq %r10, %r10
|
andq %r10, %r10
|
||||||
jle J8
|
jle J8
|
||||||
iaddq $1, %rax
|
iaddq $1, %rax
|
||||||
J8:
|
J8:
|
||||||
mrmovq 56(%rdi), %r10
|
mrmovq 56(%rdi), %r10
|
||||||
rmmovq %r10, 56(%rsi)
|
rmmovq %r10, 56(%rsi)
|
||||||
andq %r10, %r10
|
andq %r10, %r10
|
||||||
jle J7
|
jle J7
|
||||||
iaddq $1, %rax
|
iaddq $1, %rax
|
||||||
J7:
|
J7:
|
||||||
mrmovq 48(%rdi), %r10
|
mrmovq 48(%rdi), %r10
|
||||||
rmmovq %r10, 48(%rsi)
|
rmmovq %r10, 48(%rsi)
|
||||||
andq %r10, %r10
|
andq %r10, %r10
|
||||||
jle J6
|
jle J6
|
||||||
iaddq $1, %rax
|
iaddq $1, %rax
|
||||||
J6:
|
J6:
|
||||||
mrmovq 40(%rdi), %r10
|
mrmovq 40(%rdi), %r10
|
||||||
rmmovq %r10, 40(%rsi)
|
rmmovq %r10, 40(%rsi)
|
||||||
andq %r10, %r10
|
andq %r10, %r10
|
||||||
jle J5
|
jle J5
|
||||||
iaddq $1, %rax
|
iaddq $1, %rax
|
||||||
J5:
|
J5:
|
||||||
mrmovq 32(%rdi), %r10
|
mrmovq 32(%rdi), %r10
|
||||||
rmmovq %r10, 32(%rsi)
|
rmmovq %r10, 32(%rsi)
|
||||||
andq %r10, %r10
|
andq %r10, %r10
|
||||||
jle J4
|
jle J4
|
||||||
iaddq $1, %rax
|
iaddq $1, %rax
|
||||||
J4:
|
J4:
|
||||||
mrmovq 24(%rdi), %r10
|
mrmovq 24(%rdi), %r10
|
||||||
rmmovq %r10, 24(%rsi)
|
rmmovq %r10, 24(%rsi)
|
||||||
andq %r10, %r10
|
andq %r10, %r10
|
||||||
jle J3
|
jle J3
|
||||||
iaddq $1, %rax
|
iaddq $1, %rax
|
||||||
J3:
|
J3:
|
||||||
mrmovq 16(%rdi), %r10
|
mrmovq 16(%rdi), %r10
|
||||||
rmmovq %r10, 16(%rsi)
|
rmmovq %r10, 16(%rsi)
|
||||||
andq %r10, %r10
|
andq %r10, %r10
|
||||||
jle J2
|
jle J2
|
||||||
iaddq $1, %rax
|
iaddq $1, %rax
|
||||||
J2:
|
J2:
|
||||||
mrmovq 8(%rdi), %r10
|
mrmovq 8(%rdi), %r10
|
||||||
rmmovq %r10, 8(%rsi)
|
rmmovq %r10, 8(%rsi)
|
||||||
andq %r10, %r10
|
andq %r10, %r10
|
||||||
jle J1
|
jle J1
|
||||||
iaddq $1, %rax
|
iaddq $1, %rax
|
||||||
J1:
|
J1:
|
||||||
mrmovq (%rdi), %r10
|
mrmovq (%rdi), %r10
|
||||||
rmmovq %r10, (%rsi)
|
rmmovq %r10, (%rsi)
|
||||||
andq %r10, %r10
|
andq %r10, %r10
|
||||||
jle Done
|
jle Done
|
||||||
iaddq $1, %rax
|
iaddq $1, %rax
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
# Do not modify the following section of code
|
# Do not modify the following section of code
|
||||||
# Function epilogue.
|
# Function epilogue.
|
||||||
Done:
|
Done:
|
||||||
ret
|
ret
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
# Keep the following label at the end of your function
|
# Keep the following label at the end of your function
|
||||||
End:
|
End:
|
||||||
|
@ -51,6 +51,9 @@ wordsig RNONE 'REG_NONE' # Special value indicating "no register"
|
|||||||
##### ALU Functions referenced explicitly ##########################
|
##### ALU Functions referenced explicitly ##########################
|
||||||
wordsig ALUADD 'A_ADD' # ALU should add its arguments
|
wordsig ALUADD 'A_ADD' # ALU should add its arguments
|
||||||
|
|
||||||
|
##### Jump conditions referenced explicitly
|
||||||
|
wordsig UNCOND 'C_YES' # Unconditional transfer
|
||||||
|
|
||||||
##### Possible instruction status values #####
|
##### Possible instruction status values #####
|
||||||
wordsig SBUB 'STAT_BUB' # Bubble in stage
|
wordsig SBUB 'STAT_BUB' # Bubble in stage
|
||||||
wordsig SAOK 'STAT_AOK' # Normal execution
|
wordsig SAOK 'STAT_AOK' # Normal execution
|
||||||
@ -76,6 +79,7 @@ boolsig imem_error 'imem_error' # Error signal from instruction memory
|
|||||||
boolsig instr_valid 'instr_valid' # Is fetched instruction valid?
|
boolsig instr_valid 'instr_valid' # Is fetched instruction valid?
|
||||||
|
|
||||||
##### Pipeline Register D ##########################################
|
##### Pipeline Register D ##########################################
|
||||||
|
wordsig D_stat 'if_id_curr->status' # Instruction status
|
||||||
wordsig D_icode 'if_id_curr->icode' # Instruction code
|
wordsig D_icode 'if_id_curr->icode' # Instruction code
|
||||||
wordsig D_rA 'if_id_curr->ra' # rA field from instruction
|
wordsig D_rA 'if_id_curr->ra' # rA field from instruction
|
||||||
wordsig D_rB 'if_id_curr->rb' # rB field from instruction
|
wordsig D_rB 'if_id_curr->rb' # rB field from instruction
|
||||||
@ -135,12 +139,14 @@ wordsig W_valM 'mem_wb_curr->valm' # Memory M value
|
|||||||
|
|
||||||
## What address should instruction be fetched at
|
## What address should instruction be fetched at
|
||||||
word f_pc = [
|
word f_pc = [
|
||||||
# Mispredicted branch. Fetch at incremented PC
|
# Completion of RET instruction
|
||||||
M_icode == IJXX && !M_Cnd : M_valA;
|
|
||||||
# Completion of RET instruction
|
|
||||||
W_icode == IRET : W_valM;
|
W_icode == IRET : W_valM;
|
||||||
# Default: Use predicted value of PC
|
# Default: Use predicted value of PC
|
||||||
1 : F_predPC;
|
M_icode != IJXX || M_ifun == UNCOND : F_predPC;
|
||||||
|
# Mispredicted branch. Fetch at incremented PC
|
||||||
|
M_valA < M_valE && M_Cnd && D_stat == SBUB : M_valE;
|
||||||
|
M_valA > M_valE && !M_Cnd && D_stat == SBUB : M_valA;
|
||||||
|
1 : F_predPC;
|
||||||
];
|
];
|
||||||
|
|
||||||
## Determine icode of fetched instruction
|
## Determine icode of fetched instruction
|
||||||
@ -179,7 +185,8 @@ bool need_valC =
|
|||||||
|
|
||||||
# Predict next value of PC
|
# Predict next value of PC
|
||||||
word f_predPC = [
|
word f_predPC = [
|
||||||
f_icode in { IJXX, ICALL } : f_valC;
|
f_icode == ICALL ||
|
||||||
|
f_icode == IJXX && (f_ifun == UNCOND || f_valC < f_valP) : f_valC;
|
||||||
1 : f_valP;
|
1 : f_valP;
|
||||||
];
|
];
|
||||||
|
|
||||||
@ -239,7 +246,7 @@ word d_valB = [
|
|||||||
## Select input A to ALU
|
## Select input A to ALU
|
||||||
word aluA = [
|
word aluA = [
|
||||||
E_icode in { IRRMOVQ, IOPQ } : E_valA;
|
E_icode in { IRRMOVQ, IOPQ } : E_valA;
|
||||||
E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : E_valC;
|
E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ, IJXX } : E_valC;
|
||||||
E_icode in { ICALL, IPUSHQ } : -8;
|
E_icode in { ICALL, IPUSHQ } : -8;
|
||||||
E_icode in { IRET, IPOPQ } : 8;
|
E_icode in { IRET, IPOPQ } : 8;
|
||||||
# Other instructions don't need ALU
|
# Other instructions don't need ALU
|
||||||
@ -249,7 +256,7 @@ word aluA = [
|
|||||||
word aluB = [
|
word aluB = [
|
||||||
E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
|
E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
|
||||||
IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB;
|
IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB;
|
||||||
E_icode in { IRRMOVQ, IIRMOVQ } : 0;
|
E_icode in { IRRMOVQ, IIRMOVQ, IJXX } : 0;
|
||||||
# Other instructions don't need ALU
|
# Other instructions don't need ALU
|
||||||
];
|
];
|
||||||
|
|
||||||
@ -329,7 +336,10 @@ bool F_stall =
|
|||||||
E_dstM in { d_srcA, d_srcB } &&
|
E_dstM in { d_srcA, d_srcB } &&
|
||||||
!(D_icode in { IPUSHQ, IRMMOVQ } && E_dstM == d_srcA) ||
|
!(D_icode in { IPUSHQ, IRMMOVQ } && E_dstM == d_srcA) ||
|
||||||
# Stalling at fetch while ret passes through pipeline
|
# Stalling at fetch while ret passes through pipeline
|
||||||
IRET in { D_icode, E_icode, M_icode };
|
IRET in { D_icode, E_icode, M_icode } &&
|
||||||
|
!(E_icode == IJXX && E_ifun != UNCOND &&
|
||||||
|
(E_valA < e_valE && e_Cnd ||
|
||||||
|
E_valA > e_valE && !e_Cnd));
|
||||||
|
|
||||||
# Should I stall or inject a bubble into Pipeline Register D?
|
# Should I stall or inject a bubble into Pipeline Register D?
|
||||||
# At most one of these can be true.
|
# At most one of these can be true.
|
||||||
@ -342,7 +352,9 @@ bool D_stall =
|
|||||||
|
|
||||||
bool D_bubble =
|
bool D_bubble =
|
||||||
# Mispredicted branch
|
# Mispredicted branch
|
||||||
(E_icode == IJXX && !e_Cnd) ||
|
(E_icode == IJXX && E_ifun != UNCOND &&
|
||||||
|
(E_valA < e_valE && e_Cnd && F_predPC != e_valE ||
|
||||||
|
E_valA > e_valE && !e_Cnd && F_predPC != E_valA)) ||
|
||||||
# Stalling at fetch while ret passes through pipeline
|
# Stalling at fetch while ret passes through pipeline
|
||||||
# but not condition for a load/use hazard
|
# but not condition for a load/use hazard
|
||||||
!(E_icode in { IMRMOVQ, IPOPQ } &&
|
!(E_icode in { IMRMOVQ, IPOPQ } &&
|
||||||
@ -355,7 +367,9 @@ bool D_bubble =
|
|||||||
bool E_stall = 0;
|
bool E_stall = 0;
|
||||||
bool E_bubble =
|
bool E_bubble =
|
||||||
# Mispredicted branch
|
# Mispredicted branch
|
||||||
(E_icode == IJXX && !e_Cnd) ||
|
(E_icode == IJXX && E_ifun != UNCOND &&
|
||||||
|
(E_valA < e_valE && e_Cnd ||
|
||||||
|
E_valA > e_valE && !e_Cnd)) ||
|
||||||
# Conditions for a load/use hazard
|
# Conditions for a load/use hazard
|
||||||
## Set this to the new load/use condition
|
## Set this to the new load/use condition
|
||||||
E_icode in { IMRMOVQ, IPOPQ } &&
|
E_icode in { IMRMOVQ, IPOPQ } &&
|
||||||
|
@ -1,145 +0,0 @@
|
|||||||
#######################################################################
|
|
||||||
# Test for copying block of size 4;
|
|
||||||
#######################################################################
|
|
||||||
.pos 0
|
|
||||||
main: irmovq Stack, %rsp # Set up stack pointer
|
|
||||||
|
|
||||||
# Set up arguments for copy function and then invoke it
|
|
||||||
irmovq $4, %rdx # src and dst have 4 elements
|
|
||||||
irmovq dest, %rsi # dst array
|
|
||||||
irmovq src, %rdi # src array
|
|
||||||
call ncopy
|
|
||||||
halt # should halt with num nonzeros in %rax
|
|
||||||
StartFun:
|
|
||||||
#/* $begin ncopy-ys */
|
|
||||||
##################################################################
|
|
||||||
# ncopy.ys - Copy a src block of len words to dst.
|
|
||||||
# Return the number of positive words (>0) contained in src.
|
|
||||||
#
|
|
||||||
# Include your name and ID here.
|
|
||||||
#
|
|
||||||
# Describe how and why you modified the baseline code.
|
|
||||||
#
|
|
||||||
##################################################################
|
|
||||||
# Do not modify this portion
|
|
||||||
# Function prologue.
|
|
||||||
# %rdi = src, %rsi = dst, %rdx = len
|
|
||||||
ncopy:
|
|
||||||
|
|
||||||
##################################################################
|
|
||||||
# You can modify this portion
|
|
||||||
# Loop header
|
|
||||||
xorq %rax,%rax # count = 0;
|
|
||||||
iaddq $0xfffffffffffffffc, %rdx
|
|
||||||
jle Tail # if so, goto Done:
|
|
||||||
|
|
||||||
Loop1:
|
|
||||||
mrmovq (%rdi), %r10 # read val from src...
|
|
||||||
rmmovq %r10, (%rsi) # ...and store it to dst
|
|
||||||
andq %r10, %r10 # val <= 0?
|
|
||||||
jle Npos1 # if so, goto Npos:
|
|
||||||
iaddq $1, %rax # count++
|
|
||||||
Npos1:
|
|
||||||
|
|
||||||
mrmovq 8(%rdi), %r10 # read val from src...
|
|
||||||
rmmovq %r10, 8(%rsi) # ...and store it to dst
|
|
||||||
andq %r10, %r10 # val <= 0?
|
|
||||||
jle Npos2 # if so, goto Npos:
|
|
||||||
iaddq $1, %rax # count++
|
|
||||||
Npos2:
|
|
||||||
|
|
||||||
mrmovq 16(%rdi), %r10 # read val from src...
|
|
||||||
rmmovq %r10, 16(%rsi) # ...and store it to dst
|
|
||||||
andq %r10, %r10 # val <= 0?
|
|
||||||
jle Npos3 # if so, goto Npos:
|
|
||||||
iaddq $1, %rax # count++
|
|
||||||
Npos3:
|
|
||||||
|
|
||||||
mrmovq 24(%rdi), %r10 # read val from src...
|
|
||||||
rmmovq %r10, 24(%rsi) # ...and store it to dst
|
|
||||||
andq %r10, %r10 # val <= 0?
|
|
||||||
jle Npos4 # if so, goto Npos:
|
|
||||||
iaddq $1, %rax # count++
|
|
||||||
Npos4:
|
|
||||||
|
|
||||||
mrmovq 32(%rdi), %r10 # read val from src...
|
|
||||||
rmmovq %r10, 32(%rsi) # ...and store it to dst
|
|
||||||
andq %r10, %r10 # val <= 0?
|
|
||||||
jle Npos5 # if so, goto Npos:
|
|
||||||
iaddq $1, %rax # count++
|
|
||||||
Npos5:
|
|
||||||
|
|
||||||
iaddq $40, %rdi # src++
|
|
||||||
iaddq $40, %rsi # dst++
|
|
||||||
iaddq $0xfffffffffffffffb, %rdx # len--
|
|
||||||
jg Loop1
|
|
||||||
|
|
||||||
Tail:
|
|
||||||
iaddq $4, %rdx
|
|
||||||
jle Done
|
|
||||||
Loop2:
|
|
||||||
mrmovq (%rdi), %r10
|
|
||||||
rmmovq %r10, (%rsi)
|
|
||||||
andq %r10, %r10
|
|
||||||
jle NposT
|
|
||||||
iaddq $1, %rax
|
|
||||||
NposT:
|
|
||||||
iaddq $8, %rdi
|
|
||||||
iaddq $8, %rsi
|
|
||||||
iaddq $0xffffffffffffffff, %rdx
|
|
||||||
jg Loop2
|
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
|
||||||
# Do not modify the following section of code
|
|
||||||
# Function epilogue.
|
|
||||||
Done:
|
|
||||||
ret
|
|
||||||
##################################################################
|
|
||||||
# Keep the following label at the end of your function
|
|
||||||
End:
|
|
||||||
#/* $end ncopy-ys */
|
|
||||||
EndFun:
|
|
||||||
|
|
||||||
###############################
|
|
||||||
# Source and destination blocks
|
|
||||||
###############################
|
|
||||||
.align 8
|
|
||||||
src:
|
|
||||||
.quad 1
|
|
||||||
.quad -2
|
|
||||||
.quad 3
|
|
||||||
.quad -4
|
|
||||||
.quad 0xbcdefa # This shouldn't get moved
|
|
||||||
|
|
||||||
.align 16
|
|
||||||
Predest:
|
|
||||||
.quad 0xbcdefa
|
|
||||||
dest:
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
.quad 0xcdefab
|
|
||||||
Postdest:
|
|
||||||
.quad 0xdefabc
|
|
||||||
|
|
||||||
.align 8
|
|
||||||
# Run time stack
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
.quad 0
|
|
||||||
|
|
||||||
Stack:
|
|
Loading…
x
Reference in New Issue
Block a user