####################################################################### # Test for copying block of size 4; ####################################################################### .pos 0 main: irmovq Stack, %rsp # Set up stack pointer # Set up arguments for copy function and then invoke it irmovq $4, %rdx # src and dst have 4 elements irmovq dest, %rsi # dst array irmovq src, %rdi # src array call ncopy halt # should halt with num nonzeros in %rax StartFun: #/* $begin ncopy-ys */ ################################################################## # ncopy.ys - Copy a src block of len words to dst. # Return the number of positive words (>0) contained in src. # # Include your name and ID here. # # Describe how and why you modified the baseline code. # ################################################################## # Do not modify this portion # Function prologue. # %rdi = src, %rsi = dst, %rdx = len ncopy: ################################################################## # You can modify this portion # Loop header xorq %rax,%rax # count = 0; iaddq $0xfffffffffffffffc, %rdx jle Tail # if so, goto Done: Loop1: mrmovq (%rdi), %r10 # read val from src... rmmovq %r10, (%rsi) # ...and store it to dst andq %r10, %r10 # val <= 0? jle Npos1 # if so, goto Npos: iaddq $1, %rax # count++ Npos1: mrmovq 8(%rdi), %r10 # read val from src... rmmovq %r10, 8(%rsi) # ...and store it to dst andq %r10, %r10 # val <= 0? jle Npos2 # if so, goto Npos: iaddq $1, %rax # count++ Npos2: mrmovq 16(%rdi), %r10 # read val from src... rmmovq %r10, 16(%rsi) # ...and store it to dst andq %r10, %r10 # val <= 0? jle Npos3 # if so, goto Npos: iaddq $1, %rax # count++ Npos3: mrmovq 24(%rdi), %r10 # read val from src... rmmovq %r10, 24(%rsi) # ...and store it to dst andq %r10, %r10 # val <= 0? jle Npos4 # if so, goto Npos: iaddq $1, %rax # count++ Npos4: mrmovq 32(%rdi), %r10 # read val from src... rmmovq %r10, 32(%rsi) # ...and store it to dst andq %r10, %r10 # val <= 0? jle Npos5 # if so, goto Npos: iaddq $1, %rax # count++ Npos5: iaddq $40, %rdi # src++ iaddq $40, %rsi # dst++ iaddq $0xfffffffffffffffb, %rdx # len-- jg Loop1 Tail: iaddq $4, %rdx jle Done Loop2: mrmovq (%rdi), %r10 rmmovq %r10, (%rsi) andq %r10, %r10 jle NposT iaddq $1, %rax NposT: iaddq $8, %rdi iaddq $8, %rsi iaddq $0xffffffffffffffff, %rdx jg Loop2 ################################################################## # Do not modify the following section of code # Function epilogue. Done: ret ################################################################## # Keep the following label at the end of your function End: #/* $end ncopy-ys */ EndFun: ############################### # Source and destination blocks ############################### .align 8 src: .quad 1 .quad -2 .quad 3 .quad -4 .quad 0xbcdefa # This shouldn't get moved .align 16 Predest: .quad 0xbcdefa dest: .quad 0xcdefab .quad 0xcdefab .quad 0xcdefab .quad 0xcdefab Postdest: .quad 0xdefabc .align 8 # Run time stack .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 .quad 0 Stack: