-
Notifications
You must be signed in to change notification settings - Fork 438
/
ncopy.ys
90 lines (89 loc) · 2.68 KB
/
ncopy.ys
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len words to dst.
# Return the number of positive words (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
# 1. use iaddq.
# 2. unroll loop.
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:
##################################################################
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
rrmovq %rdx, %rcx
iaddq $-8, %rcx
#andq %rdx,%rdx # len <= 0?
jle Next2
Loop1:
mrmovq (%rdi), %r8
mrmovq 8(%rdi), %r9
mrmovq 16(%rdi), %r10
mrmovq 24(%rdi), %r11
mrmovq 32(%rdi), %r12
mrmovq 40(%rdi), %r13
mrmovq 48(%rdi), %r14
mrmovq 56(%rdi), %rbx
rmmovq %r8, (%rsi)
rmmovq %r9, 8(%rsi)
rmmovq %r10, 16(%rsi)
rmmovq %r11, 24(%rsi)
rmmovq %r12, 32(%rsi)
rmmovq %r13, 40(%rsi)
rmmovq %r14, 48(%rsi)
rmmovq %rbx, 56(%rsi)
ele1: andq %r8, %r8 # val <= 0?
jle ele2: # if so, goto ele1:
iaddq $1, %rax # count++, %rax
ele2: andq %r9, %r9
jle ele3:
iaddq $1, %rax
ele3: andq %r10, %r10
jle ele4:
iaddq $1, %rax
ele4: andq %r11, %r11
jle ele5:
iaddq $1, %rax
ele5: andq %r12, %r12
jle ele6:
iaddq $1, %rax
ele6: andq %r13, %r13
jle ele7:
iaddq $1, %rax
ele7: andq %r14, %r14
jle ele8:
iaddq $1, %rax
ele8: andq %rbx, %rbx
jle Npos1:
iaddq $1, %rax
Npos1: iaddq $64, %rdi # src++
iaddq $64, %rsi # dst++
iaddq $-8, %rdx # len--
iaddq $-8, %rcx # (len-8)--
jg Loop1 # if so, goto Loop1:
Next2: andq %rdx,%rdx # len <= 0?
jle Done # if so, goto Done:
Loop3: mrmovq (%rdi), %rbx # read val from src...
rmmovq %rbx, (%rsi) # ...and store it to dst
andq %rbx, %rbx # val <= 0?
jle Npos3 # if so, goto Npos3:
iaddq $1, %rax # count++
Npos3: iaddq $8, %rdi # src++
iaddq $8, %rsi # dst++
iaddq $-1, %rdx # len--
jg Loop3 # if so, goto Loop3:
##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */