forked from codeplaysoftware/syclacademy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
source.cpp
78 lines (70 loc) · 2.25 KB
/
source.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/*
SYCL Academy (c)
SYCL Academy is licensed under a Creative Commons
Attribution-ShareAlike 4.0 International License.
You should have received a copy of the license along with this
work. If not, see <http://creativecommons.org/licenses/by-sa/4.0/>.
* SYCL Quick Reference
* ~~~~~~~~~~~~~~~~~~~~
*
* // Default construct a queue
* auto q = sycl::queue{};
*
* // Construct an in-order queue
* auto q = sycl::queue{sycl::default_selector_v,
* {sycl::property::queue::in_order{}}};
*
* // Declare a buffer pointing to ptr
* auto buf = sycl::buffer{ptr, sycl::range{n}};
*
* // Do a USM memcpy
* auto event = q.memcpy(dst_ptr, src_ptr, sizeof(T)*n);
* // Do a USM memcpy with dependent events
* auto event = q.memcpy(dst_ptr, src_ptr, sizeof(T)*n, {event1, event2});
*
* // Wait on an event
* event.wait();
*
* // Wait on a queue
* q.wait();
*
* // Submit work to the queue
* auto event = q.submit([&](sycl::handler &cgh) {
* // COMMAND GROUP
* });
*
*
* // Within the command group you can
* // 1. Declare an accessor to a buffer
* auto read_write_acc = sycl::accessor{buf, cgh};
* auto read_acc = sycl::accessor{buf, cgh, sycl::read_only};
* auto write_acc = sycl::accessor{buf, cgh, sycl::write_only};
* auto no_init_acc = sycl::accessor{buf, cgh, sycl::no_init};
* // 2. Enqueue a parallel for:
* // i: With range:
* cgh.parallel_for<class mykernel>(sycl::range{n},
* [=](sycl::id<1> i) { // Do something });
* // ii: With nd_range:
* cgh.parallel_for<class mykernel>(sycl::nd_range{
* globalRange, localRange}, [=](sycl::nd_item<1> i) {
* // Do something
* });
*/
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>
TEST_CASE("nd_range_kernel", "nd_range_kernel_source") {
constexpr size_t dataSize = 1024;
int a[dataSize], b[dataSize], r[dataSize];
for (int i = 0; i < dataSize; ++i) {
a[i] = i;
b[i] = i;
r[i] = 0;
}
// Task: parallelise the vector add kernel using nd_range
for (int i = 0; i < dataSize; ++i) {
r[i] = a[i] + b[i];
}
for (int i = 0; i < dataSize; ++i) {
REQUIRE(r[i] == i * 2);
}
}