From 3ff11e23f1876aa9c1958f73228906a5744f1a95 Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Thu, 26 Jan 2023 14:18:10 +0000 Subject: [PATCH 1/5] [SYCL][Fusion] Enable fusion tests on CUDA backend --- SYCL/KernelFusion/abort_fusion.cpp | 4 +-- SYCL/KernelFusion/abort_internalization.cpp | 10 ++++--- .../barrier_local_internalization.cpp | 4 +-- SYCL/KernelFusion/buffer_internalization.cpp | 4 +-- SYCL/KernelFusion/cancel_fusion.cpp | 4 +-- SYCL/KernelFusion/complete_fusion.cpp | 4 +-- SYCL/KernelFusion/diamond_shape.cpp | 4 +-- SYCL/KernelFusion/event_wait_cancel.cpp | 4 +-- SYCL/KernelFusion/event_wait_complete.cpp | 4 +-- .../internal_explicit_dependency.cpp | 4 +-- .../internalize_array_wrapper.cpp | 4 +-- SYCL/KernelFusion/internalize_deep.cpp | 4 +-- SYCL/KernelFusion/internalize_multi_ptr.cpp | 4 +-- SYCL/KernelFusion/internalize_vec.cpp | 4 +-- SYCL/KernelFusion/internalize_vfunc.cpp | 4 +-- SYCL/KernelFusion/pointer_arg_function.cpp | 4 +-- SYCL/KernelFusion/private_internalization.cpp | 4 +-- SYCL/KernelFusion/ranged_offset_accessor.cpp | 4 +-- SYCL/KernelFusion/struct_with_array.cpp | 4 +-- SYCL/KernelFusion/sync_acc_mem_op.cpp | 4 +-- SYCL/KernelFusion/sync_buffer_destruction.cpp | 4 +-- SYCL/KernelFusion/sync_event_wait.cpp | 4 +-- SYCL/KernelFusion/sync_host_accessor.cpp | 4 +-- SYCL/KernelFusion/sync_host_task.cpp | 4 +-- SYCL/KernelFusion/sync_queue_destruction.cpp | 4 +-- SYCL/KernelFusion/sync_queue_wait.cpp | 4 +-- SYCL/KernelFusion/sync_second_queue.cpp | 4 +-- .../sync_two_queues_event_dep.cpp | 30 ++++++++++++------- .../sync_two_queues_requirement.cpp | 4 +-- SYCL/KernelFusion/sync_usm_mem_op.cpp | 8 +++-- SYCL/KernelFusion/three_dimensional.cpp | 4 +-- SYCL/KernelFusion/two_dimensional.cpp | 4 +-- SYCL/KernelFusion/usm_no_dependencies.cpp | 4 +-- SYCL/KernelFusion/work_group_barrier.cpp | 4 +-- SYCL/KernelFusion/wrapped_usm.cpp | 4 +-- 35 files changed, 96 insertions(+), 80 deletions(-) diff --git a/SYCL/KernelFusion/abort_fusion.cpp b/SYCL/KernelFusion/abort_fusion.cpp index 587a89e1fd..12384ed76d 100644 --- a/SYCL/KernelFusion/abort_fusion.cpp +++ b/SYCL/KernelFusion/abort_fusion.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test fusion being aborted: Different scenarios causing the JIT compiler diff --git a/SYCL/KernelFusion/abort_internalization.cpp b/SYCL/KernelFusion/abort_internalization.cpp index 21c9837a6c..c89b69fc3d 100644 --- a/SYCL/KernelFusion/abort_internalization.cpp +++ b/SYCL/KernelFusion/abort_internalization.cpp @@ -1,9 +1,11 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: env SYCL_ENABLE_FUSION_CACHING=0 SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out +// RUN: env SYCL_RT_WARNING_LEVEL=1 SYCL_ENABLE_FUSION_CACHING=0\ +// RUN: %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER -// RUN: env SYCL_ENABLE_FUSION_CACHING=0 SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ +// RUN: env SYCL_RT_WARNING_LEVEL=1 SYCL_ENABLE_FUSION_CACHING=0\ +// RUN: %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test incomplete internalization: Different scenarios causing the JIT compiler diff --git a/SYCL/KernelFusion/barrier_local_internalization.cpp b/SYCL/KernelFusion/barrier_local_internalization.cpp index 12302c3177..ca4ec3dc4e 100644 --- a/SYCL/KernelFusion/barrier_local_internalization.cpp +++ b/SYCL/KernelFusion/barrier_local_internalization.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with local internalization and a combination of kernels diff --git a/SYCL/KernelFusion/buffer_internalization.cpp b/SYCL/KernelFusion/buffer_internalization.cpp index 22251fbb21..f499d2e2ad 100644 --- a/SYCL/KernelFusion/buffer_internalization.cpp +++ b/SYCL/KernelFusion/buffer_internalization.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with private internalization specified on the diff --git a/SYCL/KernelFusion/cancel_fusion.cpp b/SYCL/KernelFusion/cancel_fusion.cpp index 6c94f99025..9dc5ebe2c0 100644 --- a/SYCL/KernelFusion/cancel_fusion.cpp +++ b/SYCL/KernelFusion/cancel_fusion.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test cancel fusion diff --git a/SYCL/KernelFusion/complete_fusion.cpp b/SYCL/KernelFusion/complete_fusion.cpp index 0ffeca17a5..67c2fb3d05 100644 --- a/SYCL/KernelFusion/complete_fusion.cpp +++ b/SYCL/KernelFusion/complete_fusion.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion without any internalization diff --git a/SYCL/KernelFusion/diamond_shape.cpp b/SYCL/KernelFusion/diamond_shape.cpp index 0f009a1d60..e2bc187f8d 100644 --- a/SYCL/KernelFusion/diamond_shape.cpp +++ b/SYCL/KernelFusion/diamond_shape.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with private internalization specified on the diff --git a/SYCL/KernelFusion/event_wait_cancel.cpp b/SYCL/KernelFusion/event_wait_cancel.cpp index 63a049aaa1..abe1d2411c 100644 --- a/SYCL/KernelFusion/event_wait_cancel.cpp +++ b/SYCL/KernelFusion/event_wait_cancel.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test validity of events after cancel_fusion. diff --git a/SYCL/KernelFusion/event_wait_complete.cpp b/SYCL/KernelFusion/event_wait_complete.cpp index e0ab53bf56..56ce8a2bd1 100644 --- a/SYCL/KernelFusion/event_wait_complete.cpp +++ b/SYCL/KernelFusion/event_wait_complete.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test validity of events after complete_fusion. diff --git a/SYCL/KernelFusion/internal_explicit_dependency.cpp b/SYCL/KernelFusion/internal_explicit_dependency.cpp index b30082788c..9cde900041 100644 --- a/SYCL/KernelFusion/internal_explicit_dependency.cpp +++ b/SYCL/KernelFusion/internal_explicit_dependency.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion where one kernel in the fusion list specifies an diff --git a/SYCL/KernelFusion/internalize_array_wrapper.cpp b/SYCL/KernelFusion/internalize_array_wrapper.cpp index d1b41ea7bd..79e644d98a 100644 --- a/SYCL/KernelFusion/internalize_array_wrapper.cpp +++ b/SYCL/KernelFusion/internalize_array_wrapper.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test internalization of a nested array type. diff --git a/SYCL/KernelFusion/internalize_deep.cpp b/SYCL/KernelFusion/internalize_deep.cpp index 172ea3c750..3eab50b57d 100644 --- a/SYCL/KernelFusion/internalize_deep.cpp +++ b/SYCL/KernelFusion/internalize_deep.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with internalization of a deep struct type. diff --git a/SYCL/KernelFusion/internalize_multi_ptr.cpp b/SYCL/KernelFusion/internalize_multi_ptr.cpp index b6937b0350..5f86694e44 100644 --- a/SYCL/KernelFusion/internalize_multi_ptr.cpp +++ b/SYCL/KernelFusion/internalize_multi_ptr.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with private internalization specified on the diff --git a/SYCL/KernelFusion/internalize_vec.cpp b/SYCL/KernelFusion/internalize_vec.cpp index 9f3a24f715..67b84dde4d 100644 --- a/SYCL/KernelFusion/internalize_vec.cpp +++ b/SYCL/KernelFusion/internalize_vec.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with internalization of a struct type. diff --git a/SYCL/KernelFusion/internalize_vfunc.cpp b/SYCL/KernelFusion/internalize_vfunc.cpp index abc9c2419f..3f7c65b10a 100644 --- a/SYCL/KernelFusion/internalize_vfunc.cpp +++ b/SYCL/KernelFusion/internalize_vfunc.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with private internalization specified on the diff --git a/SYCL/KernelFusion/pointer_arg_function.cpp b/SYCL/KernelFusion/pointer_arg_function.cpp index ffe5178cda..7c16d212d8 100644 --- a/SYCL/KernelFusion/pointer_arg_function.cpp +++ b/SYCL/KernelFusion/pointer_arg_function.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // This test currently fails because InferAddressSpace is not able to remove all // address-space casts, causing internalization to fail. diff --git a/SYCL/KernelFusion/private_internalization.cpp b/SYCL/KernelFusion/private_internalization.cpp index 05120a68c3..7886bdf010 100644 --- a/SYCL/KernelFusion/private_internalization.cpp +++ b/SYCL/KernelFusion/private_internalization.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with private internalization specified on the diff --git a/SYCL/KernelFusion/ranged_offset_accessor.cpp b/SYCL/KernelFusion/ranged_offset_accessor.cpp index 95f0b06c44..ed03a141e6 100644 --- a/SYCL/KernelFusion/ranged_offset_accessor.cpp +++ b/SYCL/KernelFusion/ranged_offset_accessor.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with private internalization on accessors with different diff --git a/SYCL/KernelFusion/struct_with_array.cpp b/SYCL/KernelFusion/struct_with_array.cpp index dca54abfa2..a777d67e50 100644 --- a/SYCL/KernelFusion/struct_with_array.cpp +++ b/SYCL/KernelFusion/struct_with_array.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with private internalization on a kernel functor with an diff --git a/SYCL/KernelFusion/sync_acc_mem_op.cpp b/SYCL/KernelFusion/sync_acc_mem_op.cpp index 14643a3d81..440de656d0 100644 --- a/SYCL/KernelFusion/sync_acc_mem_op.cpp +++ b/SYCL/KernelFusion/sync_acc_mem_op.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // Test fusion cancellation on an explicit memory operation on an accessor // happening before complete_fusion. diff --git a/SYCL/KernelFusion/sync_buffer_destruction.cpp b/SYCL/KernelFusion/sync_buffer_destruction.cpp index 627a8cdbfe..96375f18c4 100644 --- a/SYCL/KernelFusion/sync_buffer_destruction.cpp +++ b/SYCL/KernelFusion/sync_buffer_destruction.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // Test fusion cancellation on buffer destruction happening before // complete_fusion. diff --git a/SYCL/KernelFusion/sync_event_wait.cpp b/SYCL/KernelFusion/sync_event_wait.cpp index d34393638e..d077116412 100644 --- a/SYCL/KernelFusion/sync_event_wait.cpp +++ b/SYCL/KernelFusion/sync_event_wait.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // Test fusion cancellation on event::wait() happening before // complete_fusion. diff --git a/SYCL/KernelFusion/sync_host_accessor.cpp b/SYCL/KernelFusion/sync_host_accessor.cpp index 854803f347..d6f4cdc774 100644 --- a/SYCL/KernelFusion/sync_host_accessor.cpp +++ b/SYCL/KernelFusion/sync_host_accessor.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // Test fusion cancellation on host accessor creation happening before // complete_fusion. diff --git a/SYCL/KernelFusion/sync_host_task.cpp b/SYCL/KernelFusion/sync_host_task.cpp index fc94fa9b3d..4c2bc870e2 100644 --- a/SYCL/KernelFusion/sync_host_task.cpp +++ b/SYCL/KernelFusion/sync_host_task.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // Test fusion cancellation on host task submission happening before // complete_fusion. diff --git a/SYCL/KernelFusion/sync_queue_destruction.cpp b/SYCL/KernelFusion/sync_queue_destruction.cpp index 145fde97b5..936b486c77 100644 --- a/SYCL/KernelFusion/sync_queue_destruction.cpp +++ b/SYCL/KernelFusion/sync_queue_destruction.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // Test fusion cancellation on queue destruction happening before // complete_fusion. diff --git a/SYCL/KernelFusion/sync_queue_wait.cpp b/SYCL/KernelFusion/sync_queue_wait.cpp index 5fe768d60c..71996ffed8 100644 --- a/SYCL/KernelFusion/sync_queue_wait.cpp +++ b/SYCL/KernelFusion/sync_queue_wait.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // Test fusion cancellation on queue::wait() happening before // complete_fusion. diff --git a/SYCL/KernelFusion/sync_second_queue.cpp b/SYCL/KernelFusion/sync_second_queue.cpp index 057c969353..5147a63919 100644 --- a/SYCL/KernelFusion/sync_second_queue.cpp +++ b/SYCL/KernelFusion/sync_second_queue.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // Test fusion cancellation on submission of kernel with requirements to a // different queue happening before complete_fusion. diff --git a/SYCL/KernelFusion/sync_two_queues_event_dep.cpp b/SYCL/KernelFusion/sync_two_queues_event_dep.cpp index bb33fcdcb8..f0749b1b76 100644 --- a/SYCL/KernelFusion/sync_two_queues_event_dep.cpp +++ b/SYCL/KernelFusion/sync_two_queues_event_dep.cpp @@ -1,12 +1,13 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip -// For this test, complete_fusion must be supported. +// UNSUPPORTED: hip // REQUIRES: fusion +// NOTE: This test currently fails from time to time and is under construction. + // Test fusion cancellation on event dependency between two active fusions. #include @@ -36,8 +37,6 @@ int main() { ext::codeplay::experimental::fusion_wrapper fw1{q1}; fw1.start_fusion(); - assert(fw1.is_in_fusion_mode() && "Queue should be in fusion mode"); - auto kernel1 = q1.submit([&](handler &cgh) { cgh.parallel_for( dataSize, [=](id<1> i) { tmp[i] = in1[i] + in2[i]; }); @@ -57,7 +56,7 @@ int main() { assert(!fw1.is_in_fusion_mode() && "Queue should not be in fusion mode anymore"); - assert(fw2.is_in_fusion_mode() && "Queue should be in fusion mode"); + // assert(fw2.is_in_fusion_mode() && "Queue should be in fusion mode"); auto kernel2 = q1.submit([&](handler &cgh) { cgh.depends_on(kernel3); @@ -70,18 +69,29 @@ int main() { assert(!fw2.is_in_fusion_mode() && "Queue should not be in fusion mode anymore"); - fw1.complete_fusion({ext::codeplay::experimental::property::no_barriers{}}); + // fw1.complete_fusion({ext::codeplay::experimental::property::no_barriers{}}); - fw2.cancel_fusion(); + // fw2.cancel_fusion(); q1.wait(); q2.wait(); + for (size_t i = 0; i < 5; ++i) { + std::cout << out[i] << ", "; + } + std::cout << "\n"; + // Check the results + size_t numErrors = 0; for (size_t i = 0; i < dataSize; ++i) { - assert(out[i] == (40 * i * i) && "Computation error"); + if (out[i] != (40 * i * i)) { + ++numErrors; + } + // assert(out[i] == (40 * i * i) && "Computation error"); + } + if (numErrors) { + std::cout << "COMPUTATION ERROR\n"; } - return 0; } diff --git a/SYCL/KernelFusion/sync_two_queues_requirement.cpp b/SYCL/KernelFusion/sync_two_queues_requirement.cpp index 3ca9015c7e..d3526f2aba 100644 --- a/SYCL/KernelFusion/sync_two_queues_requirement.cpp +++ b/SYCL/KernelFusion/sync_two_queues_requirement.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // For this test, complete_fusion must be supported. // REQUIRES: fusion diff --git a/SYCL/KernelFusion/sync_usm_mem_op.cpp b/SYCL/KernelFusion/sync_usm_mem_op.cpp index 67af367316..270645af15 100644 --- a/SYCL/KernelFusion/sync_usm_mem_op.cpp +++ b/SYCL/KernelFusion/sync_usm_mem_op.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // Test fusion cancellation on an explicit memory operation on an USM pointer // happening before complete_fusion. @@ -61,6 +61,10 @@ int main() { fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}}); + for (size_t i = 0; i < dataSize; ++i) { + std::cout << out[i] << ", "; + } + std::cout << "\n"; // Check the results for (size_t i = 0; i < dataSize; ++i) { assert(out[i] == (20 * i * i) && "Computation error"); diff --git a/SYCL/KernelFusion/three_dimensional.cpp b/SYCL/KernelFusion/three_dimensional.cpp index e8006ca091..ed246226e0 100644 --- a/SYCL/KernelFusion/three_dimensional.cpp +++ b/SYCL/KernelFusion/three_dimensional.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with private internalization specified on the diff --git a/SYCL/KernelFusion/two_dimensional.cpp b/SYCL/KernelFusion/two_dimensional.cpp index 62bca54ff1..af51579a46 100644 --- a/SYCL/KernelFusion/two_dimensional.cpp +++ b/SYCL/KernelFusion/two_dimensional.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with private internalization specified on the diff --git a/SYCL/KernelFusion/usm_no_dependencies.cpp b/SYCL/KernelFusion/usm_no_dependencies.cpp index 2f18f758ba..7ef71aac5e 100644 --- a/SYCL/KernelFusion/usm_no_dependencies.cpp +++ b/SYCL/KernelFusion/usm_no_dependencies.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion using USM pointers. diff --git a/SYCL/KernelFusion/work_group_barrier.cpp b/SYCL/KernelFusion/work_group_barrier.cpp index 7141c37be8..5d7e1a6f16 100644 --- a/SYCL/KernelFusion/work_group_barrier.cpp +++ b/SYCL/KernelFusion/work_group_barrier.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with a combination of kernels that require a work-group diff --git a/SYCL/KernelFusion/wrapped_usm.cpp b/SYCL/KernelFusion/wrapped_usm.cpp index 8532a9dadf..7d0330d1c9 100644 --- a/SYCL/KernelFusion/wrapped_usm.cpp +++ b/SYCL/KernelFusion/wrapped_usm.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion using an wrapped USM pointer as kernel functor argument. From 4470972cc6b8e25dbdcce253f4a0e0b7ba128a66 Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Tue, 14 Feb 2023 16:15:19 +0000 Subject: [PATCH 2/5] [SYCL][Fusion] Enable JIT caching for CUDA fusion --- SYCL/KernelFusion/jit_caching.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SYCL/KernelFusion/jit_caching.cpp b/SYCL/KernelFusion/jit_caching.cpp index d49f7b63a6..c5d1d57c94 100644 --- a/SYCL/KernelFusion/jit_caching.cpp +++ b/SYCL/KernelFusion/jit_caching.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER --implicit-check-not "COMPUTATION ERROR" --implicit-check-not "WRONG INTERNALIZATION" // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER --implicit-check-not "COMPUTATION ERROR" --implicit-check-not "WRONG INTERNALIZATION" -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test caching for JIT fused kernels. Also test for debug messages being From 6d289d628b1df334ea3efad3f718b24b69e8d977 Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Wed, 15 Feb 2023 12:57:34 +0000 Subject: [PATCH 3/5] [SYCL][Fusion] Test local internalization for CUDA --- SYCL/KernelFusion/diamond_shape_local.cpp | 111 ++++++++++++++++++ SYCL/KernelFusion/existing_local_accessor.cpp | 78 ++++++++++++ SYCL/KernelFusion/local_internalization.cpp | 4 +- SYCL/KernelFusion/non_unit_local_size.cpp | 4 +- 4 files changed, 193 insertions(+), 4 deletions(-) create mode 100644 SYCL/KernelFusion/diamond_shape_local.cpp create mode 100644 SYCL/KernelFusion/existing_local_accessor.cpp diff --git a/SYCL/KernelFusion/diamond_shape_local.cpp b/SYCL/KernelFusion/diamond_shape_local.cpp new file mode 100644 index 0000000000..96d06e7dad --- /dev/null +++ b/SYCL/KernelFusion/diamond_shape_local.cpp @@ -0,0 +1,111 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// UNSUPPORTED: hip +// REQUIRES: fusion + +// Test complete fusion with local internalization specified on the +// accessors for a combination of four kernels, forming a diamond-like shape and +// repeating one of the kernels. + +#include + +using namespace sycl; + +struct AddKernel { + accessor accIn1; + accessor accIn2; + accessor accOut; + + void operator()(id<1> i) const { accOut[i] = accIn1[i] + accIn2[i]; } +}; + +int main() { + constexpr size_t dataSize = 512; + int in1[dataSize], in2[dataSize], in3[dataSize], tmp1[dataSize], + tmp2[dataSize], tmp3[dataSize], out[dataSize]; + + for (size_t i = 0; i < dataSize; ++i) { + in1[i] = i * 2; + in2[i] = i * 3; + in3[i] = i * 4; + tmp1[i] = -1; + tmp2[i] = -1; + tmp3[i] = -1; + out[i] = -1; + } + + queue q{ext::codeplay::experimental::property::queue::enable_fusion{}}; + + { + buffer bIn1{in1, range{dataSize}}; + buffer bIn2{in2, range{dataSize}}; + buffer bIn3{in3, range{dataSize}}; + buffer bTmp1{ + tmp1, + range{dataSize}, + {sycl::ext::codeplay::experimental::property::promote_local{}}}; + buffer bTmp2{ + tmp2, + range{dataSize}, + {sycl::ext::codeplay::experimental::property::promote_local{}}}; + buffer bTmp3{ + tmp3, + range{dataSize}, + {sycl::ext::codeplay::experimental::property::promote_local{}}}; + buffer bOut{out, range{dataSize}}; + + ext::codeplay::experimental::fusion_wrapper fw{q}; + fw.start_fusion(); + + assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode"); + + q.submit([&](handler &cgh) { + auto accIn1 = bIn1.get_access(cgh); + auto accIn2 = bIn2.get_access(cgh); + auto accTmp1 = bTmp1.get_access(cgh); + cgh.parallel_for(nd_range<1>{{dataSize}, {16}}, + AddKernel{accIn1, accIn2, accTmp1}); + }); + + q.submit([&](handler &cgh) { + auto accTmp1 = bTmp1.get_access(cgh); + auto accIn3 = bIn3.get_access(cgh); + auto accTmp2 = bTmp2.get_access(cgh); + cgh.parallel_for( + nd_range<1>{{dataSize}, {16}}, + [=](id<1> i) { accTmp2[i] = accTmp1[i] * accIn3[i]; }); + }); + + q.submit([&](handler &cgh) { + auto accTmp1 = bTmp1.get_access(cgh); + auto accTmp3 = bTmp3.get_access(cgh); + cgh.parallel_for( + nd_range<1>{{dataSize}, {16}}, + [=](id<1> i) { accTmp3[i] = accTmp1[i] * 5; }); + }); + + q.submit([&](handler &cgh) { + auto accTmp2 = bTmp2.get_access(cgh); + auto accTmp3 = bTmp3.get_access(cgh); + auto accOut = bOut.get_access(cgh); + cgh.parallel_for(nd_range<1>{{dataSize}, {16}}, + AddKernel{accTmp2, accTmp3, accOut}); + }); + + fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}}); + + assert(!fw.is_in_fusion_mode() && + "Queue should not be in fusion mode anymore"); + } + + // Check the results + for (size_t i = 0; i < dataSize; ++i) { + assert(out[i] == (20 * i * i + i * 25) && "Computation error"); + assert(tmp1[i] == -1 && "tmp1 not internalized"); + assert(tmp2[i] == -1 && "tmp2 not internalized"); + assert(tmp3[i] == -1 && "tmp3 not internalized"); + } + + return 0; +} diff --git a/SYCL/KernelFusion/existing_local_accessor.cpp b/SYCL/KernelFusion/existing_local_accessor.cpp new file mode 100644 index 0000000000..4fca5b96e5 --- /dev/null +++ b/SYCL/KernelFusion/existing_local_accessor.cpp @@ -0,0 +1,78 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// UNSUPPORTED: hip +// REQUIRES: fusion + +// Test complete fusion with local internalization and an local accessor that +// already exists in one of the input kernels. + +#include + +using namespace sycl; + +int main() { + constexpr size_t dataSize = 512; + int in1[dataSize], in2[dataSize], in3[dataSize], tmp[dataSize], out[dataSize]; + + for (size_t i = 0; i < dataSize; ++i) { + in1[i] = i * 2; + in2[i] = i * 3; + in3[i] = i * 4; + tmp[i] = -1; + out[i] = -1; + } + + queue q{ext::codeplay::experimental::property::queue::enable_fusion{}}; + + { + buffer bIn1{in1, range{dataSize}}; + buffer bIn2{in2, range{dataSize}}; + buffer bIn3{in3, range{dataSize}}; + buffer bTmp{tmp, range{dataSize}}; + buffer bOut{out, range{dataSize}}; + + ext::codeplay::experimental::fusion_wrapper fw{q}; + fw.start_fusion(); + + assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode"); + + q.submit([&](handler &cgh) { + auto accIn1 = bIn1.get_access(cgh); + auto accIn2 = bIn2.get_access(cgh); + auto accTmp = bTmp.get_access( + cgh, sycl::ext::codeplay::experimental::property::promote_local{}); + local_accessor accLocal{16, cgh}; + cgh.parallel_for( + nd_range<1>{{dataSize}, {16}}, [=](nd_item<1> i) { + size_t globalIdx = i.get_global_linear_id(); + size_t localIdx = i.get_local_linear_id(); + accLocal[localIdx] = accIn2[globalIdx]; + accTmp[globalIdx] = accIn1[globalIdx] + accLocal[localIdx]; + }); + }); + + q.submit([&](handler &cgh) { + auto accTmp = bTmp.get_access( + cgh, sycl::ext::codeplay::experimental::property::promote_local{}); + auto accIn3 = bIn3.get_access(cgh); + auto accOut = bOut.get_access(cgh); + cgh.parallel_for( + nd_range<1>{{dataSize}, {16}}, + [=](id<1> i) { accOut[i] = accTmp[i] * accIn3[i]; }); + }); + + fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}}); + + assert(!fw.is_in_fusion_mode() && + "Queue should not be in fusion mode anymore"); + } + + // Check the results + for (size_t i = 0; i < dataSize; ++i) { + assert(out[i] == (20 * i * i) && "Computation error"); + assert(tmp[i] == -1 && "Not internalized"); + } + + return 0; +} diff --git a/SYCL/KernelFusion/local_internalization.cpp b/SYCL/KernelFusion/local_internalization.cpp index a9677b4a2e..0b1346c255 100644 --- a/SYCL/KernelFusion/local_internalization.cpp +++ b/SYCL/KernelFusion/local_internalization.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with local internalization specified on the diff --git a/SYCL/KernelFusion/non_unit_local_size.cpp b/SYCL/KernelFusion/non_unit_local_size.cpp index 917eda6e09..ffd08f918d 100644 --- a/SYCL/KernelFusion/non_unit_local_size.cpp +++ b/SYCL/KernelFusion/non_unit_local_size.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test complete fusion with local internalization specified on the From a1b989cb378fae15ccc0021af12f75a8b7b1600f Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Thu, 16 Feb 2023 13:10:10 +0000 Subject: [PATCH 4/5] [SYCL][Fusion] Reinstate synchronization test --- .../sync_two_queues_event_dep.cpp | 28 ++++++------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/SYCL/KernelFusion/sync_two_queues_event_dep.cpp b/SYCL/KernelFusion/sync_two_queues_event_dep.cpp index f0749b1b76..a2c9caa88c 100644 --- a/SYCL/KernelFusion/sync_two_queues_event_dep.cpp +++ b/SYCL/KernelFusion/sync_two_queues_event_dep.cpp @@ -1,13 +1,12 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER // UNSUPPORTED: hip +// For this test, complete_fusion must be supported. // REQUIRES: fusion -// NOTE: This test currently fails from time to time and is under construction. - // Test fusion cancellation on event dependency between two active fusions. #include @@ -37,6 +36,8 @@ int main() { ext::codeplay::experimental::fusion_wrapper fw1{q1}; fw1.start_fusion(); + assert(fw1.is_in_fusion_mode() && "Queue should be in fusion mode"); + auto kernel1 = q1.submit([&](handler &cgh) { cgh.parallel_for( dataSize, [=](id<1> i) { tmp[i] = in1[i] + in2[i]; }); @@ -56,7 +57,7 @@ int main() { assert(!fw1.is_in_fusion_mode() && "Queue should not be in fusion mode anymore"); - // assert(fw2.is_in_fusion_mode() && "Queue should be in fusion mode"); + assert(fw2.is_in_fusion_mode() && "Queue should be in fusion mode"); auto kernel2 = q1.submit([&](handler &cgh) { cgh.depends_on(kernel3); @@ -69,29 +70,18 @@ int main() { assert(!fw2.is_in_fusion_mode() && "Queue should not be in fusion mode anymore"); - // fw1.complete_fusion({ext::codeplay::experimental::property::no_barriers{}}); + fw1.complete_fusion({ext::codeplay::experimental::property::no_barriers{}}); - // fw2.cancel_fusion(); + fw2.cancel_fusion(); q1.wait(); q2.wait(); - for (size_t i = 0; i < 5; ++i) { - std::cout << out[i] << ", "; - } - std::cout << "\n"; - // Check the results - size_t numErrors = 0; for (size_t i = 0; i < dataSize; ++i) { - if (out[i] != (40 * i * i)) { - ++numErrors; - } - // assert(out[i] == (40 * i * i) && "Computation error"); - } - if (numErrors) { - std::cout << "COMPUTATION ERROR\n"; + assert(out[i] == (40 * i * i) && "Computation error"); } + return 0; } From 71739c272d363f4536107c8283898c54950a5ba0 Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Thu, 23 Mar 2023 08:35:47 +0000 Subject: [PATCH 5/5] [SYCL][Fusion] Enable new tests on CUDA backend --- SYCL/KernelFusion/abort_internalization_stored_ptr.cpp | 4 ++-- SYCL/KernelFusion/device_info_descriptor.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/SYCL/KernelFusion/abort_internalization_stored_ptr.cpp b/SYCL/KernelFusion/abort_internalization_stored_ptr.cpp index c3d49cea3c..d05d53e19c 100644 --- a/SYCL/KernelFusion/abort_internalization_stored_ptr.cpp +++ b/SYCL/KernelFusion/abort_internalization_stored_ptr.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-embed-ir %s -o %t.out // RUN: env SYCL_RT_WARNING_LEVEL=1 %CPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %CPU_CHECK_PLACEHOLDER --implicit-check-not "Computation error" --implicit-check-not "Internalized" // RUN: env SYCL_RT_WARNING_LEVEL=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1\ // RUN: %GPU_CHECK_PLACEHOLDER --implicit-check-not "Computation error" --implicit-check-not "Internalized" -// UNSUPPORTED: cuda || hip +// UNSUPPORTED: hip // REQUIRES: fusion // Test pointers being stored are not internalized. diff --git a/SYCL/KernelFusion/device_info_descriptor.cpp b/SYCL/KernelFusion/device_info_descriptor.cpp index 91bd1622a5..c7dc498ce2 100644 --- a/SYCL/KernelFusion/device_info_descriptor.cpp +++ b/SYCL/KernelFusion/device_info_descriptor.cpp @@ -1,7 +1,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -// XFAIL: cuda || hip +// XFAIL: hip // REQUIRES: fusion // Test correct return from device information descriptor.