Skip to content

Commit 92330e9

Browse files
committed
Implement SYCL_KHR_WORK_ITEM_QUERIES
1 parent e3843d8 commit 92330e9

File tree

10 files changed

+157
-3
lines changed

10 files changed

+157
-3
lines changed

.github/workflows/extension_ci.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ jobs:
3636
-S ${{ github.workspace }}
3737
-DCMAKE_INSTALL_PREFIX=${{ steps.strings.outputs.install-dir }}
3838
-DSIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH=OFF
39+
-DSIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES=OFF
3940
4041
- name: Build SimSYCL (no extensions)
4142
run: >
@@ -71,6 +72,7 @@ jobs:
7172
-S ${{ github.workspace }}
7273
-DCMAKE_INSTALL_PREFIX=${{ steps.strings.outputs.install-dir }}
7374
-DSIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH=ON
75+
-DSIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES=ON
7476
7577
- name: Build SimSYCL (with extensions)
7678
run: >

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ set(SIMSYCL_CHECK_MODE "ABORT" CACHE STRING "Runtime assertion handling NONE|LOG
6969

7070
# Extension options
7171
option(SIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH "Enable the SYCL_KHR_QUEUE_FLUSH extension" ON)
72+
option(SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES "Enable the SYCL_KHR_WORK_ITEM_QUERIES extension" ON)
7273

7374
set(CONFIG_PATH "${CMAKE_CURRENT_BINARY_DIR}/include/simsycl/config.hh")
7475
configure_file(

cmake/simsycl-config.cmake.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,6 @@ set(SIMSYCL_CHECK_MODE "@SIMSYCL_CHECK_MODE@")
2828
set(SIMSYCL_ENABLE_ASAN "@SIMSYCL_ENABLE_ASAN@")
2929

3030
set(SIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH "@SIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH@")
31+
set(SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES "@SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES@")
3132

3233
include("${CMAKE_CURRENT_LIST_DIR}/AddToTarget.cmake")

include/simsycl/config.hh.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#cmakedefine01 SIMSYCL_FEATURE_HALF_TYPE
1010

1111
#cmakedefine01 SIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH
12+
#cmakedefine01 SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
1213

1314
#ifndef SIMSYCL_CHECK_MODE
1415
#define SIMSYCL_CHECK_MODE SIMSYCL_CHECK_@SIMSYCL_CHECK_MODE@

include/simsycl/sycl.hh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,6 @@
4444
#include "sycl/type_traits.hh"
4545
#include "sycl/usm.hh"
4646
#include "sycl/vec.hh"
47+
48+
#include "sycl/khr/sub_group_queries.hh"
4749
// IWYU pragma: end_keep
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#include <simsycl/sycl/group.hh>
2+
#include <simsycl/sycl/nd_item.hh>
3+
#include <simsycl/sycl/sub_group.hh>
4+
5+
#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
6+
namespace simsycl::khr {
7+
8+
namespace detail {
9+
template<int Dimensions>
10+
std::optional<simsycl::sycl::nd_item<Dimensions>> g_khr_wi_query_this_nd_item;
11+
12+
template<int Dimensions>
13+
std::optional<simsycl::sycl::group<Dimensions>> g_khr_wi_query_this_group;
14+
15+
inline std::optional<simsycl::sycl::sub_group> g_khr_wi_query_this_sub_group;
16+
} // namespace detail
17+
18+
template<int Dimensions>
19+
simsycl::sycl::nd_item<Dimensions> this_nd_item() {
20+
SIMSYCL_CHECK_MSG(!!detail::g_khr_wi_query_this_nd_item<Dimensions>,
21+
"Work item query state 'this_nd_item' is not available.\n"
22+
"Make sure that the query originated from a kernel launched with a sycl::nd_range argument");
23+
return detail::g_khr_wi_query_this_nd_item<Dimensions>.value();
24+
}
25+
26+
template<int Dimensions>
27+
simsycl::sycl::group<Dimensions> this_group() {
28+
SIMSYCL_CHECK_MSG(!!detail::g_khr_wi_query_this_group<Dimensions>,
29+
"Work item query state 'this_group' is not available.\n"
30+
"Make sure that the query originated from a kernel launched with a sycl::nd_range argument");
31+
return detail::g_khr_wi_query_this_group<Dimensions>.value();
32+
}
33+
34+
inline simsycl::sycl::sub_group this_sub_group() {
35+
SIMSYCL_CHECK_MSG(!!detail::g_khr_wi_query_this_sub_group,
36+
"Work item query state 'this_sub_group' is not available.\n"
37+
"Make sure that the query originated from a kernel launched with a sycl::nd_range argument");
38+
return detail::g_khr_wi_query_this_sub_group.value();
39+
}
40+
41+
} // namespace simsycl::khr
42+
43+
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES

src/simsycl/schedule.cc

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#include "simsycl/sycl/group.hh"
2+
#include "simsycl/sycl/khr/sub_group_queries.hh"
3+
#include "simsycl/sycl/nd_item.hh"
4+
15
#include <simsycl/detail/utils.hh>
26
#include <simsycl/schedule.hh>
37
#include <simsycl/sycl/device.hh>
@@ -181,6 +185,23 @@ void cooperative_for_nd_range(const sycl::device &device, const sycl::nd_range<D
181185
std::vector<detail::concurrent_sub_group> concurrent_sub_groups(num_concurrent_sub_groups);
182186
std::vector<detail::concurrent_nd_item> num_concurrent_nd_items(num_concurrent_items);
183187

188+
#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
189+
std::vector<const sycl::nd_item<Dimensions> *> concurrent_khr_wi_query_nd_item_ptrs(num_concurrent_items, nullptr);
190+
191+
auto update_global_khr_wi_query_data = [&](int cc_g_idx = -1) {
192+
if(cc_g_idx != -1 && concurrent_khr_wi_query_nd_item_ptrs[cc_g_idx] != nullptr) {
193+
const auto nd_item = *concurrent_khr_wi_query_nd_item_ptrs[cc_g_idx];
194+
khr::detail::g_khr_wi_query_this_nd_item<Dimensions> = nd_item;
195+
khr::detail::g_khr_wi_query_this_group<Dimensions> = nd_item.get_group();
196+
khr::detail::g_khr_wi_query_this_sub_group = nd_item.get_sub_group();
197+
} else {
198+
khr::detail::g_khr_wi_query_this_nd_item<Dimensions> = std::nullopt;
199+
khr::detail::g_khr_wi_query_this_group<Dimensions> = std::nullopt;
200+
khr::detail::g_khr_wi_query_this_sub_group = std::nullopt;
201+
}
202+
};
203+
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
204+
184205
for(auto &cgroup : concurrent_groups) {
185206
cgroup.local_memory_allocations.resize(local_memory.size());
186207
for(size_t i = 0; i < local_memory.size(); ++i) {
@@ -220,8 +241,13 @@ void cooperative_for_nd_range(const sycl::device &device, const sycl::nd_range<D
220241
group_linear_range, sub_group_linear_id_in_group, sub_group_linear_range_in_group,
221242
sub_group_max_local_linear_range, sub_group_max_local_range, thread_id_in_sub_group,
222243
sub_group_id_in_group, sub_group_range_in_group, &concurrent_nd_item, &concurrent_group,
223-
&concurrent_sub_group, &kernel, &concurrent_items_exited, &caught_exceptions,
224-
&range](boost::context::continuation &&scheduler) //
244+
&concurrent_sub_group, &kernel, &concurrent_items_exited, &caught_exceptions, &range
245+
#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
246+
,
247+
concurrent_global_idx, &concurrent_khr_wi_query_nd_item_ptrs,
248+
&update_global_khr_wi_query_data
249+
#endif
250+
](boost::context::continuation &&scheduler) //
225251
{
226252
// yield immediately to allow the scheduling loop to set up local memory pointers
227253
enter_kernel_fiber(std::move(scheduler));
@@ -245,7 +271,8 @@ void cooperative_for_nd_range(const sycl::device &device, const sycl::nd_range<D
245271

246272
SIMSYCL_START_IGNORING_DEPRECATIONS;
247273
const auto group_id = linear_index_to_id(group_range, group_linear_id);
248-
const auto global_id = range.get_offset() + (group_id * sycl::id<Dimensions>(local_range)) + local_id;
274+
const auto global_id
275+
= range.get_offset() + (group_id * sycl::id<Dimensions>(local_range)) + local_id;
249276

250277
// if sub-group range is not divisible by local range, the last sub-group will be smaller
251278
const auto sub_group_local_linear_range = std::min(sub_group_max_local_linear_range,
@@ -265,6 +292,12 @@ void cooperative_for_nd_range(const sycl::device &device, const sycl::nd_range<D
265292
const auto nd_item
266293
= detail::make_nd_item(global_item, local_item, group, sub_group, &concurrent_nd_item);
267294

295+
#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
296+
concurrent_khr_wi_query_nd_item_ptrs[concurrent_global_idx] = &nd_item;
297+
// adjust the globals now that the data is available, before starting the kernel
298+
update_global_khr_wi_query_data(concurrent_global_idx);
299+
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
300+
268301
try {
269302
kernel(nd_item);
270303
// Add an implicit "exit" operations to groups and sub-groups to catch potential divergence on
@@ -311,11 +344,21 @@ void cooperative_for_nd_range(const sycl::device &device, const sycl::nd_range<D
311344
*local_memory[i].ptr = concurrent_groups[concurrent_group_idx].local_memory_allocations[i].get();
312345
}
313346

347+
#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
348+
// adjust globals before switching fibers
349+
update_global_khr_wi_query_data(concurrent_global_idx);
350+
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
351+
314352
fibers[concurrent_global_idx] = fibers[concurrent_global_idx].resume();
315353
}
316354
schedule_state = schedule.update(schedule_state, order);
317355
}
318356

357+
#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
358+
// reset globals
359+
update_global_khr_wi_query_data();
360+
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
361+
319362
// rethrow any encountered exceptions
320363
for(auto &exception : caught_exceptions) { std::rethrow_exception(exception); }
321364
}

test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ add_executable(tests
2222
simulation_tests.cc
2323
alloc_tests.cc
2424
vec_tests.cc
25+
extensions/work_item_queries_test.cc
2526
)
2627

2728
add_sycl_to_target(TARGET tests SIMSYCL_ALL_WARNINGS)

test/extensions/extensions_test.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,13 @@ int main() {
1111

1212
// SIMSYCL_ENABLE_SYCL_KHR_QUEUE_FLUSH
1313
queue.khr_flush();
14+
15+
// SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
16+
queue.submit([&](sycl::handler &cgh) {
17+
cgh.parallel_for(sycl::nd_range<1>(1024, 64), [=](sycl::nd_item<1>) {
18+
[[maybe_unused]] const auto item = sycl::khr::this_nd_item<1>();
19+
[[maybe_unused]] const auto group = sycl::khr::this_group<1>();
20+
[[maybe_unused]] const auto sub_group = sycl::khr::this_sub_group();
21+
});
22+
});
1423
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#include <simsycl/sycl.hh>
2+
3+
#include <catch2/catch_template_test_macros.hpp>
4+
#include <catch2/catch_test_macros.hpp>
5+
#include <catch2/generators/catch_generators.hpp>
6+
7+
8+
using namespace simsycl;
9+
10+
TEMPLATE_TEST_CASE_SIG(
11+
"work item queries are correct if supported", "[khr][work_item_queries]", ((int Dims), Dims), 1, 2, 3) {
12+
13+
#if SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
14+
15+
sycl::range<Dims> global_range;
16+
sycl::range<Dims> local_range;
17+
for(int d = 0; d < Dims; ++d) {
18+
const int s = d+1;
19+
global_range[d] = s * (2 + s);
20+
local_range[d] = 2 + s;
21+
}
22+
23+
std::vector<bool> visited(global_range.size(), false);
24+
sycl::queue()
25+
.submit([&](sycl::handler &cgh) {
26+
cgh.parallel_for(sycl::nd_range(global_range, local_range), [=, &visited](sycl::nd_item<Dims> it) {
27+
const auto global_linear_id = it.get_global_linear_id();
28+
CHECK(global_linear_id < global_range.size());
29+
CHECK(!visited[global_linear_id]);
30+
visited[global_linear_id] = true;
31+
32+
CHECK(khr::this_nd_item<Dims>() == it);
33+
CHECK(khr::this_group<Dims>() == it.get_group());
34+
CHECK(khr::this_sub_group() == it.get_sub_group());
35+
36+
group_barrier(it.get_group());
37+
38+
// check again after scheduling through group_barrier
39+
CHECK(khr::this_nd_item<Dims>() == it);
40+
CHECK(khr::this_group<Dims>() == it.get_group());
41+
CHECK(khr::this_sub_group() == it.get_sub_group());
42+
});
43+
})
44+
.wait();
45+
46+
for(size_t i = 0; i < global_range.size(); ++i) { CAPTURE(i); CHECK(visited[i]); }
47+
48+
#else // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
49+
SKIP("SYCL_KHR_WORK_ITEM_QUERIES not enabled");
50+
#endif // SIMSYCL_ENABLE_SYCL_KHR_WORK_ITEM_QUERIES
51+
}

0 commit comments

Comments
 (0)