Skip to content

Commit 3e4757a

Browse files
committed
v6.0.1 release
1 parent d824043 commit 3e4757a

File tree

84 files changed

+5078
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+5078
-0
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/*
2+
* @@name: affinity_control.1
3+
* @@type: C/C++
4+
* @@operation: compile
5+
* @@expect: success
6+
* @@version: omp_6.0
7+
*/
8+
void work(); // may use additional free-agent threads
9+
10+
int main()
11+
{
12+
13+
// input place partition controlled by OMP_PLACES
14+
// team size controlled by OMP_NUM_THREADS
15+
// affinity policy controlled by OMP_PROC_BIND
16+
// number of additional free-agent threads bounded by OMP_THREAD_LIMIT
17+
#pragma omp parallel
18+
work();
19+
20+
return 0;
21+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
! @@name: affinity_control.1
2+
! @@type: F-free
3+
! @@operation: compile
4+
! @@expect: success
5+
! @@version: omp_6.0
6+
program main
7+
interface
8+
subroutine work ! may use additional free-agent threads
9+
end subroutine work
10+
end interface
11+
12+
! input place partition controlled by OMP_PLACES
13+
! team size controlled by OMP_NUM_THREADS
14+
! affinity policy controlled by OMP_PROC_BIND
15+
! number of additional free-agent threads bounded by OMP_THREAD_LIMIT
16+
!$omp parallel
17+
call work()
18+
!$omp end parallel
19+
end program

data_environment/groupprivate.tex

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
\section{\kcode{groupprivate} Directive}
2+
\label{sec:groupprivate}
3+
\index{directives!groupprivate@\kcode{groupprivate}}
4+
\index{groupprivate directive@\kcode{groupprivate} directive}
5+
6+
The \kcode{groupprivate} directive introduced in Specification 6.0
7+
allows specified list items to be replicated such that each contention
8+
group will have its own uninitialized copy. The list item is shared among
9+
threads of the contention group and does not exist
10+
outside the scope of the contention group.
11+
12+
In the following example, the variable \ucode{x} is defined as a static
13+
variable and specified with the \kcode{groupprivate} data attribute in
14+
the function \ucode{foo}. Four teams created by the \kcode{teams} construct
15+
execute the \kcode{parallel} region that calls the
16+
\ucode{foo} function. For each team the groupprivate variable \ucode{x}
17+
is created and is accessible for the group of tasks of the \kcode{parallel} region.
18+
19+
\cppexample[6.0]{groupprivate}{1}
20+
21+
\ffreeexample[6.0]{groupprivate}{1}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* @@name: groupprivate.1
3+
* @@type: C++
4+
* @@operation: run
5+
* @@expect: success
6+
* @@version: omp_6.0
7+
*/
8+
#include <omp.h>
9+
#include <stdio.h>
10+
11+
void init(int *x, int n, int tid)
12+
{
13+
#pragma omp for
14+
for (int i = 0; i < n; i++)
15+
x[i] = tid + i;
16+
17+
}
18+
19+
void foo(int &sum, int tid)
20+
{
21+
static int x[100];
22+
#pragma omp groupprivate(x)
23+
24+
init(x, 100, tid);
25+
26+
#pragma omp for reduction(+:sum)
27+
for (int i = 0; i < 100; i++) {
28+
sum += x[i];
29+
}
30+
}
31+
#pragma omp declare_target enter(foo)
32+
33+
int main()
34+
{
35+
int sums[4] = {0,0,0,0};
36+
37+
#pragma omp target teams num_teams(4) thread_limit(100)
38+
#pragma omp parallel
39+
foo(sums[omp_get_team_num()], omp_get_team_num());
40+
41+
if( sums[0] != 4950 || sums[1] != 5050 ||
42+
sums[2] != 5150 || sums[3] != 5250 ){
43+
printf("FAILED\n");
44+
return 1;
45+
}
46+
printf("PASSED\n");
47+
48+
return 0;
49+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
! @@name: groupprivate.1
2+
! @@type: F-free
3+
! @@operation: run
4+
! @@expect: success
5+
! @@version: omp_6.0
6+
module mfunc
7+
contains
8+
subroutine init(x, n, tid)
9+
implicit none
10+
integer, intent(in) :: tid, n
11+
integer, intent(out) :: x(n)
12+
integer :: i
13+
14+
! Initialize the array with the thread number
15+
!$omp do
16+
do i = 1, n
17+
x(i) = tid + i
18+
end do
19+
end subroutine init
20+
21+
subroutine foo(sum, tid)
22+
implicit none
23+
integer, intent(inout) :: sum
24+
integer, intent(in) :: tid
25+
integer :: i
26+
integer, save :: x(100)
27+
!$omp groupprivate(x)
28+
!$omp declare_target
29+
30+
call init(x,100,tid)
31+
32+
! Perform the reduction operation
33+
!$omp do reduction(+:sum)
34+
do i = 1, 100
35+
sum = sum + x(i)
36+
end do
37+
end subroutine foo
38+
39+
end module mfunc
40+
41+
program main
42+
use mfunc
43+
use omp_lib
44+
implicit none
45+
46+
integer :: sums(4) = (/ 0, 0, 0, 0 /)
47+
integer :: team_num, thread_num
48+
49+
!$omp target teams num_teams(4) thread_limit(100)
50+
!$omp parallel private(team_num, thread_num)
51+
team_num = omp_get_team_num()
52+
thread_num = omp_get_thread_num()
53+
call foo(sums(team_num+1), team_num)
54+
!$omp end parallel
55+
!$omp end target teams
56+
57+
if( sums(1) /= 5050 .or. sums(2) /= 5150 .or. &
58+
sums(3) /= 5250 .or. sums(4) /= 5350 ) then
59+
print*, "FAILED"
60+
stop 1
61+
endif
62+
print *, "PASSED"
63+
end program

devices/self_mapping.tex

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
%\pagebreak
2+
\section{Self Mapping}
3+
\label{sec:self_mapping}
4+
5+
\index{self maps}
6+
7+
In general, a map operation may create a separate copy of a list item in a
8+
device data environment that the runtime implementation must associate with the
9+
original list item. A \emph{self map} is a map operation that does not result in a
10+
separate copy of a list item. Rather, the original storage of the list item is
11+
reused for the device data environment, with the runtime regarding its storage
12+
as being ``mapped'' to itself.
13+
14+
OpenMP 6.0 adds the \kcode{self} modifier to the \kcode{map} clause to require
15+
that its list items are mapped using a self map. The \kcode{self} keyword was
16+
also added as an argument to the \kcode{defaultmap} clause, indicating that
17+
variables of a specified category in a \kcode{target} construct are self mapped
18+
by default. Additionally, the \kcode{self_maps} clause was added to the
19+
\kcode{requires} directive to require that all map operations in a given
20+
compilation unit should be self maps. The \kcode{self_maps} clause includes all
21+
the guarantees provided by the \kcode{unified_shared_memory} requirement clause.
22+
23+
The following C example shows the use of these features when mapping a structure
24+
type to a device. The self map allows \ucode{start} and \ucode{end} pointer
25+
data members that point to the start and end of the \ucode{buf} data member to
26+
be used on the device without requiring pointer attachments.
27+
28+
\cexample[6.0]{self_map}{1}
29+
30+
The next C++ and Fortran example maps a structure for which an array member
31+
\ucode{buf} is private but may be accessed in the public interface via a
32+
pointer member \ucode{p}. When mapping the structure to a device, the
33+
programmer must ordinarily ensure that the \ucode{p} data member on the device
34+
is attached to the \ucode{buf} data member, by adding a \ucode{my_data.p[:]}
35+
(for C++) or \ucode{my_data\%p(:)} (for Fortran) list item to a \kcode{map}
36+
clause. By instead asking for the structure to be self mapped, there is no need
37+
for the pointer attachment.
38+
39+
\cppexample[6.0]{self_map}{2}
40+
\ffreeexample[6.0]{self_map}{2}
41+
42+
If the implementation is unable to satisfy the requirements of a self map then
43+
a runtime error will be issued. This may occur because the original storage is
44+
not accessible and cannot be made accessible from the device, or it may occur
45+
because the storage has already been mapped to the device without a self map. A
46+
third case is that the self map applies to a pointer for which pointer
47+
attachment is prescribed to a pointee that was not also self mapped. Since a
48+
self-mapped attached pointer in that case would assign a device address to the
49+
original pointer which would almost certainly not be the desired behavior.
50+
51+
The following example illustrates each of the three cases above that could
52+
potentially result in a runtime error due to an unfulfilled self map.
53+
54+
\cexample[6.0]{self_map}{3}
55+
\ffreeexample[6.0]{self_map}{3}
56+

devices/sources/declare_target.8.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* @@name: declare_target.8
3+
* @@type: C
4+
* @@operation: run
5+
* @@expect: success
6+
* @@version: omp_6.0
7+
*/
8+
#include <stdio.h>
9+
#include <omp.h>
10+
11+
int sum;
12+
int x[100];
13+
14+
/* Device-local sum and x */
15+
#pragma omp declare_target local(sum, x)
16+
17+
#pragma omp begin declare_target
18+
void init_x(int dev_id)
19+
{
20+
for (int j = 0; j < 100; ++j) {
21+
x[j] = j + dev_id;
22+
}
23+
}
24+
25+
void foo(void)
26+
{
27+
int i;
28+
#pragma omp for reduction(+:sum)
29+
for (i = 0; i < 100; i++) {
30+
sum += x[i];
31+
}
32+
}
33+
#pragma omp end declare_target
34+
35+
int main(void)
36+
{
37+
int ndev = omp_get_num_devices();
38+
if(!ndev){
39+
printf("No OpenMP target devices found.\n");
40+
return 1;
41+
}
42+
int host_sum[ndev];
43+
/* Initialize per device */
44+
for (int i = 0; i < ndev; i++) {
45+
#pragma omp target device(i)
46+
{
47+
init_x(i);
48+
sum = 0;
49+
}
50+
}
51+
52+
/* Parallel reductions on each device */
53+
for (int i = 0; i < ndev; i++) {
54+
#pragma omp target parallel map(from:host_sum[i]) device(i) nowait
55+
{
56+
foo();
57+
host_sum[i] = sum;
58+
}
59+
}
60+
#pragma omp taskwait
61+
62+
for (int i = 0; i < ndev; i++) {
63+
printf("sum: %d, device: %d\n", host_sum[i], i);
64+
}
65+
return 0;
66+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
! @@name: declare_target.8
2+
! @@type: F-free
3+
! @@operation: run
4+
! @@expect: success
5+
! @@version: omp_6.0
6+
module dev_mod
7+
implicit none
8+
integer :: sum
9+
integer :: x(100)
10+
11+
!$omp declare_target local(sum, x)
12+
13+
contains
14+
15+
subroutine init_x(dev_id)
16+
integer, value :: dev_id
17+
integer :: j
18+
!$omp declare_target
19+
do j = 1, 100
20+
x(j) = (j-1) + dev_id
21+
end do
22+
end subroutine init_x
23+
24+
subroutine foo()
25+
integer :: i
26+
!$omp declare_target
27+
28+
!$omp do reduction(+:sum)
29+
do i = 1, 100
30+
sum = sum + x(i)
31+
end do
32+
end subroutine foo
33+
34+
end module dev_mod
35+
36+
program main
37+
use omp_lib
38+
use dev_mod
39+
implicit none
40+
41+
integer :: ndev, i
42+
integer, allocatable :: host_sum(:)
43+
44+
45+
ndev = omp_get_num_devices()
46+
if (ndev <= 0) then
47+
print *, 'No OpenMP target devices found.'
48+
stop
49+
end if
50+
allocate(host_sum(0:ndev-1))
51+
52+
do i = 0, ndev-1
53+
!$omp target device(i)
54+
call init_x(i)
55+
sum = 0
56+
!$omp end target
57+
end do
58+
59+
do i = 0, ndev-1
60+
!$omp target parallel map(from: host_sum(i)) device(i) nowait
61+
call foo()
62+
host_sum(i) = sum
63+
!$omp end target parallel
64+
end do
65+
!$omp taskwait
66+
67+
do i = 0, ndev-1
68+
print *, 'sum: ', host_sum(i), ', device: ', i
69+
end do
70+
71+
deallocate(host_sum)
72+
73+
end program main

0 commit comments

Comments
 (0)