-
Notifications
You must be signed in to change notification settings - Fork 46
add Float16 math functions #368
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Codecov Report✅ All modified and coverable lines are covered by tests. Additional details and impacted files@@ Coverage Diff @@
## master #368 +/- ##
=======================================
Coverage 79.01% 79.01%
=======================================
Files 12 12
Lines 672 672
=======================================
Hits 531 531
Misses 141 141 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
a7d113b to
3ff64f9
Compare
|
Hmm, it looks like pocl's Float16 support is incomplete and missing some of these functions. Should we not run the GPUCompiler testsuite on Float16 for now? We could only add those intrinsics pocl supports, but rusticl for example seems to support these. |
This comment was marked as resolved.
This comment was marked as resolved.
|
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/lib/intrinsics/src/math.jl b/lib/intrinsics/src/math.jl
index 39117bb..a769499 100644
--- a/lib/intrinsics/src/math.jl
+++ b/lib/intrinsics/src/math.jl
@@ -33,7 +33,7 @@ for gentype in generic_types
@device_override Base.cos(x::$gentype) = @builtin_ccall("cos", $gentype, ($gentype,), x)
@device_override Base.cosh(x::$gentype) = @builtin_ccall("cosh", $gentype, ($gentype,), x)
-@device_override Base.cospi(x::$gentype) = @builtin_ccall("cospi", $gentype, ($gentype,), x)
+ @device_override Base.cospi(x::$gentype) = @builtin_ccall("cospi", $gentype, ($gentype,), x)
@device_override SpecialFunctions.erfc(x::$gentype) = @builtin_ccall("erfc", $gentype, ($gentype,), x)
@device_override SpecialFunctions.erf(x::$gentype) = @builtin_ccall("erf", $gentype, ($gentype,), x)
@@ -59,10 +59,10 @@ for gentype in generic_types
#@device_override Base.mod(x::$gentype, y::$gentype) = @builtin_ccall("fmod", $gentype, ($gentype, $gentype), x, y)
# fract(x::$gentype, $gentype *iptr) = @builtin_ccall("fract", $gentype, ($gentype, $gentype *), x, iptr)
-# TODO: remove once https://github.com/pocl/pocl/issues/2034 is addressed
-if $gentype != Float16
- @device_override Base.hypot(x::$gentype, y::$gentype) = @builtin_ccall("hypot", $gentype, ($gentype, $gentype), x, y)
-end
+ # TODO: remove once https://github.com/pocl/pocl/issues/2034 is addressed
+ if $gentype != Float16
+ @device_override Base.hypot(x::$gentype, y::$gentype) = @builtin_ccall("hypot", $gentype, ($gentype, $gentype), x, y)
+ end
@device_override SpecialFunctions.loggamma(x::$gentype) = @builtin_ccall("lgamma", $gentype, ($gentype,), x)
@@ -101,13 +101,13 @@ end
return sinval, cosval[]
end
@device_override Base.sinh(x::$gentype) = @builtin_ccall("sinh", $gentype, ($gentype,), x)
-@device_override Base.sinpi(x::$gentype) = @builtin_ccall("sinpi", $gentype, ($gentype,), x)
+ @device_override Base.sinpi(x::$gentype) = @builtin_ccall("sinpi", $gentype, ($gentype,), x)
@device_override Base.sqrt(x::$gentype) = @builtin_ccall("sqrt", $gentype, ($gentype,), x)
@device_override Base.tan(x::$gentype) = @builtin_ccall("tan", $gentype, ($gentype,), x)
@device_override Base.tanh(x::$gentype) = @builtin_ccall("tanh", $gentype, ($gentype,), x)
-@device_override Base.tanpi(x::$gentype) = @builtin_ccall("tanpi", $gentype, ($gentype,), x)
+ @device_override Base.tanpi(x::$gentype) = @builtin_ccall("tanpi", $gentype, ($gentype,), x)
@device_override SpecialFunctions.gamma(x::$gentype) = @builtin_ccall("tgamma", $gentype, ($gentype,), x)
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index ad970c0..d2a9c48 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -63,105 +63,105 @@ cl.memory_backend() isa cl.SVMBackend && @on_device atomic_work_item_fence(OpenC
end
-@testset "math" begin
-
-@testset "unary - $T" for T in float_types
- @testset "$f" for f in [
- acos, acosh,
- asin, asinh,
- atan, atanh,
- cbrt,
- ceil,
- cos, cosh, cospi,
- exp, exp2, exp10, expm1,
- abs,
- floor,
- log, log2, log10, log1p,
- round,
- sin, sinh, sinpi,
- sqrt,
- tan, tanh, tanpi,
- trunc,
- ]
- x = rand(T)
- if f == acosh
- x += 1
+ @testset "math" begin
+
+ @testset "unary - $T" for T in float_types
+ @testset "$f" for f in [
+ acos, acosh,
+ asin, asinh,
+ atan, atanh,
+ cbrt,
+ ceil,
+ cos, cosh, cospi,
+ exp, exp2, exp10, expm1,
+ abs,
+ floor,
+ log, log2, log10, log1p,
+ round,
+ sin, sinh, sinpi,
+ sqrt,
+ tan, tanh, tanpi,
+ trunc,
+ ]
+ x = rand(T)
+ if f == acosh
+ x += 1
+ end
+ broken = ispocl && T == Float16 && f in [acosh, asinh, atanh, cbrt, cospi, expm1, log1p, sinpi, tanpi]
+ @test call_on_device(f, x) ≈ f(x) broken = broken
+ end
end
- broken = ispocl && T == Float16 && f in [acosh, asinh, atanh, cbrt, cospi, expm1, log1p, sinpi, tanpi]
- @test call_on_device(f, x) ≈ f(x) broken = broken
- end
-end
-@testset "binary - $T" for T in float_types
- @testset "$f" for f in [
- atan,
- copysign,
- max,
- min,
- hypot,
- (^),
- ]
- x = rand(T)
- y = rand(T)
- broken = ispocl && T == Float16 && f == atan
- @test call_on_device(f, x, y) ≈ f(x, y) broken = broken
- end
-end
+ @testset "binary - $T" for T in float_types
+ @testset "$f" for f in [
+ atan,
+ copysign,
+ max,
+ min,
+ hypot,
+ (^),
+ ]
+ x = rand(T)
+ y = rand(T)
+ broken = ispocl && T == Float16 && f == atan
+ @test call_on_device(f, x, y) ≈ f(x, y) broken = broken
+ end
+ end
-@testset "ternary - $T" for T in float_types
- @testset "$f" for f in [
- fma,
- ]
- x = rand(T)
- y = rand(T)
- z = rand(T)
- @test call_on_device(f, x, y, z) ≈ f(x, y, z)
- end
-end
+ @testset "ternary - $T" for T in float_types
+ @testset "$f" for f in [
+ fma,
+ ]
+ x = rand(T)
+ y = rand(T)
+ z = rand(T)
+ @test call_on_device(f, x, y, z) ≈ f(x, y, z)
+ end
+ end
-@testset "OpenCL-specific unary - $T" for T in float_types
- @testset "$f" for f in [
- OpenCL.acospi,
- OpenCL.asinpi,
- OpenCL.atanpi,
- OpenCL.logb,
- OpenCL.rint,
- OpenCL.rsqrt,
- ]
- x = rand(T)
- broken = ispocl && T == Float16 && !(f in [OpenCL.rint, OpenCL.rsqrt])
- @test call_on_device(f, x) isa Real broken = broken # Just check it doesn't error
- end
- broken = ispocl && T == Float16
- @test call_on_device(OpenCL.ilogb, T(8.0)) isa Int32 broken = broken
- @test call_on_device(OpenCL.nan, Base.uinttype(T)(0)) isa T
-end
+ @testset "OpenCL-specific unary - $T" for T in float_types
+ @testset "$f" for f in [
+ OpenCL.acospi,
+ OpenCL.asinpi,
+ OpenCL.atanpi,
+ OpenCL.logb,
+ OpenCL.rint,
+ OpenCL.rsqrt,
+ ]
+ x = rand(T)
+ broken = ispocl && T == Float16 && !(f in [OpenCL.rint, OpenCL.rsqrt])
+ @test call_on_device(f, x) isa Real broken = broken # Just check it doesn't error
+ end
+ broken = ispocl && T == Float16
+ @test call_on_device(OpenCL.ilogb, T(8.0)) isa Int32 broken = broken
+ @test call_on_device(OpenCL.nan, Base.uinttype(T)(0)) isa T
+ end
-@testset "OpenCL-specific binary - $T" for T in float_types
- @testset "$f" for f in [
- OpenCL.atanpi,
- OpenCL.dim,
- OpenCL.maxmag,
- OpenCL.minmag,
- OpenCL.nextafter,
- OpenCL.powr,
- ]
- x = rand(T)
- y = rand(T)
- broken = ispocl && T == Float16 && !(f in [OpenCL.maxmag, OpenCL.minmag])
- @test call_on_device(f, x, y) isa Real broken = broken # Just check it doesn't error
- end
- broken = ispocl && T == Float16
- @test call_on_device(OpenCL.rootn, T(8.0), Int32(3)) ≈ T(2.0) broken = broken
-end
+ @testset "OpenCL-specific binary - $T" for T in float_types
+ @testset "$f" for f in [
+ OpenCL.atanpi,
+ OpenCL.dim,
+ OpenCL.maxmag,
+ OpenCL.minmag,
+ OpenCL.nextafter,
+ OpenCL.powr,
+ ]
+ x = rand(T)
+ y = rand(T)
+ broken = ispocl && T == Float16 && !(f in [OpenCL.maxmag, OpenCL.minmag])
+ @test call_on_device(f, x, y) isa Real broken = broken # Just check it doesn't error
+ end
+ broken = ispocl && T == Float16
+ @test call_on_device(OpenCL.rootn, T(8.0), Int32(3)) ≈ T(2.0) broken = broken
+ end
-@testset "OpenCL-specific ternary - $T" for T in float_types
- x = rand(T)
- y = rand(T)
- z = rand(T)
- @test call_on_device(OpenCL.mad, x, y, z) ≈ x * y + z
-end
+ @testset "OpenCL-specific ternary - $T" for T in float_types
+ x = rand(T)
+ y = rand(T)
+ z = rand(T)
+ @test call_on_device(OpenCL.mad, x, y, z) ≈ x * y + z
+ end
-end
+ end
end |
fea0a53 to
edb3228
Compare
(Removing the overrides doesn't really buy us much either since I am then getting a bunch of miscompilation, likely due to the presence of throw statements) I excempted `hypot` for now, since that actually seems to be used in `GPUArrays`
|
Ok, I think I am happy with this for now. The intrinsics missing in pocl are tracked in pocl/pocl#2034 |
also fixes #374 by simply removing that device override