Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions operatorspy/tests/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,21 @@
check_error,
)

from operatorspy.tests.test_utils import get_args
from operatorspy.tests.test_utils import (
get_args,
debug,
get_tolerance,
)
from enum import Enum, auto
import torch

DEBUG = False

# the atol and rtol for each data type
tolerance_map = {
torch.float16: {'atol': 0, 'rtol': 1e-3},
torch.float32: {'atol': 0, 'rtol': 1e-5},
}

class Inplace(Enum):
OUT_OF_PLACE = auto()
Expand Down Expand Up @@ -83,7 +94,11 @@ def test(
check_error(
lib.infiniopAdd(descriptor, c_tensor.data, a_tensor.data, b_tensor.data, None)
)
assert torch.allclose(c, ans, atol=0, rtol=1e-3)

atol, rtol = get_tolerance(tolerance_map, tensor_dtype)
if DEBUG:
debug(c, ans, atol=atol, rtol=rtol)
assert torch.allclose(c, ans, atol=atol, rtol=rtol)
check_error(lib.infiniopDestroyAddDescriptor(descriptor))


Expand Down Expand Up @@ -157,6 +172,8 @@ def test_bang(lib, test_cases):
infiniopAddDescriptor_t,
]

if args.debug:
DEBUG = True
if args.cpu:
test_cpu(lib, test_cases)
if args.cuda:
Expand Down
7 changes: 6 additions & 1 deletion operatorspy/tests/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@
create_workspace,
)

from operatorspy.tests.test_utils import get_args
from operatorspy.tests.test_utils import get_args, debug
import torch
import torch.nn.functional as F

DEBUG = False

class AttentionDescriptor(Structure):
_fields_ = [("device", c_int32)]
Expand Down Expand Up @@ -184,6 +185,8 @@ def test(
)
)

if DEBUG:
debug(out, ans, atol=1e-4, rtol=1e-2)
assert torch.allclose(out, ans, atol=1e-4, rtol=1e-2)

check_error(lib.infiniopDestroyAttentionDescriptor(descriptor))
Expand Down Expand Up @@ -406,6 +409,8 @@ def test_bang(lib, test_cases):
infiniopAttentionDescriptor_t,
]

if args.debug:
DEBUG = True
if args.cpu:
test_cpu(lib, test_cases)
if args.cuda:
Expand Down
19 changes: 17 additions & 2 deletions operatorspy/tests/avg_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,27 @@
check_error,
)

from operatorspy.tests.test_utils import get_args
from operatorspy.tests.test_utils import (
get_args,
debug,
get_tolerance,
)
import torch
from typing import Tuple

DEBUG = False
# constant for control whether profile the pytorch and lib functions
# NOTE: need to manually add synchronization function to the lib function,
# e.g., cudaDeviceSynchronize() for CUDA
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000

# the atol and rtol for each data type
tolerance_map = {
torch.float16: {'atol': 0, 'rtol': 1e-3},
torch.float32: {'atol': 0, 'rtol': 1e-5},
}

class AvgPoolDescriptor(Structure):
_fields_ = [("device", c_int32)]
Expand Down Expand Up @@ -156,7 +166,10 @@ def test(
elapsed = (time.time() - start_time) / NUM_ITERATIONS
print(f" lib time: {elapsed :6f}")

assert torch.allclose(y, ans, atol=0, rtol=1e-3)
atol, rtol = get_tolerance(tolerance_map, tensor_dtype)
if DEBUG:
debug(y, ans, atol=atol, rtol=rtol)
assert torch.allclose(y, ans, atol=atol, rtol=rtol)
check_error(lib.infiniopDestroyAvgPoolDescriptor(descriptor))


Expand Down Expand Up @@ -228,6 +241,8 @@ def test_bang(lib, test_cases):
infiniopAvgPoolDescriptor_t,
]

if args.debug:
DEBUG = True
if args.cpu:
test_cpu(lib, test_cases)
if args.cuda:
Expand Down
8 changes: 7 additions & 1 deletion operatorspy/tests/causal_softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
create_workspace,
)

from operatorspy.tests.test_utils import get_args
from operatorspy.tests.test_utils import get_args, debug
import torch

DEBUG = False

class CausalSoftmaxDescriptor(Structure):
_fields_ = [("device", c_int32)]
Expand Down Expand Up @@ -72,6 +73,9 @@ def test(lib, handle, torch_device, x_shape, x_stride=None, x_dtype=torch.float1
None,
)
)

if DEBUG:
debug(x, ans, atol=0, rtol=1e-2)
assert torch.allclose(x, ans, atol=0, rtol=1e-2)
check_error(lib.infiniopDestroyCausalSoftmaxDescriptor(descriptor))

Expand Down Expand Up @@ -143,6 +147,8 @@ def test_ascend(lib, test_cases):
infiniopCausalSoftmaxDescriptor_t,
]

if args.debug:
DEBUG = True
if args.cpu:
test_cpu(lib, test_cases)
if args.cuda:
Expand Down
22 changes: 17 additions & 5 deletions operatorspy/tests/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,30 @@
check_error,
)

from operatorspy.tests.test_utils import get_args
from operatorspy.tests.test_utils import (
get_args,
debug,
get_tolerance,
)
import torch
import math
import ctypes
from torch.nn import functional as F
from typing import List, Tuple

DEBUG = False
# constant for control whether profile the pytorch and lib functions
# NOTE: need to manually add synchronization function to the lib function,
# e.g., cudaDeviceSynchronize() for CUDA
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000

# the atol and rtol for each data type
tolerance_map = {
torch.float16: {'atol': 0, 'rtol': 1e-2},
torch.float32: {'atol': 0, 'rtol': 1e-3},
}

class ConvDescriptor(Structure):
_fields_ = [("device", c_int32)]
Expand Down Expand Up @@ -177,10 +187,10 @@ def test(
elapsed = (time.time() - start_time) / NUM_ITERATIONS
print(f" lib time: {elapsed :6f}")

if (tensor_dtype == torch.float16):
assert torch.allclose(y, ans, atol=0, rtol=1e-2)
else:
assert torch.allclose(y, ans, atol=0, rtol=1e-3)
atol, rtol = get_tolerance(tolerance_map, tensor_dtype)
if DEBUG:
debug(y, ans, atol=atol, rtol=rtol)
assert torch.allclose(y, ans, atol=atol, rtol=rtol)
check_error(lib.infiniopDestroyConvDescriptor(descriptor))


Expand Down Expand Up @@ -286,6 +296,8 @@ def test_bang(lib, test_cases):
infiniopConvDescriptor_t,
]

if args.debug:
DEBUG = True
if args.cpu:
test_cpu(lib, test_cases)
if args.cuda:
Expand Down
11 changes: 9 additions & 2 deletions operatorspy/tests/expand.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
rearrange_tensor,
)

from operatorspy.tests.test_utils import get_args
from operatorspy.tests.test_utils import get_args, debug
import torch

DEBUG = False

# constant for control whether profile the pytorch and lib functions
# NOTE: need to manually add synchronization function to the lib function,
# e.g., cudaDeviceSynchronize() for CUDA
Expand Down Expand Up @@ -101,7 +103,10 @@ def test(
)
elapsed = (time.time() - start_time) / NUM_ITERATIONS
print(f" lib time: {elapsed :6f}")
assert torch.allclose(y, ans, atol=0, rtol=1e-3)

if DEBUG:
debug(y, ans, atol=0, rtol=0)
assert torch.allclose(y, ans, atol=0, rtol=0)
check_error(lib.infiniopDestroyExpandDescriptor(descriptor))


Expand Down Expand Up @@ -168,6 +173,8 @@ def test_bang(lib, test_cases):
infiniopExpandDescriptor_t,
]

if args.debug:
DEBUG = True
if args.cpu:
test_cpu(lib, test_cases)
if args.cuda:
Expand Down
21 changes: 19 additions & 2 deletions operatorspy/tests/gemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,28 @@
rearrange_tensor,
)

from operatorspy.tests.test_utils import get_args
from operatorspy.tests.test_utils import (
get_args,
debug,
get_tolerance,
)
import torch

DEBUG = False

# constant for control whether profile the pytorch and lib functions
# NOTE: need to manually add synchronization function to the lib function,
# e.g., cudaDeviceSynchronize() for CUDA
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000

# the atol and rtol for each data type
tolerance_map = {
torch.float16: {'atol': 0, 'rtol': 1e-2},
torch.float32: {'atol': 0, 'rtol': 1e-2},
}

class GEMMDescriptor(Structure):
_fields_ = [("device", c_int32)]

Expand Down Expand Up @@ -161,7 +173,10 @@ def test(
elapsed = (time.time() - start_time) / NUM_ITERATIONS
print(f" lib time: {elapsed :6f}")

assert torch.allclose(y, ans, atol=0, rtol=1e-2)
atol, rtol = get_tolerance(tolerance_map, dtype)
if DEBUG:
debug(y, ans, atol=atol, rtol=rtol)
assert torch.allclose(y, ans, atol=atol, rtol=rtol)
check_error(lib.infiniopDestroyGEMMDescriptor(descriptor))


Expand Down Expand Up @@ -363,6 +378,8 @@ def test_bang(lib, test_cases):
infiniopGEMMDescriptor_t,
]

if args.debug:
DEBUG = True
if args.cpu:
test_cpu(lib, test_cases)
if args.cuda:
Expand Down
19 changes: 17 additions & 2 deletions operatorspy/tests/global_avg_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,26 @@
check_error,
)

from operatorspy.tests.test_utils import get_args
from operatorspy.tests.test_utils import (
get_args,
debug,
get_tolerance,
)
import torch, time

DEBUG = False
# constant for control whether profile the pytorch and lib functions
# NOTE: need to manually add synchronization function to the lib function,
# e.g., cudaDeviceSynchronize() for CUDA
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000

# the atol and rtol for each data type
tolerance_map = {
torch.float16: {'atol': 0, 'rtol': 1e-3},
torch.float32: {'atol': 0, 'rtol': 1e-4},
}

class GlobalAvgPoolDescriptor(Structure):
_fields_ = [("device", c_int32)]
Expand Down Expand Up @@ -118,7 +128,10 @@ def test(
elapsed = (time.time() - start_time) / NUM_ITERATIONS
print(f" lib time: {elapsed :6f}")

assert torch.allclose(y, ans, atol=0, rtol=1e-3)
atol, rtol = get_tolerance(tolerance_map, tensor_dtype)
if DEBUG:
debug(y, ans, atol=atol, rtol=rtol)
assert torch.allclose(y, ans, atol=atol, rtol=rtol)
check_error(lib.infiniopDestroyGlobalAvgPoolDescriptor(descriptor))


Expand Down Expand Up @@ -197,6 +210,8 @@ def test_bang(lib, test_cases):
infiniopGlobalAvgPoolDescriptor_t,
]

if args.debug:
DEBUG = True
if args.cpu:
test_cpu(lib, test_cases)
if args.cuda:
Expand Down
21 changes: 19 additions & 2 deletions operatorspy/tests/matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,25 @@
create_workspace,
)

from operatorspy.tests.test_utils import get_args, synchronize_device
from operatorspy.tests.test_utils import (
get_args,
synchronize_device,
debug,
get_tolerance,
)
import torch

DEBUG = False
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000

# the atol and rtol for each data type
tolerance_map = {
torch.float16: {'atol': 0, 'rtol': 1e-2},
torch.float32: {'atol': 0, 'rtol': 1e-3},
}

class MatmulDescriptor(Structure):
_fields_ = [("device", c_int32)]

Expand Down Expand Up @@ -115,7 +127,10 @@ def test(
)
)

assert torch.allclose(c, ans, atol=0, rtol=1e-2)
atol, rtol = get_tolerance(tolerance_map, dtype)
if DEBUG:
debug(c, ans, atol=atol, rtol=rtol)
assert torch.allclose(c, ans, atol=atol, rtol=rtol)

if PROFILE:
for i in range(NUM_PRERUN):
Expand Down Expand Up @@ -343,6 +358,8 @@ def test_ascend(lib, test_cases):
infiniopMatmulDescriptor_t,
]

if args.debug:
DEBUG = True
if args.profile:
PROFILE = True
if args.cpu:
Expand Down
Loading
Loading