Skip to content

Commit 170c4a2

Browse files
Merge pull request #155 from PanZezhong1725/dev
beta-0.0.0
2 parents 7c74688 + a560438 commit 170c4a2

File tree

275 files changed

+22048
-3103
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

275 files changed

+22048
-3103
lines changed

.github/workflows/main.yaml

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
- dev
8+
pull_request:
9+
10+
11+
jobs:
12+
build:
13+
runs-on: ubuntu-latest
14+
15+
steps:
16+
- name: Checkout code
17+
uses: actions/checkout@v3
18+
19+
- name: Install Python
20+
uses: actions/setup-python@v4
21+
with:
22+
python-version: '3.x'
23+
24+
- name: Install Python dependencies
25+
run: |
26+
pip install numpy
27+
pip install torch
28+
29+
- name: Install xmake
30+
uses: xmake-io/github-action-setup-xmake@v1
31+
with:
32+
xmake-version: latest
33+
34+
- name: configure xmake
35+
run: xmake f --cpu=true -cv
36+
37+
- name: Set INFINI_ROOT
38+
run: |
39+
export INFINI_ROOT=$GITHUB_WORKSPACE/.infini
40+
mkdir -p $INFINI_ROOT
41+
echo "INFINI_ROOT=$INFINI_ROOT" >> $GITHUB_ENV
42+
43+
- name: Build with XMake
44+
run: xmake build && xmake install
45+
46+
- name: Run Python Tests
47+
run: |
48+
GREEN='\033[0;32m'
49+
RED='\033[0;31m'
50+
NC='\033[0m' # No Color
51+
52+
PASSED_TESTS=()
53+
FAILED_TESTS=()
54+
for script in operatorspy/tests/*.py; do
55+
if [ "$(basename $script)" != "__init__.py" ] && [ "$(basename $script)" != "test_utils.py" ]; then
56+
echo "Running $script"
57+
START_TIME=$(date +%s)
58+
if ! python3 $script --cpu; then
59+
echo "$script failed"
60+
FAILED_TESTS+=($script)
61+
else
62+
echo "$script passed"
63+
PASSED_TESTS+=($script)
64+
fi
65+
END_TIME=$(date +%s)
66+
DURATION=$(( END_TIME - START_TIME ))
67+
MINUTES=$(( DURATION / 60 ))
68+
SECONDS=$(( DURATION % 60 ))
69+
echo "Execution time for $script: ${MINUTES}m ${SECONDS}s"
70+
fi
71+
done
72+
73+
if [ ${#FAILED_TESTS[@]} -ne 0 ]; then
74+
echo "The following tests passed:"
75+
for test in "${PASSED_TESTS[@]}"; do
76+
echo -e "${GREEN}$test${NC}"
77+
done
78+
echo "The following tests failed:"
79+
for test in "${FAILED_TESTS[@]}"; do
80+
echo -e "${RED}$test${NC}"
81+
done
82+
exit 1
83+
else
84+
echo "The following tests passed:"
85+
for test in "${PASSED_TESTS[@]}"; do
86+
echo -e "${GREEN}$test${NC}"
87+
done
88+
echo "${GREEN}All tests passed${NC}"
89+
fi
90+
env:
91+
INFINI_ROOT: ${{ env.INFINI_ROOT }}

.gitignore

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,13 @@ __pycache__/
1313

1414
# Lib
1515
lib/
16+
out/
17+
18+
# Log
19+
*.log
20+
21+
# Cache
22+
cache/
23+
24+
# Json
25+
*.json

README.md

Lines changed: 72 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,77 @@
1-
# 算子库
1+
# InfiniOperators 算子库
22

3-
跨平台高性能通用算子库。形式为 C 接口动态库。
3+
跨平台高性能统一算子库。形式为 C 接口动态库。
44

5-
采用二段式算子设计,每个算子都实现并对外暴露以下的 C 接口:
5+
## 简介
66

7-
- 第一阶段:构造算子 Descriptor。用户提供的算子名称、硬件、以及算子配置(如计算的数据类型、计算排布等),相应模组会被 load 到硬件上。
7+
### 算子接口设计
8+
9+
采用3+1段式算子设计,每个算子都实现并对外暴露以下的 C 接口:
10+
11+
- 第一阶段:构造硬件控柄(Handle)。用户提供控柄地址、硬件类型以及硬件序号。控柄所在的内存空间由用户管理。
812

913
```C
10-
void* createOpDescriptor(Device, void *config);
14+
infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr, int device, int device_id);
1115
```
1216
13-
- 第二阶段:计算。根据一阶段的 Descriptor,执行相应计算,用户需要提供输入输出张量,以及硬件计算流(CPU 为 NULL)
17+
- 第二阶段:构造算子描述(Descriptor)。用户提供描述符地址、硬件控柄、以及算子涉及的张量描述(含张量数据类型、形状和步长)。这一步会完成算子所需的与张量数据无关的预计算
1418
1519
```C
16-
void op(void *descriptor, Tensor output, Tensor input, void *stream);
20+
infiniopStatus_t infiniopCreateOpDescriptor(infiniopHandle_t handle, infiniopOpDescriptor_t *desc_ptr, infiniopTensorDescriptor_t t, ...);
1721
```
1822

19-
- 销毁 Descriptor
23+
- 第三阶段(可选):计算额外工作空间。根据算子描述,计算算子所需的额外工作空间大小,并存储于用户提供的位置。具体空间分配由用户负责
2024

2125
```C
22-
void destroyOpDescriptor(void *descriptor);
26+
infiniopStatus_t infiniopGetOpWorkspaceSize(infiniopOpDescriptor_t desc, uint64_t *size);
2327
```
2428
29+
- 第四阶段:计算。根据算子描述符,在指定的硬件上执行相应计算,用户需要提供输入输出的数据,以及硬件计算流(CPU 为 NULL)。
30+
31+
```C
32+
infiniopStatus_t infiniopGetOp(infiniopOpDescriptor_t desc, [void *workspace, uint64_t workspace_size,] void *output_data, void *input_data, ..., void *stream);
33+
```
34+
35+
- 销毁描述和硬件控柄。
36+
37+
```C
38+
infiniopStatus_t infiniopDestroyOpDescriptor(infiniopOpDescriptor_t desc);
39+
infiniopStatus_t infiniopDestroyHandle(infiniopHandle_t handle);
40+
```
41+
42+
### 张量(Tensor)描述设计
43+
44+
张量描述由以下几个部分组成:
45+
46+
1.数据类型,由打包大小(即一个元素代表几个数据)、符号位、元素大小、尾数位数、指数位数共4字节表示。定义如下:
47+
48+
```C
49+
typedef struct DataLayout {
50+
unsigned short
51+
packed : 8,
52+
sign : 1,
53+
size : 7,
54+
mantissa : 8,
55+
exponent : 8;
56+
} DataLayout;
57+
```
58+
59+
2.维度信息。张量有多少个维度。类型为uint64_t。
60+
61+
3.张量形状。张量每个维度的大小。类型为uint64_t*
62+
63+
4.张量步长。张量每个维度的步长。类型为uint64_t*
64+
65+
创建和销毁张量描述符的接口:
66+
67+
```C
68+
infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, DataLayout layout, uint64_t ndim, uint64_t *shape, uint64_t *strides);
69+
infiniopStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc);
70+
```
71+
2572
## 一、使用说明
2673
27-
### 配置
74+
### 1. 配置
2875
2976
#### 查看当前配置
3077
@@ -52,23 +99,27 @@ xmake f --nv-gpu=true --cuda=$CUDA_HOME -cv
5299
xmake f --cambricon-mlu=true -cv
53100
```
54101

55-
### 编译
102+
#### 配置 NPU
103+
104+
````xmake
105+
xmake f --ascend-npu=true -cv
106+
````
107+
108+
### 2. 编译安装
56109

57110
```xmake
58-
xmake
111+
xmake build && xmake install
59112
```
60113

61-
### 将编译好的算子库添加至环境变量 `INFINI_ROOT`
114+
### 3. 设置环境变量
62115

63-
```bash
64-
export INFINI_ROOT=[PATH_TO_LIBRARY]
65-
```
116+
按输出提示设置 `INFINI_ROOT``LD_LIBRARY_PATH` 环境变量。
66117

67-
### 运行算子测试
118+
### 4. 运行算子测试
68119

69120
```bash
70121
cd operatorspy/tests
71-
python operator_name.py
122+
python operator_name.py [--cpu | --cuda | --cambricon | --ascend]
72123
```
73124

74125
## 二、开发说明
@@ -82,6 +133,8 @@ python operator_name.py
82133
│   │   ├── [operator_name].h # 对外暴露的算子 C 接口定义,descriptor 定义
83134
│   ├── tensor
84135
│   │   ├── tensor_descriptor.h # 对外暴露的张量 descriptor 定义
136+
│   ├── handle
137+
│   │   ├── handle_export.h # 对外暴露的硬件 handle 定义
85138
│   ├── *.h # 对外暴露的核心结构体定义
86139
├── src
87140
│   ├── devices
@@ -105,7 +158,7 @@ python operator_name.py
105158

106159
-`src/device.h``operatorspy/devices.py` 中增加新的硬件类型,注意两者需要一一对应;
107160
-`xmake.lua` 中增加新硬件的编译选项以及编译方式;
108-
-`src/ops/devices/[device_name]` 下编写特定硬件的通用代码
161+
-`src/ops/devices/[device_name]` 下编写特定硬件的handle实现和通用代码
109162
- 实现该硬件的算子;
110163

111164
### 增加新的算子

include/data_type.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,28 @@ typedef struct DataLayout {
88
size : 7,
99
mantissa : 8,
1010
exponent : 8;
11+
12+
#ifdef __cplusplus
13+
bool operator==(const DataLayout &other) const {
14+
union TypePun {
15+
DataLayout layout;
16+
unsigned int i;
17+
} pun;
18+
pun.layout = *this;
19+
auto a_ = pun.i;
20+
pun.layout = other;
21+
auto b_ = pun.i;
22+
return a_ == b_;
23+
}
24+
25+
bool operator!=(const DataLayout &other) const {
26+
return !(*this == other);
27+
}
28+
#endif
1129
} DataLayout;
1230

31+
typedef struct DataLayout DT;
32+
1333
// clang-format off
1434
const static struct DataLayout
1535
I8 = {1, 1, 1, 7, 0},

include/device.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,14 @@
22
#define __DEVICE_H__
33

44
enum DeviceEnum {
5-
DevCpu,
6-
DevNvGpu,
7-
DevCambriconMlu,
5+
DevCpu = 0,
6+
DevNvGpu = 1,
7+
DevCambriconMlu = 2,
8+
DevAscendNpu = 3,
9+
DevMetaxGpu = 4,
10+
DevMthreadsGpu = 5,
811
};
912

13+
typedef enum DeviceEnum Device;
14+
1015
#endif// __DEVICE_H__

include/handle.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#ifndef INFINIOP_HANDLE_H
2+
#define INFINIOP_HANDLE_H
3+
4+
#include "device.h"
5+
6+
typedef struct HandleStruct {
7+
Device device;
8+
} HandleStruct;
9+
10+
typedef HandleStruct *infiniopHandle_t;
11+
12+
#endif

include/handle/handle_export.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#ifndef INFINIOP_HANDLE_EXPORT_H
2+
#define INFINIOP_HANDLE_EXPORT_H
3+
#include "../status.h"
4+
#include "../handle.h"
5+
#include "../export.h"
6+
#include "../device.h"
7+
8+
__C __export infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr, Device device, int device_id);
9+
10+
__C __export infiniopStatus_t infiniopDestroyHandle(infiniopHandle_t handle);
11+
12+
#endif // INFINIOP_HANDLE_EXPORT_H

include/infini_operators.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
1+
#include "handle/handle_export.h"
2+
#include "ops/add/add.h"
3+
#include "ops/attention/attention.h"
4+
#include "ops/avg_pool/avg_pool.h"
15
#include "ops/causal_softmax/causal_softmax.h"
6+
#include "ops/global_avg_pool/global_avg_pool.h"
7+
#include "ops/expand/expand.h"
8+
#include "ops/gemm/gemm.h"
9+
#include "ops/conv/conv.h"
210
#include "ops/matmul/matmul.h"
3-
#include "ops/reform/reform.h"
11+
#include "ops/max_pool/max_pool.h"
12+
#include "ops/mlp/mlp.h"
13+
#include "ops/random_sample/random_sample.h"
14+
#include "ops/rearrange/rearrange.h"
15+
#include "ops/relu/relu.h"
416
#include "ops/rms_norm/rms_norm.h"
517
#include "ops/rotary_embedding/rotary_embedding.h"
618
#include "ops/swiglu/swiglu.h"

include/operators.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
#ifndef __OPERATORS_H__
22
#define __OPERATORS_H__
33

4-
#include "data_type.h"
54
#include "device.h"
65
#include "tensor.h"
7-
8-
typedef enum DeviceEnum Device;
9-
typedef struct DataLayout DT;
6+
#include "handle.h"
7+
#include "status.h"
108

119
#endif// __OPERATORS_H__

include/ops/add/add.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#ifndef ADD_H
2+
#define ADD_H
3+
4+
#include "../../export.h"
5+
#include "../../operators.h"
6+
7+
typedef struct AddDescriptor {
8+
Device device;
9+
} AddDescriptor;
10+
11+
typedef AddDescriptor *infiniopAddDescriptor_t;
12+
13+
__C __export infiniopStatus_t infiniopCreateAddDescriptor(infiniopHandle_t handle,
14+
infiniopAddDescriptor_t *desc_ptr,
15+
infiniopTensorDescriptor_t c,
16+
infiniopTensorDescriptor_t a,
17+
infiniopTensorDescriptor_t b);
18+
19+
__C __export infiniopStatus_t infiniopAdd(infiniopAddDescriptor_t desc,
20+
void *c,
21+
void const *a,
22+
void const *b,
23+
void *stream);
24+
25+
__C __export infiniopStatus_t infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc);
26+
27+
#endif

0 commit comments

Comments
 (0)