diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 05df2ca..d32c22c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,25 +1,25 @@ -name: Build and test cpu -on: - push: - paths-ignore: - - '**.md' - - 'LICENSE' - pull_request: - paths: - - '**.md' - - 'LICENSE' - -jobs: - build: - name: Build - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Build - run: make - - - name: Test cpu - run: make test-cpp +name: Build and test cpu +on: + push: + paths-ignore: + - '**.md' + - 'LICENSE' + pull_request: + paths: + - '**.md' + - 'LICENSE' + +jobs: + build: + name: Build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Build + run: make + + - name: Test cpu + run: make test-cpp diff --git a/.gitignore b/.gitignore index 98e980a..15ad474 100644 --- a/.gitignore +++ b/.gitignore @@ -1,46 +1,46 @@ -# Prerequisites -*.d - -# Compiled Object files -*.slo -*.lo -*.o -*.obj - -# Precompiled Headers -*.gch -*.pch - -# Compiled Dynamic libraries -*.so -*.dylib -*.dll - -# Fortran module files -*.mod -*.smod - -# Compiled Static libraries -*.lai -*.la -*.a -*.lib - -# Executables -*.exe -*.out -*.app - -build/ -build_debug/ - -.vscode/ - -# python -*.egg-info -*.pyc - -# onnx model -*.onnx -*.pb -*.npy +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +build/ +build_debug/ + +.vscode/ + +# python +*.egg-info +*.pyc + +# onnx model +*.onnx +*.pb +*.npy diff --git a/.gitmodules b/.gitmodules index e856b94..f7abc37 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ -[submodule "3rd-party/nlohmann_json_cmake_fetchcontent"] - path = 3rd-party/nlohmann_json_cmake_fetchcontent - url = git@github.com:ArthurSonzogni/nlohmann_json_cmake_fetchcontent.git -[submodule "3rd-party/googletest"] - path = 3rd-party/googletest - url = git@github.com:google/googletest.git +[submodule "3rd-party/nlohmann_json_cmake_fetchcontent"] + path = 3rd-party/nlohmann_json_cmake_fetchcontent + url = git@github.com:ArthurSonzogni/nlohmann_json_cmake_fetchcontent.git +[submodule "3rd-party/googletest"] + path = 3rd-party/googletest + url = git@github.com:google/googletest.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 836a7e0..62fcf74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,96 +1,96 @@ -# Do not change these options in this file. Use cmake.config, cmake -DOPTION=VALUE, or ccmake to specify them. -option(BUILD_TEST "Build tests" OFF) - -cmake_minimum_required(VERSION 3.17) - -include(CMakeDependentOption) -project(InfiniTensor C CXX) - -cmake_dependent_option(BUILD_TEST_CORE "Build tests for core components" ON BUILD_TEST OFF) - -set(DEFAULT_BUILD_TYPE "RelWithDebInfo") -# Build Type -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - message("Configuring for Debug build.") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0") - add_compile_definitions(DEBUG_MODE) -elseif(CMAKE_BUILD_TYPE STREQUAL "Release") - message("Configuring for Release build.") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") - add_compile_definitions(NDEBUG) -elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") - message("Configuring for RelWithDebInfo build.") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O2") -else() - message("Build type not specified. Configuring for RelWithDebInfo build.") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O2") -endif() - - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_EXTENSIONS OFF) # -std=gnu++11 when on, -std=c++11 when off -add_compile_options(-Wno-error=unused-variable) - -find_package( - Python - COMPONENTS Interpreter Development - REQUIRED) - -# OpenMP -find_package(OpenMP) -if(OpenMP_C_FOUND) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") -endif() -if(OpenMP_CXX_FOUND) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -endif() - -include_directories(include) - -if(BUILD_TEST) - set(BUILD_GMOCK - OFF - CACHE BOOL "Do not build gmock" FORCE) - set(INSTALL_GTEST - OFF - CACHE BOOL "Do not install gtest" FORCE) - add_subdirectory(3rd-party/googletest) - include_directories(3rd-party/googletest/googletest/include) -endif() - -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Wall -Werror -Wno-error=deprecated-declarations -Wno-error=pointer-arith") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -UNDEBUG") # Enable assertion -set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -UNDEBUG") # Enable assertion - - -# Source files -file(GLOB_RECURSE SRC src/core/*.cc src/kernels/cpu/*.cc src/operators/*.cc src/utils/*.cc) - -if(USE_INTELCPU) - file(GLOB_RECURSE SRC_INTELCPU src/intelcpu/*.cc src/kernels/intelcpu/*.cc ) - list (APPEND SRC ${SRC_INTELCPU}) -endif() - -# Libraries -add_library(InfiniTensor SHARED ${SRC}) - -function(build_test files) - # Non-recursive glob for skip failed tests - file(GLOB TEST_SOURCES ${files}) - foreach(testsourcefile ${TEST_SOURCES}) - get_filename_component(testname ${testsourcefile} NAME_WE) - add_executable(${testname} ${testsourcefile}) - target_link_libraries(${testname} InfiniTensor GTest::gtest_main) - add_test(NAME ${testname} COMMAND ${testname}) - endforeach(testsourcefile ${TEST_SOURCES}) -endfunction() - -if(BUILD_TEST) - add_compile_definitions(BUILD_TEST=1) - enable_testing() - if(BUILD_TEST_CORE) - build_test(test/core/*.cc) - build_test(test/operators/*.cc) - build_test(test/kernels/nativecpu/*.cc) - endif() -endif() +# Do not change these options in this file. Use cmake.config, cmake -DOPTION=VALUE, or ccmake to specify them. +option(BUILD_TEST "Build tests" OFF) + +cmake_minimum_required(VERSION 3.17) + +include(CMakeDependentOption) +project(InfiniTensor C CXX) + +cmake_dependent_option(BUILD_TEST_CORE "Build tests for core components" ON BUILD_TEST OFF) + +set(DEFAULT_BUILD_TYPE "RelWithDebInfo") +# Build Type +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + message("Configuring for Debug build.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0") + add_compile_definitions(DEBUG_MODE) +elseif(CMAKE_BUILD_TYPE STREQUAL "Release") + message("Configuring for Release build.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") + add_compile_definitions(NDEBUG) +elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + message("Configuring for RelWithDebInfo build.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O2") +else() + message("Build type not specified. Configuring for RelWithDebInfo build.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O2") +endif() + + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_EXTENSIONS OFF) # -std=gnu++11 when on, -std=c++11 when off +add_compile_options(-Wno-error=unused-variable) + +find_package( + Python + COMPONENTS Interpreter Development + REQUIRED) + +# OpenMP +find_package(OpenMP) +if(OpenMP_C_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") +endif() +if(OpenMP_CXX_FOUND) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif() + +include_directories(include) + +if(BUILD_TEST) + set(BUILD_GMOCK + OFF + CACHE BOOL "Do not build gmock" FORCE) + set(INSTALL_GTEST + OFF + CACHE BOOL "Do not install gtest" FORCE) + add_subdirectory(3rd-party/googletest) + include_directories(3rd-party/googletest/googletest/include) +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Wall -Werror -Wno-error=deprecated-declarations -Wno-error=pointer-arith") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -UNDEBUG") # Enable assertion +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -UNDEBUG") # Enable assertion + + +# Source files +file(GLOB_RECURSE SRC src/core/*.cc src/kernels/cpu/*.cc src/operators/*.cc src/utils/*.cc) + +if(USE_INTELCPU) + file(GLOB_RECURSE SRC_INTELCPU src/intelcpu/*.cc src/kernels/intelcpu/*.cc ) + list (APPEND SRC ${SRC_INTELCPU}) +endif() + +# Libraries +add_library(InfiniTensor SHARED ${SRC}) + +function(build_test files) + # Non-recursive glob for skip failed tests + file(GLOB TEST_SOURCES ${files}) + foreach(testsourcefile ${TEST_SOURCES}) + get_filename_component(testname ${testsourcefile} NAME_WE) + add_executable(${testname} ${testsourcefile}) + target_link_libraries(${testname} InfiniTensor GTest::gtest_main) + add_test(NAME ${testname} COMMAND ${testname}) + endforeach(testsourcefile ${TEST_SOURCES}) +endfunction() + +if(BUILD_TEST) + add_compile_definitions(BUILD_TEST=1) + enable_testing() + if(BUILD_TEST_CORE) + build_test(test/core/*.cc) + build_test(test/operators/*.cc) + build_test(test/kernels/nativecpu/*.cc) + endif() +endif() diff --git a/LICENSE b/LICENSE index 261eeb9..29f81d8 100644 --- a/LICENSE +++ b/LICENSE @@ -1,201 +1,201 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile index 35ef7ef..1028192 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,18 @@ -.PHONY : build clean format install-python test-cpp test-onnx - -TYPE ?= Release -TEST ?= ON - -CMAKE_OPT = -DCMAKE_BUILD_TYPE=$(TYPE) -CMAKE_OPT += -DBUILD_TEST=$(TEST) - -build: - mkdir -p build/$(TYPE) - cd build/$(TYPE) && cmake $(CMAKE_OPT) ../.. && make -j8 - -clean: - rm -rf build - -test-cpp: - @echo - cd build/$(TYPE) && make test +.PHONY : build clean format install-python test-cpp test-onnx + +TYPE ?= Release +TEST ?= ON + +CMAKE_OPT = -DCMAKE_BUILD_TYPE=$(TYPE) +CMAKE_OPT += -DBUILD_TEST=$(TEST) + +build: + mkdir -p build/$(TYPE) + cd build/$(TYPE) && cmake $(CMAKE_OPT) ../.. && make -j8 + +clean: + rm -rf build + +test-cpp: + @echo + cd build/$(TYPE) && make test diff --git "a/docs/\351\241\271\347\233\256\351\203\250\347\275\262.md" "b/docs/\351\241\271\347\233\256\351\203\250\347\275\262.md" index 5690349..12ac118 100644 --- "a/docs/\351\241\271\347\233\256\351\203\250\347\275\262.md" +++ "b/docs/\351\241\271\347\233\256\351\203\250\347\275\262.md" @@ -1,35 +1,35 @@ -### 环境准备 -建议使用Linux系统或Mac系统,windows下使用WSL,配置方法和Linux一致。 - -1. 安装gcc、g++,请确认版本为 11.3 及以上的稳定版本 -``` bash -# linux 使用apt安装 -sudo apt install gcc g++ - -# mac 使用Homebrew安装 -brew install gcc -``` - -2. 安装CMake,请确认版本为 3.17 及以上的稳定版本 -``` bash -# linux 使用apt安装 -sudo apt install cmake - -# mac 使用Homebrew安装 -brew install cmake -``` - -2. 安装make -``` bash -# linux 使用apt安装 -sudo apt install make - -# mac 使用Homebrew安装 -brew install make -``` - -### 构建命令 -配置好上述环境后,进入项目目录后可以通过以下命令进行构建。 -- `make`/`make build`: 构建整个项目; -- `make test-cpp`: 构建项目后执行测例; +### 环境准备 +建议使用Linux系统或Mac系统,windows下使用WSL,配置方法和Linux一致。 + +1. 安装gcc、g++,请确认版本为 11.3 及以上的稳定版本 +``` bash +# linux 使用apt安装 +sudo apt install gcc g++ + +# mac 使用Homebrew安装 +brew install gcc +``` + +2. 安装CMake,请确认版本为 3.17 及以上的稳定版本 +``` bash +# linux 使用apt安装 +sudo apt install cmake + +# mac 使用Homebrew安装 +brew install cmake +``` + +2. 安装make +``` bash +# linux 使用apt安装 +sudo apt install make + +# mac 使用Homebrew安装 +brew install make +``` + +### 构建命令 +配置好上述环境后,进入项目目录后可以通过以下命令进行构建。 +- `make`/`make build`: 构建整个项目; +- `make test-cpp`: 构建项目后执行测例; - `make clean`:清理生成文件 \ No newline at end of file diff --git a/include/core/allocator.h b/include/core/allocator.h index 002601d..a1ca6d4 100644 --- a/include/core/allocator.h +++ b/include/core/allocator.h @@ -1,59 +1,61 @@ -#pragma once -#include "core/runtime.h" -#include "core/tensor.h" -#ifdef BUILD_TEST -#include "gtest/gtest.h" -#endif -#include -#include -#include - -namespace infini { - class Allocator - { - private: - Runtime runtime; - - size_t used; - - size_t peak; - - size_t alignment; - - // pointer to the memory actually allocated - void *ptr; - - // =================================== 作业 =================================== - // TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并 - // HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小 - // =================================== 作业 =================================== - - public: - Allocator(Runtime runtime); - - virtual ~Allocator(); - - // function: simulate memory allocation - // arguments: - // size: size of memory block to be allocated - // return: head address offset of the allocated memory block - size_t alloc(size_t size); - - // function: simulate memory free - // arguments: - // addr: head address offset of memory block to be free - // size: size of memory block to be freed - void free(size_t addr, size_t size); - - // function: perform actual memory allocation - // return: pointer to the head address of the allocated memory - void *getPtr(); - - void info(); - - private: - // function: memory alignment, rouned up - // return: size of the aligned memory block - size_t getAlignedSize(size_t size); - }; -} +#pragma once +#include "core/runtime.h" +#include "core/tensor.h" +#ifdef BUILD_TEST +#include "gtest/gtest.h" +#endif +#include +#include +#include + +namespace infini { + class Allocator + { + private: + Runtime runtime; + + size_t used; + + size_t peak; + + size_t alignment; + + // pointer to the memory actually allocated + void *ptr; + + // =================================== 作业 =================================== + // TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并 + // HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小 + // =================================== 作业 =================================== + + map free_blocks; // added + + public: + Allocator(Runtime runtime); + + virtual ~Allocator(); + + // function: simulate memory allocation + // arguments: + // size: size of memory block to be allocated + // return: head address offset of the allocated memory block + size_t alloc(size_t size); + + // function: simulate memory free + // arguments: + // addr: head address offset of memory block to be free + // size: size of memory block to be freed + void free(size_t addr, size_t size); + + // function: perform actual memory allocation + // return: pointer to the head address of the allocated memory + void *getPtr(); + + void info(); + + private: + // function: memory alignment, rouned up + // return: size of the aligned memory block + size_t getAlignedSize(size_t size); + }; +} diff --git a/include/core/blob.h b/include/core/blob.h index 01684f6..0e0955a 100644 --- a/include/core/blob.h +++ b/include/core/blob.h @@ -1,25 +1,25 @@ -#pragma once -#include "core/common.h" -#include "core/ref.h" - -namespace infini { - -class RuntimeObj; -using Runtime = Ref; - -class BlobObj -{ - Runtime runtime; - void *ptr; - -public: - BlobObj(Runtime runtime, void *ptr) : runtime(runtime), ptr(ptr) {} - BlobObj(BlobObj &other) = delete; - BlobObj &operator=(BlobObj const &) = delete; - ~BlobObj() {}; - - template - T getPtr() const { return reinterpret_cast(ptr); } -}; - -} // namespace infini +#pragma once +#include "core/common.h" +#include "core/ref.h" + +namespace infini { + +class RuntimeObj; +using Runtime = Ref; + +class BlobObj +{ + Runtime runtime; + void *ptr; + +public: + BlobObj(Runtime runtime, void *ptr) : runtime(runtime), ptr(ptr) {} + BlobObj(BlobObj &other) = delete; + BlobObj &operator=(BlobObj const &) = delete; + ~BlobObj() {}; + + template + T getPtr() const { return reinterpret_cast(ptr); } +}; + +} // namespace infini diff --git a/include/core/common.h b/include/core/common.h index e4fd65b..5fbd58f 100644 --- a/include/core/common.h +++ b/include/core/common.h @@ -1,85 +1,85 @@ -#pragma once -#include "utils/exception.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace infini { -using std::list; -using std::map; -using std::optional; -using std::pair; -using std::set; -using std::string; -using std::tie; -using std::to_string; -using std::tuple; -using std::unordered_map; -using std::vector; - -// Metaprogramming utilities -#define _CAT(A, B) A##B -#define _SELECT(NAME, NUM) _CAT(NAME##_, NUM) -#define _GET_COUNT(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, COUNT, ...) COUNT -#define _VA_SIZE(...) _GET_COUNT(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) -#define _VA_SELECT(NAME, ...) _SELECT(NAME, _VA_SIZE(__VA_ARGS__))(__VA_ARGS__) - -// Assert: conditions should have no side effect -#define _IT_ASSERT_2(condition, info) \ - static_cast(condition) \ - ? void(0) \ - : throw ::infini::Exception( \ - std::string("[") + __FILE__ + ":" + std::to_string(__LINE__) + \ - "] Assertion failed (" + #condition + "): " + info) -#define _IT_ASSERT_1(condition) _IT_ASSERT_2(condition, "") -#define IT_ASSERT(...) _VA_SELECT(_IT_ASSERT, __VA_ARGS__) - -#define IT_TODO_HALT() _IT_ASSERT_2(false, "Unimplemented") -#define IT_TODO_HALT_MSG(msg) _IT_ASSERT_2(false, msg) -#define IT_ASSERT_TODO(condition) _IT_ASSERT_2(condition, "Unimplemented") -#define IT_TODO_SKIP() puts("Unimplemented " __FILE__ ":" __LINE__) - -// std::to_underlying is avaiable since C++23 -template auto enum_to_underlying(T e) { - return static_cast>(e); -} - -template std::string vecToString(const std::vector &vec) { - std::stringstream ss; - ss << "["; - for (size_t i = 0; i < vec.size(); ++i) { - ss << vec.at(i); - if (i < vec.size() - 1) { - ss << ","; - } - } - ss << "]"; - return ss.str(); -} - -template std::string vecToString(const T *st, size_t length) { - std::stringstream ss; - ss << "["; - size_t i = 0; - for (i = 0; i < length; i++) { - ss << *(st + i); - if (i < length - 1) { - ss << ","; - } - } - ss << "]"; - return ss.str(); -} - -} // namespace infini +#pragma once +#include "utils/exception.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace infini { +using std::list; +using std::map; +using std::optional; +using std::pair; +using std::set; +using std::string; +using std::tie; +using std::to_string; +using std::tuple; +using std::unordered_map; +using std::vector; + +// Metaprogramming utilities +#define _CAT(A, B) A##B +#define _SELECT(NAME, NUM) _CAT(NAME##_, NUM) +#define _GET_COUNT(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, COUNT, ...) COUNT +#define _VA_SIZE(...) _GET_COUNT(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) +#define _VA_SELECT(NAME, ...) _SELECT(NAME, _VA_SIZE(__VA_ARGS__))(__VA_ARGS__) + +// Assert: conditions should have no side effect +#define _IT_ASSERT_2(condition, info) \ + static_cast(condition) \ + ? void(0) \ + : throw ::infini::Exception( \ + std::string("[") + __FILE__ + ":" + std::to_string(__LINE__) + \ + "] Assertion failed (" + #condition + "): " + info) +#define _IT_ASSERT_1(condition) _IT_ASSERT_2(condition, "") +#define IT_ASSERT(...) _VA_SELECT(_IT_ASSERT, __VA_ARGS__) + +#define IT_TODO_HALT() _IT_ASSERT_2(false, "Unimplemented") +#define IT_TODO_HALT_MSG(msg) _IT_ASSERT_2(false, msg) +#define IT_ASSERT_TODO(condition) _IT_ASSERT_2(condition, "Unimplemented") +#define IT_TODO_SKIP() puts("Unimplemented " __FILE__ ":" __LINE__) + +// std::to_underlying is avaiable since C++23 +template auto enum_to_underlying(T e) { + return static_cast>(e); +} + +template std::string vecToString(const std::vector &vec) { + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < vec.size(); ++i) { + ss << vec.at(i); + if (i < vec.size() - 1) { + ss << ","; + } + } + ss << "]"; + return ss.str(); +} + +template std::string vecToString(const T *st, size_t length) { + std::stringstream ss; + ss << "["; + size_t i = 0; + for (i = 0; i < length; i++) { + ss << *(st + i); + if (i < length - 1) { + ss << ","; + } + } + ss << "]"; + return ss.str(); +} + +} // namespace infini diff --git a/include/core/data_type.h b/include/core/data_type.h index aa0e126..f7fd2dc 100644 --- a/include/core/data_type.h +++ b/include/core/data_type.h @@ -1,104 +1,104 @@ -#pragma once -#include "core/common.h" -#include - -namespace infini { - -class DataType { - public: - // - static const DataType Undefine; - static const DataType Float32; - static const DataType UInt8; - static const DataType Int8; - static const DataType UInt16; - static const DataType Int16; - static const DataType Int32; - static const DataType Int64; - static const DataType String; - static const DataType Bool; - static const DataType Float16; - static const DataType Double; - static const DataType UInt32; - static const DataType UInt64; - static const DataType BFloat16; - // "sizePerElement" show the DType to cpu_type - // DataType::Bool -> int8_t DataType::Float16 -> uint16_t - static constexpr size_t sizePerElement[]{0, - sizeof(float), - sizeof(uint8_t), - sizeof(int8_t), - sizeof(uint16_t), - sizeof(int16_t), - sizeof(int32_t), - sizeof(int64_t), - sizeof(std::string), - sizeof(int8_t), - sizeof(uint16_t), - sizeof(double), - sizeof(uint32_t), - sizeof(uint64_t), - 0, - 0, - sizeof(uint16_t)}; - - static constexpr std::string_view names[]{ - "Undefine", "Float32", "UInt8", "Int8", "UInt16", - "Int16", "Int32", "Int64", "String", "Bool", - "Float16", "Double", "UInt32", "UInt64", "PlaceHolder", - "PlaceHolder", "BFloat16"}; - - static constexpr int cpuType[]{-1, 0, 2, 3, 4, 5, 6, 7, -1, - 3, 4, 9, 1, 8, -1, -1, 4}; - - private: - int index; - - public: - // FIXME: default ctor should be deleted but json requires it. Solution: - // https://github.com/nlohmann/json#how-can-i-use-get-for-non-default-constructiblenon-copyable-types - DataType() = default; - constexpr DataType(int index) : index(index) {} - bool operator==(const DataType &rhs) const { return index == rhs.index; } - bool operator<(const DataType &rhs) const { return index < rhs.index; } - - template static int get() { - IT_TODO_HALT_MSG("Unsupported data type"); - } - size_t getSize() const { return sizePerElement[index]; } - string toString() const { return string(names[index]); } - int cpuTypeInt() const { return cpuType[index]; } - int getIndex() const { return index; } -}; - -// Method definitions are out of the declaration due to GCC bug: -// https://stackoverflow.com/questions/49707184/explicit-specialization-in-non-namespace-scope-does-not-compile-in-gcc -template <> inline int DataType::get() { return 0; } -template <> inline int DataType::get() { return 1; } -template <> inline int DataType::get() { return 2; } -template <> inline int DataType::get() { return 3; } -template <> inline int DataType::get() { return 4; } -template <> inline int DataType::get() { return 5; } -template <> inline int DataType::get() { return 6; } -template <> inline int DataType::get() { return 7; } -template <> inline int DataType::get() { return 8; } -template <> inline int DataType::get() { return 9; } - -template struct DT {}; -template <> struct DT<0> { using t = bool; }; -template <> struct DT<1> { using t = float; }; -template <> struct DT<2> { using t = uint8_t; }; -template <> struct DT<3> { using t = int8_t; }; -template <> struct DT<4> { using t = uint16_t; }; -template <> struct DT<5> { using t = int16_t; }; -template <> struct DT<6> { using t = int32_t; }; -template <> struct DT<7> { using t = int64_t; }; -template <> struct DT<8> { using t = char; }; -template <> struct DT<9> { using t = int8_t; }; -template <> struct DT<10> { using t = uint16_t; }; -template <> struct DT<11> { using t = double; }; -template <> struct DT<12> { using t = uint32_t; }; -template <> struct DT<13> { using t = uint64_t; }; -template <> struct DT<16> { using t = uint16_t; }; - -} // namespace infini +#pragma once +#include "core/common.h" +#include + +namespace infini { + +class DataType { + public: + // + static const DataType Undefine; + static const DataType Float32; + static const DataType UInt8; + static const DataType Int8; + static const DataType UInt16; + static const DataType Int16; + static const DataType Int32; + static const DataType Int64; + static const DataType String; + static const DataType Bool; + static const DataType Float16; + static const DataType Double; + static const DataType UInt32; + static const DataType UInt64; + static const DataType BFloat16; + // "sizePerElement" show the DType to cpu_type + // DataType::Bool -> int8_t DataType::Float16 -> uint16_t + static constexpr size_t sizePerElement[]{0, + sizeof(float), + sizeof(uint8_t), + sizeof(int8_t), + sizeof(uint16_t), + sizeof(int16_t), + sizeof(int32_t), + sizeof(int64_t), + sizeof(std::string), + sizeof(int8_t), + sizeof(uint16_t), + sizeof(double), + sizeof(uint32_t), + sizeof(uint64_t), + 0, + 0, + sizeof(uint16_t)}; + + static constexpr std::string_view names[]{ + "Undefine", "Float32", "UInt8", "Int8", "UInt16", + "Int16", "Int32", "Int64", "String", "Bool", + "Float16", "Double", "UInt32", "UInt64", "PlaceHolder", + "PlaceHolder", "BFloat16"}; + + static constexpr int cpuType[]{-1, 0, 2, 3, 4, 5, 6, 7, -1, + 3, 4, 9, 1, 8, -1, -1, 4}; + + private: + int index; + + public: + // FIXME: default ctor should be deleted but json requires it. Solution: + // https://github.com/nlohmann/json#how-can-i-use-get-for-non-default-constructiblenon-copyable-types + DataType() = default; + constexpr DataType(int index) : index(index) {} + bool operator==(const DataType &rhs) const { return index == rhs.index; } + bool operator<(const DataType &rhs) const { return index < rhs.index; } + + template static int get() { + IT_TODO_HALT_MSG("Unsupported data type"); + } + size_t getSize() const { return sizePerElement[index]; } + string toString() const { return string(names[index]); } + int cpuTypeInt() const { return cpuType[index]; } + int getIndex() const { return index; } +}; + +// Method definitions are out of the declaration due to GCC bug: +// https://stackoverflow.com/questions/49707184/explicit-specialization-in-non-namespace-scope-does-not-compile-in-gcc +template <> inline int DataType::get() { return 0; } +template <> inline int DataType::get() { return 1; } +template <> inline int DataType::get() { return 2; } +template <> inline int DataType::get() { return 3; } +template <> inline int DataType::get() { return 4; } +template <> inline int DataType::get() { return 5; } +template <> inline int DataType::get() { return 6; } +template <> inline int DataType::get() { return 7; } +template <> inline int DataType::get() { return 8; } +template <> inline int DataType::get() { return 9; } + +template struct DT {}; +template <> struct DT<0> { using t = bool; }; +template <> struct DT<1> { using t = float; }; +template <> struct DT<2> { using t = uint8_t; }; +template <> struct DT<3> { using t = int8_t; }; +template <> struct DT<4> { using t = uint16_t; }; +template <> struct DT<5> { using t = int16_t; }; +template <> struct DT<6> { using t = int32_t; }; +template <> struct DT<7> { using t = int64_t; }; +template <> struct DT<8> { using t = char; }; +template <> struct DT<9> { using t = int8_t; }; +template <> struct DT<10> { using t = uint16_t; }; +template <> struct DT<11> { using t = double; }; +template <> struct DT<12> { using t = uint32_t; }; +template <> struct DT<13> { using t = uint64_t; }; +template <> struct DT<16> { using t = uint16_t; }; + +} // namespace infini diff --git a/include/core/graph.h b/include/core/graph.h index c45580c..6801bac 100644 --- a/include/core/graph.h +++ b/include/core/graph.h @@ -1,121 +1,121 @@ -#pragma once -#include "core/allocator.h" -#include "core/operator.h" -#include "core/tensor.h" -#include -#include - -namespace infini -{ - - class GraphObj : public Object - { - protected: - Runtime runtime; - TensorVec tensors; - OpVec ops; - Allocator allocator; - - public: - explicit GraphObj(Runtime runtime) - : runtime(runtime), allocator(runtime), sorted(false){}; - string toString() const override; - Runtime getRuntime() const { return runtime; } - - Tensor addTensor(Shape dim, DataType dtype = DataType::Float32); - Tensor addTensor(const Tensor &tensor); - TensorVec addTensor(const TensorVec &tensors); - void removeOperator(Operator op) - { - auto it = std::find(ops.begin(), ops.end(), op); - if (it != ops.end()) - ops.erase(it); - } - - void removeTensor(Tensor tensor) - { - auto it = std::find(tensors.begin(), tensors.end(), tensor); - if (it != tensors.end()) - tensors.erase(it); - } - - const TensorVec &getTensors() const { return tensors; } - const OpVec &getOperators() const { return ops; } - Tensor getTensor(int) const; - - /** - * @brief Sort the nodes in topological order. - * It returns true if the sorting is successful. - * Otherwise false is returned, means that there are rings in the graph, - * so the topological sorting fails. - */ - bool topo_sort(); - - void optimize(); - - void shape_infer(); - - void dataMalloc(); - - /** - * @brief Add an operator and create its outputs. Output tensor arguments - * should be empty Refs (e.g., nullptr). - */ - template - Ref addOp(Args &&...args) - { - Ref op = infini::make_ref(this, std::forward(args)...); - addOperatorAndConnect(op); - return op; - } - - /** - * @brief Add an operator with its outputs specified. - */ - template - Ref addOpWithOutputs(Args &&...args) - { - Ref op = infini::make_ref(nullptr, std::forward(args)...); - addOperatorAndConnect(op); - return op; - } - - /** - * @brief Gets input tensors of this graph. - */ - inline TensorVec getInputs() const - { - TensorVec ret; - for (const auto &t : tensors) - if (!t->getSource()) - ret.emplace_back(t); - return ret; - } - - /** - * @brief Gets output tensors of this graph. - */ - inline TensorVec getOutputs() const - { - TensorVec ret; - for (const auto &t : tensors) - if (t->getTargets().empty()) - ret.emplace_back(t); - return ret; - } - - bool checkValid() const; - - private: - /** - * @brief Add reverse connections and Op relationship in ctor. - */ - void addOperatorAndConnect(const Operator &op); - - /** - * @brief If the nodes is sorted in topological order. - */ - bool sorted; - }; - -} // namespace infini +#pragma once +#include "core/allocator.h" +#include "core/operator.h" +#include "core/tensor.h" +#include +#include + +namespace infini +{ + + class GraphObj : public Object + { + protected: + Runtime runtime; + TensorVec tensors; + OpVec ops; + Allocator allocator; + + public: + explicit GraphObj(Runtime runtime) + : runtime(runtime), allocator(runtime), sorted(false){}; + string toString() const override; + Runtime getRuntime() const { return runtime; } + + Tensor addTensor(Shape dim, DataType dtype = DataType::Float32); + Tensor addTensor(const Tensor &tensor); + TensorVec addTensor(const TensorVec &tensors); + void removeOperator(Operator op) + { + auto it = std::find(ops.begin(), ops.end(), op); + if (it != ops.end()) + ops.erase(it); + } + + void removeTensor(Tensor tensor) + { + auto it = std::find(tensors.begin(), tensors.end(), tensor); + if (it != tensors.end()) + tensors.erase(it); + } + + const TensorVec &getTensors() const { return tensors; } + const OpVec &getOperators() const { return ops; } + Tensor getTensor(int) const; + + /** + * @brief Sort the nodes in topological order. + * It returns true if the sorting is successful. + * Otherwise false is returned, means that there are rings in the graph, + * so the topological sorting fails. + */ + bool topo_sort(); + + void optimize(); + + void shape_infer(); + + void dataMalloc(); + + /** + * @brief Add an operator and create its outputs. Output tensor arguments + * should be empty Refs (e.g., nullptr). + */ + template + Ref addOp(Args &&...args) + { + Ref op = infini::make_ref(this, std::forward(args)...); + addOperatorAndConnect(op); + return op; + } + + /** + * @brief Add an operator with its outputs specified. + */ + template + Ref addOpWithOutputs(Args &&...args) + { + Ref op = infini::make_ref(nullptr, std::forward(args)...); + addOperatorAndConnect(op); + return op; + } + + /** + * @brief Gets input tensors of this graph. + */ + inline TensorVec getInputs() const + { + TensorVec ret; + for (const auto &t : tensors) + if (!t->getSource()) + ret.emplace_back(t); + return ret; + } + + /** + * @brief Gets output tensors of this graph. + */ + inline TensorVec getOutputs() const + { + TensorVec ret; + for (const auto &t : tensors) + if (t->getTargets().empty()) + ret.emplace_back(t); + return ret; + } + + bool checkValid() const; + + private: + /** + * @brief Add reverse connections and Op relationship in ctor. + */ + void addOperatorAndConnect(const Operator &op); + + /** + * @brief If the nodes is sorted in topological order. + */ + bool sorted; + }; + +} // namespace infini diff --git a/include/core/kernel.h b/include/core/kernel.h index a762424..6d13ba1 100644 --- a/include/core/kernel.h +++ b/include/core/kernel.h @@ -1,87 +1,87 @@ -#pragma once -#include "core/common.h" -#include "core/operator.h" -#include "core/tensor.h" -#include "utils/operator_utils.h" -#include - -namespace infini -{ - - class RuntimeObj; - - class Kernel - { - public: - Kernel() {} - virtual ~Kernel() {} - - /** - * @brief Executes an op with a default parameter. - */ - virtual void compute(const Operator &op, - const RuntimeObj *context) const = 0; - }; - - class KernelRegistry - { - public: - using KernelRecord = - tuple; // Kernel, name, ID - - private: - std::map kernels; - int nKernels = 0; - - public: - ~KernelRegistry() - { - for (auto &[k, v] : kernels) - delete std::get<0>(v); - } - static KernelRegistry &getInstance() - { - static KernelRegistry instance; - return instance; - } - bool registerKernel(const KernelAttrs &key, Kernel *kernel, string name) - { - IT_ASSERT(kernels.find(key) == kernels.end(), - "Kernel already registered"); - kernels.emplace(key, KernelRecord{kernel, name, ++nKernels}); - return true; - } - Kernel *getKernel(const KernelAttrs &kernelAttrs) const - { - auto it = kernels.find(kernelAttrs); - IT_ASSERT(it != kernels.end(), "Kernel not found for key {" + - get_kernel_attrs_str(kernelAttrs) + - "}"); - return std::get<0>(it->second); - } - const KernelRecord &getKernelItem(const KernelAttrs &kernelAttrs) const - { - return kernels.at(kernelAttrs); - } - }; - - class CpuKernelWithoutConfig : public Kernel - { - public: - virtual void compute(const Operator &op, - const RuntimeObj *context) const = 0; - }; - -} // namespace infini - -#define _REGISTER_KERNEL_1(device, opType, kernel, name, cnt) \ - namespace infini \ - { \ - static const bool _CAT(_register_kernel_, cnt) = \ - KernelRegistry::getInstance().registerKernel(KernelAttrs{device, \ - opType}, \ - new kernel(), name); \ - } - -#define REGISTER_KERNEL(device, opType, kernel, name) \ - _REGISTER_KERNEL_1(device, opType, kernel, name, __COUNTER__) +#pragma once +#include "core/common.h" +#include "core/operator.h" +#include "core/tensor.h" +#include "utils/operator_utils.h" +#include + +namespace infini +{ + + class RuntimeObj; + + class Kernel + { + public: + Kernel() {} + virtual ~Kernel() {} + + /** + * @brief Executes an op with a default parameter. + */ + virtual void compute(const Operator &op, + const RuntimeObj *context) const = 0; + }; + + class KernelRegistry + { + public: + using KernelRecord = + tuple; // Kernel, name, ID + + private: + std::map kernels; + int nKernels = 0; + + public: + ~KernelRegistry() + { + for (auto &[k, v] : kernels) + delete std::get<0>(v); + } + static KernelRegistry &getInstance() + { + static KernelRegistry instance; + return instance; + } + bool registerKernel(const KernelAttrs &key, Kernel *kernel, string name) + { + IT_ASSERT(kernels.find(key) == kernels.end(), + "Kernel already registered"); + kernels.emplace(key, KernelRecord{kernel, name, ++nKernels}); + return true; + } + Kernel *getKernel(const KernelAttrs &kernelAttrs) const + { + auto it = kernels.find(kernelAttrs); + IT_ASSERT(it != kernels.end(), "Kernel not found for key {" + + get_kernel_attrs_str(kernelAttrs) + + "}"); + return std::get<0>(it->second); + } + const KernelRecord &getKernelItem(const KernelAttrs &kernelAttrs) const + { + return kernels.at(kernelAttrs); + } + }; + + class CpuKernelWithoutConfig : public Kernel + { + public: + virtual void compute(const Operator &op, + const RuntimeObj *context) const = 0; + }; + +} // namespace infini + +#define _REGISTER_KERNEL_1(device, opType, kernel, name, cnt) \ + namespace infini \ + { \ + static const bool _CAT(_register_kernel_, cnt) = \ + KernelRegistry::getInstance().registerKernel(KernelAttrs{device, \ + opType}, \ + new kernel(), name); \ + } + +#define REGISTER_KERNEL(device, opType, kernel, name) \ + _REGISTER_KERNEL_1(device, opType, kernel, name, __COUNTER__) diff --git a/include/core/object.h b/include/core/object.h index 2db50ad..5ea4265 100644 --- a/include/core/object.h +++ b/include/core/object.h @@ -1,71 +1,71 @@ -#pragma once -#include "core/common.h" -#include "ref.h" - -namespace infini { - -using UidBaseType = int; - -class Uid { - private: - UidBaseType uid; - - public: - Uid(UidBaseType uid) : uid(uid) {} - Uid &operator=(const Uid &rhs) = delete; - - operator UidBaseType() const { return uid; } -}; - -class Guid : public Uid { - private: - UidBaseType generateGuid() { - static UidBaseType guidCnt = 0; - return ++guidCnt; - } - - public: - Guid() : Uid(generateGuid()) {} - Guid(const Guid &rhs) : Uid(generateGuid()) {} -}; - -/** - * @brief Family unique ID. Cloned tensors shared the same FUID. - */ -class Fuid : public Uid { - private: - UidBaseType generateFuid() { - static UidBaseType fuidCnt = 0; - return ++fuidCnt; - } - - public: - Fuid() : Uid(generateFuid()) {} - Fuid(const Fuid &fuid) : Uid(fuid) {} -}; - -class Object { - protected: - Guid guid; - - public: - virtual ~Object(){}; - virtual string toString() const = 0; - void print() { std::cout << toString() << std::endl; } - UidBaseType getGuid() const { return guid; } -}; - -inline std::ostream &operator<<(std::ostream &os, const Object &obj) { - os << obj.toString(); - return os; -} - -// Overload for Ref-wrapped Object -template > * = nullptr> -inline std::ostream &operator<<(std::ostream &os, const Ref &obj) { - os << obj->toString(); - return os; -} - -} // namespace infini +#pragma once +#include "core/common.h" +#include "ref.h" + +namespace infini { + +using UidBaseType = int; + +class Uid { + private: + UidBaseType uid; + + public: + Uid(UidBaseType uid) : uid(uid) {} + Uid &operator=(const Uid &rhs) = delete; + + operator UidBaseType() const { return uid; } +}; + +class Guid : public Uid { + private: + UidBaseType generateGuid() { + static UidBaseType guidCnt = 0; + return ++guidCnt; + } + + public: + Guid() : Uid(generateGuid()) {} + Guid(const Guid &rhs) : Uid(generateGuid()) {} +}; + +/** + * @brief Family unique ID. Cloned tensors shared the same FUID. + */ +class Fuid : public Uid { + private: + UidBaseType generateFuid() { + static UidBaseType fuidCnt = 0; + return ++fuidCnt; + } + + public: + Fuid() : Uid(generateFuid()) {} + Fuid(const Fuid &fuid) : Uid(fuid) {} +}; + +class Object { + protected: + Guid guid; + + public: + virtual ~Object(){}; + virtual string toString() const = 0; + void print() { std::cout << toString() << std::endl; } + UidBaseType getGuid() const { return guid; } +}; + +inline std::ostream &operator<<(std::ostream &os, const Object &obj) { + os << obj.toString(); + return os; +} + +// Overload for Ref-wrapped Object +template > * = nullptr> +inline std::ostream &operator<<(std::ostream &os, const Ref &obj) { + os << obj->toString(); + return os; +} + +} // namespace infini diff --git a/include/core/op_type.h b/include/core/op_type.h index ffe2d6e..806f09d 100644 --- a/include/core/op_type.h +++ b/include/core/op_type.h @@ -1,43 +1,43 @@ -#pragma once -#ifndef OP_TYPE_H -#define OP_TYPE_H - -#include -#include -#include - -namespace infini -{ - struct OpType - { - using underlying_t = uint16_t; - enum : underlying_t - { - Unknown, - Add, - Cast, - Clip, - Concat, - Div, - Mul, - MatMul, - Relu, - Sub, - Transpose, - - } type; - - constexpr OpType(decltype(type) t) : type(t) {} - constexpr explicit OpType(underlying_t val) : type((decltype(type))val) {} - constexpr underlying_t underlying() const { return type; } - - bool operator==(OpType others) const { return type == others.type; } - bool operator!=(OpType others) const { return type != others.type; } - bool operator<(OpType others) const { return type < others.type; } - - const char *toString() const; - }; - -} // namespace infini - -#endif // OP_TYPE_H +#pragma once +#ifndef OP_TYPE_H +#define OP_TYPE_H + +#include +#include +#include + +namespace infini +{ + struct OpType + { + using underlying_t = uint16_t; + enum : underlying_t + { + Unknown, + Add, + Cast, + Clip, + Concat, + Div, + Mul, + MatMul, + Relu, + Sub, + Transpose, + + } type; + + constexpr OpType(decltype(type) t) : type(t) {} + constexpr explicit OpType(underlying_t val) : type((decltype(type))val) {} + constexpr underlying_t underlying() const { return type; } + + bool operator==(OpType others) const { return type == others.type; } + bool operator!=(OpType others) const { return type != others.type; } + bool operator<(OpType others) const { return type < others.type; } + + const char *toString() const; + }; + +} // namespace infini + +#endif // OP_TYPE_H diff --git a/include/core/operator.h b/include/core/operator.h index 0641007..66b5596 100644 --- a/include/core/operator.h +++ b/include/core/operator.h @@ -1,93 +1,93 @@ -#pragma once - -#include "core/op_type.h" -#include "core/tensor.h" - -namespace infini -{ - using KernelAttrs = std::tuple; - - class GraphObj; - class OperatorObj : public Object - { - friend class GraphObj; - - protected: - OpType type; - TensorVec inputs; - TensorVec outputs; - vector> predecessors; - vector> successors; - - public: - OperatorObj(OpType opType, TensorVec inputs, TensorVec outputs); - virtual optional> inferShape(const TensorVec &inputs) = 0; - virtual vector inferDataType(const TensorVec &inputs) const; - /** - * @brief Constructs outputs (if requried) and check whether the operator is - * valid. - * - * @param graph If graph is not nullptr, outputs should be created in this - * function. - */ - bool checkValid(GraphObj *graph); - - public: // getter and setter - const TensorVec &getInputs() const { return inputs; } - const TensorVec &getOutputs() const { return outputs; } - Tensor getInputs(size_t i) const { return inputs.at(i); } - Tensor getOutput() const - { - IT_ASSERT(outputs.size() == 1, "Unimplemented"); - return outputs[0]; - } - Tensor getOutput(size_t i) const - { - IT_ASSERT(i < outputs.size(), "Index exceeded"); - return outputs.at(i); - } - OpVec getPredecessors() const { return wrefs_to_refs(predecessors); } - OpVec getSuccessors() const { return wrefs_to_refs(successors); } - OpType getOpType() const { return type; } - // HACK: set correct data type - DataType getDType() const { return getInputs(0)->getDType(); } - DataType getOutDType() const { return getOutput()->getDType(); } - virtual int numInputs() const = 0; - virtual int numOutputs() const = 0; - - /** - * @brief Clone this operator and replace its inputs and outputs. - * - * @param newInputs - * @param newOutputs - * @return Operator - */ - virtual Operator clone(const TensorVec &newInputs, - const TensorVec &newOutputs) const = 0; - - protected: - optional> inferShape(); - vector inferDataType() const; - - private: - void addPredecessors(const Operator &op) { predecessors.emplace_back(op); } - void addSuccessors(const Operator &op) { successors.emplace_back(op); } - void removePredecessors(const Operator &op); - void removeSuccessors(const Operator &op); - void replaceInput(Tensor t1, Tensor t2); - }; - -#define OP_CLONE(OpObj) \ - virtual Operator clone(const TensorVec &newInputs, \ - const TensorVec &newOutputs) const override \ - { \ - auto op = infini::make_ref(*this); \ - op->inputs = newInputs; \ - op->outputs = newOutputs; \ - op->predecessors.clear(); \ - op->successors.clear(); \ - IT_ASSERT(op->checkValid(nullptr)); \ - return op; \ - } - -} // namespace infini +#pragma once + +#include "core/op_type.h" +#include "core/tensor.h" + +namespace infini +{ + using KernelAttrs = std::tuple; + + class GraphObj; + class OperatorObj : public Object + { + friend class GraphObj; + + protected: + OpType type; + TensorVec inputs; + TensorVec outputs; + vector> predecessors; + vector> successors; + + public: + OperatorObj(OpType opType, TensorVec inputs, TensorVec outputs); + virtual optional> inferShape(const TensorVec &inputs) = 0; + virtual vector inferDataType(const TensorVec &inputs) const; + /** + * @brief Constructs outputs (if requried) and check whether the operator is + * valid. + * + * @param graph If graph is not nullptr, outputs should be created in this + * function. + */ + bool checkValid(GraphObj *graph); + + public: // getter and setter + const TensorVec &getInputs() const { return inputs; } + const TensorVec &getOutputs() const { return outputs; } + Tensor getInputs(size_t i) const { return inputs.at(i); } + Tensor getOutput() const + { + IT_ASSERT(outputs.size() == 1, "Unimplemented"); + return outputs[0]; + } + Tensor getOutput(size_t i) const + { + IT_ASSERT(i < outputs.size(), "Index exceeded"); + return outputs.at(i); + } + OpVec getPredecessors() const { return wrefs_to_refs(predecessors); } + OpVec getSuccessors() const { return wrefs_to_refs(successors); } + OpType getOpType() const { return type; } + // HACK: set correct data type + DataType getDType() const { return getInputs(0)->getDType(); } + DataType getOutDType() const { return getOutput()->getDType(); } + virtual int numInputs() const = 0; + virtual int numOutputs() const = 0; + + /** + * @brief Clone this operator and replace its inputs and outputs. + * + * @param newInputs + * @param newOutputs + * @return Operator + */ + virtual Operator clone(const TensorVec &newInputs, + const TensorVec &newOutputs) const = 0; + + protected: + optional> inferShape(); + vector inferDataType() const; + + private: + void addPredecessors(const Operator &op) { predecessors.emplace_back(op); } + void addSuccessors(const Operator &op) { successors.emplace_back(op); } + void removePredecessors(const Operator &op); + void removeSuccessors(const Operator &op); + void replaceInput(Tensor t1, Tensor t2); + }; + +#define OP_CLONE(OpObj) \ + virtual Operator clone(const TensorVec &newInputs, \ + const TensorVec &newOutputs) const override \ + { \ + auto op = infini::make_ref(*this); \ + op->inputs = newInputs; \ + op->outputs = newOutputs; \ + op->predecessors.clear(); \ + op->successors.clear(); \ + IT_ASSERT(op->checkValid(nullptr)); \ + return op; \ + } + +} // namespace infini diff --git a/include/core/ref.h b/include/core/ref.h index 3393f6e..d7f2976 100644 --- a/include/core/ref.h +++ b/include/core/ref.h @@ -1,43 +1,43 @@ -#pragma once -#include "core/common.h" -#include -#include -#include - -namespace infini { - -template using Ref = std::shared_ptr; -template using WRef = std::weak_ptr; - -template struct is_ref : std::false_type {}; -template struct is_ref> : std::true_type {}; -template struct is_ref> : std::true_type {}; - -template Ref make_ref(Params &&...params) { - static_assert(is_ref::value == false, "Ref should not be nested"); - return std::make_shared(std::forward(params)...); -} - -template > * = nullptr> -Ref as(const Ref &ref) { - return std::dynamic_pointer_cast(ref); -} - -template -std::vector> refs_to_wrefs(const std::vector> &refs) { - std::vector> wrefs; - for (const auto &ref : refs) - wrefs.emplace_back(ref); - return wrefs; -} - -template -std::vector> wrefs_to_refs(const std::vector> &wrefs) { - std::vector> refs; - for (const auto &wref : wrefs) - refs.emplace_back(wref); - return refs; -} - -} // namespace infini +#pragma once +#include "core/common.h" +#include +#include +#include + +namespace infini { + +template using Ref = std::shared_ptr; +template using WRef = std::weak_ptr; + +template struct is_ref : std::false_type {}; +template struct is_ref> : std::true_type {}; +template struct is_ref> : std::true_type {}; + +template Ref make_ref(Params &&...params) { + static_assert(is_ref::value == false, "Ref should not be nested"); + return std::make_shared(std::forward(params)...); +} + +template > * = nullptr> +Ref as(const Ref &ref) { + return std::dynamic_pointer_cast(ref); +} + +template +std::vector> refs_to_wrefs(const std::vector> &refs) { + std::vector> wrefs; + for (const auto &ref : refs) + wrefs.emplace_back(ref); + return wrefs; +} + +template +std::vector> wrefs_to_refs(const std::vector> &wrefs) { + std::vector> refs; + for (const auto &wref : wrefs) + refs.emplace_back(wref); + return refs; +} + +} // namespace infini diff --git a/include/core/runtime.h b/include/core/runtime.h index 1b64cd9..d70c603 100644 --- a/include/core/runtime.h +++ b/include/core/runtime.h @@ -1,69 +1,69 @@ -#pragma once -#include "core/common.h" -#include "core/op_type.h" -#include "core/ref.h" - -namespace infini -{ - class TensorObj; - class OperatorObj; - class GraphObj; - class RuntimeObj; - class BlobObj; - - using Tensor = Ref; - using Operator = Ref; - using Graph = Ref; - using Runtime = Ref; - using Blob = Ref; - - using TensorVec = vector; - using OpVec = vector; - - enum class Device - { - CPU = 1 - }; - - class RuntimeObj : public std::enable_shared_from_this - { - protected: - Device device; - - public: - explicit RuntimeObj(Device device) - : device(device) {} - RuntimeObj(RuntimeObj &other) = delete; - RuntimeObj &operator=(RuntimeObj const &) = delete; - virtual ~RuntimeObj() {} - - virtual void run(const Graph &graph) const = 0; - virtual void *alloc(size_t size) = 0; - virtual void dealloc(void *ptr) = 0; - - bool isCpu() const - { - return true; - } - - virtual string toString() const = 0; - }; - - class NativeCpuRuntimeObj : public RuntimeObj - { - public: - NativeCpuRuntimeObj() : RuntimeObj(Device::CPU) {} - - static Ref &getInstance() - { - static Ref instance = - make_ref(); - return instance; - } - void dealloc(void *ptr) override; - void run(const Graph &graph) const override; - void *alloc(size_t size) override; - string toString() const override; - }; - -} // namespace infini +#pragma once +#include "core/common.h" +#include "core/op_type.h" +#include "core/ref.h" + +namespace infini +{ + class TensorObj; + class OperatorObj; + class GraphObj; + class RuntimeObj; + class BlobObj; + + using Tensor = Ref; + using Operator = Ref; + using Graph = Ref; + using Runtime = Ref; + using Blob = Ref; + + using TensorVec = vector; + using OpVec = vector; + + enum class Device + { + CPU = 1 + }; + + class RuntimeObj : public std::enable_shared_from_this + { + protected: + Device device; + + public: + explicit RuntimeObj(Device device) + : device(device) {} + RuntimeObj(RuntimeObj &other) = delete; + RuntimeObj &operator=(RuntimeObj const &) = delete; + virtual ~RuntimeObj() {} + + virtual void run(const Graph &graph) const = 0; + virtual void *alloc(size_t size) = 0; + virtual void dealloc(void *ptr) = 0; + + bool isCpu() const + { + return true; + } + + virtual string toString() const = 0; + }; + + class NativeCpuRuntimeObj : public RuntimeObj + { + public: + NativeCpuRuntimeObj() : RuntimeObj(Device::CPU) {} + + static Ref &getInstance() + { + static Ref instance = + make_ref(); + return instance; + } + void dealloc(void *ptr) override; + void run(const Graph &graph) const override; + void *alloc(size_t size) override; + string toString() const override; + }; + +} // namespace infini diff --git a/include/core/tensor.h b/include/core/tensor.h index 93eec14..3c845e2 100644 --- a/include/core/tensor.h +++ b/include/core/tensor.h @@ -1,164 +1,164 @@ -#pragma once -#include "core/blob.h" -#include "core/data_type.h" -#include "core/object.h" -#include "core/runtime.h" -#include -#include -#include - -namespace infini -{ - class GraphObj; - using ShapeElem = int; - using Shape = vector; - class TensorObj : public Object - { - friend class GraphObj; - - protected: - int dim; - - DataType dtype; - vector> targets; - WRef source; - Blob data; - Runtime runtime; - - private: - Shape shape; - size_t _size; // Cache of Π(shape). - Fuid fuid; // Cloned tensors share the same id. Tensors constructed from - // scratch have a new id. - - public: - TensorObj(Shape shape, DataType dtype, Runtime runtime); - virtual ~TensorObj() {} - string toString() const override; - - size_t size() const { return _size; } - size_t getBytes() const { return _size * dtype.getSize(); } - - Shape getDims() const { return shape; } - void setShape(Shape shape_); - size_t getRank() const { return shape.size(); } - UidBaseType getFuid() const { return fuid; } - - void setData( - std::function const &generator) const; - - void setDataBlob(const Blob &blob); - - void printData() const; - bool equalData(const Tensor &rhs, double relativeError = 1e-6) const; - - template - bool equalData(const vector &dataVector) - { - IT_ASSERT(size() == dataVector.size()); - IT_ASSERT(DataType::get() == dtype.cpuTypeInt()); - return equalDataImpl(getRawDataPtr(), dataVector.data(), size()); - } - - template - T getRawDataPtr() const - { - static_assert(std::is_pointer_v, - "Raw data pointer has a type of pointer"); - IT_ASSERT(data != nullptr); - return data->getPtr(); - } - - DataType getDType() const { return dtype; } - Runtime getRuntime() const { return runtime; } - - OpVec getTargets() const { return wrefs_to_refs(targets); } - Operator getSource() const { return source.lock(); } - - private: - template - string dataToString() const - { - std::stringstream builder; - builder << "Tensor: " << guid << std::endl; - - auto numDims = shape.size(); - auto dimSzVec = vector(numDims, 1); - auto ptr = data->getPtr(); - dimSzVec[numDims - 1] = shape[numDims - 1]; - - for (int i = numDims - 1; i != 0; --i) - dimSzVec[i - 1] = dimSzVec[i] * shape[i - 1]; - - for (size_t i = 0, iEnd = size(); i < iEnd; ++i) - { - for (size_t j = 0; j < numDims; ++j) - if (i % dimSzVec[j] == 0) - builder << "["; - - builder << ptr[i]; - for (size_t j = 0; j < numDims; ++j) - if ((int)i % dimSzVec[j] == dimSzVec[j] - 1) - builder << "]"; - - if (i != size() - 1) - builder << ", "; - - auto column = (size_t)dimSzVec[numDims - 1]; - if (i % column == column - 1) - builder << std::endl; - } - return builder.str(); - } - - template - bool equalDataImpl(const T *a, const T *b, size_t size, - double relativeError = 1e-6) const - { - for (size_t i = 0; i < size; ++i) - { - if constexpr (std::is_integral_v) - { - if (a[i] != b[i]) - return false; - } - else if constexpr (std::is_floating_point_v) - { - if (std::min(fabs(a[i]), fabs(b[i])) == 0. && - fabs(a[i] - b[i]) > relativeError) - { - printf("Error on %lu: %f %f\n", i, a[i], b[i]); - return false; - } - else if (std::min(fabs(a[i]), fabs(b[i])) != 0. && - fabs(a[i] - b[i]) / - std::max(fabs(a[i]), fabs(b[i])) > - relativeError) - { - printf("Error on %lu: %f %f\n", i, a[i], b[i]); - return false; - } - } - else - { - static_assert(!sizeof(T), "Unsupported data type"); - } - } - return true; - } - - void addTarget(const Operator &op) { targets.emplace_back(op); } - void setSource(const Operator &op) { source = op; } - void removeTarget(const Operator &op) - { - for (auto itr = targets.begin(); itr != targets.end();) - { - if (itr->lock() == op) - itr = targets.erase(itr); - else - ++itr; - } - } - }; - -} // namespace infini +#pragma once +#include "core/blob.h" +#include "core/data_type.h" +#include "core/object.h" +#include "core/runtime.h" +#include +#include +#include + +namespace infini +{ + class GraphObj; + using ShapeElem = int; + using Shape = vector; + class TensorObj : public Object + { + friend class GraphObj; + + protected: + int dim; + + DataType dtype; + vector> targets; + WRef source; + Blob data; + Runtime runtime; + + private: + Shape shape; + size_t _size; // Cache of Π(shape). + Fuid fuid; // Cloned tensors share the same id. Tensors constructed from + // scratch have a new id. + + public: + TensorObj(Shape shape, DataType dtype, Runtime runtime); + virtual ~TensorObj() {} + string toString() const override; + + size_t size() const { return _size; } + size_t getBytes() const { return _size * dtype.getSize(); } + + Shape getDims() const { return shape; } + void setShape(Shape shape_); + size_t getRank() const { return shape.size(); } + UidBaseType getFuid() const { return fuid; } + + void setData( + std::function const &generator) const; + + void setDataBlob(const Blob &blob); + + void printData() const; + bool equalData(const Tensor &rhs, double relativeError = 1e-6) const; + + template + bool equalData(const vector &dataVector) + { + IT_ASSERT(size() == dataVector.size()); + IT_ASSERT(DataType::get() == dtype.cpuTypeInt()); + return equalDataImpl(getRawDataPtr(), dataVector.data(), size()); + } + + template + T getRawDataPtr() const + { + static_assert(std::is_pointer_v, + "Raw data pointer has a type of pointer"); + IT_ASSERT(data != nullptr); + return data->getPtr(); + } + + DataType getDType() const { return dtype; } + Runtime getRuntime() const { return runtime; } + + OpVec getTargets() const { return wrefs_to_refs(targets); } + Operator getSource() const { return source.lock(); } + + private: + template + string dataToString() const + { + std::stringstream builder; + builder << "Tensor: " << guid << std::endl; + + auto numDims = shape.size(); + auto dimSzVec = vector(numDims, 1); + auto ptr = data->getPtr(); + dimSzVec[numDims - 1] = shape[numDims - 1]; + + for (int i = numDims - 1; i != 0; --i) + dimSzVec[i - 1] = dimSzVec[i] * shape[i - 1]; + + for (size_t i = 0, iEnd = size(); i < iEnd; ++i) + { + for (size_t j = 0; j < numDims; ++j) + if (i % dimSzVec[j] == 0) + builder << "["; + + builder << ptr[i]; + for (size_t j = 0; j < numDims; ++j) + if ((int)i % dimSzVec[j] == dimSzVec[j] - 1) + builder << "]"; + + if (i != size() - 1) + builder << ", "; + + auto column = (size_t)dimSzVec[numDims - 1]; + if (i % column == column - 1) + builder << std::endl; + } + return builder.str(); + } + + template + bool equalDataImpl(const T *a, const T *b, size_t size, + double relativeError = 1e-6) const + { + for (size_t i = 0; i < size; ++i) + { + if constexpr (std::is_integral_v) + { + if (a[i] != b[i]) + return false; + } + else if constexpr (std::is_floating_point_v) + { + if (std::min(fabs(a[i]), fabs(b[i])) == 0. && + fabs(a[i] - b[i]) > relativeError) + { + printf("Error on %lu: %f %f\n", i, a[i], b[i]); + return false; + } + else if (std::min(fabs(a[i]), fabs(b[i])) != 0. && + fabs(a[i] - b[i]) / + std::max(fabs(a[i]), fabs(b[i])) > + relativeError) + { + printf("Error on %lu: %f %f\n", i, a[i], b[i]); + return false; + } + } + else + { + static_assert(!sizeof(T), "Unsupported data type"); + } + } + return true; + } + + void addTarget(const Operator &op) { targets.emplace_back(op); } + void setSource(const Operator &op) { source = op; } + void removeTarget(const Operator &op) + { + for (auto itr = targets.begin(); itr != targets.end();) + { + if (itr->lock() == op) + itr = targets.erase(itr); + else + ++itr; + } + } + }; + +} // namespace infini diff --git a/include/operators/concat.h b/include/operators/concat.h index 86287fd..d1a9591 100644 --- a/include/operators/concat.h +++ b/include/operators/concat.h @@ -1,32 +1,32 @@ -#pragma once -#include "core/operator.h" - -namespace infini { -/** - * @brief Concatenate several tensors into one. All the input tensors should - * have the same shape except for the concatenated dimension. - * - */ -class ConcatObj : public OperatorObj { - int dim; - - public: - /** - * @brief Construct a new Concat object. - * - * @param graph The computation graph that this operator belongs to. - * @param inputs The input tensors to be concatenated. - * @param output Concatenated tensor. - * @param dim The dimension to concatenate on. - */ - ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int dim); - OP_CLONE(ConcatObj); - - optional> inferShape(const TensorVec &inputs) override; - - std::string toString() const override; - int numInputs() const override { return inputs.size(); } - int numOutputs() const override { return 1; } - int getDim() const { return dim; } -}; -} // namespace infini +#pragma once +#include "core/operator.h" + +namespace infini { +/** + * @brief Concatenate several tensors into one. All the input tensors should + * have the same shape except for the concatenated dimension. + * + */ +class ConcatObj : public OperatorObj { + int dim; + + public: + /** + * @brief Construct a new Concat object. + * + * @param graph The computation graph that this operator belongs to. + * @param inputs The input tensors to be concatenated. + * @param output Concatenated tensor. + * @param dim The dimension to concatenate on. + */ + ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int dim); + OP_CLONE(ConcatObj); + + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + int numInputs() const override { return inputs.size(); } + int numOutputs() const override { return 1; } + int getDim() const { return dim; } +}; +} // namespace infini diff --git a/include/operators/element_wise.h b/include/operators/element_wise.h index 4260b2d..2f64a24 100644 --- a/include/operators/element_wise.h +++ b/include/operators/element_wise.h @@ -1,47 +1,47 @@ -#pragma once -#include "core/operator.h" - -namespace infini -{ - /** - * @brief Base class of **binary** element-wise operators. - * Unary operators like activations are not the derived classes of - * ElementWiseObj. - * - */ - class ElementWiseObj : public OperatorObj - { - public: - /** - * @brief Construct a new ElementWise object - * - * @param type Operator type. - * @param graph The computation graph that this operator belongs to. - * @param input0 The first input tensor. - * @param input1 The second input tensor. - * @param output The output tensor. - */ - ElementWiseObj(OpType type, GraphObj *graph, Tensor input0, Tensor input1, - Tensor output); - optional> inferShape(const TensorVec &inputs) override; - - std::string toString() const override; - int numInputs() const override { return 2; } - int numOutputs() const override { return 1; } - }; - -#define DEFINE_ELEMENT_WISE_OBJ(prefix, type) \ - class prefix##Obj : public ElementWiseObj \ - { \ - public: \ - prefix##Obj(GraphObj *graph, Tensor input0, Tensor input1, \ - Tensor output) \ - : ElementWiseObj(type, graph, input0, input1, output) {} \ - OP_CLONE(prefix##Obj); \ - }; - - DEFINE_ELEMENT_WISE_OBJ(Add, OpType::Add) - DEFINE_ELEMENT_WISE_OBJ(Sub, OpType::Sub) - DEFINE_ELEMENT_WISE_OBJ(Mul, OpType::Mul) - DEFINE_ELEMENT_WISE_OBJ(Div, OpType::Div) -}; // namespace infini +#pragma once +#include "core/operator.h" + +namespace infini +{ + /** + * @brief Base class of **binary** element-wise operators. + * Unary operators like activations are not the derived classes of + * ElementWiseObj. + * + */ + class ElementWiseObj : public OperatorObj + { + public: + /** + * @brief Construct a new ElementWise object + * + * @param type Operator type. + * @param graph The computation graph that this operator belongs to. + * @param input0 The first input tensor. + * @param input1 The second input tensor. + * @param output The output tensor. + */ + ElementWiseObj(OpType type, GraphObj *graph, Tensor input0, Tensor input1, + Tensor output); + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + int numInputs() const override { return 2; } + int numOutputs() const override { return 1; } + }; + +#define DEFINE_ELEMENT_WISE_OBJ(prefix, type) \ + class prefix##Obj : public ElementWiseObj \ + { \ + public: \ + prefix##Obj(GraphObj *graph, Tensor input0, Tensor input1, \ + Tensor output) \ + : ElementWiseObj(type, graph, input0, input1, output) {} \ + OP_CLONE(prefix##Obj); \ + }; + + DEFINE_ELEMENT_WISE_OBJ(Add, OpType::Add) + DEFINE_ELEMENT_WISE_OBJ(Sub, OpType::Sub) + DEFINE_ELEMENT_WISE_OBJ(Mul, OpType::Mul) + DEFINE_ELEMENT_WISE_OBJ(Div, OpType::Div) +}; // namespace infini diff --git a/include/operators/matmul.h b/include/operators/matmul.h index 4925895..517edff 100644 --- a/include/operators/matmul.h +++ b/include/operators/matmul.h @@ -1,60 +1,60 @@ -#pragma once -#include "core/operator.h" - -namespace infini -{ - /** - * @brief Matrix multiplication. - * - */ - class MatmulObj : public OperatorObj - { - private: - // InfiniTensor assumes a row-major tensor layout. `transA`=false means - // default dims, true means A should be transposed before matmul. This is in - // oppsite to the column-major BLAS. - bool transA, transB; - - // Auxiliary attributes which are not a part of operator attributes. - int m, n, k; - - public: - /** - * @brief Matmul operator with batch broadcast and tensor transpose - * supports. Only one tensor with singe batch can be broadcasted due to the - * BLAS interface restriction. Tranpose indicates whether the last two - * dimensions should be transposed before Matmul and does not affect other - * leading dimensions. - * - * Matmul show how operators are defined in InfiniTensor. The constructor of - * an operator can create output tensors for the operator or not, which - * depends on `graph`. - * - * @param graph The computation graph that this operator belongs to. - * @param A The input tensor. - * @param B The input tensor. - * @param C C is the output of Matmul. If outputs are going to be created in - * the constructor, C should be an empty Ref. - * @param transA If matrix A should be transposed when computing. - * @param transB If matrix B should be transposed when computing. - */ - MatmulObj(GraphObj *graph, Tensor A, Tensor B, Tensor C, - bool transA = false, bool transB = false); - OP_CLONE(MatmulObj); - - std::string toString() const override; - optional> inferShape(const TensorVec &inputs) override; - - int numInputs() const override { return inputs.size(); } - int numOutputs() const override { return 1; } - - bool getTransA() const { return transA; } - bool getTransB() const { return transB; } - void setTransA(bool transA) { this->transA = transA; } - void setTransB(bool transB) { this->transB = transB; } - int getM() const { return m; } - int getN() const { return n; } - int getK() const { return k; } - }; - +#pragma once +#include "core/operator.h" + +namespace infini +{ + /** + * @brief Matrix multiplication. + * + */ + class MatmulObj : public OperatorObj + { + private: + // InfiniTensor assumes a row-major tensor layout. `transA`=false means + // default dims, true means A should be transposed before matmul. This is in + // oppsite to the column-major BLAS. + bool transA, transB; + + // Auxiliary attributes which are not a part of operator attributes. + int m, n, k; + + public: + /** + * @brief Matmul operator with batch broadcast and tensor transpose + * supports. Only one tensor with singe batch can be broadcasted due to the + * BLAS interface restriction. Tranpose indicates whether the last two + * dimensions should be transposed before Matmul and does not affect other + * leading dimensions. + * + * Matmul show how operators are defined in InfiniTensor. The constructor of + * an operator can create output tensors for the operator or not, which + * depends on `graph`. + * + * @param graph The computation graph that this operator belongs to. + * @param A The input tensor. + * @param B The input tensor. + * @param C C is the output of Matmul. If outputs are going to be created in + * the constructor, C should be an empty Ref. + * @param transA If matrix A should be transposed when computing. + * @param transB If matrix B should be transposed when computing. + */ + MatmulObj(GraphObj *graph, Tensor A, Tensor B, Tensor C, + bool transA = false, bool transB = false); + OP_CLONE(MatmulObj); + + std::string toString() const override; + optional> inferShape(const TensorVec &inputs) override; + + int numInputs() const override { return inputs.size(); } + int numOutputs() const override { return 1; } + + bool getTransA() const { return transA; } + bool getTransB() const { return transB; } + void setTransA(bool transA) { this->transA = transA; } + void setTransB(bool transB) { this->transB = transB; } + int getM() const { return m; } + int getN() const { return n; } + int getK() const { return k; } + }; + } // namespace infini \ No newline at end of file diff --git a/include/operators/transpose.h b/include/operators/transpose.h index c32bbe5..d5dbb71 100644 --- a/include/operators/transpose.h +++ b/include/operators/transpose.h @@ -1,34 +1,34 @@ -#pragma once -#include "core/operator.h" - -namespace infini -{ - /** - * @brief Transpose the input tensor similar to numpy.transpose. - * - */ - class TransposeObj : public OperatorObj - { - public: - /** - * @brief Construct a new TransposeObj object. - * - * @param graph The graph to which this operator belongs. - * @param input The input tensor. - * @param output The output tensor. - * @param permute The permutation of the dimensions. - */ - TransposeObj(GraphObj *graph, Tensor input, Tensor output, - vector permute); - OP_CLONE(TransposeObj); - optional> inferShape(const TensorVec &inputs) override; - - std::string toString() const override; - int numInputs() const override { return 1; } - int numOutputs() const override { return 1; } - std::vector getPermute() const { return transposePermute; } - - private: - vector transposePermute; - }; -} // namespace infini +#pragma once +#include "core/operator.h" + +namespace infini +{ + /** + * @brief Transpose the input tensor similar to numpy.transpose. + * + */ + class TransposeObj : public OperatorObj + { + public: + /** + * @brief Construct a new TransposeObj object. + * + * @param graph The graph to which this operator belongs. + * @param input The input tensor. + * @param output The output tensor. + * @param permute The permutation of the dimensions. + */ + TransposeObj(GraphObj *graph, Tensor input, Tensor output, + vector permute); + OP_CLONE(TransposeObj); + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + std::vector getPermute() const { return transposePermute; } + + private: + vector transposePermute; + }; +} // namespace infini diff --git a/include/operators/unary.h b/include/operators/unary.h index 83f3dd3..16e5755 100644 --- a/include/operators/unary.h +++ b/include/operators/unary.h @@ -1,104 +1,104 @@ -#pragma once -#include "core/operator.h" - -namespace infini -{ - /** - * @brief The base class for unary operators. - * - */ - class UnaryObj : public OperatorObj - { - public: - /** - * @brief Construct a new Unary object. - * - * @param type Operator type. - * @param graph The computation graph that this operator belongs to. - * @param input The input tensor. - * @param output The output tensor. - */ - UnaryObj(OpType type, GraphObj *graph, Tensor input, Tensor output); - optional> inferShape(const TensorVec &inputs) override; - - std::string toString() const override; - int numInputs() const override { return 1; } - int numOutputs() const override { return 1; } - }; - - class ClipObj : public OperatorObj - { - public: - ClipObj(GraphObj *graph, Tensor input, Tensor output, - std::optional min, std::optional max); - OP_CLONE(ClipObj); - optional> inferShape(const TensorVec &inputs) override; - - std::string toString() const override; - std::optional getMin() const { return minValue; }; - std::optional getMax() const { return maxValue; }; - int numInputs() const override { return 1; } - int numOutputs() const override { return 1; } - - private: - std::optional minValue, maxValue; - }; - - enum class CastType - { - Float2Float16 = 0, - Float2Int64, - Float2Int32, - Float2Int16, - Float2Int8, - Float2BFloat16, - Int322Float, - Int322Int8, - Int322Int16, - Int322Int64, - Int162Float, - Int162Int32, - Int82Float, - Int82Int16, - Int82Int32, - Uint82Float, - Uint82Int32, - Uint82Int64, - Int642Int32, - Int642Uint32, - Int642Float, - Uint322Int64, - Float162Float, - BFloat162Float, - Float2Float, - }; - - class CastObj : public OperatorObj - { - public: - CastObj(GraphObj *graph, Tensor input, Tensor output, CastType type); - OP_CLONE(CastObj); - optional> inferShape(const TensorVec &inputs) override; - vector inferDataType(const TensorVec &inputs) const override; - - std::string toString() const override; - CastType getType() const { return castType; } - DataType getOutputDataType() const; - int numInputs() const override { return 1; } - int numOutputs() const override { return 1; } - - private: - CastType castType; - }; - -#define DEFINE_UNARY_OBJ(prefix, type) \ - class prefix##Obj : public UnaryObj \ - { \ - public: \ - prefix##Obj(GraphObj *graph, Tensor input, Tensor output) \ - : UnaryObj(type, graph, input, output) {} \ - OP_CLONE(prefix##Obj); \ - }; - - DEFINE_UNARY_OBJ(Relu, OpType::Relu) -}; // namespace infini +#pragma once +#include "core/operator.h" + +namespace infini +{ + /** + * @brief The base class for unary operators. + * + */ + class UnaryObj : public OperatorObj + { + public: + /** + * @brief Construct a new Unary object. + * + * @param type Operator type. + * @param graph The computation graph that this operator belongs to. + * @param input The input tensor. + * @param output The output tensor. + */ + UnaryObj(OpType type, GraphObj *graph, Tensor input, Tensor output); + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + }; + + class ClipObj : public OperatorObj + { + public: + ClipObj(GraphObj *graph, Tensor input, Tensor output, + std::optional min, std::optional max); + OP_CLONE(ClipObj); + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + std::optional getMin() const { return minValue; }; + std::optional getMax() const { return maxValue; }; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + + private: + std::optional minValue, maxValue; + }; + + enum class CastType + { + Float2Float16 = 0, + Float2Int64, + Float2Int32, + Float2Int16, + Float2Int8, + Float2BFloat16, + Int322Float, + Int322Int8, + Int322Int16, + Int322Int64, + Int162Float, + Int162Int32, + Int82Float, + Int82Int16, + Int82Int32, + Uint82Float, + Uint82Int32, + Uint82Int64, + Int642Int32, + Int642Uint32, + Int642Float, + Uint322Int64, + Float162Float, + BFloat162Float, + Float2Float, + }; + + class CastObj : public OperatorObj + { + public: + CastObj(GraphObj *graph, Tensor input, Tensor output, CastType type); + OP_CLONE(CastObj); + optional> inferShape(const TensorVec &inputs) override; + vector inferDataType(const TensorVec &inputs) const override; + + std::string toString() const override; + CastType getType() const { return castType; } + DataType getOutputDataType() const; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + + private: + CastType castType; + }; + +#define DEFINE_UNARY_OBJ(prefix, type) \ + class prefix##Obj : public UnaryObj \ + { \ + public: \ + prefix##Obj(GraphObj *graph, Tensor input, Tensor output) \ + : UnaryObj(type, graph, input, output) {} \ + OP_CLONE(prefix##Obj); \ + }; + + DEFINE_UNARY_OBJ(Relu, OpType::Relu) +}; // namespace infini diff --git a/include/test.h b/include/test.h index cef5a3e..a7d6341 100644 --- a/include/test.h +++ b/include/test.h @@ -1,4 +1,4 @@ -#pragma once -#include "core/common.h" -#include "utils/data_generator.h" -#include "gtest/gtest.h" +#pragma once +#include "core/common.h" +#include "utils/data_generator.h" +#include "gtest/gtest.h" diff --git a/include/utils/data_generator.h b/include/utils/data_generator.h index 1b7d91a..4c23575 100644 --- a/include/utils/data_generator.h +++ b/include/utils/data_generator.h @@ -1,59 +1,59 @@ -#pragma once -#include "core/common.h" -#include - -namespace infini { - -class DataGenerator { - private: - virtual void fill(uint32_t *data, size_t size) { IT_TODO_HALT(); } - virtual void fill(float *data, size_t size) { IT_TODO_HALT(); } - -public: - virtual ~DataGenerator() {} - void operator()(void *data, size_t size, DataType dataType) { - if (dataType == DataType::UInt32) - fill(reinterpret_cast(data), size); - else if (dataType == DataType::Float32) - fill(reinterpret_cast(data), size); - else - IT_TODO_HALT(); - } -}; - -class IncrementalGenerator : public DataGenerator { - public: - virtual ~IncrementalGenerator() {} - - private: - template void fill(T *data, size_t size) { - for (size_t i = 0; i < size; i++) { - data[i] = i; - } - } - - void fill(uint32_t *data, size_t size) override { - fill(data, size); - } - void fill(float *data, size_t size) override { fill(data, size); } -}; - -template class ValGenerator : public DataGenerator { - public: - virtual ~ValGenerator() {} - - private: - template void fill(T *data, size_t size) { - for (size_t i = 0; i < size; i++) { - data[i] = val; - } - } - - void fill(uint32_t *data, size_t size) override { - fill(data, size); - } - void fill(float *data, size_t size) override { fill(data, size); } -}; -typedef ValGenerator<1> OneGenerator; -typedef ValGenerator<0> ZeroGenerator; -} // namespace infini +#pragma once +#include "core/common.h" +#include + +namespace infini { + +class DataGenerator { + private: + virtual void fill(uint32_t *data, size_t size) { IT_TODO_HALT(); } + virtual void fill(float *data, size_t size) { IT_TODO_HALT(); } + +public: + virtual ~DataGenerator() {} + void operator()(void *data, size_t size, DataType dataType) { + if (dataType == DataType::UInt32) + fill(reinterpret_cast(data), size); + else if (dataType == DataType::Float32) + fill(reinterpret_cast(data), size); + else + IT_TODO_HALT(); + } +}; + +class IncrementalGenerator : public DataGenerator { + public: + virtual ~IncrementalGenerator() {} + + private: + template void fill(T *data, size_t size) { + for (size_t i = 0; i < size; i++) { + data[i] = i; + } + } + + void fill(uint32_t *data, size_t size) override { + fill(data, size); + } + void fill(float *data, size_t size) override { fill(data, size); } +}; + +template class ValGenerator : public DataGenerator { + public: + virtual ~ValGenerator() {} + + private: + template void fill(T *data, size_t size) { + for (size_t i = 0; i < size; i++) { + data[i] = val; + } + } + + void fill(uint32_t *data, size_t size) override { + fill(data, size); + } + void fill(float *data, size_t size) override { fill(data, size); } +}; +typedef ValGenerator<1> OneGenerator; +typedef ValGenerator<0> ZeroGenerator; +} // namespace infini diff --git a/include/utils/exception.h b/include/utils/exception.h index d7bb433..6fa62a1 100644 --- a/include/utils/exception.h +++ b/include/utils/exception.h @@ -1,22 +1,22 @@ -#pragma once -#include -#include - -namespace infini { - -class Exception : public std::runtime_error { - protected: - std::string info; - - public: - Exception(const std::string &msg); - - Exception &operator<<(const std::string &str) { - info += str; - return *this; - } - - const char *what() const noexcept override { return info.c_str(); } -}; - -} // namespace infini +#pragma once +#include +#include + +namespace infini { + +class Exception : public std::runtime_error { + protected: + std::string info; + + public: + Exception(const std::string &msg); + + Exception &operator<<(const std::string &str) { + info += str; + return *this; + } + + const char *what() const noexcept override { return info.c_str(); } +}; + +} // namespace infini diff --git a/include/utils/operator_utils.h b/include/utils/operator_utils.h index e3a2373..7f6dd29 100644 --- a/include/utils/operator_utils.h +++ b/include/utils/operator_utils.h @@ -1,26 +1,26 @@ -#pragma once -#ifndef OPERATOR_UTIL_H -#define OPERATOR_UTIL_H - -#include "core/operator.h" -#include "core/tensor.h" - -#include - -namespace infini { - -// Launch a broadcast shape based on the shape of input A and B -Shape infer_broadcast(const Shape &A, const Shape &B); -// Launch the real axis based on rank and current axis -int get_real_axis(const int &axis, const int &rank); -// Locate the index with size from Shape -Shape locate_index(size_t inputN, const Shape &shape); -// Delocate the ShapeIndex from Shape with broadcast -size_t delocate_index(const Shape &shapeIndex, const Shape &shape, - const Shape &stride); -// Convert KernelAttrs to a string representation -std::string get_kernel_attrs_str(const KernelAttrs &kernelAttrs); - -} // namespace infini - -#endif +#pragma once +#ifndef OPERATOR_UTIL_H +#define OPERATOR_UTIL_H + +#include "core/operator.h" +#include "core/tensor.h" + +#include + +namespace infini { + +// Launch a broadcast shape based on the shape of input A and B +Shape infer_broadcast(const Shape &A, const Shape &B); +// Launch the real axis based on rank and current axis +int get_real_axis(const int &axis, const int &rank); +// Locate the index with size from Shape +Shape locate_index(size_t inputN, const Shape &shape); +// Delocate the ShapeIndex from Shape with broadcast +size_t delocate_index(const Shape &shapeIndex, const Shape &shape, + const Shape &stride); +// Convert KernelAttrs to a string representation +std::string get_kernel_attrs_str(const KernelAttrs &kernelAttrs); + +} // namespace infini + +#endif diff --git a/src/core/allocator.cc b/src/core/allocator.cc index ff593ae..263a3c0 100644 --- a/src/core/allocator.cc +++ b/src/core/allocator.cc @@ -25,15 +25,39 @@ namespace infini size_t Allocator::alloc(size_t size) { + IT_ASSERT(size > 0); IT_ASSERT(this->ptr == nullptr); // pad the size to the multiple of alignment size = this->getAlignedSize(size); - + size_t ret_offset = 0; // =================================== 作业 =================================== // TODO: 设计一个算法来分配内存,返回起始地址偏移量 // =================================== 作业 =================================== - - return 0; + auto tar_free_block = free_blocks.end();//记录最小适配空闲位置 + auto end_free_block = tar_free_block; + for(auto it = free_blocks.begin(); it!=free_blocks.end(); it++){ + if(it->second >= size){ + + if(tar_free_block!= end_free_block){ + if(it->second< tar_free_block->second){ + tar_free_block = it; + } + }else{ + tar_free_block=it; + } + } + } + if(tar_free_block != end_free_block){//找到了合适的空间 + ret_offset = tar_free_block->first + tar_free_block->second - size;//从后向前alloc空间 + tar_free_block->second -=size; + if(tar_free_block->second==0) free_blocks.erase(tar_free_block); + }else{//没有找到 + ret_offset = used; + used += size; + peak = used; //peak 只需要在used变化是进行更新 + } + + return ret_offset;// } void Allocator::free(size_t addr, size_t size) @@ -44,6 +68,23 @@ namespace infini // =================================== 作业 =================================== // TODO: 设计一个算法来回收内存 // =================================== 作业 =================================== + free_blocks[addr] = size; + used -= size; + auto last = free_blocks.begin(); + if ( last == free_blocks.end()) return; + auto it = last; + it++; + for( ; it!=free_blocks.end() ; ) { + if(last->first+last->second == it->first){ + auto need_remove = it; + it++; + free_blocks.erase(need_remove); + }else{ + last = it; + it++; + } + } + } void *Allocator::getPtr() diff --git a/src/core/graph.cc b/src/core/graph.cc index 3a90637..28fcaa5 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -2,6 +2,10 @@ #include #include #include +#include "operators/matmul.h" +#include "operators/transpose.h" + +#include "core/ref.h" namespace infini { @@ -106,6 +110,97 @@ namespace infini // 1. 去除冗余的算子(例如,两个相邻的算子都是 transpose 算子,且做的是相反的操作,可以将其全部删除) // 2. 合并算子(例如,矩阵乘算子中含有属性transA、transB,如果其输入存在transpose,且对最后两个维度做交换,就可以将transpose融入到矩阵乘算子的属性中去) // =================================== 作业 =================================== + + if (ops.size()==0) return; + topo_sort(); + //1 + OpVec need_to_remove_operators; + for(auto op: ops){ + switch(op->getOpType().type){ + case OpType::Transpose: + { + auto input = op->getInputs()[0]; + auto output = op->getOutput(); + if (output->getTargets().size() == 1) // only one target + { + auto next_op = output->getTargets()[0]; + if (next_op->getOpType() == OpType::Transpose) + { + auto next_output = next_op->getOutput(); + if (next_output->getDims() == input->getDims()) + { + need_to_remove_operators.push_back(op); + need_to_remove_operators.push_back(next_op); + for (auto target : next_output->getTargets()) + { + input->addTarget(target); + target->replaceInput(next_output, input); + target->removePredecessors(next_op); + } + input->removeTarget(op); + removeTensor(output); + removeTensor(next_output); + } + } + } + break; + + } + + case OpType::MatMul: + { + auto matlut_op = as(op); + auto input_a = op->getInputs()[0]; + auto input_b = op->getInputs()[1]; + auto output = op->getOutput(); + if (input_a->getSource() && input_a->getSource()->getOpType() == OpType::Transpose) + { + auto transpose_op = input_a->getSource(); + if (as(transpose_op)->getPermute() == Shape{0, 1, 3, 2}) + { + if(transpose_op->getOutput()->getTargets().size() == 1){ + matlut_op->setTransB(true); + auto transpose_input = transpose_op->getInputs()[0]; + transpose_input->removeTarget(transpose_op); + transpose_input->addTarget(matlut_op); + matlut_op->replaceInput(input_a, transpose_input); + matlut_op->removePredecessors(transpose_op); + need_to_remove_operators.push_back(transpose_op); + removeTensor(transpose_op->getOutput()); + } + } + } + if (input_b->getSource() && input_b->getSource()->getOpType() == OpType::Transpose) + { + auto transpose_op = input_b->getSource(); + if (as(transpose_op)->getPermute() == Shape{0, 1, 3, 2}) + { + if(transpose_op->getOutput()->getTargets().size() == 1){ + matlut_op->setTransB(true); + auto transpose_input = transpose_op->getInputs()[0]; + transpose_input->removeTarget(transpose_op); + transpose_input->addTarget(matlut_op); + matlut_op->replaceInput(input_b, transpose_input); + matlut_op->removePredecessors(transpose_op); + need_to_remove_operators.push_back(transpose_op); + removeTensor(transpose_op->getOutput()); + } + } + } + + } + break; + default: + // 其余算子 + break; + } + + } + for (auto op : need_to_remove_operators) + { + removeOperator(op); + } + } Tensor GraphObj::getTensor(int fuid) const @@ -152,7 +247,19 @@ namespace infini // TODO:利用 allocator 给计算图分配内存 // HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存 // =================================== 作业 =================================== - + + std::unordered_map offsets;//为每一个tensor申请空间 + for (auto &tensor : tensors) + { + auto offset = allocator.alloc(tensor->getBytes()); + offsets[tensor->getFuid()] = offset; + } + auto base_addr = static_cast(allocator.getPtr()); + for (auto &tensor : tensors) + { + auto offset = offsets[tensor->getFuid()]; + tensor->setDataBlob(make_ref(runtime, base_addr + offset)); + } allocator.info(); } diff --git a/src/operators/concat.cc b/src/operators/concat.cc index d196330..b3bd424 100644 --- a/src/operators/concat.cc +++ b/src/operators/concat.cc @@ -12,12 +12,17 @@ ConcatObj::ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int _dim) optional> ConcatObj::inferShape(const TensorVec &inputs) { Shape dims = inputs[0]->getDims(); auto rank = inputs[0]->getRank(); - + IT_ASSERT(inputs.size()>1); + // =================================== 作业 =================================== // TODO:修改 dims,返回正确的 concat 后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13 // =================================== 作业 =================================== + for(size_t i = 1; i < inputs.size(); i++) + { + dims[dim] += inputs[i]->getDims()[dim]; + } return {{dims}}; } diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc index 7a16ca2..4a28378 100644 --- a/src/operators/matmul.cc +++ b/src/operators/matmul.cc @@ -27,7 +27,14 @@ namespace infini // TODO:返回经过 matmul 操作后的 shape // REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm // =================================== 作业 =================================== - return std::nullopt; + + Shape shape_a = inputs[0]->getDims(); + Shape shape_b = inputs[1]->getDims(); + if(transA) std::swap(shape_a[shape_a.size()-2], shape_a[shape_a.size()-1]); + if(transB) std::swap(shape_b[shape_a.size()-2], shape_b[shape_a.size()-1]); + Shape ret = shape_a; + ret[ret.size()-1] = shape_b[shape_b.size()-1]; + return {{ret}}; } } // namespace infini \ No newline at end of file diff --git a/src/operators/transpose.cc b/src/operators/transpose.cc index faab2b6..8f5a029 100644 --- a/src/operators/transpose.cc +++ b/src/operators/transpose.cc @@ -33,8 +33,11 @@ namespace infini // TODO:修改 output_dim,返回正确的 transpose 后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21 // =================================== 作业 =================================== - - return std::nullopt; + for (int i = 0; i < rank; i++) + { + output_dim[i] = input_dim[transposePermute[i]]; + } + return {{output_dim}}; } std::string TransposeObj::toString() const diff --git a/src/operators/unary.cc b/src/operators/unary.cc index 3daad36..56e4222 100644 --- a/src/operators/unary.cc +++ b/src/operators/unary.cc @@ -39,7 +39,9 @@ namespace infini // TODO:返回经过 clip 操作后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Clip.html#clip-13 // =================================== 作业 =================================== - return std::nullopt; + auto len = inputs.size(); + IT_ASSERT(len>0 && len<4); + return {{inputs[0]->getDims()}};; } std::string ClipObj::toString() const @@ -66,7 +68,7 @@ namespace infini // REF_FILE: src/core/operator.cc // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 // =================================== 作业 =================================== - return {}; + return vector(numOutputs(), getOutputDataType()); } optional> CastObj::inferShape(const TensorVec &inputs) @@ -75,7 +77,10 @@ namespace infini // TODO:返回经过 cast 操作后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 // =================================== 作业 =================================== - return std::nullopt; + + IT_ASSERT(!inputs.empty(), "Empty input"); + return {{inputs[0]->getDims()}}; + // return std::nullopt; } std::string CastObj::toString() const diff --git a/src/utils/operator_utils.cc b/src/utils/operator_utils.cc index edbd2c8..b8c9d00 100644 --- a/src/utils/operator_utils.cc +++ b/src/utils/operator_utils.cc @@ -9,8 +9,24 @@ Shape infer_broadcast(const Shape &A, const Shape &B) { // TODO:对 A 和 B 进行双向广播,返回广播后的形状。 // REF: https://github.com/onnx/onnx/blob/main/docs/Broadcasting.md // =================================== 作业 =================================== - - return {}; + Shape ans; + auto a_l = A.size(); + auto b_l = B.size(); + while (a_l > 0 || b_l > 0){ + if (a_l > 0 && b_l > 0) { + ans.insert(ans.begin(), std::max(A[a_l-1], B[b_l-1])); + a_l--; + b_l--; + } else if (a_l > 0) { + ans.insert(ans.begin(), A[a_l-1]); + a_l--; + } else { + ans.insert(ans.begin(), B[b_l-1]); + b_l--; + } + } + + return ans; } int get_real_axis(const int &axis, const int &rank) { diff --git a/test/core/test_allocator.cc b/test/core/test_allocator.cc index 0515edc..71b1388 100644 --- a/test/core/test_allocator.cc +++ b/test/core/test_allocator.cc @@ -1,74 +1,74 @@ -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/unary.h" - -#include "test.h" - -namespace infini -{ - TEST(Allocator, testAlloc) - { - Shape shape = Shape{1, 2, 2, 3}; - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - Tensor a = make_ref(shape, DataType::Float32, runtime); - Tensor b = make_ref(shape, DataType::Float32, runtime); - Tensor c = make_ref(shape, DataType::Float32, runtime); - Tensor d = make_ref(shape, DataType::Float32, runtime); - Allocator allocator = Allocator(runtime); - // allocate a->b->c - size_t offsetA = allocator.alloc(a->getBytes()); - size_t offsetB = allocator.alloc(b->getBytes()); - size_t offsetC = allocator.alloc(c->getBytes()); - // free b, then allocate d - allocator.free(offsetB, b->getBytes()); - size_t offsetD = allocator.alloc(d->getBytes()); - // expected to be a->d->c - EXPECT_EQ(offsetB, offsetD); - ASSERT_FALSE(offsetA == 0 && offsetB == 0 && offsetC == 0 && offsetD == 0); - } - - TEST(Allocator, testAllocWithEndFreeBlock) - { - Shape shape = Shape{1, 2, 2, 3}; - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - Tensor a = make_ref(shape, DataType::Float32, runtime); - Tensor b = make_ref(shape, DataType::Float32, runtime); - Tensor c = make_ref(shape, DataType::Float32, runtime); - Tensor d = - make_ref(Shape{2, 2, 2, 3}, DataType::Float32, runtime); - Allocator allocator = Allocator(runtime); - // allocate a->b->c - allocator.alloc(a->getBytes()); - allocator.alloc(b->getBytes()); - size_t offsetC = allocator.alloc(c->getBytes()); - allocator.info(); - // free c, then allocate d - allocator.free(offsetC, c->getBytes()); - size_t offsetD = allocator.alloc(d->getBytes()); - allocator.info(); - // expected to be a->b->d, with no free block between b and c - EXPECT_EQ(offsetC, offsetD); - } - - TEST(Allocator, testGetPtr) - { - Shape shape = Shape{1, 2, 2, 3}; - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - Tensor a = make_ref(shape, DataType::Float32, runtime); - Tensor b = make_ref(shape, DataType::Float32, runtime); - Tensor c = make_ref(shape, DataType::Float32, runtime); - Tensor d = make_ref(shape, DataType::Float32, runtime); - Allocator allocator = Allocator(runtime); - // allocate a->b->c->d - allocator.alloc(a->getBytes()); - allocator.alloc(b->getBytes()); - allocator.alloc(c->getBytes()); - allocator.alloc(d->getBytes()); - // multiple calls to the getPtr() function should return the same pointer - void *ptr1 = allocator.getPtr(); - void *ptr2 = allocator.getPtr(); - EXPECT_EQ(ptr1, ptr2); - } - -} // namespace infini +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/unary.h" + +#include "test.h" + +namespace infini +{ + TEST(Allocator, testAlloc) + { + Shape shape = Shape{1, 2, 2, 3}; + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Tensor a = make_ref(shape, DataType::Float32, runtime); + Tensor b = make_ref(shape, DataType::Float32, runtime); + Tensor c = make_ref(shape, DataType::Float32, runtime); + Tensor d = make_ref(shape, DataType::Float32, runtime); + Allocator allocator = Allocator(runtime); + // allocate a->b->c + size_t offsetA = allocator.alloc(a->getBytes()); + size_t offsetB = allocator.alloc(b->getBytes()); + size_t offsetC = allocator.alloc(c->getBytes()); + // free b, then allocate d + allocator.free(offsetB, b->getBytes()); + size_t offsetD = allocator.alloc(d->getBytes()); + // expected to be a->d->c + EXPECT_EQ(offsetB, offsetD); + ASSERT_FALSE(offsetA == 0 && offsetB == 0 && offsetC == 0 && offsetD == 0); + } + + TEST(Allocator, testAllocWithEndFreeBlock) + { + Shape shape = Shape{1, 2, 2, 3}; + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Tensor a = make_ref(shape, DataType::Float32, runtime); + Tensor b = make_ref(shape, DataType::Float32, runtime); + Tensor c = make_ref(shape, DataType::Float32, runtime); + Tensor d = + make_ref(Shape{2, 2, 2, 3}, DataType::Float32, runtime); + Allocator allocator = Allocator(runtime); + // allocate a->b->c + allocator.alloc(a->getBytes()); + allocator.alloc(b->getBytes()); + size_t offsetC = allocator.alloc(c->getBytes()); + allocator.info(); + // free c, then allocate d + allocator.free(offsetC, c->getBytes()); + size_t offsetD = allocator.alloc(d->getBytes()); + allocator.info(); + // expected to be a->b->d, with no free block between b and c + EXPECT_EQ(offsetC, offsetD); + } + + TEST(Allocator, testGetPtr) + { + Shape shape = Shape{1, 2, 2, 3}; + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Tensor a = make_ref(shape, DataType::Float32, runtime); + Tensor b = make_ref(shape, DataType::Float32, runtime); + Tensor c = make_ref(shape, DataType::Float32, runtime); + Tensor d = make_ref(shape, DataType::Float32, runtime); + Allocator allocator = Allocator(runtime); + // allocate a->b->c->d + allocator.alloc(a->getBytes()); + allocator.alloc(b->getBytes()); + allocator.alloc(c->getBytes()); + allocator.alloc(d->getBytes()); + // multiple calls to the getPtr() function should return the same pointer + void *ptr1 = allocator.getPtr(); + void *ptr2 = allocator.getPtr(); + EXPECT_EQ(ptr1, ptr2); + } + +} // namespace infini diff --git a/test/core/test_graph.cc b/test/core/test_graph.cc index bf696dd..05317ad 100644 --- a/test/core/test_graph.cc +++ b/test/core/test_graph.cc @@ -1,40 +1,40 @@ -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/matmul.h" -#include "operators/transpose.h" - -#include "test.h" - -namespace infini -{ - TEST(Graph, Optimize) - { - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - Graph g = make_ref(runtime); - Tensor i1 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); - Tensor i2 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); - Tensor t1 = g->addTensor({2, 3, 5, 4}, DataType::UInt32); - Tensor t2 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); - Tensor t3 = g->addTensor({2, 3, 5, 4}, DataType::UInt32); - Tensor o = g->addTensor({2, 3, 4, 4}, DataType::UInt32); - g->addOpWithOutputs(i1, t1, Shape{0, 1, 3, 2}); - g->addOpWithOutputs(t1, t2, Shape{0, 1, 3, 2}); - g->addOpWithOutputs(i2, t3, Shape{0, 1, 3, 2}); - g->addOpWithOutputs(t2, t3, o); - // 优化前 - g->print(); - g->optimize(); - // 优化后 - g->print(); - EXPECT_EQ(g->getOperators().size(), 1); - EXPECT_EQ(g->getTensors().size(), 3); - EXPECT_EQ(g->getOperators()[0]->getOpType().underlying(), 7); - auto op = as(g->getOperators()[0]); - EXPECT_EQ(op->getInputs(0)->getGuid(), 2); - EXPECT_EQ(op->getInputs(1)->getGuid(), 3); - EXPECT_EQ(op->getOutputs()[0], o); - EXPECT_EQ(op->getTransA(), false); - EXPECT_EQ(op->getTransB(), true); - } +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/matmul.h" +#include "operators/transpose.h" + +#include "test.h" + +namespace infini +{ + TEST(Graph, Optimize) + { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + Tensor i1 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + Tensor i2 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + Tensor t1 = g->addTensor({2, 3, 5, 4}, DataType::UInt32); + Tensor t2 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + Tensor t3 = g->addTensor({2, 3, 5, 4}, DataType::UInt32); + Tensor o = g->addTensor({2, 3, 4, 4}, DataType::UInt32); + g->addOpWithOutputs(i1, t1, Shape{0, 1, 3, 2}); + g->addOpWithOutputs(t1, t2, Shape{0, 1, 3, 2}); + g->addOpWithOutputs(i2, t3, Shape{0, 1, 3, 2}); + g->addOpWithOutputs(t2, t3, o); + // 优化前 + g->print(); + g->optimize(); + // 优化后 + g->print(); + EXPECT_EQ(g->getOperators().size(), 1); + EXPECT_EQ(g->getTensors().size(), 3); + EXPECT_EQ(g->getOperators()[0]->getOpType().underlying(), 7); + auto op = as(g->getOperators()[0]); + EXPECT_EQ(op->getInputs(0)->getGuid(), 2); + EXPECT_EQ(op->getInputs(1)->getGuid(), 3); + EXPECT_EQ(op->getOutputs()[0], o); + EXPECT_EQ(op->getTransA(), false); + EXPECT_EQ(op->getTransB(), true); + } } \ No newline at end of file diff --git a/test/kernels/nativecpu/test_nativecpu_concat.cc b/test/kernels/nativecpu/test_nativecpu_concat.cc index fc87fb1..4eac503 100644 --- a/test/kernels/nativecpu/test_nativecpu_concat.cc +++ b/test/kernels/nativecpu/test_nativecpu_concat.cc @@ -1,28 +1,28 @@ -#include "core/graph.h" -#include "core/runtime.h" -#include "operators/concat.h" - -#include "test.h" - -namespace infini { - -TEST(Concat, NativeCpu) { - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - Graph g = make_ref(runtime); - - auto t1 = g->addTensor({2, 2, 3, 1}, DataType::Float32); - auto t2 = g->addTensor({2, 2, 1, 1}, DataType::Float32); - auto t3 = g->addTensor({2, 2, 2, 1}, DataType::Float32); - auto op = g->addOp(TensorVec{t1, t2, t3}, nullptr, 2); - g->dataMalloc(); - t1->setData(IncrementalGenerator()); - t2->setData(OneGenerator()); - t3->setData(OneGenerator()); - - runtime->run(g); - EXPECT_TRUE(op->getOutput()->equalData( - vector{0, 1, 2, 1, 1, 1, 3, 4, 5, 1, 1, 1, - 6, 7, 8, 1, 1, 1, 9, 10, 11, 1, 1, 1})); -} - -} // namespace infini +#include "core/graph.h" +#include "core/runtime.h" +#include "operators/concat.h" + +#include "test.h" + +namespace infini { + +TEST(Concat, NativeCpu) { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + + auto t1 = g->addTensor({2, 2, 3, 1}, DataType::Float32); + auto t2 = g->addTensor({2, 2, 1, 1}, DataType::Float32); + auto t3 = g->addTensor({2, 2, 2, 1}, DataType::Float32); + auto op = g->addOp(TensorVec{t1, t2, t3}, nullptr, 2); + g->dataMalloc(); + t1->setData(IncrementalGenerator()); + t2->setData(OneGenerator()); + t3->setData(OneGenerator()); + + runtime->run(g); + EXPECT_TRUE(op->getOutput()->equalData( + vector{0, 1, 2, 1, 1, 1, 3, 4, 5, 1, 1, 1, + 6, 7, 8, 1, 1, 1, 9, 10, 11, 1, 1, 1})); +} + +} // namespace infini diff --git a/test/kernels/nativecpu/test_nativecpu_elementwise.cc b/test/kernels/nativecpu/test_nativecpu_elementwise.cc index c6ef191..9d8ae3c 100644 --- a/test/kernels/nativecpu/test_nativecpu_elementwise.cc +++ b/test/kernels/nativecpu/test_nativecpu_elementwise.cc @@ -1,44 +1,44 @@ -#include "core/graph.h" -#include "core/runtime.h" -#include "operators/element_wise.h" - -#include "test.h" - -namespace infini { - -using ExpectOutput = vector; -template -void testElementWiseNativeCpu( - const std::function &generator1, - const std::function &generator2, - const Shape &shape1, const Shape &shape2, const ExpectOutput &ansVec) { - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - Graph g = make_ref(runtime); - auto t1 = g->addTensor(shape1, DataType::Float32); - auto t2 = g->addTensor(shape2, DataType::Float32); - - auto op = g->addOp(t1, t2, nullptr); - g->dataMalloc(); - t1->setData(generator1); - t2->setData(generator2); - - runtime->run(g); - EXPECT_TRUE(op->getOutput()->equalData(ansVec)); -} - -TEST(ElementWise, NativeCpu) { - testElementWiseNativeCpu( - IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1}, - Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 4, 5, 6, 6, 7, 8, 10, 11, 12}); - testElementWiseNativeCpu( - IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1}, - Shape{2, 1, 1}, ExpectOutput{0, 0, 0, 3, 4, 5, 0, 0, 0, 9, 10, 11}); - testElementWiseNativeCpu( - IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1}, - Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 2, 3, 4, 6, 7, 8, 8, 9, 10}); - testElementWiseNativeCpu( - IncrementalGenerator(), OneGenerator(), Shape{1, 2, 2, 3, 1}, - Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); -} - -} // namespace infini +#include "core/graph.h" +#include "core/runtime.h" +#include "operators/element_wise.h" + +#include "test.h" + +namespace infini { + +using ExpectOutput = vector; +template +void testElementWiseNativeCpu( + const std::function &generator1, + const std::function &generator2, + const Shape &shape1, const Shape &shape2, const ExpectOutput &ansVec) { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + auto t1 = g->addTensor(shape1, DataType::Float32); + auto t2 = g->addTensor(shape2, DataType::Float32); + + auto op = g->addOp(t1, t2, nullptr); + g->dataMalloc(); + t1->setData(generator1); + t2->setData(generator2); + + runtime->run(g); + EXPECT_TRUE(op->getOutput()->equalData(ansVec)); +} + +TEST(ElementWise, NativeCpu) { + testElementWiseNativeCpu( + IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1}, + Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 4, 5, 6, 6, 7, 8, 10, 11, 12}); + testElementWiseNativeCpu( + IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1}, + Shape{2, 1, 1}, ExpectOutput{0, 0, 0, 3, 4, 5, 0, 0, 0, 9, 10, 11}); + testElementWiseNativeCpu( + IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1}, + Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 2, 3, 4, 6, 7, 8, 8, 9, 10}); + testElementWiseNativeCpu( + IncrementalGenerator(), OneGenerator(), Shape{1, 2, 2, 3, 1}, + Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); +} + +} // namespace infini diff --git a/test/kernels/nativecpu/test_nativecpu_transpose.cc b/test/kernels/nativecpu/test_nativecpu_transpose.cc index 501d402..0fcf808 100644 --- a/test/kernels/nativecpu/test_nativecpu_transpose.cc +++ b/test/kernels/nativecpu/test_nativecpu_transpose.cc @@ -1,27 +1,27 @@ -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/transpose.h" - -#include "test.h" - -namespace infini { - -TEST(Transpose, NativeCpu) { - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - Graph g = make_ref(runtime); - - Shape permute = {0, 2, 1, 3}; - auto input = g->addTensor({1, 2, 3, 4}, DataType::Float32); - auto op = g->addOp(input, nullptr, permute); - g->dataMalloc(); - input->setData(IncrementalGenerator()); - - runtime->run(g); - - EXPECT_TRUE(op->getOutput(0)->equalData(vector{0, 1, 2, 3, 12, 13, 14, 15, - 4, 5, 6, 7, 16, 17, 18, 19, - 8, 9, 10, 11, 20, 21, 22, 23})); -} - -} // namespace infini +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/transpose.h" + +#include "test.h" + +namespace infini { + +TEST(Transpose, NativeCpu) { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + + Shape permute = {0, 2, 1, 3}; + auto input = g->addTensor({1, 2, 3, 4}, DataType::Float32); + auto op = g->addOp(input, nullptr, permute); + g->dataMalloc(); + input->setData(IncrementalGenerator()); + + runtime->run(g); + + EXPECT_TRUE(op->getOutput(0)->equalData(vector{0, 1, 2, 3, 12, 13, 14, 15, + 4, 5, 6, 7, 16, 17, 18, 19, + 8, 9, 10, 11, 20, 21, 22, 23})); +} + +} // namespace infini diff --git a/test/operators/test_cast.cc b/test/operators/test_cast.cc index 3177751..52909d0 100644 --- a/test/operators/test_cast.cc +++ b/test/operators/test_cast.cc @@ -1,23 +1,23 @@ -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/unary.h" - -#include "test.h" - -namespace infini -{ - - TEST(Cast, ShapeInference) - { - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - { - Graph g = make_ref(runtime); - Tensor i0 = g->addTensor({2}, DataType::Float32); - auto op = g->addOp(i0, nullptr, CastType::Float2Float16); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{2})); - EXPECT_EQ(op->getOutDType(), (DataType::Float16)); - } - } - -} // namespace infini +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/unary.h" + +#include "test.h" + +namespace infini +{ + + TEST(Cast, ShapeInference) + { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({2}, DataType::Float32); + auto op = g->addOp(i0, nullptr, CastType::Float2Float16); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2})); + EXPECT_EQ(op->getOutDType(), (DataType::Float16)); + } + } + +} // namespace infini diff --git a/test/operators/test_clip.cc b/test/operators/test_clip.cc index bd4e07f..b440163 100644 --- a/test/operators/test_clip.cc +++ b/test/operators/test_clip.cc @@ -1,23 +1,23 @@ -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/unary.h" - -#include "test.h" - -namespace infini { - - TEST(Clip, ShapeInference) - { - // Runtime - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - Graph g = make_ref(runtime); - Tensor i0 = g->addTensor({1, 2, 2, 3}, DataType::Float32); - float min = 1.0; - float max = 4.0; - auto op = g->addOp(i0, nullptr, min, max); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 2, 2, 3})); - EXPECT_EQ(op->getOutDType(), (DataType::Float32)); - } - -} // namespace infini +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/unary.h" + +#include "test.h" + +namespace infini { + + TEST(Clip, ShapeInference) + { + // Runtime + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({1, 2, 2, 3}, DataType::Float32); + float min = 1.0; + float max = 4.0; + auto op = g->addOp(i0, nullptr, min, max); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 2, 2, 3})); + EXPECT_EQ(op->getOutDType(), (DataType::Float32)); + } + +} // namespace infini diff --git a/test/operators/test_concat.cc b/test/operators/test_concat.cc index 8984b9f..6bb7ea5 100644 --- a/test/operators/test_concat.cc +++ b/test/operators/test_concat.cc @@ -1,16 +1,16 @@ -#include "core/graph.h" -#include "core/runtime.h" -#include "operators/concat.h" -#include "test.h" - -namespace infini { -TEST(Concat, ShapeInfer) { - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - Graph g = make_ref(runtime); - auto t1 = g->addTensor({1, 3, 2, 4}, DataType::Float32); - auto t2 = g->addTensor({1, 3, 2, 5}, DataType::Float32); - - auto op = g->addOp(TensorVec{t1, t2}, nullptr, 3); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 3, 2, 9})); -} -} // namespace infini +#include "core/graph.h" +#include "core/runtime.h" +#include "operators/concat.h" +#include "test.h" + +namespace infini { +TEST(Concat, ShapeInfer) { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + auto t1 = g->addTensor({1, 3, 2, 4}, DataType::Float32); + auto t2 = g->addTensor({1, 3, 2, 5}, DataType::Float32); + + auto op = g->addOp(TensorVec{t1, t2}, nullptr, 3); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 3, 2, 9})); +} +} // namespace infini diff --git a/test/operators/test_element_wise.cc b/test/operators/test_element_wise.cc index f4fdd66..f2c30cd 100644 --- a/test/operators/test_element_wise.cc +++ b/test/operators/test_element_wise.cc @@ -1,66 +1,66 @@ -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/element_wise.h" - -#include "test.h" - -namespace infini { - - TEST(ElementWise, ShapeInference) - { - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - { - Graph g = make_ref(runtime); - Tensor i0 = g->addTensor({2, 3, 3, 4}, DataType::UInt32); - Tensor i1 = g->addTensor({2, 3, 3, 4}, DataType::UInt32); - auto op = g->addOp(i0, i1, nullptr); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 3, 4})); - } - } - - TEST(ElementWise, Broadcasting) - { - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - { - Graph g = make_ref(runtime); - Tensor i0 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); - Tensor i1 = g->addTensor({}, DataType::UInt32); - auto op = g->addOp(i0, i1, nullptr); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); - } - - { - Graph g = make_ref(runtime); - Tensor i0 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); - Tensor i1 = g->addTensor({5}, DataType::UInt32); - auto op = g->addOp(i0, i1, nullptr); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); - } - - { - Graph g = make_ref(runtime); - Tensor i0 = g->addTensor({4, 5}, DataType::UInt32); - Tensor i1 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); - auto op = g->addOp(i0, i1, nullptr); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); - } - - { - Graph g = make_ref(runtime); - Tensor i0 = g->addTensor({1, 4, 5}, DataType::UInt32); - Tensor i1 = g->addTensor({2, 3, 1, 1}, DataType::UInt32); - auto op = g->addOp(i0, i1, nullptr); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); - } - - { - Graph g = make_ref(runtime); - Tensor i0 = g->addTensor({3, 4, 5}, DataType::UInt32); - Tensor i1 = g->addTensor({2, 1, 1, 1}, DataType::UInt32); - auto op = g->addOp(i0, i1, nullptr); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); - } - } - -} // namespace infini +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/element_wise.h" + +#include "test.h" + +namespace infini { + + TEST(ElementWise, ShapeInference) + { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({2, 3, 3, 4}, DataType::UInt32); + Tensor i1 = g->addTensor({2, 3, 3, 4}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 3, 4})); + } + } + + TEST(ElementWise, Broadcasting) + { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + Tensor i1 = g->addTensor({}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); + } + + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + Tensor i1 = g->addTensor({5}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); + } + + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({4, 5}, DataType::UInt32); + Tensor i1 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); + } + + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({1, 4, 5}, DataType::UInt32); + Tensor i1 = g->addTensor({2, 3, 1, 1}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); + } + + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({3, 4, 5}, DataType::UInt32); + Tensor i1 = g->addTensor({2, 1, 1, 1}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); + } + } + +} // namespace infini diff --git a/test/operators/test_matmul.cc b/test/operators/test_matmul.cc index 32fbc36..843b796 100644 --- a/test/operators/test_matmul.cc +++ b/test/operators/test_matmul.cc @@ -1,57 +1,57 @@ -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/matmul.h" - -#include "test.h" - -namespace infini -{ - using ExpectOutput = vector; - - TEST(Matmul, ShapeInference) - { - auto runtime = NativeCpuRuntimeObj::getInstance(); - { - Graph g = make_ref(runtime); - auto A = g->addTensor(Shape{1, 3, 5}); - auto B = g->addTensor(Shape{1, 5, 2}); - auto matmul = g->addOp(A, B, nullptr); - auto C = matmul->getOutputs()[0]; - EXPECT_EQ(C->getDims(), (Shape{1, 3, 2})); - } - { - Graph g = make_ref(runtime); - auto A = g->addTensor(Shape{3, 5, 4}); - auto B = g->addTensor(Shape{3, 5, 2}); - auto matmul = g->addOp(A, B, nullptr, true, false); - auto C = matmul->getOutputs()[0]; - EXPECT_EQ(C->getDims(), (Shape{3, 4, 2})); - } - { - Graph g = make_ref(runtime); - auto A = g->addTensor(Shape{1, 2, 3, 5}); - auto B = g->addTensor(Shape{1, 1, 5, 2}); - auto matmul = g->addOp(A, B, nullptr); - auto C = matmul->getOutputs()[0]; - EXPECT_EQ(C->getDims(), (Shape{1, 2, 3, 2})); - } - { - Graph g = make_ref(runtime); - auto A = g->addTensor(Shape{2, 3, 5, 4}); - auto B = g->addTensor(Shape{1, 3, 5, 2}); - auto matmul = g->addOp(A, B, nullptr, true, false); - auto C = matmul->getOutputs()[0]; - EXPECT_EQ(C->getDims(), (Shape{2, 3, 4, 2})); - } - { - Graph g = make_ref(runtime); - auto A = g->addTensor(Shape{2, 3, 5, 4}); - auto B = g->addTensor(Shape{1, 3, 2, 5}); - auto matmul = g->addOp(A, B, nullptr, true, true); - auto C = matmul->getOutputs()[0]; - EXPECT_EQ(C->getDims(), (Shape{2, 3, 4, 2})); - } - } - +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/matmul.h" + +#include "test.h" + +namespace infini +{ + using ExpectOutput = vector; + + TEST(Matmul, ShapeInference) + { + auto runtime = NativeCpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + auto A = g->addTensor(Shape{1, 3, 5}); + auto B = g->addTensor(Shape{1, 5, 2}); + auto matmul = g->addOp(A, B, nullptr); + auto C = matmul->getOutputs()[0]; + EXPECT_EQ(C->getDims(), (Shape{1, 3, 2})); + } + { + Graph g = make_ref(runtime); + auto A = g->addTensor(Shape{3, 5, 4}); + auto B = g->addTensor(Shape{3, 5, 2}); + auto matmul = g->addOp(A, B, nullptr, true, false); + auto C = matmul->getOutputs()[0]; + EXPECT_EQ(C->getDims(), (Shape{3, 4, 2})); + } + { + Graph g = make_ref(runtime); + auto A = g->addTensor(Shape{1, 2, 3, 5}); + auto B = g->addTensor(Shape{1, 1, 5, 2}); + auto matmul = g->addOp(A, B, nullptr); + auto C = matmul->getOutputs()[0]; + EXPECT_EQ(C->getDims(), (Shape{1, 2, 3, 2})); + } + { + Graph g = make_ref(runtime); + auto A = g->addTensor(Shape{2, 3, 5, 4}); + auto B = g->addTensor(Shape{1, 3, 5, 2}); + auto matmul = g->addOp(A, B, nullptr, true, false); + auto C = matmul->getOutputs()[0]; + EXPECT_EQ(C->getDims(), (Shape{2, 3, 4, 2})); + } + { + Graph g = make_ref(runtime); + auto A = g->addTensor(Shape{2, 3, 5, 4}); + auto B = g->addTensor(Shape{1, 3, 2, 5}); + auto matmul = g->addOp(A, B, nullptr, true, true); + auto C = matmul->getOutputs()[0]; + EXPECT_EQ(C->getDims(), (Shape{2, 3, 4, 2})); + } + } + }; // namespace infini \ No newline at end of file diff --git a/test/operators/test_transpose.cc b/test/operators/test_transpose.cc index 1c12b79..4867ea5 100644 --- a/test/operators/test_transpose.cc +++ b/test/operators/test_transpose.cc @@ -1,32 +1,32 @@ -#include "core/graph.h" -#include "core/kernel.h" -#include "core/runtime.h" -#include "operators/transpose.h" - -#include "test.h" - -namespace infini { - -TEST(Transpose, ShapeInference) { - Runtime runtime = NativeCpuRuntimeObj::getInstance(); - { - Graph g = make_ref(runtime); - Tensor i = g->addTensor({1, 2, 3, 4}, DataType::Float32); - auto op = g->addOp(i, nullptr, Shape{0, 1, 2, 3}); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 2, 3, 4})); - } - { - Graph g = make_ref(runtime); - Tensor i = g->addTensor({1, 2, 3, 4}, DataType::Float32); - auto op = g->addOp(i, nullptr, Shape{0, 2, 1, 3}); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 3, 2, 4})); - } - { - Graph g = make_ref(runtime); - Tensor i = g->addTensor({2, 3, 4}, DataType::Float32); - auto op = g->addOp(i, nullptr, Shape{0, 2, 1}); - EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 4, 3})); - } -} - -} // namespace infini +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/transpose.h" + +#include "test.h" + +namespace infini { + +TEST(Transpose, ShapeInference) { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + Tensor i = g->addTensor({1, 2, 3, 4}, DataType::Float32); + auto op = g->addOp(i, nullptr, Shape{0, 1, 2, 3}); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 2, 3, 4})); + } + { + Graph g = make_ref(runtime); + Tensor i = g->addTensor({1, 2, 3, 4}, DataType::Float32); + auto op = g->addOp(i, nullptr, Shape{0, 2, 1, 3}); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 3, 2, 4})); + } + { + Graph g = make_ref(runtime); + Tensor i = g->addTensor({2, 3, 4}, DataType::Float32); + auto op = g->addOp(i, nullptr, Shape{0, 2, 1}); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 4, 3})); + } +} + +} // namespace infini