From d961ac282cf79955fc85641142292ce4958c9915 Mon Sep 17 00:00:00 2001 From: colinleeo Date: Mon, 18 May 2026 11:13:20 +0800 Subject: [PATCH 01/14] support msvc 2017. --- .github/workflows/unit-test-cpp-msvc.yml | 91 ++++++++++ cpp/CMakeLists.txt | 84 +++++++-- cpp/examples/CMakeLists.txt | 9 +- cpp/patches/antlr4-4.9.3-msvc-compat.patch | 126 +++++++++++++ cpp/pom.xml | 17 ++ cpp/src/CMakeLists.txt | 18 +- cpp/src/common/allocator/alloc_base.h | 4 +- cpp/src/common/allocator/my_string.h | 1 + cpp/src/common/constant/tsfile_constant.h | 9 +- cpp/src/common/container/bit_map.h | 77 +++++++- cpp/src/common/container/blocking_queue.cc | 31 ++-- cpp/src/common/container/blocking_queue.h | 8 +- cpp/src/common/db_common.h | 6 +- cpp/src/common/global.cc | 1 + cpp/src/common/global.h | 6 +- cpp/src/common/mutex/mutex.h | 38 ++-- cpp/src/common/tablet.cc | 2 +- cpp/src/common/tsfile_common.h | 14 +- cpp/src/cwrapper/tsfile_cwrapper.cc | 6 +- cpp/src/file/read_file.cc | 5 + cpp/src/file/write_file.cc | 4 + cpp/src/reader/result_set.h | 1 + cpp/src/utils/db_utils.h | 13 +- cpp/src/utils/storage_utils.h | 1 + cpp/src/utils/util_define.h | 137 +++++++++++++- cpp/src/writer/tsfile_writer.cc | 4 + cpp/src/writer/tsfile_writer.h | 1 + cpp/src/writer/value_page_writer.h | 2 +- cpp/test/CMakeLists.txt | 14 +- cpp/test/common/allocator/byte_stream_test.cc | 3 +- cpp/test/cwrapper/c_release_test.cc | 56 ++---- cpp/test/cwrapper/cwrapper_metadata_test.cc | 4 + cpp/test/cwrapper/cwrapper_test.cc | 4 + cpp/third_party/CMakeLists.txt | 6 + .../antlr4-cpp-runtime-4/runtime/src/Token.h | 10 +- .../runtime/src/Vocabulary.cpp | 2 + .../runtime/src/atn/LL1Analyzer.h | 2 +- pom.xml | 50 ++++++ python/setup.py | 168 +++++++++++++----- python/tsfile/__init__.py | 14 +- 40 files changed, 842 insertions(+), 207 deletions(-) create mode 100644 .github/workflows/unit-test-cpp-msvc.yml create mode 100644 cpp/patches/antlr4-4.9.3-msvc-compat.patch diff --git a/.github/workflows/unit-test-cpp-msvc.yml b/.github/workflows/unit-test-cpp-msvc.yml new file mode 100644 index 000000000..e0a706ca3 --- /dev/null +++ b/.github/workflows/unit-test-cpp-msvc.yml @@ -0,0 +1,91 @@ +# This workflow builds and tests the C++ implementation of TsFile on Windows +# using the MSVC toolchain (Visual Studio generator), as a complement to +# unit-test-cpp.yml which builds the Windows target with MinGW. + +name: Unit-Test-Cpp-MSVC + +on: + push: + branches: + - develop + - iotdb + - rc/* + paths-ignore: + - 'docs/**' + - 'java/**' + pull_request: + branches: + - develop + - dev/* + - iotdb + - rc/* + paths-ignore: + - 'docs/**' + - 'java/**' + # Enable manually starting builds. + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3 + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + +jobs: + unit-test-msvc: + strategy: + fail-fast: false + matrix: + build_type: [Release, Debug] + # AddressSanitizer for MSVC requires Visual Studio 2019 16.9+, which + # the windows-latest runner provides. + enable_asan: [NoAsan, Asan] + runs-on: windows-latest + + steps: + + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up JDK 17 + uses: actions/setup-java@v5 + with: + distribution: corretto + java-version: 17 + + # Setup caching of the artifacts in the .m2 directory, so they don't have + # to all be downloaded again for every build. + - name: Cache Maven packages + uses: actions/cache@v5 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-msvc-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + + # Put the MSVC toolchain (and, importantly, the AddressSanitizer runtime + # clang_rt.asan_dynamic-*.dll) on PATH so the test executable can run + # during ctest discovery and execution. + - name: Set up MSVC developer environment + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: x64 + + # Run the maven build, selecting the MSVC toolchain via -Dcpp.toolchain=msvc. + # spotless (clang-format) is already covered by unit-test-cpp.yml, so it is + # skipped here to keep this workflow focused on the MSVC build. + - name: Build and test with Maven (MSVC) + shell: bash + run: | + if [ "${{ matrix.enable_asan }}" = "Asan" ]; then + ASAN_VALUE="ON" + else + ASAN_VALUE="OFF" + fi + ./mvnw.cmd -P with-cpp \ + -Dcpp.toolchain=msvc \ + -Denable.asan=$ASAN_VALUE \ + -Dbuild.type=${{ matrix.build_type }} \ + -Dspotless.skip=true \ + clean verify diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3f1d8bdd3..3f9be090e 100755 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -26,16 +26,34 @@ else() message(STATUS "Not using ToolChain") endif () -cmake_policy(SET CMP0079 NEW) +if (POLICY CMP0079) + cmake_policy(SET CMP0079 NEW) +endif () set(TsFile_CPP_VERSION 2.2.1.dev) -set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} -Wall") + +if (MSVC) + set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} /W3 /utf-8 /EHsc /bigobj /Zc:__cplusplus") + add_definitions(-DNOMINMAX -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS + -D_SCL_SECURE_NO_WARNINGS -D_WINSOCK_DEPRECATED_NO_WARNINGS) + # Export all symbols of the tsfile shared library automatically so that + # consumers do not need __declspec(dllexport) annotations. + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +else () + set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} -Wall") +endif () if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wunused -Wuninitialized -D__STDC_FORMAT_MACROS") endif () message("cmake using: USE_CPP11=${USE_CPP11}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +# MSVC has no /std:c++11; CMake maps this to the closest supported standard +# (C++14 default on MSVC), which compiles the C++11 codebase fine. +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED OFF) +if (NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +endif () if (DEFINED ENV{CXX}) set(CMAKE_CXX_COMPILER $ENV{CXX}) @@ -83,25 +101,47 @@ else () endif () message("CMAKE BUILD TYPE " ${CMAKE_BUILD_TYPE}) -if (CMAKE_BUILD_TYPE STREQUAL "Debug") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g") -elseif (CMAKE_BUILD_TYPE STREQUAL "Release") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2") -elseif (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O2 -g") -elseif (CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") - set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} -ffunction-sections -fdata-sections -Os") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections") +# MSVC provides sensible per-configuration optimization flags by default; the +# GCC-style flags below would be rejected by cl.exe, so skip them on MSVC. +if (NOT MSVC) + if (CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g") + elseif (CMAKE_BUILD_TYPE STREQUAL "Release") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2") + elseif (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O2 -g") + elseif (CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") + set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} -ffunction-sections -fdata-sections -Os") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections") + endif () endif () message("CMAKE DEBUG: CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}") # disable asan by default. option(ENABLE_ASAN "Enable Address Sanitizer" OFF) -if (NOT WIN32) - if (ENABLE_ASAN) - message("Address Sanitizer is enabled.") - +if (ENABLE_ASAN) + message("Address Sanitizer is enabled.") + if (MSVC) + # MSVC ships AddressSanitizer; it requires Visual Studio 2019 16.9 or + # newer (MSVC_VERSION >= 1928). Only the address sanitizer is available + # (there is no UndefinedBehaviorSanitizer for MSVC). + if (MSVC_VERSION LESS 1928) + message(FATAL_ERROR + "ENABLE_ASAN requires MSVC 19.28+ (Visual Studio 2019 16.9); " + "detected MSVC_VERSION=${MSVC_VERSION}.") + endif () + # /fsanitize=address is incompatible with the /RTC* runtime checks that + # CMake injects into Debug builds, and with incremental linking. Strip + # /RTC* from the per-config flags and force non-incremental linking. + add_compile_options(/fsanitize=address) + foreach (flagsVar + CMAKE_C_FLAGS_DEBUG CMAKE_CXX_FLAGS_DEBUG + CMAKE_C_FLAGS_RELWITHDEBINFO CMAKE_CXX_FLAGS_RELWITHDEBINFO) + string(REGEX REPLACE "/RTC[1csu]+" "" ${flagsVar} "${${flagsVar}}") + endforeach () + add_link_options(/INCREMENTAL:NO) + elseif (NOT WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-omit-frame-pointer") if (NOT APPLE) @@ -111,8 +151,12 @@ if (NOT WIN32) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address,undefined") endif () else () - message("Address Sanitizer is disabled.") + message(WARNING + "ENABLE_ASAN on Windows is only supported with the MSVC toolchain; " + "ignoring it for the current generator.") endif () +else () + message("Address Sanitizer is disabled.") endif () option(BUILD_TEST "Build tests" ON) @@ -178,7 +222,11 @@ set(LIBRARY_INCLUDE_DIR ${PROJECT_BINARY_DIR}/include CACHE STRING "TsFile inclu set(THIRD_PARTY_INCLUDE ${PROJECT_BINARY_DIR}/third_party) set(SAVED_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} -Wall -std=c++11") +if (MSVC) + set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} /W3 /utf-8 /EHsc /bigobj /Zc:__cplusplus") +else () + set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} -Wall -std=c++11") +endif () add_subdirectory(third_party) set(CMAKE_CXX_FLAGS "${SAVED_CXX_FLAGS}") diff --git a/cpp/examples/CMakeLists.txt b/cpp/examples/CMakeLists.txt index ebe6c66c8..1d5072b08 100644 --- a/cpp/examples/CMakeLists.txt +++ b/cpp/examples/CMakeLists.txt @@ -20,7 +20,9 @@ cmake_minimum_required(VERSION 3.10) project(examples) message("Running in exampes directory") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +if (NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +endif () # TsFile include dir set(SDK_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/../src/) @@ -37,8 +39,9 @@ include_directories(${PROJECT_SOURCE_DIR}/../third_party/antlr4-cpp-runtime-4/ru set(BUILD_TYPE "Release") include_directories(${SDK_INCLUDE_DIR}) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g") -set(CMAKE_CXX_FLAGS_DEBUG" ${CMAKE_CXX_FLAGS} -O0 -g") +if (NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g") +endif () add_subdirectory(cpp_examples) add_subdirectory(c_examples) diff --git a/cpp/patches/antlr4-4.9.3-msvc-compat.patch b/cpp/patches/antlr4-4.9.3-msvc-compat.patch new file mode 100644 index 000000000..f888b8391 --- /dev/null +++ b/cpp/patches/antlr4-4.9.3-msvc-compat.patch @@ -0,0 +1,126 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# antlr4-4.9.3-msvc-compat.patch +# +# Purpose : Make the bundled ANTLR4 C++ runtime (v4.9.3) compile under MSVC +# (Visual Studio 2017 / v141) while keeping the project on C++11. +# Scope : third_party/antlr4-cpp-runtime-4 only. No version change, no +# generated-parser change, no impact on the Java module (also 4.9.3) +# or on embedded/C++11 builds. +# Apply : from the cpp/ directory: git apply patches/antlr4-4.9.3-msvc-compat.patch +# or: patch -p1 < patches/antlr4-4.9.3-msvc-compat.patch +# +# --- Why this is needed ------------------------------------------------------- +# +# 1) Token.h / LL1Analyzer.h (overload-resolution ambiguity, error C2668) +# These headers define EPSILON/EOF/HIT_PRED/etc. as either +# `static constexpr size_t` (when __cplusplus >= 201703L) or as members of an +# unnamed `enum : size_t` (otherwise). IntervalSet has both contains(size_t) +# and contains(ssize_t) overloads. Per [conv.prom], converting an unnamed +# enum with a fixed underlying type to that underlying type is an integral +# PROMOTION (a strictly better match than a conversion to ssize_t). GCC +# implements this correctly, so the enum branch compiles there. MSVC 2017 +# does NOT -- it ranks both overloads equally and reports an ambiguous call. +# MSVC also reports __cplusplus as 199711L by default, so it always took the +# enum branch. `static constexpr size_t` is fully usable in MSVC 2017's +# C++11 mode, so we let MSVC take the constexpr branch: the constants become +# plain size_t values and the ambiguity disappears with no call-site changes. +# This is ODR-safe: the constants are currently enumerators (which cannot be +# ODR-used), proving the runtime never takes their address. +# NOTE: only the headers whose constants reach IntervalSet::contains/remove +# are patched. The std::string_view guards in ANTLRInputStream.{h,cpp} use +# the same `#if` and are deliberately left untouched (string_view is C++17). +# +# 2) Vocabulary.cpp (error C2039/C2660: isupper) +# Vocabulary.cpp calls the two-argument std::isupper(charT, const locale&) +# from but never includes ; it relied on a transitive +# include that libstdc++ happens to provide and MSVC's STL does not. +# Fix: add the missing #include . +# +# ------------------------------------------------------------------------------ +--- a/third_party/antlr4-cpp-runtime-4/runtime/src/Token.h ++++ b/third_party/antlr4-cpp-runtime-4/runtime/src/Token.h +@@ -14,7 +14,7 @@ + /// we obtained this token. + class ANTLR4CPP_PUBLIC Token { + public: +-#if __cplusplus >= 201703L ++#if __cplusplus >= 201703L || defined(_MSC_VER) + static constexpr size_t INVALID_TYPE = 0; + #else + enum : size_t { +@@ -24,7 +24,7 @@ + + /// During lookahead operations, this "token" signifies we hit rule end ATN state + /// and did not follow it despite needing to. +-#if __cplusplus >= 201703L ++#if __cplusplus >= 201703L || defined(_MSC_VER) + static constexpr size_t EPSILON = std::numeric_limits::max() - 1; + static constexpr size_t MIN_USER_TOKEN_TYPE = 1; + static constexpr size_t EOF = IntStream::EOF; +@@ -41,7 +41,7 @@ + /// All tokens go to the parser (unless skip() is called in that rule) + /// on a particular "channel". The parser tunes to a particular channel + /// so that whitespace etc... can go to the parser on a "hidden" channel. +-#if __cplusplus >= 201703L ++#if __cplusplus >= 201703L || defined(_MSC_VER) + static constexpr size_t DEFAULT_CHANNEL = 0; + #else + enum : size_t { +@@ -51,7 +51,7 @@ + + /// Anything on different channel than DEFAULT_CHANNEL is not parsed + /// by parser. +-#if __cplusplus >= 201703L ++#if __cplusplus >= 201703L || defined(_MSC_VER) + static constexpr size_t HIDDEN_CHANNEL = 1; + #else + enum : size_t { +@@ -70,7 +70,7 @@ + * + * @see Token#getChannel() + */ +-#if __cplusplus >= 201703L ++#if __cplusplus >= 201703L || defined(_MSC_VER) + static constexpr size_t MIN_USER_CHANNEL_VALUE = 2; + #else + enum : size_t { +--- a/third_party/antlr4-cpp-runtime-4/runtime/src/atn/LL1Analyzer.h ++++ b/third_party/antlr4-cpp-runtime-4/runtime/src/atn/LL1Analyzer.h +@@ -17,7 +17,7 @@ + public: + /// Special value added to the lookahead sets to indicate that we hit + /// a predicate during analysis if {@code seeThruPreds==false}. +-#if __cplusplus >= 201703L ++#if __cplusplus >= 201703L || defined(_MSC_VER) + static constexpr size_t HIT_PRED = Token::INVALID_TYPE; + #else + enum : size_t { +--- a/third_party/antlr4-cpp-runtime-4/runtime/src/Vocabulary.cpp ++++ b/third_party/antlr4-cpp-runtime-4/runtime/src/Vocabulary.cpp +@@ -3,6 +3,8 @@ + * can be found in the LICENSE.txt file in the project root. + */ + ++#include ++ + #include "Token.h" + + #include "Vocabulary.h" diff --git a/cpp/pom.xml b/cpp/pom.xml index 13af0eefc..f14c9d24b 100644 --- a/cpp/pom.xml +++ b/cpp/pom.xml @@ -88,6 +88,13 @@ + + @@ -100,6 +107,14 @@ compile + + + ${build.type} + @@ -110,6 +125,8 @@ ${project.build.directory}/build/test + + ${build.type} diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index e922836b7..93342c113 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -75,7 +75,10 @@ endif() if (ENABLE_ZLIB) list(APPEND PROJECT_INCLUDE_DIR - ${CMAKE_SOURCE_DIR}/third_party/zlib-1.2.13 + # zlib.h ships in the source tree; zconf.h is generated into the + # build tree by zlib's own CMake, so both directories are needed. + ${CMAKE_SOURCE_DIR}/third_party/zlib-1.3.1 + ${THIRD_PARTY_INCLUDE}/zlib-1.3.1 ) endif() @@ -88,6 +91,11 @@ endif() include_directories(${PROJECT_INCLUDE_DIR}) +# Mark every translation unit that is compiled into the tsfile library so that +# TSFILE_API (see utils/util_define.h) resolves to an export-side (empty) +# decoration here, and to __declspec(dllimport) for external consumers. +add_definitions(-DTSFILE_BUILDING) + if (ENABLE_ANTLR4) add_subdirectory(parser) message("Adding parser subdirectory") @@ -157,4 +165,10 @@ set(LIBTSFILE_SO_VERSION ${TsFile_CPP_VERSION}) set_target_properties(tsfile PROPERTIES VERSION ${LIBTSFILE_PROJECT_VERSION}) set_target_properties(tsfile PROPERTIES SOVERSION ${LIBTSFILE_SO_VERSION}) -install(TARGETS tsfile LIBRARY DESTINATION ${LIBRARY_OUTPUT_PATH}) \ No newline at end of file +# On Windows a SHARED library produces a .dll (RUNTIME) plus an import .lib +# (ARCHIVE); on Unix it produces a .so (LIBRARY). Cover all three so the +# install step works for every platform. +install(TARGETS tsfile + RUNTIME DESTINATION ${LIBRARY_OUTPUT_PATH} + LIBRARY DESTINATION ${LIBRARY_OUTPUT_PATH} + ARCHIVE DESTINATION ${LIBRARY_OUTPUT_PATH}) \ No newline at end of file diff --git a/cpp/src/common/allocator/alloc_base.h b/cpp/src/common/allocator/alloc_base.h index facfd8081..c89aed077 100644 --- a/cpp/src/common/allocator/alloc_base.h +++ b/cpp/src/common/allocator/alloc_base.h @@ -59,7 +59,7 @@ enum AllocModID { __MAX_MOD_ID = 127, }; -extern const char* g_mod_names[__LAST_MOD_ID]; +extern TSFILE_API const char* g_mod_names[__LAST_MOD_ID]; /* very basic alloc/free interface in C style */ void* mem_alloc(uint32_t size, AllocModID mid); @@ -120,7 +120,7 @@ class BaseAllocator { void free(void* ptr) { mem_free(ptr); } }; -extern BaseAllocator g_base_allocator; +extern TSFILE_API BaseAllocator g_base_allocator; } // end namespace common diff --git a/cpp/src/common/allocator/my_string.h b/cpp/src/common/allocator/my_string.h index 279ee798e..ce8a6b229 100644 --- a/cpp/src/common/allocator/my_string.h +++ b/cpp/src/common/allocator/my_string.h @@ -21,6 +21,7 @@ #include +#include #include #include "common/allocator/page_arena.h" diff --git a/cpp/src/common/constant/tsfile_constant.h b/cpp/src/common/constant/tsfile_constant.h index 096c645ab..01ad1b2bc 100644 --- a/cpp/src/common/constant/tsfile_constant.h +++ b/cpp/src/common/constant/tsfile_constant.h @@ -41,9 +41,14 @@ static const std::string TIME_COLUMN_NAME = "time"; static const unsigned char VALUE_COLUMN_MASK = 0x40; static const int NO_STR_TO_READ = -1; -static const std::regex IDENTIFIER_PATTERN("([a-zA-Z0-9_\\u2E80-\\u9FFF]+)"); +// Identifier characters: ASCII alphanumerics / underscore plus any non-ASCII +// (high) byte. The \x80-\xFF byte range covers UTF-8 lead/continuation bytes +// (e.g. CJK characters) and is accepted by every std::regex implementation; +// the \uXXXX codepoint-escape form used previously is rejected by MSVC's +// (regex_error: error_escape). +static const std::regex IDENTIFIER_PATTERN("([a-zA-Z0-9_\\x80-\\xFF]+)"); static const std::regex NODE_NAME_PATTERN( - "(\\*{0,2}[a-zA-Z0-9_\\u2E80-\\u9FFF]+\\*{0,2})"); + "(\\*{0,2}[a-zA-Z0-9_\\x80-\\xFF]+\\*{0,2})"); static const int DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME = 3; } // namespace storage diff --git a/cpp/src/common/container/bit_map.h b/cpp/src/common/container/bit_map.h index 7c60a1ea3..dea8c4f98 100644 --- a/cpp/src/common/container/bit_map.h +++ b/cpp/src/common/container/bit_map.h @@ -21,11 +21,80 @@ #include +#if defined(_MSC_VER) +#include +#endif + #include "utils/errno_define.h" #include "utils/util_define.h" namespace common { +// Cross-platform bit-twiddling helpers. GCC/Clang use their builtins; MSVC +// uses the equivalent intrinsics from ; any other compiler falls +// back to a portable loop. +namespace bitops { +// Population count of an 8-bit value. +FORCE_INLINE int popcount8(uint8_t v) { +#if defined(__GNUC__) || defined(__clang__) + return __builtin_popcount(v); +#elif defined(_MSC_VER) + return static_cast(__popcnt(static_cast(v))); +#else + int c = 0; + while (v) { + v = static_cast(v & (v - 1)); + ++c; + } + return c; +#endif +} +// Count trailing zero bits. The argument must be non-zero. +FORCE_INLINE int ctz_nonzero(uint32_t v) { +#if defined(__GNUC__) || defined(__clang__) + return __builtin_ctz(v); +#elif defined(_MSC_VER) + unsigned long idx; + _BitScanForward(&idx, v); + return static_cast(idx); +#else + int c = 0; + while (!(v & 1u)) { + v >>= 1; + ++c; + } + return c; +#endif +} +// Count trailing zero bits of a 64-bit value. The argument must be non-zero. +FORCE_INLINE int ctz64_nonzero(uint64_t v) { +#if defined(__GNUC__) || defined(__clang__) + return __builtin_ctzll(v); +#elif defined(_MSC_VER) + unsigned long idx; +#if defined(_M_X64) || defined(_M_ARM64) + _BitScanForward64(&idx, v); +#else + // 32-bit MSVC has no _BitScanForward64. + if (static_cast(v) != 0) { + _BitScanForward(&idx, static_cast(v)); + } else { + _BitScanForward(&idx, static_cast(v >> 32)); + idx += 32; + } +#endif + return static_cast(idx); +#else + int c = 0; + while (!(v & 1ull)) { + v >>= 1; + ++c; + } + return c; +#endif +} +} // namespace bitops + class BitMap { public: BitMap() : bitmap_(nullptr), size_(0), init_as_zero_(true) {} @@ -67,13 +136,11 @@ class BitMap { } // Count the number of bits set to 1 (i.e., number of null entries). - // __builtin_popcount is supported by GCC, Clang, and MinGW on Windows. - // TODO: add MSVC support if needed (e.g. __popcnt or manual bit count). FORCE_INLINE uint32_t count_set_bits() const { uint32_t count = 0; const uint8_t* p = reinterpret_cast(bitmap_); for (uint32_t i = 0; i < size_; i++) { - count += __builtin_popcount(p[i]); + count += bitops::popcount8(p[i]); } return count; } @@ -90,13 +157,13 @@ class BitMap { // Check remaining bits in the first (partial) byte uint8_t byte_val = p[byte_idx] >> (from & 7); if (byte_val) { - return from + __builtin_ctz(byte_val); + return from + bitops::ctz_nonzero(byte_val); } // Scan subsequent full bytes, skipping zeros const uint32_t byte_end = (total_bits + 7) >> 3; for (++byte_idx; byte_idx < byte_end; ++byte_idx) { if (p[byte_idx]) { - uint32_t pos = (byte_idx << 3) + __builtin_ctz(p[byte_idx]); + uint32_t pos = (byte_idx << 3) + bitops::ctz_nonzero(p[byte_idx]); return pos < total_bits ? pos : total_bits; } } diff --git a/cpp/src/common/container/blocking_queue.cc b/cpp/src/common/container/blocking_queue.cc index c843cd283..db2ecb3c9 100644 --- a/cpp/src/common/container/blocking_queue.cc +++ b/cpp/src/common/container/blocking_queue.cc @@ -21,36 +21,27 @@ namespace common { -BlockingQueue::BlockingQueue() : queue_(), mutex_(), cond_() { - pthread_mutex_init(&mutex_, NULL); - pthread_cond_init(&cond_, NULL); -} +BlockingQueue::BlockingQueue() : queue_(), mutex_(), cond_() {} -BlockingQueue::~BlockingQueue() { - pthread_mutex_destroy(&mutex_); - pthread_cond_destroy(&cond_); -} +BlockingQueue::~BlockingQueue() {} void BlockingQueue::push(void* data) { - pthread_mutex_lock(&mutex_); - queue_.push(data); - pthread_mutex_unlock(&mutex_); + { + std::lock_guard lock(mutex_); + queue_.push(data); + } /* * it is safe to signal after unlock. - * since pthread_cond_wait is guarantee to unlock and sleep atomically. + * std::condition_variable::wait unlocks and sleeps atomically. */ - pthread_cond_signal(&cond_); + cond_.notify_one(); } void* BlockingQueue::pop() { - void* ret_data = NULL; - pthread_mutex_lock(&mutex_); - while (queue_.empty()) { - pthread_cond_wait(&cond_, &mutex_); - } - ret_data = queue_.front(); + std::unique_lock lock(mutex_); + cond_.wait(lock, [this] { return !queue_.empty(); }); + void* ret_data = queue_.front(); queue_.pop(); - pthread_mutex_unlock(&mutex_); return ret_data; } diff --git a/cpp/src/common/container/blocking_queue.h b/cpp/src/common/container/blocking_queue.h index e03ea3a46..15572ec18 100644 --- a/cpp/src/common/container/blocking_queue.h +++ b/cpp/src/common/container/blocking_queue.h @@ -19,8 +19,8 @@ #ifndef COMMON_CONTAINER_BLOCKING_QUEUE_H #define COMMON_CONTAINER_BLOCKING_QUEUE_H -#include - +#include +#include #include namespace common { @@ -36,8 +36,8 @@ class BlockingQueue { private: std::queue queue_; - pthread_mutex_t mutex_; - pthread_cond_t cond_; + std::mutex mutex_; + std::condition_variable cond_; }; } // end namespace common diff --git a/cpp/src/common/db_common.h b/cpp/src/common/db_common.h index 8c637c3da..a2700c61c 100644 --- a/cpp/src/common/db_common.h +++ b/cpp/src/common/db_common.h @@ -93,9 +93,9 @@ enum CompressionType : uint8_t { INVALID_COMPRESSION = 255 }; -extern const char* s_data_type_names[8]; -extern const char* s_encoding_names[12]; -extern const char* s_compression_names[8]; +extern TSFILE_API const char* s_data_type_names[8]; +extern TSFILE_API const char* s_encoding_names[12]; +extern TSFILE_API const char* s_compression_names[8]; } // namespace common #if defined(__GLIBCXX__) && (__GNUC__ < 7) diff --git a/cpp/src/common/global.cc b/cpp/src/common/global.cc index ea4bf1289..a510dbff9 100644 --- a/cpp/src/common/global.cc +++ b/cpp/src/common/global.cc @@ -30,6 +30,7 @@ #include "common/thread_pool.h" #endif #include "utils/injection.h" +#include "utils/util_define.h" // strncasecmp and other platform-compat shims namespace common { diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h index ba5f4bd40..57adc81f5 100644 --- a/cpp/src/common/global.h +++ b/cpp/src/common/global.h @@ -26,8 +26,8 @@ #include "common/config/config.h" namespace common { -extern ConfigValue g_config_value_; -extern ColumnSchema g_time_column_schema; +extern TSFILE_API ConfigValue g_config_value_; +extern TSFILE_API ColumnSchema g_time_column_schema; FORCE_INLINE int set_global_time_data_type(uint8_t data_type) { ASSERT(data_type >= BOOLEAN && data_type <= STRING); @@ -184,7 +184,7 @@ FORCE_INLINE int set_write_thread_count(int32_t count) { #ifdef ENABLE_THREADS class ThreadPool; // Global write thread pool, created by init_common(). -extern ThreadPool* g_write_thread_pool_; +extern TSFILE_API ThreadPool* g_write_thread_pool_; #endif extern int init_common(); diff --git a/cpp/src/common/mutex/mutex.h b/cpp/src/common/mutex/mutex.h index 726288cd9..b35d328de 100644 --- a/cpp/src/common/mutex/mutex.h +++ b/cpp/src/common/mutex/mutex.h @@ -20,46 +20,28 @@ #ifndef COMMON_MUTEX_MUTEX_H #define COMMON_MUTEX_MUTEX_H -#include -#include +#include #include "utils/util_define.h" namespace common { +// Thin wrapper over std::mutex. Implemented with the C++11 standard library +// (instead of pthreads directly) so it builds on every platform, including +// MSVC where pthreads is not available. class Mutex { public: - Mutex() : mutex_() { pthread_mutex_init(&mutex_, NULL); } - ~Mutex() { pthread_mutex_destroy(&mutex_); } + Mutex() {} + ~Mutex() {} - void lock() { - int ret = EBUSY; - do { - ret = pthread_mutex_lock(&mutex_); - } while (UNLIKELY(ret == EBUSY || ret == EAGAIN)); - ASSERT(ret == 0); - } + void lock() { mutex_.lock(); } - void unlock() { - int ret = pthread_mutex_unlock(&mutex_); - ASSERT(ret == 0); - (void)ret; - } + void unlock() { mutex_.unlock(); } - bool try_lock() { - int ret = pthread_mutex_trylock(&mutex_); - if (ret == 0) { - return true; - } else if (ret == EBUSY || ret == EAGAIN) { - return false; - } else { - ASSERT(false); - return false; - } - } + bool try_lock() { return mutex_.try_lock(); } private: - pthread_mutex_t mutex_; + std::mutex mutex_; }; class MutexGuard { diff --git a/cpp/src/common/tablet.cc b/cpp/src/common/tablet.cc index 4088a6927..d71e48384 100644 --- a/cpp/src/common/tablet.cc +++ b/cpp/src/common/tablet.cc @@ -565,7 +565,7 @@ std::vector Tablet::find_all_device_boundaries() const { for (uint32_t w = 0; w < nwords; w++) { uint64_t bits = boundary[w]; while (bits) { - uint32_t bit = __builtin_ctzll(bits); + uint32_t bit = bitops::ctz64_nonzero(bits); uint32_t idx = w * 64 + bit; if (idx > 0 && idx < row_count) { result.push_back(idx); diff --git a/cpp/src/common/tsfile_common.h b/cpp/src/common/tsfile_common.h index 75b9a9da5..b516b608f 100644 --- a/cpp/src/common/tsfile_common.h +++ b/cpp/src/common/tsfile_common.h @@ -41,14 +41,14 @@ namespace storage { -extern const char* MAGIC_STRING_TSFILE; +extern TSFILE_API const char* MAGIC_STRING_TSFILE; constexpr int MAGIC_STRING_TSFILE_LEN = 6; -extern const char VERSION_NUM_BYTE; -extern const char CHUNK_GROUP_HEADER_MARKER; -extern const char CHUNK_HEADER_MARKER; -extern const char ONLY_ONE_PAGE_CHUNK_HEADER_MARKER; -extern const char SEPARATOR_MARKER; -extern const char OPERATION_INDEX_RANGE; +extern TSFILE_API const char VERSION_NUM_BYTE; +extern TSFILE_API const char CHUNK_GROUP_HEADER_MARKER; +extern TSFILE_API const char CHUNK_HEADER_MARKER; +extern TSFILE_API const char ONLY_ONE_PAGE_CHUNK_HEADER_MARKER; +extern TSFILE_API const char SEPARATOR_MARKER; +extern TSFILE_API const char OPERATION_INDEX_RANGE; // TODO review the String.len_ used diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 99db61042..07b363aeb 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -21,8 +21,12 @@ #include #include -#include #include +#ifdef _WIN32 +#include +#else +#include +#endif #include #include diff --git a/cpp/src/file/read_file.cc b/cpp/src/file/read_file.cc index 1807883a8..86d38bb64 100644 --- a/cpp/src/file/read_file.cc +++ b/cpp/src/file/read_file.cc @@ -21,10 +21,15 @@ #include #include +#ifdef _WIN32 +#include +#else #include +#endif #include "common/logger/elog.h" #include "common/tsfile_common.h" +#include "utils/util_define.h" // ssize_t and other platform-compat shims #ifdef _WIN32 #include diff --git a/cpp/src/file/write_file.cc b/cpp/src/file/write_file.cc index 8ad96fab2..e6620c7d3 100644 --- a/cpp/src/file/write_file.cc +++ b/cpp/src/file/write_file.cc @@ -24,8 +24,12 @@ #include #include #include +#ifdef _WIN32 +#include +#else #include #include +#endif #include "common/config/config.h" #include "common/logger/elog.h" diff --git a/cpp/src/reader/result_set.h b/cpp/src/reader/result_set.h index 216fced4f..1f1653603 100644 --- a/cpp/src/reader/result_set.h +++ b/cpp/src/reader/result_set.h @@ -21,6 +21,7 @@ #define READER_QUERY_DATA_SET_H #include +#include #include #include diff --git a/cpp/src/utils/db_utils.h b/cpp/src/utils/db_utils.h index 832e5c167..4ffc4d138 100644 --- a/cpp/src/utils/db_utils.h +++ b/cpp/src/utils/db_utils.h @@ -23,8 +23,8 @@ #include #include #include // memcpy -#include +#include #include #include #include @@ -195,12 +195,11 @@ struct ColumnSchema { }; FORCE_INLINE int64_t get_cur_timestamp() { - int64_t timestamp = 0; - struct timeval tv; - if (gettimeofday(&tv, NULL) >= 0) { - timestamp = (int64_t)tv.tv_sec * 1000 + tv.tv_usec / 1000; - } - return timestamp; + // Milliseconds since the Unix epoch. Uses the C++11 standard library so it + // is portable across platforms (gettimeofday is not available on MSVC). + return std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); } } // end namespace common diff --git a/cpp/src/utils/storage_utils.h b/cpp/src/utils/storage_utils.h index 78cc775c1..3cb06762a 100644 --- a/cpp/src/utils/storage_utils.h +++ b/cpp/src/utils/storage_utils.h @@ -23,6 +23,7 @@ #include #include +#include #include "common/datatype/value.h" #include "common/tsblock/tsblock.h" diff --git a/cpp/src/utils/util_define.h b/cpp/src/utils/util_define.h index 2796dfb0f..44de4603c 100644 --- a/cpp/src/utils/util_define.h +++ b/cpp/src/utils/util_define.h @@ -23,6 +23,67 @@ #include #include +/* ======== platform compatibility ======== + * + * MSVC does not provide several POSIX types/functions/macros used across the + * codebase. Provide drop-in equivalents so the same source compiles on both + * GCC/Clang (Linux) and MSVC (Windows) without scattering #ifdefs. + */ +#ifdef _WIN32 +#include +#include + +#if defined(_MSC_VER) +// ssize_t is a signed, pointer-sized integer; intptr_t (from , +// included above) is exactly that. We deliberately avoid /SSIZE_T +// because that header also pollutes the global namespace with INT32/INT64 +// typedefs, which collide with the project's own INT32/INT64 enum values. +typedef intptr_t ssize_t; +typedef int mode_t; +#endif // _MSC_VER + +// access() mode flags (POSIX ); MSVC's _access uses the same bits. +#ifndef F_OK +#define F_OK 0 +#endif +#ifndef X_OK +#define X_OK 1 +#endif +#ifndef W_OK +#define W_OK 2 +#endif +#ifndef R_OK +#define R_OK 4 +#endif + +#ifndef strcasecmp +#define strcasecmp _stricmp +#endif +#ifndef strncasecmp +#define strncasecmp _strnicmp +#endif +#endif // _WIN32 + +/* ======== shared-library symbol visibility ======== + * + * Functions are exported from tsfile.dll automatically via + * CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS, but global DATA symbols (plain variables, + * static class members) are not reliably auto-exported, and a consumer must + * see __declspec(dllimport) to reference them across the DLL boundary. Mark + * such symbols with TSFILE_API: it expands to dllexport while building the + * library (TSFILE_BUILDING is defined for its own translation units), + * dllimport for external consumers, and nothing on non-MSVC toolchains. + */ +#if defined(_MSC_VER) +#if defined(TSFILE_BUILDING) +#define TSFILE_API __declspec(dllexport) +#else +#define TSFILE_API __declspec(dllimport) +#endif +#else +#define TSFILE_API +#endif + /* ======== unsued ======== */ #define UNUSED(v) ((void)(v)) #if __cplusplus >= 201703L @@ -34,8 +95,10 @@ #endif /* ======== inline ======== */ -#ifdef __GNUC__ +#if defined(__GNUC__) || defined(__clang__) #define FORCE_INLINE inline __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define FORCE_INLINE __forceinline #else #define FORCE_INLINE inline #endif // __GNUC__ @@ -91,7 +154,19 @@ #define STATIC_ASSERT(cond, msg) static_assert((cond), #msg) #endif // __cplusplus < 201103L -/* ======== atomic operation ======== */ +/* ======== atomic operation ======== + * + * The ATOMIC_* macros operate on the address of a plain (non-std::atomic) + * scalar, matching the semantics of the GCC/Clang __atomic builtins. + * + * - On GCC/Clang the builtins are used directly (unchanged behaviour). + * - On other compilers (MSVC) they are implemented on top of C++11 + * via helper templates. Reinterpreting a plain scalar's address as a + * std::atomic* is well-defined in practice for lock-free integral types + * (this is exactly what C++20 std::atomic_ref formalizes); all current call + * sites use naturally-aligned integral members. + */ +#if defined(__GNUC__) || defined(__clang__) #define ATOMIC_FAA(val_addr, addv) \ __atomic_fetch_add((val_addr), (addv), __ATOMIC_SEQ_CST) #define ATOMIC_AAF(val_addr, addv) \ @@ -112,9 +187,67 @@ #define ATOMIC_LOAD(val_addr) __atomic_load_n((val_addr), __ATOMIC_SEQ_CST) #define ATOMIC_STORE(val_addr, val) \ __atomic_store_n((val_addr), (val), __ATOMIC_SEQ_CST) +#elif defined(__cplusplus) +#include +namespace common { +namespace util_atomic { +template +inline std::atomic* as_atomic(T* p) { + return reinterpret_cast*>(p); +} +template +inline const std::atomic* as_atomic(const T* p) { + return reinterpret_cast*>(p); +} +// fetch-and-add: returns the value held *before* the addition. +template +inline T faa(T* p, V v) { + return as_atomic(p)->fetch_add(static_cast(v), + std::memory_order_seq_cst); +} +// add-and-fetch: returns the value held *after* the addition. +template +inline T aaf(T* p, V v) { + return static_cast(as_atomic(p)->fetch_add(static_cast(v), + std::memory_order_seq_cst) + + static_cast(v)); +} +// compare-and-swap: returns true on success; on failure writes the current +// value into *expected (same contract as __atomic_compare_exchange_n). +template +inline bool cas(T* p, T* expected, D desired) { + return as_atomic(p)->compare_exchange_strong( + *expected, static_cast(desired), std::memory_order_seq_cst); +} +template +inline T load(const T* p) { + return as_atomic(p)->load(std::memory_order_seq_cst); +} +template +inline void store(T* p, V v) { + as_atomic(p)->store(static_cast(v), std::memory_order_seq_cst); +} +} // namespace util_atomic +} // namespace common +#define ATOMIC_FAA(val_addr, addv) \ + (::common::util_atomic::faa((val_addr), (addv))) +#define ATOMIC_AAF(val_addr, addv) \ + (::common::util_atomic::aaf((val_addr), (addv))) +#define ATOMIC_CAS(val_addr, expected, desired) \ + (::common::util_atomic::cas((val_addr), (expected), (desired))) +#define ATOMIC_LOAD(val_addr) (::common::util_atomic::load((val_addr))) +#define ATOMIC_STORE(val_addr, val) \ + (::common::util_atomic::store((val_addr), (val))) +#endif // atomic operation /* ======== align ======== */ +#if defined(__GNUC__) || defined(__clang__) #define ALIGNED(a) __attribute__((aligned(a))) +#elif defined(_MSC_VER) +#define ALIGNED(a) __declspec(align(a)) +#else +#define ALIGNED(a) +#endif #define ALIGNED_4 ALIGNED(4) #define ALIGNED_8 ALIGNED(8) diff --git a/cpp/src/writer/tsfile_writer.cc b/cpp/src/writer/tsfile_writer.cc index 9bdbaa08d..3170a3160 100644 --- a/cpp/src/writer/tsfile_writer.cc +++ b/cpp/src/writer/tsfile_writer.cc @@ -19,7 +19,11 @@ #include "tsfile_writer.h" +#ifdef _WIN32 +#include +#else #include +#endif #include "chunk_writer.h" #include "common/config/config.h" diff --git a/cpp/src/writer/tsfile_writer.h b/cpp/src/writer/tsfile_writer.h index a931345af..0d4185874 100644 --- a/cpp/src/writer/tsfile_writer.h +++ b/cpp/src/writer/tsfile_writer.h @@ -31,6 +31,7 @@ #include "common/container/simple_vector.h" #include "common/device_id.h" #include "common/record.h" +#include "utils/util_define.h" // mode_t and other platform-compat shims #include "common/schema.h" #include "common/tablet.h" diff --git a/cpp/src/writer/value_page_writer.h b/cpp/src/writer/value_page_writer.h index ef694693b..97f8a5f0d 100644 --- a/cpp/src/writer/value_page_writer.h +++ b/cpp/src/writer/value_page_writer.h @@ -229,7 +229,7 @@ class ValuePageWriter { std::vector col_notnull_bitmap_; uint32_t size_; - static uint32_t MASK; + static TSFILE_API uint32_t MASK; }; } // end namespace storage diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 2be9c1b2c..f5d084f8f 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -108,12 +108,17 @@ file(GLOB_RECURSE TEST_SRCS "encoding/*_test.cc" "utils/*_test.cc" "file/*_test.cc" - "parser/*_test.cc" "reader/*_test.cc" "writer/*_test.cc" "cwrapper/*_test.cc" ) +# Parser tests depend on the ANTLR4 runtime; only build them when it is enabled. +if (ENABLE_ANTLR4) + file(GLOB_RECURSE PARSER_TEST_SRCS "parser/*_test.cc") + list(APPEND TEST_SRCS ${PARSER_TEST_SRCS}) +endif() + if (ENABLE_SNAPPY) file(GLOB_RECURSE SNAPPY_TEST_SRCS "compress/*snappy*_test.cc") list(APPEND TEST_SRCS ${SNAPPY_TEST_SRCS}) @@ -171,8 +176,13 @@ include(GoogleTest) # On Windows, delay test discovery until ctest runs (PRE_TEST) so the test exe # runs with the correct env (e.g. PATH has MinGW, libtsfile.dll is present). # Avoids 0xc0000139 when discovery runs at build time. +# +# DISCOVERY_TIMEOUT is raised well above the 5s default: the first execution of +# the freshly-built test executable can be delayed by on-access antivirus +# scanning (e.g. Windows Defender), which otherwise trips a spurious +# "Process terminated due to timeout" while gtest_discover_tests enumerates it. if(WIN32) - gtest_discover_tests(TsFile_Test DISCOVERY_MODE PRE_TEST) + gtest_discover_tests(TsFile_Test DISCOVERY_MODE PRE_TEST DISCOVERY_TIMEOUT 120) else() gtest_discover_tests(TsFile_Test) endif() \ No newline at end of file diff --git a/cpp/test/common/allocator/byte_stream_test.cc b/cpp/test/common/allocator/byte_stream_test.cc index 6296e3a5d..b211803c3 100644 --- a/cpp/test/common/allocator/byte_stream_test.cc +++ b/cpp/test/common/allocator/byte_stream_test.cc @@ -87,7 +87,8 @@ TEST_F(ByteStreamTest, WriteReadLargeQuantities) { write_to_stream(&data, 1); } - uint8_t read_buffer[1024 * 1024]; + // 1 MiB buffer: keep it off the stack (MSVC's default stack is only 1 MiB). + static uint8_t read_buffer[1024 * 1024]; for (int i = 0; i < 1024 * 1024; i++) { uint32_t read_len = 0; read_from_stream(read_buffer + i, 1, read_len); diff --git a/cpp/test/cwrapper/c_release_test.cc b/cpp/test/cwrapper/c_release_test.cc index c0d0c35de..8cdb7a977 100644 --- a/cpp/test/cwrapper/c_release_test.cc +++ b/cpp/test/cwrapper/c_release_test.cc @@ -18,7 +18,11 @@ */ #include +#ifdef _WIN32 +#include +#else #include +#endif #include #include @@ -97,13 +101,9 @@ TEST_F(CReleaseTest, TsFileWriterNew) { table_schema.column_schemas = static_cast(malloc(sizeof(ColumnSchema) * 2)); table_schema.column_schemas[0] = - (ColumnSchema){.column_name = strdup("col1"), - .data_type = TS_DATATYPE_STRING, - .column_category = TAG}; + ColumnSchema{strdup("col1"),TS_DATATYPE_STRING,TAG}; table_schema.column_schemas[1] = - (ColumnSchema){.column_name = strdup("col2"), - .data_type = TS_DATATYPE_INT32, - .column_category = FIELD}; + ColumnSchema{strdup("col2"),TS_DATATYPE_INT32,FIELD}; writer = tsfile_writer_new(file, &table_schema, &error_code); ASSERT_EQ(RET_OK, error_code); @@ -128,21 +128,15 @@ TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) { abnormal_schema.column_schemas = static_cast(malloc(sizeof(ColumnSchema) * 4)); abnormal_schema.column_schemas[0] = - (ColumnSchema){.column_name = strdup("!@#$%^*()_+-="), - .data_type = TS_DATATYPE_STRING, - .column_category = TAG}; + ColumnSchema{strdup("!@#$%^*()_+-="),TS_DATATYPE_STRING,TAG}; // TAG's datatype is not correct abnormal_schema.column_schemas[1] = - (ColumnSchema){.column_name = strdup("TAG2"), - .data_type = TS_DATATYPE_INT32, - .column_category = TAG}; + ColumnSchema{strdup("TAG2"),TS_DATATYPE_INT32,TAG}; // same column name with column[0] abnormal_schema.column_schemas[2] = - (ColumnSchema){.column_name = strdup("!@#$%^*()_+-="), - .data_type = TS_DATATYPE_DOUBLE, - .column_category = FIELD}; + ColumnSchema{strdup("!@#$%^*()_+-="),TS_DATATYPE_DOUBLE,FIELD}; // column name conflict TsFileWriter writer = @@ -151,9 +145,7 @@ TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) { free(abnormal_schema.column_schemas[2].column_name); abnormal_schema.column_schemas[2] = - (ColumnSchema){.column_name = strdup("!@#$%^*()_+-=1"), - .data_type = TS_DATATYPE_DOUBLE, - .column_category = FIELD}; + ColumnSchema{strdup("!@#$%^*()_+-=1"),TS_DATATYPE_DOUBLE,FIELD}; // datatype conflict writer = tsfile_writer_new(file, &abnormal_schema, &error_code); @@ -161,9 +153,7 @@ TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) { free(abnormal_schema.column_schemas[1].column_name); abnormal_schema.column_schemas[1] = - (ColumnSchema){.column_name = strdup("TAG2"), - .data_type = TS_DATATYPE_STRING, - .column_category = TAG}; + ColumnSchema{strdup("TAG2"),TS_DATATYPE_STRING,TAG}; writer = tsfile_writer_new(file, &abnormal_schema, &error_code); ASSERT_EQ(RET_OK, error_code); @@ -235,29 +225,17 @@ TEST_F(CReleaseTest, TsFileWriterMultiDataType) { all_type_schema.column_schemas = static_cast(malloc(sizeof(ColumnSchema) * 6)); all_type_schema.column_schemas[0] = - (ColumnSchema){.column_name = strdup("TAG"), - .data_type = TS_DATATYPE_STRING, - .column_category = TAG}; + ColumnSchema{strdup("TAG"),TS_DATATYPE_STRING,TAG}; all_type_schema.column_schemas[1] = - (ColumnSchema){.column_name = strdup("INT32"), - .data_type = TS_DATATYPE_INT32, - .column_category = FIELD}; + ColumnSchema{strdup("INT32"),TS_DATATYPE_INT32,FIELD}; all_type_schema.column_schemas[2] = - (ColumnSchema){.column_name = strdup("INT64"), - .data_type = TS_DATATYPE_INT64, - .column_category = FIELD}; + ColumnSchema{strdup("INT64"),TS_DATATYPE_INT64,FIELD}; all_type_schema.column_schemas[3] = - (ColumnSchema){.column_name = strdup("FLOAT"), - .data_type = TS_DATATYPE_FLOAT, - .column_category = FIELD}; + ColumnSchema{strdup("FLOAT"),TS_DATATYPE_FLOAT,FIELD}; all_type_schema.column_schemas[4] = - (ColumnSchema){.column_name = strdup("DOUBLE"), - .data_type = TS_DATATYPE_DOUBLE, - .column_category = FIELD}; + ColumnSchema{strdup("DOUBLE"),TS_DATATYPE_DOUBLE,FIELD}; all_type_schema.column_schemas[5] = - (ColumnSchema){.column_name = strdup("BOOLEAN"), - .data_type = TS_DATATYPE_BOOLEAN, - .column_category = FIELD}; + ColumnSchema{strdup("BOOLEAN"),TS_DATATYPE_BOOLEAN,FIELD}; TsFileWriter writer = tsfile_writer_new(file, &all_type_schema, &error_code); diff --git a/cpp/test/cwrapper/cwrapper_metadata_test.cc b/cpp/test/cwrapper/cwrapper_metadata_test.cc index 57fca4de6..4595770a6 100644 --- a/cpp/test/cwrapper/cwrapper_metadata_test.cc +++ b/cpp/test/cwrapper/cwrapper_metadata_test.cc @@ -18,7 +18,11 @@ */ #include +#ifdef _WIN32 +#include +#else #include +#endif #include #include diff --git a/cpp/test/cwrapper/cwrapper_test.cc b/cpp/test/cwrapper/cwrapper_test.cc index 5998939af..9cf06d2f8 100644 --- a/cpp/test/cwrapper/cwrapper_test.cc +++ b/cpp/test/cwrapper/cwrapper_test.cc @@ -17,7 +17,11 @@ * under the License. */ #include +#ifdef _WIN32 +#include +#else #include +#endif #include #include diff --git a/cpp/third_party/CMakeLists.txt b/cpp/third_party/CMakeLists.txt index b72c21638..70c5aa86c 100755 --- a/cpp/third_party/CMakeLists.txt +++ b/cpp/third_party/CMakeLists.txt @@ -17,6 +17,12 @@ specific language governing permissions and limitations under the License. ]] if (ENABLE_ANTLR4) + # Build the ANTLR4 runtime against the dynamic CRT (/MD) so it links the + # same C runtime as the rest of the project (avoiding LNK2038 mismatches). + # ANTLR4's own option is named WITH_STATIC_CRT and defaults to ON (=> /MT); + # pre-seeding it in the cache makes its option() call a no-op. + set(WITH_STATIC_CRT OFF CACHE BOOL + "Link the ANTLR4 runtime against the dynamic CRT" FORCE) add_subdirectory(antlr4-cpp-runtime-4) message("ANTLR4 runtime is enabled") else() diff --git a/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/Token.h b/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/Token.h index 1878b28a1..c26ba03fd 100755 --- a/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/Token.h +++ b/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/Token.h @@ -14,7 +14,7 @@ namespace antlr4 { /// we obtained this token. class ANTLR4CPP_PUBLIC Token { public: -#if __cplusplus >= 201703L +#if __cplusplus >= 201703L || defined(_MSC_VER) static constexpr size_t INVALID_TYPE = 0; #else enum : size_t { @@ -24,7 +24,7 @@ namespace antlr4 { /// During lookahead operations, this "token" signifies we hit rule end ATN state /// and did not follow it despite needing to. -#if __cplusplus >= 201703L +#if __cplusplus >= 201703L || defined(_MSC_VER) static constexpr size_t EPSILON = std::numeric_limits::max() - 1; static constexpr size_t MIN_USER_TOKEN_TYPE = 1; static constexpr size_t EOF = IntStream::EOF; @@ -41,7 +41,7 @@ namespace antlr4 { /// All tokens go to the parser (unless skip() is called in that rule) /// on a particular "channel". The parser tunes to a particular channel /// so that whitespace etc... can go to the parser on a "hidden" channel. -#if __cplusplus >= 201703L +#if __cplusplus >= 201703L || defined(_MSC_VER) static constexpr size_t DEFAULT_CHANNEL = 0; #else enum : size_t { @@ -51,7 +51,7 @@ namespace antlr4 { /// Anything on different channel than DEFAULT_CHANNEL is not parsed /// by parser. -#if __cplusplus >= 201703L +#if __cplusplus >= 201703L || defined(_MSC_VER) static constexpr size_t HIDDEN_CHANNEL = 1; #else enum : size_t { @@ -70,7 +70,7 @@ namespace antlr4 { * * @see Token#getChannel() */ -#if __cplusplus >= 201703L +#if __cplusplus >= 201703L || defined(_MSC_VER) static constexpr size_t MIN_USER_CHANNEL_VALUE = 2; #else enum : size_t { diff --git a/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/Vocabulary.cpp b/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/Vocabulary.cpp index 9bbf0b23a..f6da6e870 100755 --- a/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/Vocabulary.cpp +++ b/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/Vocabulary.cpp @@ -3,6 +3,8 @@ * can be found in the LICENSE.txt file in the project root. */ +#include + #include "Token.h" #include "Vocabulary.h" diff --git a/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/atn/LL1Analyzer.h b/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/atn/LL1Analyzer.h index e297bc9a9..5b44be9db 100755 --- a/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/atn/LL1Analyzer.h +++ b/cpp/third_party/antlr4-cpp-runtime-4/runtime/src/atn/LL1Analyzer.h @@ -17,7 +17,7 @@ namespace atn { public: /// Special value added to the lookahead sets to indicate that we hit /// a predicate during analysis if {@code seeThruPreds==false}. -#if __cplusplus >= 201703L +#if __cplusplus >= 201703L || defined(_MSC_VER) static constexpr size_t HIT_PRED = Token::INVALID_TYPE; #else enum : size_t { diff --git a/pom.xml b/pom.xml index 1e80410b0..d726b2d26 100644 --- a/pom.xml +++ b/pom.xml @@ -43,6 +43,18 @@ 26.3.1 1.21.1 4.0.22 + + Visual Studio 17 2022 + x64 + + @@ -777,6 +789,44 @@ python + + + cpp-mingw + + + cpp.toolchain + mingw + + + + MinGW Makefiles + + + + + cpp-msvc + + + cpp.toolchain + msvc + + + + ${msvc.generator} + -DCMAKE_GENERATOR_PLATFORM=${msvc.platform} + + .skipTests diff --git a/python/setup.py b/python/setup.py index 63d99de03..3bd19365f 100644 --- a/python/setup.py +++ b/python/setup.py @@ -29,20 +29,65 @@ ROOT = Path(__file__).parent.resolve() PKG = ROOT / "tsfile" -CPP_OUT = ROOT / ".." / "cpp" / "target" / "build" -CPP_LIB = CPP_OUT / "lib" -CPP_INC = CPP_OUT / "include" version = "2.2.1.dev" + +def _find_cpp_build(): + """Locate the C++ build output directory (one containing include/). + + The Maven-driven build emits ``cpp/target/build``; a direct CMake build + emits ``cpp/build/``. Both layouts are supported, and an + explicit override may be given via the ``TSFILE_CPP_BUILD`` env var. + """ + candidates = [] + override = os.environ.get("TSFILE_CPP_BUILD") + if override: + candidates.append(Path(override)) + candidates += [ + ROOT / ".." / "cpp" / "target" / "build", + ROOT / ".." / "cpp" / "build" / "msvc", + ROOT / ".." / "cpp" / "build" / "Release", + ROOT / ".." / "cpp" / "build" / "RelWithDebInfo", + ROOT / ".." / "cpp" / "build" / "Debug", + ] + for cand in candidates: + cand = cand.resolve() + if (cand / "include").is_dir(): + return cand + raise FileNotFoundError( + "Could not locate the C++ build output (a directory containing " + "include/). Build the C++ module first, or point TSFILE_CPP_BUILD " + "at the build directory." + ) + + +def _find_lib(root, patterns): + """Return the shortest-named file under ``root`` matching any pattern.""" + for pattern in patterns: + hits = sorted(root.rglob(pattern), key=lambda p: len(p.name)) + if hits: + return hits[0] + return None + + +CPP_OUT = _find_cpp_build() +CPP_LIB = CPP_OUT / "lib" +CPP_INC = CPP_OUT / "include" + if not CPP_INC.exists(): raise FileNotFoundError(f"missing C++ headers: {CPP_INC}") if (PKG / "include").exists(): shutil.rmtree(PKG / "include") shutil.copytree(CPP_INC, PKG / "include") + +# Windows toolchain: "msvc" links against an MSVC import library (tsfile.lib), +# "mingw" links against a MinGW import library (libtsfile.dll.a). +win_toolchain = None + if sys.platform.startswith("linux"): candidates = sorted( - CPP_LIB.glob("libtsfile.so*"), key=lambda p: len(p.name), reverse=True + CPP_LIB.rglob("libtsfile.so*"), key=lambda p: len(p.name), reverse=True ) if not candidates: raise FileNotFoundError("missing libtsfile.so* in build output") @@ -53,8 +98,8 @@ shutil.copy2(src, link_name) elif sys.platform == "darwin": - candidates = sorted(CPP_LIB.glob("libtsfile.*.dylib")) or list( - CPP_LIB.glob("libtsfile.dylib") + candidates = sorted(CPP_LIB.rglob("libtsfile.*.dylib")) or list( + CPP_LIB.rglob("libtsfile.dylib") ) if not candidates: raise FileNotFoundError("missing libtsfile*.dylib in build output") @@ -63,38 +108,51 @@ shutil.copy2(src, dst) link_name = PKG / "libtsfile.dylib" shutil.copy2(src, link_name) + elif sys.platform == "win32": - for base_name in ("libtsfile",): - dll_candidates = sorted( - CPP_LIB.glob(f"{base_name}*.dll"), key=lambda p: len(p.name), reverse=True - ) - dll_a_candidates = sorted( - CPP_LIB.glob(f"{base_name}*.dll.a"), key=lambda p: len(p.name), reverse=True + # The shared library is named tsfile.dll (MSVC) or libtsfile.dll (MinGW). + dll_src = _find_lib(CPP_LIB, ["tsfile*.dll", "libtsfile*.dll"]) + if dll_src is None: + raise FileNotFoundError(f"missing tsfile DLL in build output: {CPP_LIB}") + + # Pick the import library and infer the toolchain from its kind. + mingw_imp = _find_lib(CPP_LIB, ["libtsfile*.dll.a"]) + msvc_imp = _find_lib(CPP_LIB, ["tsfile*.lib", "libtsfile*.lib"]) + if mingw_imp is not None and msvc_imp is None: + win_toolchain = "mingw" + imp_src = mingw_imp + elif msvc_imp is not None: + win_toolchain = "msvc" + imp_src = msvc_imp + else: + raise FileNotFoundError( + f"missing tsfile import library (*.lib or *.dll.a) in {CPP_LIB}" ) - if not dll_candidates: - raise FileNotFoundError(f"missing {base_name}*.dll in build output") - if not dll_a_candidates: - raise FileNotFoundError(f"missing {base_name}*.dll.a in build output") - - dll_src = dll_candidates[0] - dll_a_src = dll_a_candidates[0] - - shutil.copy2(dll_src, PKG / f"{base_name}.dll") - shutil.copy2(dll_a_src, PKG / f"{base_name}.dll.a") - - # Copy MinGW runtime DLLs next to libtsfile.dll so Python can find them. - # Python 3.8+ does not search PATH for DLLs; they must be in the same - # directory as the .pyd extensions (registered via os.add_dll_directory). - for _mingw_dll in ("libstdc++-6.dll", "libgcc_s_seh-1.dll", "libwinpthread-1.dll"): - for _dir in os.environ.get("PATH", "").split(os.pathsep): - _src = Path(_dir) / _mingw_dll - if _src.is_file(): - shutil.copy2(_src, PKG / _mingw_dll) - print(f"setup.py: copied {_mingw_dll} from {_src}") - break - else: - print(f"setup.py: WARNING - {_mingw_dll} not found on PATH") + # Copy the DLL keeping its original base name: the import library embeds + # that name, so the .pyd extensions must find a DLL with the same name. + shutil.copy2(dll_src, PKG / dll_src.name) + shutil.copy2(imp_src, PKG / imp_src.name) + print(f"setup.py: Windows toolchain = {win_toolchain}") + print(f"setup.py: copied {dll_src.name} and {imp_src.name}") + + if win_toolchain == "mingw": + # Copy MinGW runtime DLLs next to libtsfile.dll so Python can find + # them. Python 3.8+ does not search PATH for DLLs; they must sit in + # the same directory as the .pyd extensions (os.add_dll_directory). + for _mingw_dll in ( + "libstdc++-6.dll", + "libgcc_s_seh-1.dll", + "libwinpthread-1.dll", + ): + for _dir in os.environ.get("PATH", "").split(os.pathsep): + _src = Path(_dir) / _mingw_dll + if _src.is_file(): + shutil.copy2(_src, PKG / _mingw_dll) + print(f"setup.py: copied {_mingw_dll} from {_src}") + break + else: + print(f"setup.py: WARNING - {_mingw_dll} not found on PATH") else: raise RuntimeError(f"Unsupported platform: {sys.platform}") @@ -104,7 +162,9 @@ def run(self): super().run() def finalize_options(self): - if sys.platform == "win32": + # MinGW must be requested explicitly; MSVC is the default Windows + # compiler distutils selects, which matches an MSVC-built libtsfile. + if sys.platform == "win32" and win_toolchain == "mingw": self.compiler = "mingw32" super().finalize_options() @@ -127,15 +187,33 @@ def finalize_options(self): extra_link_args += ["-Wl,-rpath,@loader_path", "-stdlib=libc++"] elif sys.platform == "win32": libraries = ["tsfile"] - extra_compile_args += [ - "-O2", - "-std=c++11", - "-DSIZEOF_VOID_P=8", - "-D__USE_MINGW_ANSI_STDIO=1", - "-DMS_WIN64", - "-D_WIN64", - ] - extra_link_args += [] + if win_toolchain == "mingw": + extra_compile_args += [ + "-O2", + "-std=c++11", + "-DSIZEOF_VOID_P=8", + "-D__USE_MINGW_ANSI_STDIO=1", + "-DMS_WIN64", + "-D_WIN64", + ] + else: # msvc + # cl.exe rejects the GCC-style flags above. Mirror the options the + # C++ module itself is built with (see cpp/CMakeLists.txt). C++17 is + # required because Cython 3 emits inline variables (error C7525); the + # C++11 headers compile cleanly under the newer standard. + extra_compile_args += [ + "/O2", + "/std:c++17", + "/EHsc", + "/bigobj", + "/utf-8", + "/Zc:__cplusplus", + "/DNOMINMAX", + "/D_CRT_SECURE_NO_WARNINGS", + "/D_CRT_NONSTDC_NO_WARNINGS", + "/D_SCL_SECURE_NO_WARNINGS", + "/D_WINSOCK_DEPRECATED_NO_WARNINGS", + ] else: raise RuntimeError(f"Unsupported platform: {sys.platform}") diff --git a/python/tsfile/__init__.py b/python/tsfile/__init__.py index 32021d52e..2e619a1f7 100644 --- a/python/tsfile/__init__.py +++ b/python/tsfile/__init__.py @@ -24,11 +24,15 @@ if sys.platform == "win32": os.add_dll_directory(_pkg_dir) - # Preload libtsfile.dll with absolute path to bypass DLL search issues. - # This ensures it's already in memory when .pyd extensions reference it. - _tsfile_dll = os.path.join(_pkg_dir, "libtsfile.dll") - if os.path.isfile(_tsfile_dll): - ctypes.CDLL(_tsfile_dll) + # Preload the tsfile DLL with an absolute path to bypass DLL search + # issues, so it is already in memory when the .pyd extensions reference + # it. The DLL is named tsfile.dll when built with MSVC and libtsfile.dll + # when built with MinGW. + for _dll_name in ("libtsfile.dll", "tsfile.dll"): + _tsfile_dll = os.path.join(_pkg_dir, _dll_name) + if os.path.isfile(_tsfile_dll): + ctypes.CDLL(_tsfile_dll) + break elif sys.platform == "darwin": _tsfile_dylib = os.path.join(_pkg_dir, "libtsfile.dylib") if os.path.isfile(_tsfile_dylib): From fda40a01e4aeb20d00c62d1c506e940aa3de738d Mon Sep 17 00:00:00 2001 From: colinleeo Date: Tue, 19 May 2026 09:43:36 +0800 Subject: [PATCH 02/14] fix clang format. --- cpp/src/common/container/bit_map.h | 3 ++- cpp/src/utils/util_define.h | 6 +++--- cpp/src/writer/tsfile_writer.h | 2 +- cpp/test/cwrapper/c_release_test.cc | 26 +++++++++++++------------- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/cpp/src/common/container/bit_map.h b/cpp/src/common/container/bit_map.h index dea8c4f98..757ab1fb1 100644 --- a/cpp/src/common/container/bit_map.h +++ b/cpp/src/common/container/bit_map.h @@ -163,7 +163,8 @@ class BitMap { const uint32_t byte_end = (total_bits + 7) >> 3; for (++byte_idx; byte_idx < byte_end; ++byte_idx) { if (p[byte_idx]) { - uint32_t pos = (byte_idx << 3) + bitops::ctz_nonzero(p[byte_idx]); + uint32_t pos = + (byte_idx << 3) + bitops::ctz_nonzero(p[byte_idx]); return pos < total_bits ? pos : total_bits; } } diff --git a/cpp/src/utils/util_define.h b/cpp/src/utils/util_define.h index 44de4603c..3cbe1d4c9 100644 --- a/cpp/src/utils/util_define.h +++ b/cpp/src/utils/util_define.h @@ -208,9 +208,9 @@ inline T faa(T* p, V v) { // add-and-fetch: returns the value held *after* the addition. template inline T aaf(T* p, V v) { - return static_cast(as_atomic(p)->fetch_add(static_cast(v), - std::memory_order_seq_cst) + - static_cast(v)); + return static_cast( + as_atomic(p)->fetch_add(static_cast(v), std::memory_order_seq_cst) + + static_cast(v)); } // compare-and-swap: returns true on success; on failure writes the current // value into *expected (same contract as __atomic_compare_exchange_n). diff --git a/cpp/src/writer/tsfile_writer.h b/cpp/src/writer/tsfile_writer.h index 0d4185874..a2c8f2842 100644 --- a/cpp/src/writer/tsfile_writer.h +++ b/cpp/src/writer/tsfile_writer.h @@ -31,9 +31,9 @@ #include "common/container/simple_vector.h" #include "common/device_id.h" #include "common/record.h" -#include "utils/util_define.h" // mode_t and other platform-compat shims #include "common/schema.h" #include "common/tablet.h" +#include "utils/util_define.h" // mode_t and other platform-compat shims namespace storage { class WriteFile; diff --git a/cpp/test/cwrapper/c_release_test.cc b/cpp/test/cwrapper/c_release_test.cc index 8cdb7a977..375c7e115 100644 --- a/cpp/test/cwrapper/c_release_test.cc +++ b/cpp/test/cwrapper/c_release_test.cc @@ -101,9 +101,9 @@ TEST_F(CReleaseTest, TsFileWriterNew) { table_schema.column_schemas = static_cast(malloc(sizeof(ColumnSchema) * 2)); table_schema.column_schemas[0] = - ColumnSchema{strdup("col1"),TS_DATATYPE_STRING,TAG}; + ColumnSchema{strdup("col1"), TS_DATATYPE_STRING, TAG}; table_schema.column_schemas[1] = - ColumnSchema{strdup("col2"),TS_DATATYPE_INT32,FIELD}; + ColumnSchema{strdup("col2"), TS_DATATYPE_INT32, FIELD}; writer = tsfile_writer_new(file, &table_schema, &error_code); ASSERT_EQ(RET_OK, error_code); @@ -128,15 +128,15 @@ TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) { abnormal_schema.column_schemas = static_cast(malloc(sizeof(ColumnSchema) * 4)); abnormal_schema.column_schemas[0] = - ColumnSchema{strdup("!@#$%^*()_+-="),TS_DATATYPE_STRING,TAG}; + ColumnSchema{strdup("!@#$%^*()_+-="), TS_DATATYPE_STRING, TAG}; // TAG's datatype is not correct abnormal_schema.column_schemas[1] = - ColumnSchema{strdup("TAG2"),TS_DATATYPE_INT32,TAG}; + ColumnSchema{strdup("TAG2"), TS_DATATYPE_INT32, TAG}; // same column name with column[0] abnormal_schema.column_schemas[2] = - ColumnSchema{strdup("!@#$%^*()_+-="),TS_DATATYPE_DOUBLE,FIELD}; + ColumnSchema{strdup("!@#$%^*()_+-="), TS_DATATYPE_DOUBLE, FIELD}; // column name conflict TsFileWriter writer = @@ -145,7 +145,7 @@ TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) { free(abnormal_schema.column_schemas[2].column_name); abnormal_schema.column_schemas[2] = - ColumnSchema{strdup("!@#$%^*()_+-=1"),TS_DATATYPE_DOUBLE,FIELD}; + ColumnSchema{strdup("!@#$%^*()_+-=1"), TS_DATATYPE_DOUBLE, FIELD}; // datatype conflict writer = tsfile_writer_new(file, &abnormal_schema, &error_code); @@ -153,7 +153,7 @@ TEST_F(CReleaseTest, TsFileWriterWriteDataAbnormalColumn) { free(abnormal_schema.column_schemas[1].column_name); abnormal_schema.column_schemas[1] = - ColumnSchema{strdup("TAG2"),TS_DATATYPE_STRING,TAG}; + ColumnSchema{strdup("TAG2"), TS_DATATYPE_STRING, TAG}; writer = tsfile_writer_new(file, &abnormal_schema, &error_code); ASSERT_EQ(RET_OK, error_code); @@ -225,17 +225,17 @@ TEST_F(CReleaseTest, TsFileWriterMultiDataType) { all_type_schema.column_schemas = static_cast(malloc(sizeof(ColumnSchema) * 6)); all_type_schema.column_schemas[0] = - ColumnSchema{strdup("TAG"),TS_DATATYPE_STRING,TAG}; + ColumnSchema{strdup("TAG"), TS_DATATYPE_STRING, TAG}; all_type_schema.column_schemas[1] = - ColumnSchema{strdup("INT32"),TS_DATATYPE_INT32,FIELD}; + ColumnSchema{strdup("INT32"), TS_DATATYPE_INT32, FIELD}; all_type_schema.column_schemas[2] = - ColumnSchema{strdup("INT64"),TS_DATATYPE_INT64,FIELD}; + ColumnSchema{strdup("INT64"), TS_DATATYPE_INT64, FIELD}; all_type_schema.column_schemas[3] = - ColumnSchema{strdup("FLOAT"),TS_DATATYPE_FLOAT,FIELD}; + ColumnSchema{strdup("FLOAT"), TS_DATATYPE_FLOAT, FIELD}; all_type_schema.column_schemas[4] = - ColumnSchema{strdup("DOUBLE"),TS_DATATYPE_DOUBLE,FIELD}; + ColumnSchema{strdup("DOUBLE"), TS_DATATYPE_DOUBLE, FIELD}; all_type_schema.column_schemas[5] = - ColumnSchema{strdup("BOOLEAN"),TS_DATATYPE_BOOLEAN,FIELD}; + ColumnSchema{strdup("BOOLEAN"), TS_DATATYPE_BOOLEAN, FIELD}; TsFileWriter writer = tsfile_writer_new(file, &all_type_schema, &error_code); From d1f421c4784e12569ba4ac7e9533afb393452855 Mon Sep 17 00:00:00 2001 From: colinleeo Date: Tue, 19 May 2026 09:54:33 +0800 Subject: [PATCH 03/14] fix compile on win. --- .github/workflows/unit-test-cpp-msvc.yml | 32 +++++++++++++++++++++--- cpp/pom.xml | 7 ++++++ pom.xml | 27 +++++++++++++++++++- 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/.github/workflows/unit-test-cpp-msvc.yml b/.github/workflows/unit-test-cpp-msvc.yml index e0a706ca3..dc687607e 100644 --- a/.github/workflows/unit-test-cpp-msvc.yml +++ b/.github/workflows/unit-test-cpp-msvc.yml @@ -37,11 +37,26 @@ jobs: unit-test-msvc: strategy: fail-fast: false + # The windows-latest runner ships Visual Studio 2022. To exercise the + # VS 2017 compatibility this branch targets, the non-ASan jobs build with + # the v141 toolset (the VS 2017 compiler, cl.exe 19.16) via a VS 2022 + # generator. AddressSanitizer is not supported by v141, so the ASan jobs + # use the default v143 toolset (VS 2022), which fully supports + # /fsanitize=address. matrix: - build_type: [Release, Debug] - # AddressSanitizer for MSVC requires Visual Studio 2019 16.9+, which - # the windows-latest runner provides. - enable_asan: [NoAsan, Asan] + include: + - build_type: Release + enable_asan: NoAsan + toolset: v141 + - build_type: Debug + enable_asan: NoAsan + toolset: v141 + - build_type: Release + enable_asan: Asan + toolset: default + - build_type: Debug + enable_asan: Asan + toolset: default runs-on: windows-latest steps: @@ -75,6 +90,9 @@ jobs: # Run the maven build, selecting the MSVC toolchain via -Dcpp.toolchain=msvc. # spotless (clang-format) is already covered by unit-test-cpp.yml, so it is # skipped here to keep this workflow focused on the MSVC build. + # + # For the v141 jobs, -Dmsvc.toolset=v141 pins the VS 2017 compiler; the + # ASan jobs pass no toolset and so use the generator's default (v143). - name: Build and test with Maven (MSVC) shell: bash run: | @@ -83,8 +101,14 @@ jobs: else ASAN_VALUE="OFF" fi + if [ "${{ matrix.toolset }}" = "default" ]; then + TOOLSET_ARG="" + else + TOOLSET_ARG="-Dmsvc.toolset=${{ matrix.toolset }}" + fi ./mvnw.cmd -P with-cpp \ -Dcpp.toolchain=msvc \ + $TOOLSET_ARG \ -Denable.asan=$ASAN_VALUE \ -Dbuild.type=${{ matrix.build_type }} \ -Dspotless.skip=true \ diff --git a/cpp/pom.xml b/cpp/pom.xml index f14c9d24b..0bbdb73bf 100644 --- a/cpp/pom.xml +++ b/cpp/pom.xml @@ -95,6 +95,13 @@ plugin drops empty options. --> + + diff --git a/pom.xml b/pom.xml index d726b2d26..dec545bb7 100644 --- a/pom.xml +++ b/pom.xml @@ -50,11 +50,18 @@ the CMake generator to Visual Studio. The msvc.generator value may be overridden, e.g. -Dmsvc.generator="Visual Studio 15 2017", to match the Visual Studio version installed on the build machine. + + msvc.toolset optionally pins the MSVC toolset, e.g. -Dmsvc.toolset=v141 + builds with the Visual Studio 2017 compiler while still using a + Visual Studio 2022 generator (useful on machines / CI runners that + only ship VS 2022 but need to verify VS 2017 compatibility). + When unset, CMake uses the generator's default toolset. --> Visual Studio 17 2022 x64 - + + @@ -827,6 +834,24 @@ -DCMAKE_GENERATOR_PLATFORM=${msvc.platform} + + + .cpp-msvc-toolset + + + msvc.toolset + + + + -DCMAKE_GENERATOR_TOOLSET=${msvc.toolset} + + .skipTests From 8b3d51b3a7e04f74d8c7d6c5cf0cbd39b8da0fa0 Mon Sep 17 00:00:00 2001 From: colinleeo Date: Tue, 19 May 2026 10:01:08 +0800 Subject: [PATCH 04/14] fix ci workflow. --- .github/workflows/unit-test-cpp-msvc.yml | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/workflows/unit-test-cpp-msvc.yml b/.github/workflows/unit-test-cpp-msvc.yml index dc687607e..280f0506b 100644 --- a/.github/workflows/unit-test-cpp-msvc.yml +++ b/.github/workflows/unit-test-cpp-msvc.yml @@ -37,12 +37,11 @@ jobs: unit-test-msvc: strategy: fail-fast: false - # The windows-latest runner ships Visual Studio 2022. To exercise the - # VS 2017 compatibility this branch targets, the non-ASan jobs build with - # the v141 toolset (the VS 2017 compiler, cl.exe 19.16) via a VS 2022 - # generator. AddressSanitizer is not supported by v141, so the ASan jobs - # use the default v143 toolset (VS 2022), which fully supports - # /fsanitize=address. + # To exercise the VS 2017 compatibility this branch targets, the non-ASan + # jobs build with the v141 toolset (the VS 2017 compiler, cl.exe 19.16) + # via a VS 2022 generator. AddressSanitizer is not supported by v141, so + # the ASan jobs use the default v143 toolset (VS 2022), which fully + # supports /fsanitize=address. matrix: include: - build_type: Release @@ -57,7 +56,12 @@ jobs: - build_type: Debug enable_asan: Asan toolset: default - runs-on: windows-latest + # Pinned to windows-2022 deliberately: it ships Visual Studio 2022, which + # the "Visual Studio 17 2022" CMake generator and the v141 toolset both + # require. The windows-latest image has since moved to a newer Visual + # Studio that the bundled CMake (3.30.x) does not yet recognise as a + # generator. Bump this (and msvc.generator) together when migrating. + runs-on: windows-2022 steps: From cad083333c3a1e7d380b9729f490e807f7c6cfdc Mon Sep 17 00:00:00 2001 From: colinleeo Date: Tue, 19 May 2026 10:56:12 +0800 Subject: [PATCH 05/14] add msvc python. --- .github/workflows/unit-test-python-msvc.yml | 104 ++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 .github/workflows/unit-test-python-msvc.yml diff --git a/.github/workflows/unit-test-python-msvc.yml b/.github/workflows/unit-test-python-msvc.yml new file mode 100644 index 000000000..db4e6cbde --- /dev/null +++ b/.github/workflows/unit-test-python-msvc.yml @@ -0,0 +1,104 @@ +# This workflow builds and tests the Python implementation of TsFile on +# Windows using the MSVC toolchain, as a complement to unit-test-python.yml +# which builds the Windows target with MinGW. +# +# The C++ shared library (libtsfile) is built with the v141 toolset (the +# Visual Studio 2017 compiler) to match the VS 2017 support this branch +# targets. The Cython extension links to it through the plain C wrapper ABI, +# so it builds cleanly with whichever MSVC toolset setuptools selects. + +name: Unit-Test-Py-MSVC + +on: + push: + branches: + - develop + - iotdb + - rc/* + paths-ignore: + - 'docs/**' + - 'java/**' + pull_request: + branches: + - develop + - dev/* + - iotdb + - rc/* + paths-ignore: + - 'docs/**' + - 'java/**' + # Enable manually starting builds. + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3 + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + +jobs: + unit-test-py-msvc: + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.11", "3.14"] + # Pinned to windows-2022 deliberately: it ships Visual Studio 2022, which + # the "Visual Studio 17 2022" CMake generator and the v141 toolset both + # require. See unit-test-cpp-msvc.yml for the full rationale. + runs-on: windows-2022 + + steps: + + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Set up JDK 17 + uses: actions/setup-java@v5 + with: + distribution: corretto + java-version: 17 + + # Setup caching of the artifacts in the .m2 directory, so they don't have + # to all be downloaded again for every build. + - name: Cache Maven packages + uses: actions/cache@v5 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-msvc-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2- + + # Make the MSVC toolchain available so setup.py can compile the Cython + # extension with cl.exe. + - name: Set up MSVC developer environment + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: x64 + + # Build C++ (MSVC, v141 toolset) and the Python extension, then run the + # Python test-suite. -P with-python builds the cpp module first. + # spotless (black/clang-format) is already covered by the non-MSVC + # workflows, so it is skipped here to keep this workflow focused. + - name: Build and test with Maven (MSVC) + shell: bash + run: | + ./mvnw.cmd -P with-python \ + -Dcpp.toolchain=msvc \ + -Dmsvc.toolset=v141 \ + -Denable.asan=OFF \ + -Dbuild.type=Release \ + -Dspotless.skip=true \ + clean verify + + - name: Upload whl Artifact + uses: actions/upload-artifact@v7 + with: + name: tsfile-msvc-${{ runner.os }}-py${{ matrix.python-version }}-whl + path: python/dist/tsfile-*.whl + retention-days: 1 From 481d5a9205022df2993e652dab9e5d77ea72b51a Mon Sep 17 00:00:00 2001 From: colinleeo Date: Tue, 19 May 2026 13:21:53 +0800 Subject: [PATCH 06/14] fix ci. --- cpp/pom.xml | 87 +++++++++++++++++++++++++++++++++++++++++++++-------- pom.xml | 22 -------------- 2 files changed, 75 insertions(+), 34 deletions(-) diff --git a/cpp/pom.xml b/cpp/pom.xml index 0bbdb73bf..7061f2696 100644 --- a/cpp/pom.xml +++ b/cpp/pom.xml @@ -89,19 +89,15 @@ - - - @@ -227,6 +223,73 @@ ON + + + cpp-msvc + + + cpp.toolchain + msvc + + + + + + com.googlecode.cmake-maven-project + cmake-maven-plugin + + + cmake-generate-test-compile + + + + + + + + + + + + + + cpp-msvc-toolset + + + msvc.toolset + + + + + + com.googlecode.cmake-maven-project + cmake-maven-plugin + + + cmake-generate-test-compile + + + + + + + + + + + .java-9-and-above diff --git a/pom.xml b/pom.xml index dec545bb7..6cd695e85 100644 --- a/pom.xml +++ b/pom.xml @@ -59,9 +59,6 @@ --> Visual Studio 17 2022 x64 - - - @@ -831,25 +828,6 @@ ${msvc.generator} - -DCMAKE_GENERATOR_PLATFORM=${msvc.platform} - - - - - .cpp-msvc-toolset - - - msvc.toolset - - - - -DCMAKE_GENERATOR_TOOLSET=${msvc.toolset} From 0a29f3d2eb45ea2adf35ca76c1914803aed53de6 Mon Sep 17 00:00:00 2001 From: colinleeo Date: Tue, 19 May 2026 15:09:51 +0800 Subject: [PATCH 07/14] fix memory leak. --- .github/workflows/unit-test-cpp-msvc.yml | 30 +++++++++++-- .github/workflows/unit-test-python-msvc.yml | 25 ++++++++++- cpp/src/common/tablet.h | 47 +++++++++++++++++++++ 3 files changed, 96 insertions(+), 6 deletions(-) diff --git a/.github/workflows/unit-test-cpp-msvc.yml b/.github/workflows/unit-test-cpp-msvc.yml index 280f0506b..b12bce29a 100644 --- a/.github/workflows/unit-test-cpp-msvc.yml +++ b/.github/workflows/unit-test-cpp-msvc.yml @@ -57,10 +57,11 @@ jobs: enable_asan: Asan toolset: default # Pinned to windows-2022 deliberately: it ships Visual Studio 2022, which - # the "Visual Studio 17 2022" CMake generator and the v141 toolset both - # require. The windows-latest image has since moved to a newer Visual - # Studio that the bundled CMake (3.30.x) does not yet recognise as a - # generator. Bump this (and msvc.generator) together when migrating. + # the "Visual Studio 17 2022" CMake generator requires. The windows-latest + # image has since moved to a newer Visual Studio that the bundled CMake + # (3.30.x) does not yet recognise as a generator. Bump this (and + # msvc.generator) together when migrating. The v141 (VS 2017) toolset is + # no longer bundled with the image and is installed by a step below. runs-on: windows-2022 steps: @@ -91,6 +92,27 @@ jobs: with: arch: x64 + # The windows-2022 image no longer ships the v141 (VS 2017) toolset, so + # install it on demand for the v141 jobs. With the component present the + # VS 2022 generator can build with the VS 2017 compiler via + # -DCMAKE_GENERATOR_TOOLSET=v141. + - name: Install MSVC v141 (VS 2017) toolset + if: matrix.toolset == 'v141' + shell: pwsh + run: | + $ErrorActionPreference = "Stop" + $installerDir = "C:\Program Files (x86)\Microsoft Visual Studio\Installer" + $installPath = & (Join-Path $installerDir "vswhere.exe") -latest -property installationPath + Write-Host "Visual Studio install path: $installPath" + $argString = "modify --installPath `"$installPath`" " + + "--add Microsoft.VisualStudio.Component.VC.v141.x86.x64 " + + "--quiet --norestart --nocache --wait" + $proc = Start-Process -FilePath (Join-Path $installerDir "vs_installer.exe") ` + -ArgumentList $argString -Wait -PassThru + if ($proc.ExitCode -ne 0 -and $proc.ExitCode -ne 3010) { + throw "vs_installer modify failed with exit code $($proc.ExitCode)" + } + # Run the maven build, selecting the MSVC toolchain via -Dcpp.toolchain=msvc. # spotless (clang-format) is already covered by unit-test-cpp.yml, so it is # skipped here to keep this workflow focused on the MSVC build. diff --git a/.github/workflows/unit-test-python-msvc.yml b/.github/workflows/unit-test-python-msvc.yml index db4e6cbde..6b742a070 100644 --- a/.github/workflows/unit-test-python-msvc.yml +++ b/.github/workflows/unit-test-python-msvc.yml @@ -45,8 +45,9 @@ jobs: matrix: python-version: ["3.9", "3.11", "3.14"] # Pinned to windows-2022 deliberately: it ships Visual Studio 2022, which - # the "Visual Studio 17 2022" CMake generator and the v141 toolset both - # require. See unit-test-cpp-msvc.yml for the full rationale. + # the "Visual Studio 17 2022" CMake generator requires. The v141 (VS 2017) + # toolset is installed by a step below. See unit-test-cpp-msvc.yml for the + # full rationale. runs-on: windows-2022 steps: @@ -81,6 +82,26 @@ jobs: with: arch: x64 + # The windows-2022 image no longer ships the v141 (VS 2017) toolset that + # this build pins via -Dmsvc.toolset=v141, so install the component on + # demand. With it present the VS 2022 generator can build libtsfile with + # the VS 2017 compiler. + - name: Install MSVC v141 (VS 2017) toolset + shell: pwsh + run: | + $ErrorActionPreference = "Stop" + $installerDir = "C:\Program Files (x86)\Microsoft Visual Studio\Installer" + $installPath = & (Join-Path $installerDir "vswhere.exe") -latest -property installationPath + Write-Host "Visual Studio install path: $installPath" + $argString = "modify --installPath `"$installPath`" " + + "--add Microsoft.VisualStudio.Component.VC.v141.x86.x64 " + + "--quiet --norestart --nocache --wait" + $proc = Start-Process -FilePath (Join-Path $installerDir "vs_installer.exe") ` + -ArgumentList $argString -Wait -PassThru + if ($proc.ExitCode -ne 0 -and $proc.ExitCode -ne 3010) { + throw "vs_installer modify failed with exit code $($proc.ExitCode)" + } + # Build C++ (MSVC, v141 toolset) and the Python extension, then run the # Python test-suite. -P with-python builds the cpp module first. # spotless (black/clang-format) is already covered by the non-MSVC diff --git a/cpp/src/common/tablet.h b/cpp/src/common/tablet.h index 50750d02b..799d6b7cc 100644 --- a/cpp/src/common/tablet.h +++ b/cpp/src/common/tablet.h @@ -22,6 +22,7 @@ #include #include +#include #include #include "common/config/config.h" @@ -230,6 +231,52 @@ class Tablet { ~Tablet() { destroy(); } + // Tablet owns raw heap buffers (timestamps_, value_matrix_, bitmaps_) that + // destroy() frees. The implicitly generated copy operations would shallow- + // copy those pointers, causing double-free / use-after-free, so copying is + // disabled. Move transfers ownership and leaves the source empty (its + // pointers nulled) so the moved-from object destructs harmlessly. + Tablet(const Tablet&) = delete; + Tablet& operator=(const Tablet&) = delete; + + Tablet(Tablet&& other) noexcept + : err_code_(other.err_code_), + max_row_num_(other.max_row_num_), + cur_row_size_(other.cur_row_size_), + insert_target_name_(std::move(other.insert_target_name_)), + schema_vec_(std::move(other.schema_vec_)), + schema_map_(std::move(other.schema_map_)), + timestamps_(other.timestamps_), + value_matrix_(other.value_matrix_), + bitmaps_(other.bitmaps_), + column_categories_(std::move(other.column_categories_)), + id_column_indexes_(std::move(other.id_column_indexes_)) { + other.timestamps_ = nullptr; + other.value_matrix_ = nullptr; + other.bitmaps_ = nullptr; + } + + Tablet& operator=(Tablet&& other) noexcept { + if (this != &other) { + destroy(); + err_code_ = other.err_code_; + max_row_num_ = other.max_row_num_; + cur_row_size_ = other.cur_row_size_; + insert_target_name_ = std::move(other.insert_target_name_); + schema_vec_ = std::move(other.schema_vec_); + schema_map_ = std::move(other.schema_map_); + timestamps_ = other.timestamps_; + value_matrix_ = other.value_matrix_; + bitmaps_ = other.bitmaps_; + column_categories_ = std::move(other.column_categories_); + id_column_indexes_ = std::move(other.id_column_indexes_); + other.timestamps_ = nullptr; + other.value_matrix_ = nullptr; + other.bitmaps_ = nullptr; + } + return *this; + } + const std::string& get_table_name() const { return insert_target_name_; } void set_table_name(const std::string& table_name) { insert_target_name_ = table_name; From 26753ef6e2a3b68f712cb7798fad8c4e8ab79450 Mon Sep 17 00:00:00 2001 From: colinleeo Date: Tue, 19 May 2026 16:41:21 +0800 Subject: [PATCH 08/14] fix ci workflow. --- .github/workflows/unit-test-cpp-msvc.yml | 33 ++++++++++++++++----- .github/workflows/unit-test-python-msvc.yml | 33 ++++++++++++++++----- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/.github/workflows/unit-test-cpp-msvc.yml b/.github/workflows/unit-test-cpp-msvc.yml index b12bce29a..e7c88d4a3 100644 --- a/.github/workflows/unit-test-cpp-msvc.yml +++ b/.github/workflows/unit-test-cpp-msvc.yml @@ -104,13 +104,32 @@ jobs: $installerDir = "C:\Program Files (x86)\Microsoft Visual Studio\Installer" $installPath = & (Join-Path $installerDir "vswhere.exe") -latest -property installationPath Write-Host "Visual Studio install path: $installPath" - $argString = "modify --installPath `"$installPath`" " + - "--add Microsoft.VisualStudio.Component.VC.v141.x86.x64 " + - "--quiet --norestart --nocache --wait" - $proc = Start-Process -FilePath (Join-Path $installerDir "vs_installer.exe") ` - -ArgumentList $argString -Wait -PassThru - if ($proc.ExitCode -ne 0 -and $proc.ExitCode -ne 3010) { - throw "vs_installer modify failed with exit code $($proc.ExitCode)" + + # The v141 (VS 2017) toolset always installs as a 14.16.* directory. + function Test-V141Toolset { + [bool](Get-ChildItem -Path (Join-Path $installPath "VC\Tools\MSVC") ` + -Directory -Filter "14.16.*" -ErrorAction SilentlyContinue) + } + + if (Test-V141Toolset) { + Write-Host "v141 toolset already present." + } else { + # vs_installer.exe 'modify' accepts only a limited switch set; + # bootstrapper-only switches such as --wait / --nocache make it + # fail with exit code 87 (ERROR_INVALID_PARAMETER). + $argString = "modify --installPath `"$installPath`" " + + "--add Microsoft.VisualStudio.Component.VC.v141.x86.x64 " + + "--quiet --norestart" + $proc = Start-Process -FilePath (Join-Path $installerDir "vs_installer.exe") ` + -ArgumentList $argString -Wait -PassThru + Write-Host "vs_installer exit code: $($proc.ExitCode)" + # vs_installer may delegate to a background process; wait for it. + Get-Process -Name "vs_installer", "setup" -ErrorAction SilentlyContinue | + Wait-Process -Timeout 900 -ErrorAction SilentlyContinue + if (-not (Test-V141Toolset)) { + throw "v141 toolset not found after install (vs_installer exit code $($proc.ExitCode))" + } + Write-Host "v141 toolset installed." } # Run the maven build, selecting the MSVC toolchain via -Dcpp.toolchain=msvc. diff --git a/.github/workflows/unit-test-python-msvc.yml b/.github/workflows/unit-test-python-msvc.yml index 6b742a070..d86756c31 100644 --- a/.github/workflows/unit-test-python-msvc.yml +++ b/.github/workflows/unit-test-python-msvc.yml @@ -93,13 +93,32 @@ jobs: $installerDir = "C:\Program Files (x86)\Microsoft Visual Studio\Installer" $installPath = & (Join-Path $installerDir "vswhere.exe") -latest -property installationPath Write-Host "Visual Studio install path: $installPath" - $argString = "modify --installPath `"$installPath`" " + - "--add Microsoft.VisualStudio.Component.VC.v141.x86.x64 " + - "--quiet --norestart --nocache --wait" - $proc = Start-Process -FilePath (Join-Path $installerDir "vs_installer.exe") ` - -ArgumentList $argString -Wait -PassThru - if ($proc.ExitCode -ne 0 -and $proc.ExitCode -ne 3010) { - throw "vs_installer modify failed with exit code $($proc.ExitCode)" + + # The v141 (VS 2017) toolset always installs as a 14.16.* directory. + function Test-V141Toolset { + [bool](Get-ChildItem -Path (Join-Path $installPath "VC\Tools\MSVC") ` + -Directory -Filter "14.16.*" -ErrorAction SilentlyContinue) + } + + if (Test-V141Toolset) { + Write-Host "v141 toolset already present." + } else { + # vs_installer.exe 'modify' accepts only a limited switch set; + # bootstrapper-only switches such as --wait / --nocache make it + # fail with exit code 87 (ERROR_INVALID_PARAMETER). + $argString = "modify --installPath `"$installPath`" " + + "--add Microsoft.VisualStudio.Component.VC.v141.x86.x64 " + + "--quiet --norestart" + $proc = Start-Process -FilePath (Join-Path $installerDir "vs_installer.exe") ` + -ArgumentList $argString -Wait -PassThru + Write-Host "vs_installer exit code: $($proc.ExitCode)" + # vs_installer may delegate to a background process; wait for it. + Get-Process -Name "vs_installer", "setup" -ErrorAction SilentlyContinue | + Wait-Process -Timeout 900 -ErrorAction SilentlyContinue + if (-not (Test-V141Toolset)) { + throw "v141 toolset not found after install (vs_installer exit code $($proc.ExitCode))" + } + Write-Host "v141 toolset installed." } # Build C++ (MSVC, v141 toolset) and the Python extension, then run the From 779583ce660b147f628f3660de0875a75db56a87 Mon Sep 17 00:00:00 2001 From: colinleeo Date: Tue, 19 May 2026 18:19:44 +0800 Subject: [PATCH 09/14] fix debug info in compile. --- cpp/CMakeLists.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3f9be090e..608804a41 100755 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -134,13 +134,18 @@ if (ENABLE_ASAN) # /fsanitize=address is incompatible with the /RTC* runtime checks that # CMake injects into Debug builds, and with incremental linking. Strip # /RTC* from the per-config flags and force non-incremental linking. - add_compile_options(/fsanitize=address) + # + # ASan also needs debug info: /Zi (compile) + /DEBUG (link). Without it + # MSVC emits warning C5072 ("ASAN enabled without debug information + # emission"), which the bundled googletest build promotes to an error + # via /WX in Release builds, and ASan reports lose symbol/line info. + add_compile_options(/fsanitize=address /Zi) foreach (flagsVar CMAKE_C_FLAGS_DEBUG CMAKE_CXX_FLAGS_DEBUG CMAKE_C_FLAGS_RELWITHDEBINFO CMAKE_CXX_FLAGS_RELWITHDEBINFO) string(REGEX REPLACE "/RTC[1csu]+" "" ${flagsVar} "${${flagsVar}}") endforeach () - add_link_options(/INCREMENTAL:NO) + add_link_options(/INCREMENTAL:NO /DEBUG) elseif (NOT WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-omit-frame-pointer") From 871628982483490aec2ff1df8742b664134280ca Mon Sep 17 00:00:00 2001 From: colinleeo Date: Wed, 20 May 2026 09:54:27 +0800 Subject: [PATCH 10/14] fix unsequenced UB in DictionaryEncoder entry_index_ assignment. entry_index_[value] = entry_index_.size() is undefined behaviour in C++11: operator[] inserts into the map (a modification) while size() reads the same map; the evaluation order of the two operands is unspecified. Under MSVC /O2 the left-hand side is evaluated first, inserting the new key and bumping the size by one before size() is read. The resulting off-by-one code causes the decoder to access index_entry_ out of bounds, detected as heap-buffer-overflow by ASan. Fix: use index_entry_.size() - 1 instead. index_entry_ is pushed before this line, so its size is already the correct 1-based count, and reading from a separate vector avoids the aliasing issue entirely. --- cpp/src/encoding/dictionary_encoder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/encoding/dictionary_encoder.h b/cpp/src/encoding/dictionary_encoder.h index fad4ef68f..be5f78a09 100644 --- a/cpp/src/encoding/dictionary_encoder.h +++ b/cpp/src/encoding/dictionary_encoder.h @@ -83,7 +83,7 @@ class DictionaryEncoder : public Encoder { if (entry_index_.count(value) == 0) { index_entry_.push_back(value); map_size_ = map_size_ + value.length(); - entry_index_[value] = entry_index_.size(); + entry_index_[value] = static_cast(index_entry_.size()) - 1; } values_encoder_.encode(entry_index_[value], out); return common::E_OK; From 1de0aea9250548d35f42dc5acce34f6edef28896 Mon Sep 17 00:00:00 2001 From: colinleeo Date: Wed, 20 May 2026 11:08:26 +0800 Subject: [PATCH 11/14] refine. --- cpp/src/common/container/blocking_queue.cc | 48 ---------------------- cpp/src/common/container/blocking_queue.h | 44 -------------------- cpp/src/common/global.h | 2 +- python/setup.py | 22 ++++++---- python/tsfile/__init__.py | 13 ++---- 5 files changed, 19 insertions(+), 110 deletions(-) delete mode 100644 cpp/src/common/container/blocking_queue.cc delete mode 100644 cpp/src/common/container/blocking_queue.h diff --git a/cpp/src/common/container/blocking_queue.cc b/cpp/src/common/container/blocking_queue.cc deleted file mode 100644 index db2ecb3c9..000000000 --- a/cpp/src/common/container/blocking_queue.cc +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include "blocking_queue.h" - -namespace common { - -BlockingQueue::BlockingQueue() : queue_(), mutex_(), cond_() {} - -BlockingQueue::~BlockingQueue() {} - -void BlockingQueue::push(void* data) { - { - std::lock_guard lock(mutex_); - queue_.push(data); - } - /* - * it is safe to signal after unlock. - * std::condition_variable::wait unlocks and sleeps atomically. - */ - cond_.notify_one(); -} - -void* BlockingQueue::pop() { - std::unique_lock lock(mutex_); - cond_.wait(lock, [this] { return !queue_.empty(); }); - void* ret_data = queue_.front(); - queue_.pop(); - return ret_data; -} - -} // end namespace common \ No newline at end of file diff --git a/cpp/src/common/container/blocking_queue.h b/cpp/src/common/container/blocking_queue.h deleted file mode 100644 index 15572ec18..000000000 --- a/cpp/src/common/container/blocking_queue.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -#ifndef COMMON_CONTAINER_BLOCKING_QUEUE_H -#define COMMON_CONTAINER_BLOCKING_QUEUE_H - -#include -#include -#include - -namespace common { - -class BlockingQueue { - public: - BlockingQueue(); - ~BlockingQueue(); - - void push(void* data); - // if empty, blocking - void* pop(); - - private: - std::queue queue_; - std::mutex mutex_; - std::condition_variable cond_; -}; - -} // end namespace common -#endif // COMMON_CONTAINER_BLOCKING_QUEUE_H diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h index 57adc81f5..5bee0fa60 100644 --- a/cpp/src/common/global.h +++ b/cpp/src/common/global.h @@ -184,7 +184,7 @@ FORCE_INLINE int set_write_thread_count(int32_t count) { #ifdef ENABLE_THREADS class ThreadPool; // Global write thread pool, created by init_common(). -extern TSFILE_API ThreadPool* g_write_thread_pool_; +extern ThreadPool* g_write_thread_pool_; #endif extern int init_common(); diff --git a/python/setup.py b/python/setup.py index 3bd19365f..181ffcbed 100644 --- a/python/setup.py +++ b/python/setup.py @@ -129,17 +129,23 @@ def _find_lib(root, patterns): f"missing tsfile import library (*.lib or *.dll.a) in {CPP_LIB}" ) - # Copy the DLL keeping its original base name: the import library embeds - # that name, so the .pyd extensions must find a DLL with the same name. - shutil.copy2(dll_src, PKG / dll_src.name) - shutil.copy2(imp_src, PKG / imp_src.name) + # Copy the DLL with a unified name regardless of toolchain. + dll_dst = PKG / "tsfile.dll" + shutil.copy2(dll_src, dll_dst) + + # Copy import library with a name matching the DLL. + if win_toolchain == "mingw": + imp_dst = PKG / "tsfile.dll.a" + else: + imp_dst = PKG / "tsfile.lib" + shutil.copy2(imp_src, imp_dst) print(f"setup.py: Windows toolchain = {win_toolchain}") - print(f"setup.py: copied {dll_src.name} and {imp_src.name}") + print(f"setup.py: copied {dll_src.name} -> tsfile.dll and {imp_src.name} -> {imp_dst.name}") if win_toolchain == "mingw": - # Copy MinGW runtime DLLs next to libtsfile.dll so Python can find - # them. Python 3.8+ does not search PATH for DLLs; they must sit in - # the same directory as the .pyd extensions (os.add_dll_directory). + # Copy MinGW runtime DLLs next to tsfile.dll so Python can find them. + # Python 3.8+ does not search PATH for DLLs; they must sit in the + # same directory as the .pyd extensions (os.add_dll_directory). for _mingw_dll in ( "libstdc++-6.dll", "libgcc_s_seh-1.dll", diff --git a/python/tsfile/__init__.py b/python/tsfile/__init__.py index 2e619a1f7..cab85421a 100644 --- a/python/tsfile/__init__.py +++ b/python/tsfile/__init__.py @@ -24,15 +24,10 @@ if sys.platform == "win32": os.add_dll_directory(_pkg_dir) - # Preload the tsfile DLL with an absolute path to bypass DLL search - # issues, so it is already in memory when the .pyd extensions reference - # it. The DLL is named tsfile.dll when built with MSVC and libtsfile.dll - # when built with MinGW. - for _dll_name in ("libtsfile.dll", "tsfile.dll"): - _tsfile_dll = os.path.join(_pkg_dir, _dll_name) - if os.path.isfile(_tsfile_dll): - ctypes.CDLL(_tsfile_dll) - break + # Preload tsfile.dll with absolute path to bypass DLL search issues. + _tsfile_dll = os.path.join(_pkg_dir, "tsfile.dll") + if os.path.isfile(_tsfile_dll): + ctypes.CDLL(_tsfile_dll) elif sys.platform == "darwin": _tsfile_dylib = os.path.join(_pkg_dir, "libtsfile.dylib") if os.path.isfile(_tsfile_dylib): From eac8e42e5a9561d917e536fac89bebd2b72e6edc Mon Sep 17 00:00:00 2001 From: colinleeo Date: Wed, 20 May 2026 11:33:46 +0800 Subject: [PATCH 12/14] fix clang format. --- cpp/README.md | 57 ++++++++++++++++++++++++++++++++++++++++++------- python/setup.py | 4 +++- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/cpp/README.md b/cpp/README.md index 9f3ee7ac0..639aa38f7 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -36,9 +36,9 @@ The source code can be found in the `./src` directory. C/C++ examples are locate ## How to make contributions -We use `clang-format` to ensure that our C++ code adheres to a consistent set of rules defined in `./clang-format`. This is similar to the Google style. +We use `clang-format` to ensure that our C++ code adheres to a consistent set of rules defined in `.clang-format`. This is similar to the Google style. -`mvn spotless` uses `clang-format v17.0.6` for C++ code formatting. Please make sure the `clang-format` in your `PATH` matches this version before submitting code. +`mvn spotless:apply` uses `clang-format v17.0.6` for C++ code formatting. Please make sure the `clang-format` in your `PATH` matches this version before submitting code. How to install `clang-format v17.0.6`: @@ -70,31 +70,72 @@ mvn spotless:apply -P with-cpp If you need to skip code formatting temporarily, you can add `-Dspotless.skip=true`, for example: ```bash -mvn package -P with-cpp clean verify -Dspotless.skip=true +mvn clean verify -P with-cpp -Dspotless.skip=true ``` +### Platform Support + +TsFile C++ now supports: +- **Linux**: GCC/Clang +- **macOS**: Clang +- **Windows**: MSVC 2017+ and MinGW + +All code must compile without errors on all supported platforms before submission. + We welcome any bug reports. You can open an issue with a title starting with [CPP] to describe the bug, like: https://github.com/apache/tsfile/issues/94 ## Build ### Requirements +TsFile C++ supports three toolchains: + +**Linux (GCC/Clang):** ```bash sudo apt-get update sudo apt-get install -y cmake make g++ clang-format libuuid-dev ``` -To build tsfile, you can run: `bash build.sh`. If you have Maven tools, you can run: `mvn package -P with-cpp clean verify`. Then, you can find the shared object at `./build`. - -Before you submit your code to GitHub, please ensure that the `mvn` compilation is correct. +**Windows (MSVC):** +- Visual Studio 2017 or later +- CMake 3.11+ +**Windows (MinGW):** If you compile using MinGW on windows and encounter an error, you can try replacing MinGW with the following version that we have tried without problems: - * GCC 14.2.0 (with **POSIX** threads) + LLVM/Clang/LLD/LLDB 18.1.8 + MinGW-w64 12.0.0 UCRT - release 1 * GCC 12.2.0 + LLVM/Clang/LLD/LLDB 16.0.0 + MinGW-w64 10.0.0 (UCRT) - release 5 * GCC 12.2.0 + LLVM/Clang/LLD/LLDB 16.0.0 + MinGW-w64 10.0.0 (MSVCRT) - release 5 * GCC 11.2.0 + MinGW-w64 10.0.0 (MSVCRT) - release 1 +### Build Instructions + +To build tsfile, use Maven which automatically detects and uses the appropriate toolchain: + +```bash +mvn clean verify -P with-cpp +``` + +**Toolchain Selection:** + +Maven will automatically select the compiler based on your platform: +- **Linux**: GCC/Clang +- **macOS**: Clang +- **Windows**: MinGW (default) or MSVC + +To explicitly specify a toolchain on Windows: + +```bash +# Use MinGW (default on Windows) +mvn clean verify -P with-cpp -Dcpp.toolchain=mingw + +# Use MSVC +mvn clean verify -P with-cpp -Dcpp.toolchain=msvc +``` + +Then you can find the shared library at `./cpp/target/build/lib`. + +Before you submit your code to GitHub, please ensure that the compilation is correct. + ### configure the cross-compilation toolchain Modify the Toolchain File `cmake/ToolChain.cmake`, define the following variables: @@ -139,4 +180,4 @@ By default, parallel write is enabled when the machine has more than one CPU cor ## Use TsFile -You can find examples on how to read and write data in `demo_read.cpp` and `demo_write.cpp` located under `./examples/cpp_examples`. There are also examples under `./examples/c_examples`on how to use a C-style API to read and write data in a C environment. You can run `bash build.sh` under `./examples` to generate an executable output under `./examples/build`. +You can find examples on how to read and write data in `demo_read.cpp` and `demo_write.cpp` located under `./examples/cpp_examples`. There are also examples under `./examples/c_examples` on how to use a C-style API to read and write data in a C environment. The examples will be built automatically when you run the main build command. diff --git a/python/setup.py b/python/setup.py index 181ffcbed..5bbee297c 100644 --- a/python/setup.py +++ b/python/setup.py @@ -140,7 +140,9 @@ def _find_lib(root, patterns): imp_dst = PKG / "tsfile.lib" shutil.copy2(imp_src, imp_dst) print(f"setup.py: Windows toolchain = {win_toolchain}") - print(f"setup.py: copied {dll_src.name} -> tsfile.dll and {imp_src.name} -> {imp_dst.name}") + print( + f"setup.py: copied {dll_src.name} -> tsfile.dll and {imp_src.name} -> {imp_dst.name}" + ) if win_toolchain == "mingw": # Copy MinGW runtime DLLs next to tsfile.dll so Python can find them. From 1bce8de6ed3b7c609bb66666a68e3e082b258e66 Mon Sep 17 00:00:00 2001 From: colinleeo Date: Wed, 20 May 2026 13:30:50 +0800 Subject: [PATCH 13/14] fix compile. --- cpp/CMakeLists.txt | 12 ++++++++++-- python/tsfile/__init__.py | 17 +++++++++++++---- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 608804a41..ba2c0c921 100755 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -32,7 +32,11 @@ endif () set(TsFile_CPP_VERSION 2.2.1.dev) if (MSVC) - set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} /W3 /utf-8 /EHsc /bigobj /Zc:__cplusplus") + # MSVC does not provide a /std:c++11 flag; C++11 is its implicit baseline. + # The lowest explicitly settable standard is /std:c++14. Without this flag, + # the default varies by VS version (VS2017+ defaults to C++14 mode with some + # C++17 extensions), so we pin it explicitly for reproducibility. + set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} /W3 /utf-8 /EHsc /bigobj /Zc:__cplusplus /std:c++14") add_definitions(-DNOMINMAX -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS -D_WINSOCK_DEPRECATED_NO_WARNINGS) # Export all symbols of the tsfile shared library automatically so that @@ -228,7 +232,11 @@ set(THIRD_PARTY_INCLUDE ${PROJECT_BINARY_DIR}/third_party) set(SAVED_CXX_FLAGS "${CMAKE_CXX_FLAGS}") if (MSVC) - set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} /W3 /utf-8 /EHsc /bigobj /Zc:__cplusplus") + # MSVC does not provide a /std:c++11 flag; C++11 is its implicit baseline. + # The lowest explicitly settable standard is /std:c++14. Without this flag, + # the default varies by VS version (VS2017+ defaults to C++14 mode with some + # C++17 extensions), so we pin it explicitly for reproducibility. + set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} /W3 /utf-8 /EHsc /bigobj /Zc:__cplusplus /std:c++14") else () set(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS} -Wall -std=c++11") endif () diff --git a/python/tsfile/__init__.py b/python/tsfile/__init__.py index cab85421a..985ec9254 100644 --- a/python/tsfile/__init__.py +++ b/python/tsfile/__init__.py @@ -23,11 +23,20 @@ _pkg_dir = os.path.dirname(os.path.abspath(__file__)) if sys.platform == "win32": - os.add_dll_directory(_pkg_dir) - # Preload tsfile.dll with absolute path to bypass DLL search issues. + # Keep the handle alive for the lifetime of this module. CPython's reference + # counting frees the object immediately if not stored, which calls + # RemoveDllDirectory and undoes the registration before any .pyd is loaded. + _dll_dir = os.add_dll_directory(_pkg_dir) + # Preload tsfile.dll so Windows finds it by base-name when loading the + # Cython extensions. Store the handle to prevent the DLL from being + # unloaded prematurely. _tsfile_dll = os.path.join(_pkg_dir, "tsfile.dll") - if os.path.isfile(_tsfile_dll): - ctypes.CDLL(_tsfile_dll) + if not os.path.isfile(_tsfile_dll): + raise FileNotFoundError( + f"tsfile.dll not found in {_pkg_dir}. " + "Re-build the C++ module and reinstall the Python package." + ) + _tsfile_cdll = ctypes.CDLL(_tsfile_dll) elif sys.platform == "darwin": _tsfile_dylib = os.path.join(_pkg_dir, "libtsfile.dylib") if os.path.isfile(_tsfile_dylib): From 277e53846af4f04964834ca5b8781d71bacf7cc5 Mon Sep 17 00:00:00 2001 From: colinleeo Date: Wed, 20 May 2026 14:27:32 +0800 Subject: [PATCH 14/14] fix compile on mingw. --- python/setup.py | 9 ++++++--- python/tsfile/__init__.py | 14 +++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/python/setup.py b/python/setup.py index 5bbee297c..19ada7921 100644 --- a/python/setup.py +++ b/python/setup.py @@ -129,8 +129,11 @@ def _find_lib(root, patterns): f"missing tsfile import library (*.lib or *.dll.a) in {CPP_LIB}" ) - # Copy the DLL with a unified name regardless of toolchain. - dll_dst = PKG / "tsfile.dll" + # For MSVC the DLL is already named tsfile.dll. For MinGW, keep the original + # name (libtsfile.dll): the import library (libtsfile.dll.a) has that name + # baked in, so Cython .pyd files record "libtsfile.dll" in their PE import + # table and Windows must find it by that exact name at runtime. + dll_dst = PKG / dll_src.name shutil.copy2(dll_src, dll_dst) # Copy import library with a name matching the DLL. @@ -141,7 +144,7 @@ def _find_lib(root, patterns): shutil.copy2(imp_src, imp_dst) print(f"setup.py: Windows toolchain = {win_toolchain}") print( - f"setup.py: copied {dll_src.name} -> tsfile.dll and {imp_src.name} -> {imp_dst.name}" + f"setup.py: copied {dll_src.name} -> {dll_dst.name} and {imp_src.name} -> {imp_dst.name}" ) if win_toolchain == "mingw": diff --git a/python/tsfile/__init__.py b/python/tsfile/__init__.py index 985ec9254..1026ec673 100644 --- a/python/tsfile/__init__.py +++ b/python/tsfile/__init__.py @@ -27,16 +27,20 @@ # counting frees the object immediately if not stored, which calls # RemoveDllDirectory and undoes the registration before any .pyd is loaded. _dll_dir = os.add_dll_directory(_pkg_dir) - # Preload tsfile.dll so Windows finds it by base-name when loading the + # Preload the tsfile DLL so Windows finds it by base-name when loading the # Cython extensions. Store the handle to prevent the DLL from being # unloaded prematurely. - _tsfile_dll = os.path.join(_pkg_dir, "tsfile.dll") - if not os.path.isfile(_tsfile_dll): + # MSVC builds produce "tsfile.dll"; MinGW builds produce "libtsfile.dll". + for _dll_name in ("tsfile.dll", "libtsfile.dll"): + _tsfile_dll = os.path.join(_pkg_dir, _dll_name) + if os.path.isfile(_tsfile_dll): + _tsfile_cdll = ctypes.CDLL(_tsfile_dll) + break + else: raise FileNotFoundError( - f"tsfile.dll not found in {_pkg_dir}. " + f"tsfile DLL (tsfile.dll or libtsfile.dll) not found in {_pkg_dir}. " "Re-build the C++ module and reinstall the Python package." ) - _tsfile_cdll = ctypes.CDLL(_tsfile_dll) elif sys.platform == "darwin": _tsfile_dylib = os.path.join(_pkg_dir, "libtsfile.dylib") if os.path.isfile(_tsfile_dylib):