dotnet
diff --git a/‎RELEASENOTES.md‎
Lines changed: 9 additions & 0 deletions b/‎RELEASENOTES.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎build/Dependencies.props‎
Lines changed: 1 addition & 1 deletion b/‎build/Dependencies.props‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/Native/LibTorchSharp/THSExport.cpp‎
Lines changed: 30 additions & 119 deletions b/‎src/Native/LibTorchSharp/THSExport.cpp‎
Lines changed: 30 additions & 119 deletions
diff --git a/‎src/Native/LibTorchSharp/THSExport.h‎
Lines changed: 19 additions & 35 deletions b/‎src/Native/LibTorchSharp/THSExport.h‎
Lines changed: 19 additions & 35 deletions
diff --git a/‎src/Native/LibTorchSharp/Utils.h‎
Lines changed: 4 additions & 3 deletions b/‎src/Native/LibTorchSharp/Utils.h‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/Redist/libtorch-cpu/libtorch-macos-arm64-2.9.0.zip.sha‎
Lines changed: 1 addition & 0 deletions b/‎src/Redist/libtorch-cpu/libtorch-macos-arm64-2.9.0.zip.sha‎
Lines changed: 1 addition & 0 deletions
@@ -1,6 +1,15 @@
 ## TorchSharp Release Notes
 
 Releases, starting with 9/2/2021, are listed with the most recent release at the top.
+# NuGet Version 0.106.0 (Upcoming)
+
+This release upgrades the libtorch backend to v2.9.0.
+
+__API Changes__:
+
+#1498 Add support for torch.export ExportedProgram models (.pt2 files)<br/>
+TorchSharp now supports loading and executing PyTorch models exported via torch.export using AOTInductor compilation. Use `torch.export.load()` to load `.pt2` model packages compiled with `torch._inductor.aoti_compile_and_package()` in Python. This provides 30-40% better inference latency compared to TorchScript models. Note: This is an inference-only API with no training support.<br/>
+
 # NuGet Version 0.105.2
 
 This release upgrades the libtorch backend to v2.7.1, using CUDA 12.8.
 
@@ -7,7 +7,7 @@
 
   <!-- Other/Non-Core Product Dependencies -->
   <PropertyGroup>
-    <LibTorchVersion>2.7.1</LibTorchVersion>
+    <LibTorchVersion>2.9.0</LibTorchVersion>
     <LibTorchVersion Condition="'$(TargetArchitecture)' == 'x64' and '$(TargetOS)' == 'mac'">2.2.2</LibTorchVersion>
     <CudaVersionDot>12.8</CudaVersionDot>
     <CudaVersionNoDot>128</CudaVersionNoDot>
 
@@ -1,23 +1,17 @@
 // Copyright (c) .NET Foundation and Contributors.  All Rights Reserved.  See LICENSE in the project root for license information.
 #include "THSExport.h"
 
-// NOTE: In LibTorch C++ API, ExportedProgram models (.pt2 files) are loaded using torch::jit::load()
-// The .pt2 format is compatible with the TorchScript loading infrastructure
+// torch.export support via AOTInductor
+// This uses torch::inductor::AOTIModelPackageLoader which is INFERENCE-ONLY
+// Models must be compiled with torch._inductor.aoti_compile_and_package() in Python
 
-ExportedProgramModule THSExport_load(const char* filename, int64_t device, int64_t index)
+ExportedProgramModule THSExport_load(const char* filename)
 {
-    c10::DeviceType dev = c10::kCPU;
-    if (device == 1)
-        dev = c10::kCUDA;
-    if (device == 13)
-        dev = c10::kMPS;
-
     CATCH(
-        // Load .pt2 file using torch::jit::load
-        // This works because ExportedProgram models are serialized in a JIT-compatible format
-        auto res = torch::jit::load(filename, torch::Device(dev, index));
-        auto copy = new torch::jit::Module(res);
-        return new std::shared_ptr<torch::jit::Module>(copy);
+        // Load .pt2 file using AOTIModelPackageLoader
+        // This requires models to be compiled with aoti_compile_and_package()
+        auto* loader = new torch::inductor::AOTIModelPackageLoader(filename);
+        return loader;
     );
 
     return nullptr;
@@ -28,113 +22,30 @@ void THSExport_Module_dispose(const ExportedProgramModule module)
     delete module;
 }
 
-void THSExport_Module_forward(
+void THSExport_Module_run(
     const ExportedProgramModule module,
-    const TensorOrScalar* tensorPtrs,
-    const int length,
-    TensorOrScalar* (*allocator)(int32_t idx, size_t length),
-    int8_t* typeCode,
-    int32_t idx)
+    const Tensor* input_tensors,
+    const int input_length,
+    Tensor** result_tensors,
+    int* result_length)
 {
-    *typeCode = 0;
-
     CATCH(
-        // Execute the forward method
-        auto result = (*module)->forward(toIValue(tensorPtrs, length));
-        ReturnHelper(result, allocator, typeCode, &idx);
-    )
-}
-
-int THSExport_Module_is_training(ExportedProgramModule module)
-{
-    // ExportedPrograms are always in eval mode, but we check the underlying module
-    return (*module)->is_training();
-}
-
-void THSExport_Module_train(ExportedProgramModule module, bool on)
-{
-    // ExportedPrograms should remain in eval mode, but we allow this for compatibility
-    (*module)->train(on);
-}
-
-void THSExport_Module_eval(ExportedProgramModule module)
-{
-    (*module)->eval();
-}
-
-void THSExport_Module_to_device_dtype(ExportedProgramModule module, int8_t dtype, int64_t device, int64_t index)
-{
-    c10::DeviceType dev = c10::kCPU;
-    if (device == 1)
-        dev = c10::kCUDA;
-    if (device == 13)
-        dev = c10::kMPS;
-
-    CATCH(
-        (*module)->to(torch::Device(dev, index), (at::ScalarType)dtype);
+        // Convert input tensor pointers to std::vector<torch::Tensor>
+        std::vector<torch::Tensor> inputs;
+        inputs.reserve(input_length);
+        for (int i = 0; i < input_length; i++) {
+            inputs.push_back(*input_tensors[i]);
+        }
+
+        // Run inference
+        std::vector<torch::Tensor> outputs = module->run(inputs);
+
+        // Allocate output array and copy results
+        *result_length = outputs.size();
+        *result_tensors = new Tensor[outputs.size()];
+
+        for (size_t i = 0; i < outputs.size(); i++) {
+            (*result_tensors)[i] = new torch::Tensor(outputs[i]);
+        }
     );
 }
-
-void THSExport_Module_to_device(ExportedProgramModule module, int64_t device, int64_t index)
-{
-    c10::DeviceType dev = c10::kCPU;
-    if (device == 1)
-        dev = c10::kCUDA;
-    if (device == 13)
-        dev = c10::kMPS;
-
-    CATCH(
-        (*module)->to(torch::Device(dev, index));
-    );
-}
-
-void THSExport_Module_to_dtype(ExportedProgramModule module, int8_t dtype)
-{
-    CATCH(
-        (*module)->to((at::ScalarType)dtype);
-    );
-}
-
-void THSExport_Module_parameters(const ExportedProgramModule module, Tensor* (*allocator)(size_t length))
-{
-    auto parameters = (*module)->parameters();
-    Tensor* result = allocator(parameters.size());
-
-    int i = 0;
-    for (auto parameter : parameters)
-        result[i++] = new torch::Tensor(parameter);
-}
-
-void THSExport_Module_named_parameters(
-    const ExportedProgramModule module,
-    Tensor* (*allocator)(size_t length),
-    const char** (*allocator2)(size_t length))
-{
-    auto parameters = (*module)->named_parameters();
-    Tensor* result = allocator(parameters.size());
-    const char** names = allocator2(parameters.size());
-
-    int i = 0;
-    for (const auto& parameter : parameters) {
-        result[i] = new torch::Tensor(parameter.value);
-        names[i] = make_sharable_string(parameter.name);
-        i++;
-    }
-}
-
-void THSExport_Module_named_buffers(
-    const ExportedProgramModule module,
-    Tensor* (*allocator)(size_t length),
-    const char** (*allocator2)(size_t length))
-{
-    auto buffers = (*module)->named_buffers();
-    Tensor* result = allocator(buffers.size());
-    const char** names = allocator2(buffers.size());
-
-    int i = 0;
-    for (const auto& buffer : buffers) {
-        result[i] = new torch::Tensor(buffer.value);
-        names[i] = make_sharable_string(buffer.name);
-        i++;
-    }
-}
@@ -3,46 +3,30 @@
 
 #include "../Stdafx.h"
 
-#include "torch/csrc/jit/api/module.h"
+#include "torch/torch.h"
+#include "torch/csrc/inductor/aoti_package/model_package_loader.h"
 
 #include "Utils.h"
-#include "THSJIT.h"  // For TensorOrScalar struct
 
-// API for torch.export ExportedProgram
+// torch.export support via AOTInductor - Load and execute PyTorch ExportedProgram models (.pt2 files)
+// ExportedProgram is PyTorch 2.x's recommended way to export models for production deployment
+//
+// IMPORTANT: This implementation uses torch::inductor::AOTIModelPackageLoader which is
+// INFERENCE-ONLY. Training, parameter updates, and device movement are not supported.
+// Models must be compiled with torch._inductor.aoti_compile_and_package() in Python.
 
-// Load ExportedProgram from .pt2 file
-EXPORT_API(ExportedProgramModule) THSExport_load(const char* filename, int64_t device, int64_t index);
+// Load an AOTInductor-compiled model package from a .pt2 file
+EXPORT_API(ExportedProgramModule) THSExport_load(const char* filename);
 
-// Dispose ExportedProgram module
+// Dispose of an ExportedProgram module
 EXPORT_API(void) THSExport_Module_dispose(const ExportedProgramModule module);
 
-// Execute forward pass on ExportedProgram
-EXPORT_API(void) THSExport_Module_forward(
+// Execute the ExportedProgram's forward method (inference only)
+// Input: Array of tensors
+// Output: Array of result tensors (caller must free)
+EXPORT_API(void) THSExport_Module_run(
     const ExportedProgramModule module,
-    const TensorOrScalar* tensorPtrs,
-    const int length,
-    TensorOrScalar* (*allocator)(int32_t idx, size_t length),
-    int8_t* typeCode,
-    int32_t idx);
-
-// Device and dtype management
-EXPORT_API(void) THSExport_Module_to_device_dtype(ExportedProgramModule module, int8_t dtype, int64_t device, int64_t index);
-EXPORT_API(void) THSExport_Module_to_device(ExportedProgramModule module, int64_t device, int64_t index);
-EXPORT_API(void) THSExport_Module_to_dtype(ExportedProgramModule module, int8_t dtype);
-
-// Training mode (ExportedPrograms are always in eval mode, but we provide these for compatibility)
-EXPORT_API(int) THSExport_Module_is_training(ExportedProgramModule module);
-EXPORT_API(void) THSExport_Module_train(ExportedProgramModule module, bool on);
-EXPORT_API(void) THSExport_Module_eval(ExportedProgramModule module);
-
-// Parameters and buffers access
-EXPORT_API(void) THSExport_Module_parameters(const ExportedProgramModule module, Tensor* (*allocator)(size_t length));
-EXPORT_API(void) THSExport_Module_named_parameters(
-    const ExportedProgramModule module,
-    Tensor* (*allocator)(size_t length),
-    const char** (*allocator2)(size_t length));
-
-EXPORT_API(void) THSExport_Module_named_buffers(
-    const ExportedProgramModule module,
-    Tensor* (*allocator)(size_t length),
-    const char** (*allocator2)(size_t length));
+    const Tensor* input_tensors,
+    const int input_length,
+    Tensor** result_tensors,
+    int* result_length);
@@ -4,6 +4,7 @@
 #include <string>
 
 #include "torch/torch.h"
+#include "torch/csrc/inductor/aoti_package/model_package_loader.h"
 
 extern thread_local char *torch_last_err;
 
@@ -24,9 +25,9 @@ typedef std::shared_ptr<torch::jit::Function> * JITFunction;
 typedef std::shared_ptr<c10::Type> * JITType;
 typedef std::shared_ptr<c10::TensorType>* JITTensorType;
 
-// torch.export ExportedProgram module
-// Note: In LibTorch C++ API, ExportedProgram is also represented as torch::jit::Module
-typedef std::shared_ptr<torch::jit::Module>* ExportedProgramModule;
+// torch.export ExportedProgram module via AOTInductor
+// Note: Uses torch::inductor::AOTIModelPackageLoader for inference-only execution
+typedef torch::inductor::AOTIModelPackageLoader* ExportedProgramModule;
 
 struct TensorArray {
     Tensor *array;
 
@@ -0,0 +1 @@
+6D6AF87CAB301FA25CB4909697A03C65ED234E784CD96C8743A9AD6586238D0E
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+6D6AF87CAB301FA25CB4909697A03C65ED234E784CD96C8743A9AD6586238D0E`