From 70d4873e5e0a5f0623222919a0e0a4f2b72d742a Mon Sep 17 00:00:00 2001
From: Anand Joseph <anajoseph@nvidia.com>
Date: Wed, 30 Oct 2024 10:04:58 +0530
Subject: [PATCH 1/5] Add support for canary export

Signed-off-by: Anand Joseph <anajoseph@nvidia.com>
---
 nemo2riva/cookbook.py                         | 12 +++
 nemo2riva/patches/__init__.py                 |  2 +
 nemo2riva/patches/aed_canary.py               | 78 +++++++++++++++++++
 nemo2riva/schema.py                           | 19 ++++-
 nemo2riva/scripts/__init__.py                 |  0
 .../asr-scr-exported-aedmodel.yaml            | 43 ++++++++++
 6 files changed, 151 insertions(+), 3 deletions(-)
 create mode 100644 nemo2riva/patches/aed_canary.py
 create mode 100644 nemo2riva/scripts/__init__.py
 create mode 100644 nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml

diff --git a/nemo2riva/cookbook.py b/nemo2riva/cookbook.py
index a583ef1..1465c44 100644
--- a/nemo2riva/cookbook.py
+++ b/nemo2riva/cookbook.py
@@ -48,6 +48,8 @@ def export_model(model, cfg, args, artifacts, metadata):
         format_meta = {"has_pytorch_checkpoint": True, "runtime": "PyTorch"}
     elif cfg.export_format == "NEMO":
         format_meta = {"has_pytorch_checkpoint": True, "runtime": "Python"}
+    elif cfg.export_format == "TRT-LLM":
+        format_meta = {"has_pytorch_checkpoint": False, "runtime": "TRT-LLM"}
     # TODO: use submodel sections
     metadata.update(format_meta)
     runtime = format_meta["runtime"]
@@ -140,6 +142,16 @@ def export_model(model, cfg, args, artifacts, metadata):
 
         elif cfg.export_format == "NEMO":
             model.save_to(export_file)
+        elif cfg.export_format == "TRT-LLM":
+            if not isinstance(model, Exportable):
+                logging.error("Your NeMo model class ({}) is not Exportable.".format(metadata['obj_cls']))
+                sys.exit(1)
+            model.transf_decoder.freeze()
+            decoder_params = model.transf_decoder.state_dict()
+            decoder_params.update(model.log_softmax.state_dict())
+            torch.save(decoder_params, export_file)
+
+
 
         # Add exported file to the artifact registry
 
diff --git a/nemo2riva/patches/__init__.py b/nemo2riva/patches/__init__.py
index a70d4ce..3d5a747 100644
--- a/nemo2riva/patches/__init__.py
+++ b/nemo2riva/patches/__init__.py
@@ -3,12 +3,14 @@
 
 from nemo2riva.patches.ctc import set_decoder_num_classes
 from nemo2riva.patches.ctc_bpe import bpe_check_inputs_and_version
+from nemo2riva.patches.aed_canary import config_for_trtllm
 from nemo2riva.patches.mtencdec import change_tokenizer_names
 from nemo2riva.patches.tts import fastpitch_model_versioning, generate_vocab_mapping, radtts_model_versioning
 
 patches = {
     "EncDecCTCModel": [set_decoder_num_classes],
     "EncDecCTCModelBPE": [bpe_check_inputs_and_version],
+    "EncDecMultiTaskModel": [config_for_trtllm],
     "MTEncDecModel": [change_tokenizer_names],
     "FastPitchModel": [generate_vocab_mapping, fastpitch_model_versioning],
     "RadTTSModel": [generate_vocab_mapping, radtts_model_versioning],
diff --git a/nemo2riva/patches/aed_canary.py b/nemo2riva/patches/aed_canary.py
new file mode 100644
index 0000000..9e4bd82
--- /dev/null
+++ b/nemo2riva/patches/aed_canary.py
@@ -0,0 +1,78 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+import yaml
+import json
+import logging
+
+
+def config_for_trtllm(model, artifacts, **kwargs):
+    if model.__class__.__name__ == 'EncDecMultiTaskModel':
+
+        model_config = yaml.safe_load(artifacts['model_config.yaml']['content'])
+
+        keys_required = [
+            'beam_search',
+            'encoder',
+            'head',
+            'model_defaults',
+            'prompt_format',
+            'sample_rate',
+            'target',
+            'preprocessor',
+        ]
+        if 'beam_search' not in model_config and 'decoding' in model_config:
+            model_config['beam_search'] = model_config['decoding'].get('beam', {'beam_size': 1, 'len_pen': 0.0,
+                                                                                'max_generation_delta': 50}
+                                                                       )
+        config = dict({k: model_config[k] for k in keys_required})
+        config['decoder'] = {
+            'transf_decoder': model_config['transf_decoder'],
+            'transf_encoder': model_config['transf_encoder'],
+            'vocabulary': make_vocabulary_file(model,artifacts),
+            'num_classes': model_config['head']['num_classes'],
+            'feat_in': model_config['model_defaults']['asr_enc_hidden'],
+            'n_layers': model_config['transf_decoder']['config_dict']['num_layers'],
+        }
+        config['target'] = 'trtllm.canary'
+
+
+        artifacts['model_config.yaml']['content'] = yaml.safe_dump(config, encoding=('utf-8'))
+
+
+def make_vocabulary_file(model, artifacts, **kwargs):
+    if model.__class__.__name__ == 'EncDecMultiTaskModel':
+
+        tokenizer_vocab = {'tokens': {},
+                           'offsets': model.tokenizer.token_id_offset
+                           }
+        for lang in model.tokenizer.langs:
+            tokenizer_vocab['tokens'][lang] = {}
+        tokenizer_vocab['size'] = model.tokenizer.vocab_size
+
+        try:
+            tokenizer_vocab['bos_id'] = model.tokenizer.bos_id
+        except Exception as e:
+            logging.warning(f"Tokenizer is missing bos_id. Could affect accuracy")
+
+        try:
+            tokenizer_vocab['eos_id'] = model.tokenizer.eos_id
+        except Exception as e:
+            logging.warning(f"Tokenizer is missing eos_id. Could affect accuracy")
+        try:
+            tokenizer_vocab['nospeech_id'] = model.tokenizer.nospeech_id
+        except Exception as e:
+            logging.warning(f"Tokenizer is missing nospeech_id. Could affect accuracy")
+        try:
+            tokenizer_vocab['pad_id'] = model.tokenizer.pad_id
+        except Exception as e:
+            logging.warning(f"Tokenizer is missing pad_id. Could affect accuracy")
+
+        for t_id in range(0, model.tokenizer.vocab_size):
+            lang = model.tokenizer.ids_to_lang([t_id])
+            tokenizer_vocab['tokens'][lang][t_id] = model.tokenizer.ids_to_tokens([t_id])[0]
+
+        artifacts['vocab.json']={}
+        artifacts['vocab.json']['content'] = json.dumps(tokenizer_vocab).encode('utf-8')
+        return tokenizer_vocab
+
diff --git a/nemo2riva/schema.py b/nemo2riva/schema.py
index f881c96..13ef553 100644
--- a/nemo2riva/schema.py
+++ b/nemo2riva/schema.py
@@ -17,7 +17,7 @@
 
 schema_dict = None
 
-supported_formats = ["ONNX", "CKPT", "TS", "NEMO"]
+supported_formats = ["ONNX", "CKPT", "TS", "NEMO", "PYTORCH", "TRT-LLM"]
 
 
 @dataclass
@@ -48,15 +48,28 @@ def get_export_config(export_obj, args):
     need_autocast = False
     if export_obj:
         conf.export_file = list(export_obj)[0]
+        attribs = export_obj[conf.export_file]
+        conf.export_subnet = attribs.get('export_subnet', None)
+        conf.is_onnx=attribs.get('onnx', False)
+        conf.trt_llm = False
+        conf.weights_only = False
+
+        if not conf.is_onnx:
+            conf.trt_llm = attribs.get('trtllm', False)
+            conf.is_torch = attribs.get('torch', False)
+
         if conf.export_file.endswith('.onnx'):
             conf.export_format = "ONNX"
         elif conf.export_file.endswith('.ts'):
             conf.export_format = "TS"
         elif conf.export_file.endswith('.nemo'):
             conf.export_format = "NEMO"
+        elif conf.trt_llm:
+            conf.export_format = "TRT-LLM"
+        elif conf.export_file.endswith('.pt'):
+            conf.export_format = "PYTORCH"
         else:
             conf.export_format = "CKPT"
-        attribs = export_obj[conf.export_file]
         conf.autocast = attribs.get('autocast', False)
         need_autocast = conf.autocast
 
@@ -66,7 +79,7 @@ def get_export_config(export_obj, args):
         if conf.encryption and args.key is None:
             raise Exception(f"{conf.export_file} requires encryption and no key was given")
 
-        conf.export_subnet = attribs.get('export_subnet', None)
+
 
     if args.export_subnet:
         if conf.export_subnet:
diff --git a/nemo2riva/scripts/__init__.py b/nemo2riva/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml b/nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml
new file mode 100644
index 0000000..e83d344
--- /dev/null
+++ b/nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml
@@ -0,0 +1,43 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+# Define required metadata fields expected in the archive (optional).
+metadata:
+  - obj_cls: nemo.collections.asr.models.EncDecMultiTaskModel
+
+
+# Define list of files that are expected (optional).
+artifact_properties:
+  # List of files.
+  - model_config.yaml
+  - encoder.onnx:
+      export_subnet: encoder
+      onnx: True
+  - decoder.pt:
+      trtllm: True
+      torch: True
+      onnx: False
+
+
+# Define list of files with expected content (optional).
+# Functionality limited to yaml files (e.g. model_config.yaml).
+artifact_content:
+  # List of files.
+  - model_config.yaml:
+    # List of sections.subsections. ... that are required.
+    # (Optional `: True` instructs to check the presence of the file in indicated as leaf in the archive)
+    - transf_decoder
+    - transf_encoder
+    - vocabulary
+    - num_classes
+    - feat_in
+    - n_layers
+    - target
+    - beam_search
+    - encoder
+    - head
+    - model_defaults
+    - prompt_format
+    - sample_rate
+    - target
+    - preprocessor

From f2625883550b025afdbded0f8cfbe46ad53dac1c Mon Sep 17 00:00:00 2001
From: Anand Joseph <anajoseph@nvidia.com>
Date: Wed, 30 Oct 2024 18:57:08 +0530
Subject: [PATCH 2/5] Cleanup and additional fixes

Signed-off-by: Anand Joseph <anajoseph@nvidia.com>
---
 nemo2riva/cookbook.py                          | 13 ++++++-------
 nemo2riva/patches/aed_canary.py                |  4 ++--
 nemo2riva/schema.py                            | 18 ++++++++++--------
 .../asr-scr-exported-aedmodel.yaml             |  8 +++++++-
 requirements.txt                               |  3 ++-
 5 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/nemo2riva/cookbook.py b/nemo2riva/cookbook.py
index 1465c44..3d5f5d3 100644
--- a/nemo2riva/cookbook.py
+++ b/nemo2riva/cookbook.py
@@ -48,8 +48,8 @@ def export_model(model, cfg, args, artifacts, metadata):
         format_meta = {"has_pytorch_checkpoint": True, "runtime": "PyTorch"}
     elif cfg.export_format == "NEMO":
         format_meta = {"has_pytorch_checkpoint": True, "runtime": "Python"}
-    elif cfg.export_format == "TRT-LLM":
-        format_meta = {"has_pytorch_checkpoint": False, "runtime": "TRT-LLM"}
+    elif cfg.export_format == "STATE":
+        format_meta = {"has_pytorch_checkpoint": False, "runtime": "Python"}
     # TODO: use submodel sections
     metadata.update(format_meta)
     runtime = format_meta["runtime"]
@@ -142,14 +142,13 @@ def export_model(model, cfg, args, artifacts, metadata):
 
         elif cfg.export_format == "NEMO":
             model.save_to(export_file)
-        elif cfg.export_format == "TRT-LLM":
+        elif cfg.export_format == "STATE":
             if not isinstance(model, Exportable):
                 logging.error("Your NeMo model class ({}) is not Exportable.".format(metadata['obj_cls']))
                 sys.exit(1)
-            model.transf_decoder.freeze()
-            decoder_params = model.transf_decoder.state_dict()
-            decoder_params.update(model.log_softmax.state_dict())
-            torch.save(decoder_params, export_file)
+            model.freeze()
+            model_params = model.state_dict()
+            torch.save(model_params, export_file)
 
 
 
diff --git a/nemo2riva/patches/aed_canary.py b/nemo2riva/patches/aed_canary.py
index 9e4bd82..d75ebf0 100644
--- a/nemo2riva/patches/aed_canary.py
+++ b/nemo2riva/patches/aed_canary.py
@@ -72,7 +72,7 @@ def make_vocabulary_file(model, artifacts, **kwargs):
             lang = model.tokenizer.ids_to_lang([t_id])
             tokenizer_vocab['tokens'][lang][t_id] = model.tokenizer.ids_to_tokens([t_id])[0]
 
-        artifacts['vocab.json']={}
-        artifacts['vocab.json']['content'] = json.dumps(tokenizer_vocab).encode('utf-8')
+        #artifacts['vocab.json']={}
+        #artifacts['vocab.json']['content'] = json.dumps(tokenizer_vocab).encode('utf-8')
         return tokenizer_vocab
 
diff --git a/nemo2riva/schema.py b/nemo2riva/schema.py
index 13ef553..4c1e0ef 100644
--- a/nemo2riva/schema.py
+++ b/nemo2riva/schema.py
@@ -17,7 +17,7 @@
 
 schema_dict = None
 
-supported_formats = ["ONNX", "CKPT", "TS", "NEMO", "PYTORCH", "TRT-LLM"]
+supported_formats = ["ONNX", "CKPT", "TS", "NEMO", "PYTORCH", "STATE"]
 
 
 @dataclass
@@ -50,12 +50,13 @@ def get_export_config(export_obj, args):
         conf.export_file = list(export_obj)[0]
         attribs = export_obj[conf.export_file]
         conf.export_subnet = attribs.get('export_subnet', None)
+
         conf.is_onnx=attribs.get('onnx', False)
-        conf.trt_llm = False
-        conf.weights_only = False
+
+
 
         if not conf.is_onnx:
-            conf.trt_llm = attribs.get('trtllm', False)
+            conf.states_only = attribs.get('states_only', False)
             conf.is_torch = attribs.get('torch', False)
 
         if conf.export_file.endswith('.onnx'):
@@ -64,10 +65,11 @@ def get_export_config(export_obj, args):
             conf.export_format = "TS"
         elif conf.export_file.endswith('.nemo'):
             conf.export_format = "NEMO"
-        elif conf.trt_llm:
-            conf.export_format = "TRT-LLM"
-        elif conf.export_file.endswith('.pt'):
-            conf.export_format = "PYTORCH"
+        elif conf.is_torch:
+            if conf.states_only:
+                conf.export_format = "STATE"
+            else:
+                conf.export_format = "PYTORCH"
         else:
             conf.export_format = "CKPT"
         conf.autocast = attribs.get('autocast', False)
diff --git a/nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml b/nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml
index e83d344..c1339ab 100644
--- a/nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml
+++ b/nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml
@@ -14,7 +14,13 @@ artifact_properties:
       export_subnet: encoder
       onnx: True
   - decoder.pt:
-      trtllm: True
+      export_subnet: transf_decoder
+      states_only: True
+      torch: True
+      onnx: False
+  - log_softmax.pt:
+      export_subnet: log_softmax
+      states_only: True
       torch: True
       onnx: False
 
diff --git a/requirements.txt b/requirements.txt
index dffdec7..34ee049 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,10 +2,11 @@
 # SPDX-License-Identifier: MIT
 
 nemo_toolkit>=1.6.0
+torch>=2.4.0
 nvidia-eff>=0.6.4
 nvidia-eff-tao-encryption>=0.1.8
 nvidia-pyindex==1.0.6
-onnx==1.14.1
+onnx==1.16.1
 onnxruntime==1.16.3
 onnxruntime-gpu==1.16.3
 onnx-graphsurgeon==0.3.27

From 4961a0daeec719c09472cad937b79140f2be0af4 Mon Sep 17 00:00:00 2001
From: Anand Joseph <anajoseph@nvidia.com>
Date: Mon, 11 Nov 2024 10:13:29 +0530
Subject: [PATCH 3/5] Cleanup

Signed-off-by: Anand Joseph <anajoseph@nvidia.com>
---
 nemo2riva/patches/aed_canary.py                             | 5 +----
 nemo2riva/schema.py                                         | 5 -----
 nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml | 2 +-
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/nemo2riva/patches/aed_canary.py b/nemo2riva/patches/aed_canary.py
index d75ebf0..88caccc 100644
--- a/nemo2riva/patches/aed_canary.py
+++ b/nemo2riva/patches/aed_canary.py
@@ -1,8 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: MIT
 
 import yaml
-import json
 import logging
 
 
@@ -72,7 +71,5 @@ def make_vocabulary_file(model, artifacts, **kwargs):
             lang = model.tokenizer.ids_to_lang([t_id])
             tokenizer_vocab['tokens'][lang][t_id] = model.tokenizer.ids_to_tokens([t_id])[0]
 
-        #artifacts['vocab.json']={}
-        #artifacts['vocab.json']['content'] = json.dumps(tokenizer_vocab).encode('utf-8')
         return tokenizer_vocab
 
diff --git a/nemo2riva/schema.py b/nemo2riva/schema.py
index 4c1e0ef..1e08c5b 100644
--- a/nemo2riva/schema.py
+++ b/nemo2riva/schema.py
@@ -50,11 +50,8 @@ def get_export_config(export_obj, args):
         conf.export_file = list(export_obj)[0]
         attribs = export_obj[conf.export_file]
         conf.export_subnet = attribs.get('export_subnet', None)
-
         conf.is_onnx=attribs.get('onnx', False)
 
-
-
         if not conf.is_onnx:
             conf.states_only = attribs.get('states_only', False)
             conf.is_torch = attribs.get('torch', False)
@@ -81,8 +78,6 @@ def get_export_config(export_obj, args):
         if conf.encryption and args.key is None:
             raise Exception(f"{conf.export_file} requires encryption and no key was given")
 
-
-
     if args.export_subnet:
         if conf.export_subnet:
             raise Exception("Can't combine schema's export_subnet and export-subnet argument!")
diff --git a/nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml b/nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml
index c1339ab..5a6f627 100644
--- a/nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml
+++ b/nemo2riva/validation_schemas/asr-scr-exported-aedmodel.yaml
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: MIT
 
 # Define required metadata fields expected in the archive (optional).

From 5e6b3e1b534c6d46c80415ce2fb6d0821071a5e6 Mon Sep 17 00:00:00 2001
From: Anand Joseph <anajoseph@nvidia.com>
Date: Mon, 11 Nov 2024 10:20:41 +0530
Subject: [PATCH 4/5] Cleanup

Signed-off-by: Anand Joseph <anajoseph@nvidia.com>
---
 nemo2riva/patches/aed_canary.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nemo2riva/patches/aed_canary.py b/nemo2riva/patches/aed_canary.py
index 88caccc..6bf8ddf 100644
--- a/nemo2riva/patches/aed_canary.py
+++ b/nemo2riva/patches/aed_canary.py
@@ -35,7 +35,6 @@ def config_for_trtllm(model, artifacts, **kwargs):
         }
         config['target'] = 'trtllm.canary'
 
-
         artifacts['model_config.yaml']['content'] = yaml.safe_dump(config, encoding=('utf-8'))
 
 

From 5cc5f95fe4fefff0d0428f71a416c8fcf220a767 Mon Sep 17 00:00:00 2001
From: anand-nv <105917641+anand-nv@users.noreply.github.com>
Date: Sat, 8 Mar 2025 07:37:55 +0530
Subject: [PATCH 5/5] Update requirements.txt

---
 requirements.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 34ee049..f369637 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,11 +2,11 @@
 # SPDX-License-Identifier: MIT
 
 nemo_toolkit>=1.6.0
-torch>=2.4.0
+torch>=2.5.0
 nvidia-eff>=0.6.4
 nvidia-eff-tao-encryption>=0.1.8
 nvidia-pyindex==1.0.6
-onnx==1.16.1
-onnxruntime==1.16.3
-onnxruntime-gpu==1.16.3
-onnx-graphsurgeon==0.3.27
+onnx>=1.17.0
+onnxruntime>=1.17.0
+onnxruntime-gpu>=1.17.0
+onnx-graphsurgeon>=0.3.27