diff --git a/.github/container/Dockerfile.axlearn b/.github/container/Dockerfile.axlearn
index 736e04a50..8a44385a0 100644
--- a/.github/container/Dockerfile.axlearn
+++ b/.github/container/Dockerfile.axlearn
@@ -1,6 +1,6 @@
 # syntax=docker/dockerfile:1-labs
 ARG BASE_IMAGE=ghcr.io/nvidia/jax-mealkit:jax
-ARG URLREF_AXLEARN=https://github.com/Steboss/axlearn.git#sbosisio/working_branch
+ARG URLREF_AXLEARN=https://github.com/apple/axlearn.git
 ARG SRC_PATH_AXLEARN=/opt/axlearn
 
 ###############################################################################
diff --git a/.github/container/manifest.yaml b/.github/container/manifest.yaml
index a304033b0..961a7d6c6 100644
--- a/.github/container/manifest.yaml
+++ b/.github/container/manifest.yaml
@@ -102,8 +102,8 @@ pathwaysutils:
   latest_verified_commit: 359776d454940ffaa337c36d1df16308d44a95a9
   mode: pip-vcs
 axlearn:
-  url: https://github.com/Steboss/axlearn.git
-  tracking_ref: sbosisio/working_branch
+  url: https://github.com/apple/axlearn.git
+  tracking_ref: main
   mode: git-clone
 qwix:
   url: https://github.com/google/qwix.git
diff --git a/.github/container/test-axlearn.sh b/.github/container/test-axlearn.sh
index 288a03a4e..4cd21cd9d 100755
--- a/.github/container/test-axlearn.sh
+++ b/.github/container/test-axlearn.sh
@@ -11,9 +11,9 @@ usage() {
     echo ""
     echo "  OPTIONS                       DESCRIPTION"
     echo "  -d, --directory DIR           Directory to run tests in."
-    echo "                                Default: 'axlearn/axlearn/common'."
+    echo "                                Default: 'opt/axlearn'."
     echo "  -t, --test-files FILES        Pattern for test files to run."
-    echo "                                Default: '*_test.py'."
+    echo "                                Default: 'axlearn/common/*_test.py'."
     echo "  -o, --output DIRECTORY        Output directory for logs and summary."
     echo "                                Default: 'test_runs/<timestamp>'."
     echo "  -h, --help                    Show this help message and exit."
@@ -39,7 +39,7 @@ run_tests() {
 }
 
 # DEFAULT VALUES
-DIR='/opt/axlearn/axlearn/common'
+DIR='/opt/axlearn'
 TEST_FILES=()
 OUTPUT_DIRECTORY=''
 
@@ -95,15 +95,6 @@ LOG_DIRECTORY="${OUTPUT_DIRECTORY}/logs"
 
 mkdir -p "${LOG_DIRECTORY}"
 
-if [ "${#TEST_FILES[@]}" -gt 0 ]; then
-    echo "  Test Files:"
-    for f in "${TEST_FILES[@]}"; do
-        echo "    $f"
-    done
-else
-    echo "  Test Files Pattern: '*_test.py' (default)"
-fi
-
 # DEPENDENCIES
 pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
 pip install timm transformers scikit-learn grain evaluate prefixed wandb
@@ -115,26 +106,30 @@ curl https://huggingface.co/FacebookAI/roberta-base/raw/main/merges.txt -o /opt/
 curl https://huggingface.co/FacebookAI/roberta-base/raw/main/vocab.json -o /opt/axlearn/axlearn/data/tokenizers/bpe/roberta-base-vocab.json
 
 # RETRIEVE TEST FILES
+expanded_test_files=()
 if [ "${#TEST_FILES[@]}" -eq 0 ]; then
-    TEST_FILES=("*_test.py")
+    # if we are not giving anything for --test-files than we can match all those *_test.py files
+    readarray -t expanded_test_files < <(find . -name "*_test.py" -type f)
+    # otherwise let's check in the --test-files pattern
+else
+    for pattern in "${TEST_FILES[@]}"; do
+        echo "looking for pattern: $pattern"
+        echo "Cmd: find . -name \"$pattern\" -type f"
+        readarray -t found_files < <(find . -path "./$pattern" -type f)
+        if [ ${#found_files[@]} -gt 0 ]; then
+            expanded_test_files+=( "${found_files[@]}" )
+        else
+            echo "Warning: No files found matching pattern '$pattern'"
+        fi
+    done
 fi
 
-expanded_test_files=()
-for pattern in "${TEST_FILES[@]}"; do
-    # retrieve all the files
-    files=( $pattern )
-    if [ "${#files[@]}" -gt 0 ]; then
-        expanded_test_files+=( "${files[@]}" )
-    else
-        echo "Warning: No files matched pattern '$pattern'"
-    fi
-done
-
 if [ "${#expanded_test_files[@]}" -eq 0 ]; then
     echo "No test files found to run."
     exit 1
 fi
 
+# EXCLUDE PATTERNS
 EXCLUDE_PATTERNS=("array_serialization_test.py"
     "t5_test.py" # tensorflow bug
     "loss_test.py"
@@ -185,23 +180,13 @@ done
 
 
 # RUN TESTS
-TEST_8_DEVICES_FILES=("gda_test.py"
-    "input_base_test.py"
-    "input_dispatch_test.py"
-    "trainer_test.py"
-    "utils_test.py"
+TEST_8_DEVICES_WITH_PATHS=(
+  "./axlearn/common/gda_test.py"
+  "./axlearn/common/input_base_test.py"
+  "./axlearn/common/input_dispatch_test.py"
+  "./axlearn/common/trainer_test.py"
+  "./axlearn/common/utils_test.py"
 )
-TEST_8_DEVICES_WITH_PATHS=()
-for file in "${TEST_8_DEVICES_FILES[@]}"; do
-    found_files=$(find . -name "$file" -type f 2>/dev/null)
-    if [[ -n "$found_files" ]]; then
-        while IFS= read -r found_file; do
-            TEST_8_DEVICES_WITH_PATHS+=("$found_file")
-        done <<< "$found_files"
-    else
-        echo "Warning: Test file $file not found in current directory structure"
-    fi
-done
 
 run_tests "" "for_8_devices" "8_dev" "${TEST_8_DEVICES_WITH_PATHS[@]}"
 # All the other tests
diff --git a/.github/eks-workflow-files/axlearn/axlearn-job.yml b/.github/eks-workflow-files/axlearn/axlearn-job.yml
index 24e7ca8e7..0a77f4c86 100644
--- a/.github/eks-workflow-files/axlearn/axlearn-job.yml
+++ b/.github/eks-workflow-files/axlearn/axlearn-job.yml
@@ -22,7 +22,7 @@ spec:
                       LOG_DIR="/output/${RUN_ID}"
                       mkdir -p ${LOG_DIR}
                       # test on JAX, make sure 8 devices are visible
-                      pytest-xdist.sh 8 4 ${LOG_DIR}/axlearn-unittests.jsonl test-axlearn.sh --directory "." --output ${LOG_DIR} --test-files "/opt/axlearn/axlearn/common/*_test.py"
+                      pytest-xdist.sh 8 4 ${LOG_DIR}/axlearn-unittests.jsonl test-axlearn.sh --output ${LOG_DIR} --test-files "axlearn/common/*_test.py"
                   env:
                     - name: RUN_ID
                       value: PLACEHOLDER
diff --git a/.github/eks-workflow-files/mpi-nccl-test.yml b/.github/eks-workflow-files/mpi-nccl-test.yml
index 0e34cb7a2..e02ddf45c 100644
--- a/.github/eks-workflow-files/mpi-nccl-test.yml
+++ b/.github/eks-workflow-files/mpi-nccl-test.yml
@@ -71,7 +71,7 @@ spec:
               resources:
                 limits:
                   nvidia.com/gpu: 8
-                  hugepages-2Mi: 5120Mi
+                  #hugepages-2Mi: 5120Mi
                   vpc.amazonaws.com/efa: 32
                   memory: 32000Mi
           imagePullSecrets:
diff --git a/.github/workflows/_ci.yaml b/.github/workflows/_ci.yaml
index aafc32fdb..86ee9b239 100644
--- a/.github/workflows/_ci.yaml
+++ b/.github/workflows/_ci.yaml
@@ -54,7 +54,12 @@ jobs:
     secrets: inherit
 
   test-nccl:
-    if: inputs.ARCHITECTURE == 'amd64' # build only amd64
+    if: >-
+      inputs.ARCHITECTURE == 'amd64' &&
+      (
+        inputs.MODE == 'full' ||
+        inputs.MODE == 'nccl'
+      )
     needs: build-base
     uses: ./.github/workflows/_test_nccl.yaml
     with:
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 1a6f53ec4..3553e46fd 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -51,7 +51,8 @@ on:
           - t5x - run build rosetta
           - maxtext - run only the tests for maxtext
           - axlearn - run only the tests for axlearn
-        options: [full, jax, te, t5x, maxtext, axlearn]
+          - nccl - run only the nccl tests
+        options: [full, jax, te, t5x, maxtext, axlearn, nccl]
         default: full
 
 concurrency:
@@ -210,7 +211,7 @@ jobs:
       CUDA_IMAGE: ${{ needs.metadata.outputs.CUDA_IMAGE }}
       MANIFEST_ARTIFACT_NAME: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }}
       SOURCE_URLREFS: ${{ needs.bump-manifest.outputs.SOURCE_URLREFS }}
-      MODE: ${{ github.event_name == 'workflow_dispatch' && inputs.MODE || 'full' }}
+      MODE: "nccl" # ${{ github.event_name == 'workflow_dispatch' && inputs.MODE || 'full' }}
     secrets: inherit
 
   arm64:
@@ -222,7 +223,7 @@ jobs:
       CUDA_IMAGE: ${{ needs.metadata.outputs.CUDA_IMAGE }}
       MANIFEST_ARTIFACT_NAME: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }}
       SOURCE_URLREFS: ${{ needs.bump-manifest.outputs.SOURCE_URLREFS }}
-      MODE: ${{ github.event_name == 'workflow_dispatch' && inputs.MODE || 'full' }}
+      MODE: "nccl" # ${{ github.event_name == 'workflow_dispatch' && inputs.MODE || 'full' }}
     secrets: inherit
 
   # Only merge if everything succeeds