From ac452beb86ee9641b4c6ebeb4f4a21065647c2af Mon Sep 17 00:00:00 2001
From: aasthabharill <aasthabharill4@gmail.com>
Date: Tue, 9 Jun 2026 06:26:46 +0000
Subject: [PATCH 1/6] Generate project-context.md for sourcedb-to-spanner

---
 v2/sourcedb-to-spanner/architecture.dot   |  35 ++++++
 v2/sourcedb-to-spanner/architecture.svg   | 134 ++++++++++++++++++++++
 v2/sourcedb-to-spanner/project-context.md |  78 +++++++++++++
 3 files changed, 247 insertions(+)
 create mode 100644 v2/sourcedb-to-spanner/architecture.dot
 create mode 100644 v2/sourcedb-to-spanner/architecture.svg
 create mode 100644 v2/sourcedb-to-spanner/project-context.md
diff --git a/v2/sourcedb-to-spanner/architecture.dot b/v2/sourcedb-to-spanner/architecture.dot
new file mode 100644
index 0000000000..22f557881f
--- /dev/null
+++ b/v2/sourcedb-to-spanner/architecture.dot
@@ -0,0 +1,35 @@
+digraph Architecture {
+    node [shape=box, style=filled, color=lightblue];
+    
+    SourceDb [label="Source Database\n(Cassandra, MySQL, PostgreSQL)"];
+    
+    subgraph cluster_Reader {
+        label = "com.google.cloud.teleport.v2.source.reader";
+        ReaderImpl [label="ReaderImpl"];
+        IoWrapper [label="IoWrapper (Cassandra, JDBC)"];
+        RowMapper [label="RowMapper"];
+    }
+    
+    subgraph cluster_Transformer {
+        label = "com.google.cloud.teleport.v2.transformer";
+        SourceRowToMutation [label="SourceRowToMutationDoFn"];
+    }
+    
+    subgraph cluster_Writer {
+        label = "com.google.cloud.teleport.v2.writer";
+        SpannerWriter [label="SpannerWriter"];
+        DLQ [label="DeadLetterQueue"];
+    }
+
+    Spanner [label="Cloud Spanner"];
+    GCS [label="GCS (DLQ)"];
+
+    SourceDb -> IoWrapper;
+    IoWrapper -> RowMapper;
+    RowMapper -> ReaderImpl;
+    ReaderImpl -> SourceRowToMutation [label="SourceRow"];
+    SourceRowToMutation -> SpannerWriter [label="Mutation"];
+    SpannerWriter -> Spanner;
+    SpannerWriter -> DLQ [label="Failed Mutations"];
+    DLQ -> GCS;
+}
diff --git a/v2/sourcedb-to-spanner/architecture.svg b/v2/sourcedb-to-spanner/architecture.svg
new file mode 100644
index 0000000000..035c1d300d
--- /dev/null
+++ b/v2/sourcedb-to-spanner/architecture.svg
@@ -0,0 +1,134 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 14.1.2 (0)
+ -->
+<!-- Title: Architecture Pages: 1 -->
+<svg width="335pt" height="674pt"
+ viewBox="0.00 0.00 335.00 674.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 670)">
+<title>Architecture</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-670 330.5,-670 330.5,4 -4,4"/>
+<g id="clust1" class="cluster">
+<title>cluster_Reader</title>
+<polygon fill="none" stroke="black" points="36,-375.5 36,-598 289,-598 289,-375.5 36,-375.5"/>
+<text xml:space="preserve" text-anchor="middle" x="162.5" y="-580.7" font-family="Times,serif" font-size="14.00">com.google.cloud.teleport.v2.source.reader</text>
+</g>
+<g id="clust2" class="cluster">
+<title>cluster_Transformer</title>
+<polygon fill="none" stroke="black" points="38,-264.5 38,-341 282,-341 282,-264.5 38,-264.5"/>
+<text xml:space="preserve" text-anchor="middle" x="160" y="-323.7" font-family="Times,serif" font-size="14.00">com.google.cloud.teleport.v2.transformer</text>
+</g>
+<g id="clust3" class="cluster">
+<title>cluster_Writer</title>
+<polygon fill="none" stroke="black" points="8,-65 8,-230 221,-230 221,-65 8,-65"/>
+<text xml:space="preserve" text-anchor="middle" x="114.5" y="-212.7" font-family="Times,serif" font-size="14.00">com.google.cloud.teleport.v2.writer</text>
+</g>
+<!-- SourceDb -->
+<g id="node1" class="node">
+<title>SourceDb</title>
+<polygon fill="lightblue" stroke="lightblue" points="245.25,-666 38.75,-666 38.75,-625 245.25,-625 245.25,-666"/>
+<text xml:space="preserve" text-anchor="middle" x="142" y="-648.7" font-family="Times,serif" font-size="14.00">Source Database</text>
+<text xml:space="preserve" text-anchor="middle" x="142" y="-632.2" font-family="Times,serif" font-size="14.00">(Cassandra, MySQL, PostgreSQL)</text>
+</g>
+<!-- IoWrapper -->
+<g id="node3" class="node">
+<title>IoWrapper</title>
+<polygon fill="lightblue" stroke="lightblue" points="233.62,-565.5 50.38,-565.5 50.38,-529.5 233.62,-529.5 233.62,-565.5"/>
+<text xml:space="preserve" text-anchor="middle" x="142" y="-542.45" font-family="Times,serif" font-size="14.00">IoWrapper (Cassandra, JDBC)</text>
+</g>
+<!-- SourceDb&#45;&gt;IoWrapper -->
+<g id="edge1" class="edge">
+<title>SourceDb&#45;&gt;IoWrapper</title>
+<path fill="none" stroke="black" d="M142,-624.61C142,-610.91 142,-592.51 142,-577.23"/>
+<polygon fill="black" stroke="black" points="145.5,-577.3 142,-567.3 138.5,-577.3 145.5,-577.3"/>
+</g>
+<!-- ReaderImpl -->
+<g id="node2" class="node">
+<title>ReaderImpl</title>
+<polygon fill="lightblue" stroke="lightblue" points="182.25,-419.5 101.75,-419.5 101.75,-383.5 182.25,-383.5 182.25,-419.5"/>
+<text xml:space="preserve" text-anchor="middle" x="142" y="-396.45" font-family="Times,serif" font-size="14.00">ReaderImpl</text>
+</g>
+<!-- SourceRowToMutation -->
+<g id="node5" class="node">
+<title>SourceRowToMutation</title>
+<polygon fill="lightblue" stroke="lightblue" points="229.5,-308.5 54.5,-308.5 54.5,-272.5 229.5,-272.5 229.5,-308.5"/>
+<text xml:space="preserve" text-anchor="middle" x="142" y="-285.45" font-family="Times,serif" font-size="14.00">SourceRowToMutationDoFn</text>
+</g>
+<!-- ReaderImpl&#45;&gt;SourceRowToMutation -->
+<g id="edge4" class="edge">
+<title>ReaderImpl&#45;&gt;SourceRowToMutation</title>
+<path fill="none" stroke="black" d="M142,-383.47C142,-366.53 142,-340.23 142,-320.09"/>
+<polygon fill="black" stroke="black" points="145.5,-320.28 142,-310.28 138.5,-320.28 145.5,-320.28"/>
+<text xml:space="preserve" text-anchor="middle" x="173.5" y="-352.2" font-family="Times,serif" font-size="14.00">SourceRow</text>
+</g>
+<!-- RowMapper -->
+<g id="node4" class="node">
+<title>RowMapper</title>
+<polygon fill="lightblue" stroke="lightblue" points="184.12,-492.5 99.88,-492.5 99.88,-456.5 184.12,-456.5 184.12,-492.5"/>
+<text xml:space="preserve" text-anchor="middle" x="142" y="-469.45" font-family="Times,serif" font-size="14.00">RowMapper</text>
+</g>
+<!-- IoWrapper&#45;&gt;RowMapper -->
+<g id="edge2" class="edge">
+<title>IoWrapper&#45;&gt;RowMapper</title>
+<path fill="none" stroke="black" d="M142,-529.31C142,-521.73 142,-512.6 142,-504.04"/>
+<polygon fill="black" stroke="black" points="145.5,-504.04 142,-494.04 138.5,-504.04 145.5,-504.04"/>
+</g>
+<!-- RowMapper&#45;&gt;ReaderImpl -->
+<g id="edge3" class="edge">
+<title>RowMapper&#45;&gt;ReaderImpl</title>
+<path fill="none" stroke="black" d="M142,-456.31C142,-448.73 142,-439.6 142,-431.04"/>
+<polygon fill="black" stroke="black" points="145.5,-431.04 142,-421.04 138.5,-431.04 145.5,-431.04"/>
+</g>
+<!-- SpannerWriter -->
+<g id="node6" class="node">
+<title>SpannerWriter</title>
+<polygon fill="lightblue" stroke="lightblue" points="189.75,-197.5 94.25,-197.5 94.25,-161.5 189.75,-161.5 189.75,-197.5"/>
+<text xml:space="preserve" text-anchor="middle" x="142" y="-174.45" font-family="Times,serif" font-size="14.00">SpannerWriter</text>
+</g>
+<!-- SourceRowToMutation&#45;&gt;SpannerWriter -->
+<g id="edge5" class="edge">
+<title>SourceRowToMutation&#45;&gt;SpannerWriter</title>
+<path fill="none" stroke="black" d="M142,-272.47C142,-255.53 142,-229.23 142,-209.09"/>
+<polygon fill="black" stroke="black" points="145.5,-209.28 142,-199.28 138.5,-209.28 145.5,-209.28"/>
+<text xml:space="preserve" text-anchor="middle" x="167.12" y="-241.2" font-family="Times,serif" font-size="14.00">Mutation</text>
+</g>
+<!-- DLQ -->
+<g id="node7" class="node">
+<title>DLQ</title>
+<polygon fill="lightblue" stroke="lightblue" points="176,-109 64,-109 64,-73 176,-73 176,-109"/>
+<text xml:space="preserve" text-anchor="middle" x="120" y="-85.95" font-family="Times,serif" font-size="14.00">DeadLetterQueue</text>
+</g>
+<!-- SpannerWriter&#45;&gt;DLQ -->
+<g id="edge7" class="edge">
+<title>SpannerWriter&#45;&gt;DLQ</title>
+<path fill="none" stroke="black" d="M137.65,-161.41C134.66,-149.64 130.61,-133.73 127.15,-120.11"/>
+<polygon fill="black" stroke="black" points="130.64,-119.65 124.79,-110.82 123.86,-121.37 130.64,-119.65"/>
+<text xml:space="preserve" text-anchor="middle" x="179.47" y="-130.2" font-family="Times,serif" font-size="14.00">Failed Mutations</text>
+</g>
+<!-- Spanner -->
+<g id="node8" class="node">
+<title>Spanner</title>
+<polygon fill="lightblue" stroke="lightblue" points="326.5,-109 229.5,-109 229.5,-73 326.5,-73 326.5,-109"/>
+<text xml:space="preserve" text-anchor="middle" x="278" y="-85.95" font-family="Times,serif" font-size="14.00">Cloud Spanner</text>
+</g>
+<!-- SpannerWriter&#45;&gt;Spanner -->
+<g id="edge6" class="edge">
+<title>SpannerWriter&#45;&gt;Spanner</title>
+<path fill="none" stroke="black" d="M190.06,-163.79C203.6,-158.5 217.92,-151.76 230,-143.5 240.29,-136.47 250,-126.86 257.96,-117.85"/>
+<polygon fill="black" stroke="black" points="260.55,-120.21 264.32,-110.31 255.2,-115.7 260.55,-120.21"/>
+</g>
+<!-- GCS -->
+<g id="node9" class="node">
+<title>GCS</title>
+<polygon fill="lightblue" stroke="lightblue" points="161.38,-36 78.62,-36 78.62,0 161.38,0 161.38,-36"/>
+<text xml:space="preserve" text-anchor="middle" x="120" y="-12.95" font-family="Times,serif" font-size="14.00">GCS (DLQ)</text>
+</g>
+<!-- DLQ&#45;&gt;GCS -->
+<g id="edge8" class="edge">
+<title>DLQ&#45;&gt;GCS</title>
+<path fill="none" stroke="black" d="M120,-72.81C120,-65.23 120,-56.1 120,-47.54"/>
+<polygon fill="black" stroke="black" points="123.5,-47.54 120,-37.54 116.5,-47.54 123.5,-47.54"/>
+</g>
+</g>
+</svg>
diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md
new file mode 100644
index 0000000000..e6784020ad
--- /dev/null
+++ b/v2/sourcedb-to-spanner/project-context.md
@@ -0,0 +1,78 @@
+# Project Context: SourceDb to Spanner
+
+<!-- AI Agent: Please parse this document to understand the project's context before making changes. -->
+
+## Overview
+
+*   **Core Intent:** A bulk migration Dataflow pipeline to migrate data from various Source Databases (MySQL, PostgreSQL, Cassandra) into Cloud Spanner. It handles sharded and non-sharded databases.
+*   **Primary Users:** Internal SREs, external customers migrating to Cloud Spanner, and users of Spanner Migration Tool.
+*   **Critical SLOs/Guarantees:** Must effectively handle bulk data extraction and mapping to Cloud Spanner mutations while maintaining data integrity. Features a Dead Letter Queue (DLQ) for failed mutations.
+*   **Terminology:** 
+    *   **DLQ:** Dead Letter Queue (for failed records).
+    *   **SourceRow:** Intermediate representation of a row read from the source database.
+    *   **Mutation:** Spanner mutation to be applied.
+
+## Technical Details
+
+*   **Tech Stack & Versions:**
+    *   **Languages:** Java 17
+    *   **Frameworks/Libraries:** Apache Beam 2.73.0, Maven
+    *   **Key Google Technologies:** Cloud Spanner, Cloud Storage (GCS), Dataflow
+*   **Code Location:** `/usr/local/google/home/aasthabharill/DataflowTemplates/v2/sourcedb-to-spanner`
+*   **Data Flow:** Data is read from Source Databases (MySQL/PostgreSQL/Cassandra) using JDBC or Datastax driver -> Mapped into SourceRows -> Transformed to Spanner Mutations -> Written to Cloud Spanner. Failed mutations are logged to a GCS DLQ.
+*   **Project Structure (Logical Architecture Mapping):**
+    *   `src/main/java/com/google/cloud/teleport/v2/source/reader`: Source Readers (IoWrappers for Cassandra, JDBC, etc., RowMappers)
+    *   `src/main/java/com/google/cloud/teleport/v2/transformer`: Transformers (e.g., `SourceRowToMutationDoFn`)
+    *   `src/main/java/com/google/cloud/teleport/v2/writer`: Writers and error handling (`SpannerWriter`, `DeadLetterQueue`)
+    *   `src/main/java/com/google/cloud/teleport/v2/templates`: Main pipeline definition (`SourceDbToSpanner`)
+*   **Build/Run Commands:**
+    ```bash
+    # To build the flex template
+    export PROJECT=span-cloud-ck-testing-external
+    export BUCKET_NAME=ea-functional-tests
+    mvn clean package -PtemplatesStage -DskipTests -DprojectId="$PROJECT" -DbucketName="$BUCKET_NAME" -DstagePrefix="templates-<replace-with-your-prefix>" -DtemplateName="Sourcedb_to_Spanner_Flex" -pl v2/sourcedb-to-spanner -am
+
+    # To run tests
+    mvn clean test -pl v2/sourcedb-to-spanner -am
+
+    # To run pipeline
+    export JOB_NAME="bulk-migrate-to-spanner-$(date +%Y%m%d-%H%M%S)"
+    export OUTPUT_DIR="gs://${BUCKET_NAME}/bulk-migration"
+    gcloud dataflow flex-template run $JOB_NAME \
+      --project=$PROJECT_ID \
+      --region=$REGION \
+      --template-file-gcs-location="gs://dataflow-templates-${REGION}/latest/flex/Sourcedb_to_Spanner_Flex" \
+      --max-workers=2 \
+      --num-workers=1 \
+      --worker-machine-type=n2-highmem-8 \
+      --parameters sourceConfigURL=$GCS_SHARDING_PATH,instanceId=$SPANNER_INSTANCE_NAME,databaseId=$SPANNER_DATABASE_NAME,projectId=$PROJECT_ID,outputDirectory=$OUTPUT_DIR,username=datastream_user,password=complex_password_123,schemaOverridesFilePath=$GCS_OVERRIDES_PATH,transformationJarPath=$CUSTOM_JAR_PATH,transformationClassName=com.custom.CustomTransformationFetcher
+    ```
+
+## Project Management
+
+*   **Buganizer Component:** [Infrastructure > Spanner > Cloud > Migrations](https://b.corp.google.com/issues?q=componentid:1008064) - (Cloud Spanner migrations component)
+*   **Key Contacts:**
+    *   **Recent Contributors:** darshan-sj, aasthabharill, shreyakhajanchi, sm745052
+
+## Documentation
+
+*   **Key Design Docs:**
+    *   [Bulk Migration to Spanner Design](http://go/bulk-migration-to-spanner-design) - Overall pipeline design.
+    *   [CS Reader for Bulk Migration](http://go/cs-reader-for-bulk-migration-to-spanner) - Reader design.
+    *   [Spanner Bulk Migration User Guide](http://go/spanner-bulk-migration-user-guide) - Usage instructions.
+*   **Architecture Diagram:** [architecture.svg](architecture.svg)
+
+## AI Agent Tips
+
+*   **Common Tasks:** Adding new JDBC dialects, fixing parsing errors, implementing new transformations or schema overrides, adding new source reader capabilities.
+*   **Coding Standards & Best Practices:**
+    *   Use `AutoValue` for POJOs.
+    *   Strict adherence to Apache Beam paradigms (PTransforms, DoFns). Use `TupleTag` for side outputs like the DLQ.
+    *   Use structured logging (`com.google.cloud.teleport.structured-logging`).
+*   **Testing Frameworks & Guidelines:**
+    *   **Frameworks:** JUnit 4, Google Truth for assertions, Mockito for mocking.
+    *   **Rules:** Ensure tests use `@RunWith(JUnit4.class)`. Use embedded databases for testing when possible (e.g. `derby` or `embedded-cassandra`).
+*   **Areas to be Careful:** Cross-shard querying logic, causal ordering around the DLQ, and schema mappings parsing.
+*   **Example CLs:**
+    *   [39a8ae5e0](https://github.com/GoogleCloudPlatform/DataflowTemplates/commit/39a8ae5e0) - Fix GCS Avro Export flow
+    *   [90964dca6](https://github.com/GoogleCloudPlatform/DataflowTemplates/commit/90964dca6) - Add Support for UUID-based Partitioning

From c6f1b2c064cfb839a5a1404cc1d836aa358f2ab8 Mon Sep 17 00:00:00 2001
From: aasthabharill <77983396+aasthabharill@users.noreply.github.com>
Date: Tue, 9 Jun 2026 12:04:26 +0530
Subject: [PATCH 2/6] Update v2/sourcedb-to-spanner/project-context.md

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 v2/sourcedb-to-spanner/project-context.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md
index e6784020ad..c2d7a9e752 100644
--- a/v2/sourcedb-to-spanner/project-context.md
+++ b/v2/sourcedb-to-spanner/project-context.md
@@ -18,7 +18,7 @@
     *   **Languages:** Java 17
     *   **Frameworks/Libraries:** Apache Beam 2.73.0, Maven
     *   **Key Google Technologies:** Cloud Spanner, Cloud Storage (GCS), Dataflow
-*   **Code Location:** `/usr/local/google/home/aasthabharill/DataflowTemplates/v2/sourcedb-to-spanner`
+*   **Code Location:** `v2/sourcedb-to-spanner`
 *   **Data Flow:** Data is read from Source Databases (MySQL/PostgreSQL/Cassandra) using JDBC or Datastax driver -> Mapped into SourceRows -> Transformed to Spanner Mutations -> Written to Cloud Spanner. Failed mutations are logged to a GCS DLQ.
 *   **Project Structure (Logical Architecture Mapping):**
     *   `src/main/java/com/google/cloud/teleport/v2/source/reader`: Source Readers (IoWrappers for Cassandra, JDBC, etc., RowMappers)

From 8efa399308e6c8014a71404383302167b2eefd03 Mon Sep 17 00:00:00 2001
From: aasthabharill <77983396+aasthabharill@users.noreply.github.com>
Date: Tue, 9 Jun 2026 12:04:37 +0530
Subject: [PATCH 3/6] Update v2/sourcedb-to-spanner/project-context.md

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 v2/sourcedb-to-spanner/project-context.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md
index c2d7a9e752..7df261d9e3 100644
--- a/v2/sourcedb-to-spanner/project-context.md
+++ b/v2/sourcedb-to-spanner/project-context.md
@@ -28,9 +28,9 @@
 *   **Build/Run Commands:**
     ```bash
     # To build the flex template
-    export PROJECT=span-cloud-ck-testing-external
-    export BUCKET_NAME=ea-functional-tests
-    mvn clean package -PtemplatesStage -DskipTests -DprojectId="$PROJECT" -DbucketName="$BUCKET_NAME" -DstagePrefix="templates-<replace-with-your-prefix>" -DtemplateName="Sourcedb_to_Spanner_Flex" -pl v2/sourcedb-to-spanner -am
+    export PROJECT_ID="<your-project-id>"
+    export BUCKET_NAME="<your-bucket-name>"
+    mvn clean package -PtemplatesStage -DskipTests -DprojectId="$PROJECT_ID" -DbucketName="$BUCKET_NAME" -DstagePrefix="templates-<replace-with-your-prefix>" -DtemplateName="Sourcedb_to_Spanner_Flex" -pl v2/sourcedb-to-spanner -am
 
     # To run tests
     mvn clean test -pl v2/sourcedb-to-spanner -am

From b0b2cf2aa358b99cf2cfc354e8bba7abb435a84b Mon Sep 17 00:00:00 2001
From: aasthabharill <aasthabharill4@gmail.com>
Date: Tue, 9 Jun 2026 12:10:49 +0530
Subject: [PATCH 4/6] spotless

---
 v2/sourcedb-to-spanner/project-context.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md
index 7df261d9e3..d9ca7d35c5 100644
--- a/v2/sourcedb-to-spanner/project-context.md
+++ b/v2/sourcedb-to-spanner/project-context.md
@@ -7,7 +7,7 @@
 *   **Core Intent:** A bulk migration Dataflow pipeline to migrate data from various Source Databases (MySQL, PostgreSQL, Cassandra) into Cloud Spanner. It handles sharded and non-sharded databases.
 *   **Primary Users:** Internal SREs, external customers migrating to Cloud Spanner, and users of Spanner Migration Tool.
 *   **Critical SLOs/Guarantees:** Must effectively handle bulk data extraction and mapping to Cloud Spanner mutations while maintaining data integrity. Features a Dead Letter Queue (DLQ) for failed mutations.
-*   **Terminology:** 
+*   **Terminology:**
     *   **DLQ:** Dead Letter Queue (for failed records).
     *   **SourceRow:** Intermediate representation of a row read from the source database.
     *   **Mutation:** Spanner mutation to be applied.

From f0c315d1eada61ffd683ab680d0a01d547eaa7fc Mon Sep 17 00:00:00 2001
From: aasthabharill <aasthabharill4@gmail.com>
Date: Tue, 9 Jun 2026 13:24:38 +0530
Subject: [PATCH 5/6] "review + added more gotchas from docs

---
 v2/sourcedb-to-spanner/project-context.md | 69 +++++++++--------------
 1 file changed, 28 insertions(+), 41 deletions(-)

diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md
index d9ca7d35c5..f6271779dc 100644
--- a/v2/sourcedb-to-spanner/project-context.md
+++ b/v2/sourcedb-to-spanner/project-context.md
@@ -4,13 +4,16 @@
 
 ## Overview
 
-*   **Core Intent:** A bulk migration Dataflow pipeline to migrate data from various Source Databases (MySQL, PostgreSQL, Cassandra) into Cloud Spanner. It handles sharded and non-sharded databases.
-*   **Primary Users:** Internal SREs, external customers migrating to Cloud Spanner, and users of Spanner Migration Tool.
+*   **Core Intent:** A bulk migration Dataflow pipeline to migrate data from various Source Databases (MySQL, PostgreSQL, Cassandra) into Cloud Spanner. It handles sharded and non-sharded databases. Writes eagerly to Spanner (no intermediate buffers) and works in tandem with a CDC pipeline (like Datastream) to reach eventual consistency.
+*   **Primary Users:** SREs, external customers migrating to Cloud Spanner, and users of Spanner Migration Tool.
 *   **Critical SLOs/Guarantees:** Must effectively handle bulk data extraction and mapping to Cloud Spanner mutations while maintaining data integrity. Features a Dead Letter Queue (DLQ) for failed mutations.
 *   **Terminology:**
+    *   **Pipeline Controller:** Central component managing the lifecycle, configuration parsing, and dependency ordering.
     *   **DLQ:** Dead Letter Queue (for failed records).
-    *   **SourceRow:** Intermediate representation of a row read from the source database.
+    *   **SourceRow:** Intermediate representation of a row read from the source database, typically wrapping an Avro `GenericRecord` based on Datastream's unified type system.
     *   **Mutation:** Spanner mutation to be applied.
+    *   **RWUPT:** Read With Uniform Partitions.
+    *   **UniformSourcePartitioner:** Custom partitioner used because standard Beam partitioners only split integer/datetime columns.
 
 ## Technical Details
 
@@ -18,7 +21,7 @@
     *   **Languages:** Java 17
     *   **Frameworks/Libraries:** Apache Beam 2.73.0, Maven
     *   **Key Google Technologies:** Cloud Spanner, Cloud Storage (GCS), Dataflow
-*   **Code Location:** `v2/sourcedb-to-spanner`
+*   **Code Location:** `/usr/local/google/home/aasthabharill/DataflowTemplates/v2/sourcedb-to-spanner`
 *   **Data Flow:** Data is read from Source Databases (MySQL/PostgreSQL/Cassandra) using JDBC or Datastax driver -> Mapped into SourceRows -> Transformed to Spanner Mutations -> Written to Cloud Spanner. Failed mutations are logged to a GCS DLQ.
 *   **Project Structure (Logical Architecture Mapping):**
     *   `src/main/java/com/google/cloud/teleport/v2/source/reader`: Source Readers (IoWrappers for Cassandra, JDBC, etc., RowMappers)
@@ -26,53 +29,37 @@
     *   `src/main/java/com/google/cloud/teleport/v2/writer`: Writers and error handling (`SpannerWriter`, `DeadLetterQueue`)
     *   `src/main/java/com/google/cloud/teleport/v2/templates`: Main pipeline definition (`SourceDbToSpanner`)
 *   **Build/Run Commands:**
-    ```bash
-    # To build the flex template
-    export PROJECT_ID="<your-project-id>"
-    export BUCKET_NAME="<your-bucket-name>"
-    mvn clean package -PtemplatesStage -DskipTests -DprojectId="$PROJECT_ID" -DbucketName="$BUCKET_NAME" -DstagePrefix="templates-<replace-with-your-prefix>" -DtemplateName="Sourcedb_to_Spanner_Flex" -pl v2/sourcedb-to-spanner -am
-
-    # To run tests
-    mvn clean test -pl v2/sourcedb-to-spanner -am
-
-    # To run pipeline
-    export JOB_NAME="bulk-migrate-to-spanner-$(date +%Y%m%d-%H%M%S)"
-    export OUTPUT_DIR="gs://${BUCKET_NAME}/bulk-migration"
-    gcloud dataflow flex-template run $JOB_NAME \
-      --project=$PROJECT_ID \
-      --region=$REGION \
-      --template-file-gcs-location="gs://dataflow-templates-${REGION}/latest/flex/Sourcedb_to_Spanner_Flex" \
-      --max-workers=2 \
-      --num-workers=1 \
-      --worker-machine-type=n2-highmem-8 \
-      --parameters sourceConfigURL=$GCS_SHARDING_PATH,instanceId=$SPANNER_INSTANCE_NAME,databaseId=$SPANNER_DATABASE_NAME,projectId=$PROJECT_ID,outputDirectory=$OUTPUT_DIR,username=datastream_user,password=complex_password_123,schemaOverridesFilePath=$GCS_OVERRIDES_PATH,transformationJarPath=$CUSTOM_JAR_PATH,transformationClassName=com.custom.CustomTransformationFetcher
-    ```
-
-## Project Management
-
-*   **Buganizer Component:** [Infrastructure > Spanner > Cloud > Migrations](https://b.corp.google.com/issues?q=componentid:1008064) - (Cloud Spanner migrations component)
-*   **Key Contacts:**
-    *   **Recent Contributors:** darshan-sj, aasthabharill, shreyakhajanchi, sm745052
+    See the `README_Sourcedb_to_Spanner_Flex.md` file for instructions on building and running the pipeline.
 
 ## Documentation
-
-*   **Key Design Docs:**
-    *   [Bulk Migration to Spanner Design](http://go/bulk-migration-to-spanner-design) - Overall pipeline design.
-    *   [CS Reader for Bulk Migration](http://go/cs-reader-for-bulk-migration-to-spanner) - Reader design.
-    *   [Spanner Bulk Migration User Guide](http://go/spanner-bulk-migration-user-guide) - Usage instructions.
-*   **Architecture Diagram:** [architecture.svg](architecture.svg)
+*   **Architecture Diagram:** [architecture.svg](architecture.svg) (Source: `architecture.dot`).
+    *   **Rule:** Always keep the `.dot` and `.svg` files in sync. If you modify the architecture, you MUST regenerate the `.svg` from the `.dot` file.
 
 ## AI Agent Tips
 
 *   **Common Tasks:** Adding new JDBC dialects, fixing parsing errors, implementing new transformations or schema overrides, adding new source reader capabilities.
 *   **Coding Standards & Best Practices:**
-    *   Use `AutoValue` for POJOs.
+    *   Use `AutoValue` for POJOs. Do not bypass or omit variables required by the AutoValue builder.
     *   Strict adherence to Apache Beam paradigms (PTransforms, DoFns). Use `TupleTag` for side outputs like the DLQ.
+    *   **Serializability:** All elements that are members of `PTransforms` and `PCollections` MUST be serializable. Use `Serializable` interface or register an appropriate `Coder`. Mark non-serializable IO channels or active connection clients `transient` and instantiate them strictly within `@Setup` or `@StartBundle`.
+    *   **Security:** NEVER log sensitive credentials or customer PII.
+    *   **Type Handling:** Time-based fields MUST be normalized to UTC and encoded as ISO-8601 with nanosecond precision. String fields must correctly map source charsets to Java UTF-16.
+    *   **Separation of Concerns:** The Reader must encode the highest precision possible without data loss. Scaling/rounding to fit Spanner's limits is the strict responsibility of the Transformer.
     *   Use structured logging (`com.google.cloud.teleport.structured-logging`).
+    *   **Formatting:** Always run `mvn spotless:apply -pl v2/sourcedb-to-spanner -am` before committing to adhere to project formatting standards.
 *   **Testing Frameworks & Guidelines:**
     *   **Frameworks:** JUnit 4, Google Truth for assertions, Mockito for mocking.
-    *   **Rules:** Ensure tests use `@RunWith(JUnit4.class)`. Use embedded databases for testing when possible (e.g. `derby` or `embedded-cassandra`).
-*   **Areas to be Careful:** Cross-shard querying logic, causal ordering around the DLQ, and schema mappings parsing.
-*   **Example CLs:**
+    *   **Rules:** Ensure tests use `@RunWith(JUnit4.class)`. Use embedded databases for testing when possible (e.g. `derby` or `embedded-cassandra`). Maintain a minimum Unit test code coverage of 80%.
+    *   **Non-Destructive Refactoring:** When enhancing production classes, do not refactor or rewrite existing test methods. Minimalistically resolve breaking changes and append new, dedicated test methods for new functionality.
+    *   **100% Branch & Exception Coverage:**
+        *   **Conditionals:** For every touched conditional (e.g., `if/else`, ternary operators), write tests covering both `true` and `false` paths.
+        *   **Exceptions:** Assert all thrown checked and runtime exceptions explicitly via `assertThrows()` or Truth's `ThrowableSubject`.
+*   **Areas to be Careful (Gotchas):**
+    *   **Integration/Load Tests:** NEVER execute `*IT.java` (Integration) or `*LT.java` (Load) test suites during local coding/machine verification. These require remote environments. Only execute `*Test.java` (Unit) locally.
+    *   **OOM Prevention (MySQL Cursor Fetch):** Always configure `fetchSize` to prevent Out Of Memory errors.
+    *   **Inconsistent Data Snapshots:** The reader intentionally does NOT read from a consistent snapshot. The companion CDC stream is trusted to replay updates and resolve mid-flight inconsistencies. Do not attempt to "fix" or lock tables for consistency.
+    *   **Foreign Keys:** The pipeline processes parent tables before child tables, but cyclic (self-referencing) foreign keys will cause startup failures and are unsupported.
+    *   **Pipeline Logic:** Cross-shard querying logic, causal ordering around the DLQ, and schema mappings parsing are highly complex areas.
+*   **Example PRs:**
     *   [39a8ae5e0](https://github.com/GoogleCloudPlatform/DataflowTemplates/commit/39a8ae5e0) - Fix GCS Avro Export flow
     *   [90964dca6](https://github.com/GoogleCloudPlatform/DataflowTemplates/commit/90964dca6) - Add Support for UUID-based Partitioning

From 93a5ed2a64131997b559181664f7f715d3b6f552 Mon Sep 17 00:00:00 2001
From: aasthabharill <77983396+aasthabharill@users.noreply.github.com>
Date: Tue, 9 Jun 2026 15:42:29 +0530
Subject: [PATCH 6/6] Apply suggestion from @gemini-code-assist[bot]

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 v2/sourcedb-to-spanner/project-context.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md
index f6271779dc..6faa8d880d 100644
--- a/v2/sourcedb-to-spanner/project-context.md
+++ b/v2/sourcedb-to-spanner/project-context.md
@@ -21,7 +21,7 @@
     *   **Languages:** Java 17
     *   **Frameworks/Libraries:** Apache Beam 2.73.0, Maven
     *   **Key Google Technologies:** Cloud Spanner, Cloud Storage (GCS), Dataflow
-*   **Code Location:** `/usr/local/google/home/aasthabharill/DataflowTemplates/v2/sourcedb-to-spanner`
+*   **Code Location:** `v2/sourcedb-to-spanner`
 *   **Data Flow:** Data is read from Source Databases (MySQL/PostgreSQL/Cassandra) using JDBC or Datastax driver -> Mapped into SourceRows -> Transformed to Spanner Mutations -> Written to Cloud Spanner. Failed mutations are logged to a GCS DLQ.
 *   **Project Structure (Logical Architecture Mapping):**
     *   `src/main/java/com/google/cloud/teleport/v2/source/reader`: Source Readers (IoWrappers for Cassandra, JDBC, etc., RowMappers)