From ac452beb86ee9641b4c6ebeb4f4a21065647c2af Mon Sep 17 00:00:00 2001 From: aasthabharill Date: Tue, 9 Jun 2026 06:26:46 +0000 Subject: [PATCH 1/6] Generate project-context.md for sourcedb-to-spanner --- v2/sourcedb-to-spanner/architecture.dot | 35 ++++++ v2/sourcedb-to-spanner/architecture.svg | 134 ++++++++++++++++++++++ v2/sourcedb-to-spanner/project-context.md | 78 +++++++++++++ 3 files changed, 247 insertions(+) create mode 100644 v2/sourcedb-to-spanner/architecture.dot create mode 100644 v2/sourcedb-to-spanner/architecture.svg create mode 100644 v2/sourcedb-to-spanner/project-context.md diff --git a/v2/sourcedb-to-spanner/architecture.dot b/v2/sourcedb-to-spanner/architecture.dot new file mode 100644 index 0000000000..22f557881f --- /dev/null +++ b/v2/sourcedb-to-spanner/architecture.dot @@ -0,0 +1,35 @@ +digraph Architecture { + node [shape=box, style=filled, color=lightblue]; + + SourceDb [label="Source Database\n(Cassandra, MySQL, PostgreSQL)"]; + + subgraph cluster_Reader { + label = "com.google.cloud.teleport.v2.source.reader"; + ReaderImpl [label="ReaderImpl"]; + IoWrapper [label="IoWrapper (Cassandra, JDBC)"]; + RowMapper [label="RowMapper"]; + } + + subgraph cluster_Transformer { + label = "com.google.cloud.teleport.v2.transformer"; + SourceRowToMutation [label="SourceRowToMutationDoFn"]; + } + + subgraph cluster_Writer { + label = "com.google.cloud.teleport.v2.writer"; + SpannerWriter [label="SpannerWriter"]; + DLQ [label="DeadLetterQueue"]; + } + + Spanner [label="Cloud Spanner"]; + GCS [label="GCS (DLQ)"]; + + SourceDb -> IoWrapper; + IoWrapper -> RowMapper; + RowMapper -> ReaderImpl; + ReaderImpl -> SourceRowToMutation [label="SourceRow"]; + SourceRowToMutation -> SpannerWriter [label="Mutation"]; + SpannerWriter -> Spanner; + SpannerWriter -> DLQ [label="Failed Mutations"]; + DLQ -> GCS; +} diff --git a/v2/sourcedb-to-spanner/architecture.svg b/v2/sourcedb-to-spanner/architecture.svg new file mode 100644 index 0000000000..035c1d300d --- /dev/null +++ b/v2/sourcedb-to-spanner/architecture.svg @@ -0,0 +1,134 @@ + + + + + + +Architecture + + +cluster_Reader + +com.google.cloud.teleport.v2.source.reader + + +cluster_Transformer + +com.google.cloud.teleport.v2.transformer + + +cluster_Writer + +com.google.cloud.teleport.v2.writer + + + +SourceDb + +Source Database +(Cassandra, MySQL, PostgreSQL) + + + +IoWrapper + +IoWrapper (Cassandra, JDBC) + + + +SourceDb->IoWrapper + + + + + +ReaderImpl + +ReaderImpl + + + +SourceRowToMutation + +SourceRowToMutationDoFn + + + +ReaderImpl->SourceRowToMutation + + +SourceRow + + + +RowMapper + +RowMapper + + + +IoWrapper->RowMapper + + + + + +RowMapper->ReaderImpl + + + + + +SpannerWriter + +SpannerWriter + + + +SourceRowToMutation->SpannerWriter + + +Mutation + + + +DLQ + +DeadLetterQueue + + + +SpannerWriter->DLQ + + +Failed Mutations + + + +Spanner + +Cloud Spanner + + + +SpannerWriter->Spanner + + + + + +GCS + +GCS (DLQ) + + + +DLQ->GCS + + + + + diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md new file mode 100644 index 0000000000..e6784020ad --- /dev/null +++ b/v2/sourcedb-to-spanner/project-context.md @@ -0,0 +1,78 @@ +# Project Context: SourceDb to Spanner + + + +## Overview + +* **Core Intent:** A bulk migration Dataflow pipeline to migrate data from various Source Databases (MySQL, PostgreSQL, Cassandra) into Cloud Spanner. It handles sharded and non-sharded databases. +* **Primary Users:** Internal SREs, external customers migrating to Cloud Spanner, and users of Spanner Migration Tool. +* **Critical SLOs/Guarantees:** Must effectively handle bulk data extraction and mapping to Cloud Spanner mutations while maintaining data integrity. Features a Dead Letter Queue (DLQ) for failed mutations. +* **Terminology:** + * **DLQ:** Dead Letter Queue (for failed records). + * **SourceRow:** Intermediate representation of a row read from the source database. + * **Mutation:** Spanner mutation to be applied. + +## Technical Details + +* **Tech Stack & Versions:** + * **Languages:** Java 17 + * **Frameworks/Libraries:** Apache Beam 2.73.0, Maven + * **Key Google Technologies:** Cloud Spanner, Cloud Storage (GCS), Dataflow +* **Code Location:** `/usr/local/google/home/aasthabharill/DataflowTemplates/v2/sourcedb-to-spanner` +* **Data Flow:** Data is read from Source Databases (MySQL/PostgreSQL/Cassandra) using JDBC or Datastax driver -> Mapped into SourceRows -> Transformed to Spanner Mutations -> Written to Cloud Spanner. Failed mutations are logged to a GCS DLQ. +* **Project Structure (Logical Architecture Mapping):** + * `src/main/java/com/google/cloud/teleport/v2/source/reader`: Source Readers (IoWrappers for Cassandra, JDBC, etc., RowMappers) + * `src/main/java/com/google/cloud/teleport/v2/transformer`: Transformers (e.g., `SourceRowToMutationDoFn`) + * `src/main/java/com/google/cloud/teleport/v2/writer`: Writers and error handling (`SpannerWriter`, `DeadLetterQueue`) + * `src/main/java/com/google/cloud/teleport/v2/templates`: Main pipeline definition (`SourceDbToSpanner`) +* **Build/Run Commands:** + ```bash + # To build the flex template + export PROJECT=span-cloud-ck-testing-external + export BUCKET_NAME=ea-functional-tests + mvn clean package -PtemplatesStage -DskipTests -DprojectId="$PROJECT" -DbucketName="$BUCKET_NAME" -DstagePrefix="templates-" -DtemplateName="Sourcedb_to_Spanner_Flex" -pl v2/sourcedb-to-spanner -am + + # To run tests + mvn clean test -pl v2/sourcedb-to-spanner -am + + # To run pipeline + export JOB_NAME="bulk-migrate-to-spanner-$(date +%Y%m%d-%H%M%S)" + export OUTPUT_DIR="gs://${BUCKET_NAME}/bulk-migration" + gcloud dataflow flex-template run $JOB_NAME \ + --project=$PROJECT_ID \ + --region=$REGION \ + --template-file-gcs-location="gs://dataflow-templates-${REGION}/latest/flex/Sourcedb_to_Spanner_Flex" \ + --max-workers=2 \ + --num-workers=1 \ + --worker-machine-type=n2-highmem-8 \ + --parameters sourceConfigURL=$GCS_SHARDING_PATH,instanceId=$SPANNER_INSTANCE_NAME,databaseId=$SPANNER_DATABASE_NAME,projectId=$PROJECT_ID,outputDirectory=$OUTPUT_DIR,username=datastream_user,password=complex_password_123,schemaOverridesFilePath=$GCS_OVERRIDES_PATH,transformationJarPath=$CUSTOM_JAR_PATH,transformationClassName=com.custom.CustomTransformationFetcher + ``` + +## Project Management + +* **Buganizer Component:** [Infrastructure > Spanner > Cloud > Migrations](https://b.corp.google.com/issues?q=componentid:1008064) - (Cloud Spanner migrations component) +* **Key Contacts:** + * **Recent Contributors:** darshan-sj, aasthabharill, shreyakhajanchi, sm745052 + +## Documentation + +* **Key Design Docs:** + * [Bulk Migration to Spanner Design](http://go/bulk-migration-to-spanner-design) - Overall pipeline design. + * [CS Reader for Bulk Migration](http://go/cs-reader-for-bulk-migration-to-spanner) - Reader design. + * [Spanner Bulk Migration User Guide](http://go/spanner-bulk-migration-user-guide) - Usage instructions. +* **Architecture Diagram:** [architecture.svg](architecture.svg) + +## AI Agent Tips + +* **Common Tasks:** Adding new JDBC dialects, fixing parsing errors, implementing new transformations or schema overrides, adding new source reader capabilities. +* **Coding Standards & Best Practices:** + * Use `AutoValue` for POJOs. + * Strict adherence to Apache Beam paradigms (PTransforms, DoFns). Use `TupleTag` for side outputs like the DLQ. + * Use structured logging (`com.google.cloud.teleport.structured-logging`). +* **Testing Frameworks & Guidelines:** + * **Frameworks:** JUnit 4, Google Truth for assertions, Mockito for mocking. + * **Rules:** Ensure tests use `@RunWith(JUnit4.class)`. Use embedded databases for testing when possible (e.g. `derby` or `embedded-cassandra`). +* **Areas to be Careful:** Cross-shard querying logic, causal ordering around the DLQ, and schema mappings parsing. +* **Example CLs:** + * [39a8ae5e0](https://github.com/GoogleCloudPlatform/DataflowTemplates/commit/39a8ae5e0) - Fix GCS Avro Export flow + * [90964dca6](https://github.com/GoogleCloudPlatform/DataflowTemplates/commit/90964dca6) - Add Support for UUID-based Partitioning From c6f1b2c064cfb839a5a1404cc1d836aa358f2ab8 Mon Sep 17 00:00:00 2001 From: aasthabharill <77983396+aasthabharill@users.noreply.github.com> Date: Tue, 9 Jun 2026 12:04:26 +0530 Subject: [PATCH 2/6] Update v2/sourcedb-to-spanner/project-context.md Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- v2/sourcedb-to-spanner/project-context.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md index e6784020ad..c2d7a9e752 100644 --- a/v2/sourcedb-to-spanner/project-context.md +++ b/v2/sourcedb-to-spanner/project-context.md @@ -18,7 +18,7 @@ * **Languages:** Java 17 * **Frameworks/Libraries:** Apache Beam 2.73.0, Maven * **Key Google Technologies:** Cloud Spanner, Cloud Storage (GCS), Dataflow -* **Code Location:** `/usr/local/google/home/aasthabharill/DataflowTemplates/v2/sourcedb-to-spanner` +* **Code Location:** `v2/sourcedb-to-spanner` * **Data Flow:** Data is read from Source Databases (MySQL/PostgreSQL/Cassandra) using JDBC or Datastax driver -> Mapped into SourceRows -> Transformed to Spanner Mutations -> Written to Cloud Spanner. Failed mutations are logged to a GCS DLQ. * **Project Structure (Logical Architecture Mapping):** * `src/main/java/com/google/cloud/teleport/v2/source/reader`: Source Readers (IoWrappers for Cassandra, JDBC, etc., RowMappers) From 8efa399308e6c8014a71404383302167b2eefd03 Mon Sep 17 00:00:00 2001 From: aasthabharill <77983396+aasthabharill@users.noreply.github.com> Date: Tue, 9 Jun 2026 12:04:37 +0530 Subject: [PATCH 3/6] Update v2/sourcedb-to-spanner/project-context.md Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- v2/sourcedb-to-spanner/project-context.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md index c2d7a9e752..7df261d9e3 100644 --- a/v2/sourcedb-to-spanner/project-context.md +++ b/v2/sourcedb-to-spanner/project-context.md @@ -28,9 +28,9 @@ * **Build/Run Commands:** ```bash # To build the flex template - export PROJECT=span-cloud-ck-testing-external - export BUCKET_NAME=ea-functional-tests - mvn clean package -PtemplatesStage -DskipTests -DprojectId="$PROJECT" -DbucketName="$BUCKET_NAME" -DstagePrefix="templates-" -DtemplateName="Sourcedb_to_Spanner_Flex" -pl v2/sourcedb-to-spanner -am + export PROJECT_ID="" + export BUCKET_NAME="" + mvn clean package -PtemplatesStage -DskipTests -DprojectId="$PROJECT_ID" -DbucketName="$BUCKET_NAME" -DstagePrefix="templates-" -DtemplateName="Sourcedb_to_Spanner_Flex" -pl v2/sourcedb-to-spanner -am # To run tests mvn clean test -pl v2/sourcedb-to-spanner -am From b0b2cf2aa358b99cf2cfc354e8bba7abb435a84b Mon Sep 17 00:00:00 2001 From: aasthabharill Date: Tue, 9 Jun 2026 12:10:49 +0530 Subject: [PATCH 4/6] spotless --- v2/sourcedb-to-spanner/project-context.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md index 7df261d9e3..d9ca7d35c5 100644 --- a/v2/sourcedb-to-spanner/project-context.md +++ b/v2/sourcedb-to-spanner/project-context.md @@ -7,7 +7,7 @@ * **Core Intent:** A bulk migration Dataflow pipeline to migrate data from various Source Databases (MySQL, PostgreSQL, Cassandra) into Cloud Spanner. It handles sharded and non-sharded databases. * **Primary Users:** Internal SREs, external customers migrating to Cloud Spanner, and users of Spanner Migration Tool. * **Critical SLOs/Guarantees:** Must effectively handle bulk data extraction and mapping to Cloud Spanner mutations while maintaining data integrity. Features a Dead Letter Queue (DLQ) for failed mutations. -* **Terminology:** +* **Terminology:** * **DLQ:** Dead Letter Queue (for failed records). * **SourceRow:** Intermediate representation of a row read from the source database. * **Mutation:** Spanner mutation to be applied. From f0c315d1eada61ffd683ab680d0a01d547eaa7fc Mon Sep 17 00:00:00 2001 From: aasthabharill Date: Tue, 9 Jun 2026 13:24:38 +0530 Subject: [PATCH 5/6] "review + added more gotchas from docs --- v2/sourcedb-to-spanner/project-context.md | 69 +++++++++-------------- 1 file changed, 28 insertions(+), 41 deletions(-) diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md index d9ca7d35c5..f6271779dc 100644 --- a/v2/sourcedb-to-spanner/project-context.md +++ b/v2/sourcedb-to-spanner/project-context.md @@ -4,13 +4,16 @@ ## Overview -* **Core Intent:** A bulk migration Dataflow pipeline to migrate data from various Source Databases (MySQL, PostgreSQL, Cassandra) into Cloud Spanner. It handles sharded and non-sharded databases. -* **Primary Users:** Internal SREs, external customers migrating to Cloud Spanner, and users of Spanner Migration Tool. +* **Core Intent:** A bulk migration Dataflow pipeline to migrate data from various Source Databases (MySQL, PostgreSQL, Cassandra) into Cloud Spanner. It handles sharded and non-sharded databases. Writes eagerly to Spanner (no intermediate buffers) and works in tandem with a CDC pipeline (like Datastream) to reach eventual consistency. +* **Primary Users:** SREs, external customers migrating to Cloud Spanner, and users of Spanner Migration Tool. * **Critical SLOs/Guarantees:** Must effectively handle bulk data extraction and mapping to Cloud Spanner mutations while maintaining data integrity. Features a Dead Letter Queue (DLQ) for failed mutations. * **Terminology:** + * **Pipeline Controller:** Central component managing the lifecycle, configuration parsing, and dependency ordering. * **DLQ:** Dead Letter Queue (for failed records). - * **SourceRow:** Intermediate representation of a row read from the source database. + * **SourceRow:** Intermediate representation of a row read from the source database, typically wrapping an Avro `GenericRecord` based on Datastream's unified type system. * **Mutation:** Spanner mutation to be applied. + * **RWUPT:** Read With Uniform Partitions. + * **UniformSourcePartitioner:** Custom partitioner used because standard Beam partitioners only split integer/datetime columns. ## Technical Details @@ -18,7 +21,7 @@ * **Languages:** Java 17 * **Frameworks/Libraries:** Apache Beam 2.73.0, Maven * **Key Google Technologies:** Cloud Spanner, Cloud Storage (GCS), Dataflow -* **Code Location:** `v2/sourcedb-to-spanner` +* **Code Location:** `/usr/local/google/home/aasthabharill/DataflowTemplates/v2/sourcedb-to-spanner` * **Data Flow:** Data is read from Source Databases (MySQL/PostgreSQL/Cassandra) using JDBC or Datastax driver -> Mapped into SourceRows -> Transformed to Spanner Mutations -> Written to Cloud Spanner. Failed mutations are logged to a GCS DLQ. * **Project Structure (Logical Architecture Mapping):** * `src/main/java/com/google/cloud/teleport/v2/source/reader`: Source Readers (IoWrappers for Cassandra, JDBC, etc., RowMappers) @@ -26,53 +29,37 @@ * `src/main/java/com/google/cloud/teleport/v2/writer`: Writers and error handling (`SpannerWriter`, `DeadLetterQueue`) * `src/main/java/com/google/cloud/teleport/v2/templates`: Main pipeline definition (`SourceDbToSpanner`) * **Build/Run Commands:** - ```bash - # To build the flex template - export PROJECT_ID="" - export BUCKET_NAME="" - mvn clean package -PtemplatesStage -DskipTests -DprojectId="$PROJECT_ID" -DbucketName="$BUCKET_NAME" -DstagePrefix="templates-" -DtemplateName="Sourcedb_to_Spanner_Flex" -pl v2/sourcedb-to-spanner -am - - # To run tests - mvn clean test -pl v2/sourcedb-to-spanner -am - - # To run pipeline - export JOB_NAME="bulk-migrate-to-spanner-$(date +%Y%m%d-%H%M%S)" - export OUTPUT_DIR="gs://${BUCKET_NAME}/bulk-migration" - gcloud dataflow flex-template run $JOB_NAME \ - --project=$PROJECT_ID \ - --region=$REGION \ - --template-file-gcs-location="gs://dataflow-templates-${REGION}/latest/flex/Sourcedb_to_Spanner_Flex" \ - --max-workers=2 \ - --num-workers=1 \ - --worker-machine-type=n2-highmem-8 \ - --parameters sourceConfigURL=$GCS_SHARDING_PATH,instanceId=$SPANNER_INSTANCE_NAME,databaseId=$SPANNER_DATABASE_NAME,projectId=$PROJECT_ID,outputDirectory=$OUTPUT_DIR,username=datastream_user,password=complex_password_123,schemaOverridesFilePath=$GCS_OVERRIDES_PATH,transformationJarPath=$CUSTOM_JAR_PATH,transformationClassName=com.custom.CustomTransformationFetcher - ``` - -## Project Management - -* **Buganizer Component:** [Infrastructure > Spanner > Cloud > Migrations](https://b.corp.google.com/issues?q=componentid:1008064) - (Cloud Spanner migrations component) -* **Key Contacts:** - * **Recent Contributors:** darshan-sj, aasthabharill, shreyakhajanchi, sm745052 + See the `README_Sourcedb_to_Spanner_Flex.md` file for instructions on building and running the pipeline. ## Documentation - -* **Key Design Docs:** - * [Bulk Migration to Spanner Design](http://go/bulk-migration-to-spanner-design) - Overall pipeline design. - * [CS Reader for Bulk Migration](http://go/cs-reader-for-bulk-migration-to-spanner) - Reader design. - * [Spanner Bulk Migration User Guide](http://go/spanner-bulk-migration-user-guide) - Usage instructions. -* **Architecture Diagram:** [architecture.svg](architecture.svg) +* **Architecture Diagram:** [architecture.svg](architecture.svg) (Source: `architecture.dot`). + * **Rule:** Always keep the `.dot` and `.svg` files in sync. If you modify the architecture, you MUST regenerate the `.svg` from the `.dot` file. ## AI Agent Tips * **Common Tasks:** Adding new JDBC dialects, fixing parsing errors, implementing new transformations or schema overrides, adding new source reader capabilities. * **Coding Standards & Best Practices:** - * Use `AutoValue` for POJOs. + * Use `AutoValue` for POJOs. Do not bypass or omit variables required by the AutoValue builder. * Strict adherence to Apache Beam paradigms (PTransforms, DoFns). Use `TupleTag` for side outputs like the DLQ. + * **Serializability:** All elements that are members of `PTransforms` and `PCollections` MUST be serializable. Use `Serializable` interface or register an appropriate `Coder`. Mark non-serializable IO channels or active connection clients `transient` and instantiate them strictly within `@Setup` or `@StartBundle`. + * **Security:** NEVER log sensitive credentials or customer PII. + * **Type Handling:** Time-based fields MUST be normalized to UTC and encoded as ISO-8601 with nanosecond precision. String fields must correctly map source charsets to Java UTF-16. + * **Separation of Concerns:** The Reader must encode the highest precision possible without data loss. Scaling/rounding to fit Spanner's limits is the strict responsibility of the Transformer. * Use structured logging (`com.google.cloud.teleport.structured-logging`). + * **Formatting:** Always run `mvn spotless:apply -pl v2/sourcedb-to-spanner -am` before committing to adhere to project formatting standards. * **Testing Frameworks & Guidelines:** * **Frameworks:** JUnit 4, Google Truth for assertions, Mockito for mocking. - * **Rules:** Ensure tests use `@RunWith(JUnit4.class)`. Use embedded databases for testing when possible (e.g. `derby` or `embedded-cassandra`). -* **Areas to be Careful:** Cross-shard querying logic, causal ordering around the DLQ, and schema mappings parsing. -* **Example CLs:** + * **Rules:** Ensure tests use `@RunWith(JUnit4.class)`. Use embedded databases for testing when possible (e.g. `derby` or `embedded-cassandra`). Maintain a minimum Unit test code coverage of 80%. + * **Non-Destructive Refactoring:** When enhancing production classes, do not refactor or rewrite existing test methods. Minimalistically resolve breaking changes and append new, dedicated test methods for new functionality. + * **100% Branch & Exception Coverage:** + * **Conditionals:** For every touched conditional (e.g., `if/else`, ternary operators), write tests covering both `true` and `false` paths. + * **Exceptions:** Assert all thrown checked and runtime exceptions explicitly via `assertThrows()` or Truth's `ThrowableSubject`. +* **Areas to be Careful (Gotchas):** + * **Integration/Load Tests:** NEVER execute `*IT.java` (Integration) or `*LT.java` (Load) test suites during local coding/machine verification. These require remote environments. Only execute `*Test.java` (Unit) locally. + * **OOM Prevention (MySQL Cursor Fetch):** Always configure `fetchSize` to prevent Out Of Memory errors. + * **Inconsistent Data Snapshots:** The reader intentionally does NOT read from a consistent snapshot. The companion CDC stream is trusted to replay updates and resolve mid-flight inconsistencies. Do not attempt to "fix" or lock tables for consistency. + * **Foreign Keys:** The pipeline processes parent tables before child tables, but cyclic (self-referencing) foreign keys will cause startup failures and are unsupported. + * **Pipeline Logic:** Cross-shard querying logic, causal ordering around the DLQ, and schema mappings parsing are highly complex areas. +* **Example PRs:** * [39a8ae5e0](https://github.com/GoogleCloudPlatform/DataflowTemplates/commit/39a8ae5e0) - Fix GCS Avro Export flow * [90964dca6](https://github.com/GoogleCloudPlatform/DataflowTemplates/commit/90964dca6) - Add Support for UUID-based Partitioning From 93a5ed2a64131997b559181664f7f715d3b6f552 Mon Sep 17 00:00:00 2001 From: aasthabharill <77983396+aasthabharill@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:42:29 +0530 Subject: [PATCH 6/6] Apply suggestion from @gemini-code-assist[bot] Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- v2/sourcedb-to-spanner/project-context.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v2/sourcedb-to-spanner/project-context.md b/v2/sourcedb-to-spanner/project-context.md index f6271779dc..6faa8d880d 100644 --- a/v2/sourcedb-to-spanner/project-context.md +++ b/v2/sourcedb-to-spanner/project-context.md @@ -21,7 +21,7 @@ * **Languages:** Java 17 * **Frameworks/Libraries:** Apache Beam 2.73.0, Maven * **Key Google Technologies:** Cloud Spanner, Cloud Storage (GCS), Dataflow -* **Code Location:** `/usr/local/google/home/aasthabharill/DataflowTemplates/v2/sourcedb-to-spanner` +* **Code Location:** `v2/sourcedb-to-spanner` * **Data Flow:** Data is read from Source Databases (MySQL/PostgreSQL/Cassandra) using JDBC or Datastax driver -> Mapped into SourceRows -> Transformed to Spanner Mutations -> Written to Cloud Spanner. Failed mutations are logged to a GCS DLQ. * **Project Structure (Logical Architecture Mapping):** * `src/main/java/com/google/cloud/teleport/v2/source/reader`: Source Readers (IoWrappers for Cassandra, JDBC, etc., RowMappers)