diff --git a/examples/atm/atm_clinvar.tsv b/examples/atm/atm_clinvar.tsv
new file mode 100644
index 0000000..42f2999
--- /dev/null
+++ b/examples/atm/atm_clinvar.tsv
@@ -0,0 +1,35 @@
+rsid	gene	chromosome	position	ref	alt	clnrevstat	clnsig	clnvc
+rs786203606	ATM	11	108227626	T	C	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs746235533	ATM	11	108227691	C	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs747855862	ATM	11	108235669	G	A	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs876658159	ATM	11	108235805	G	A	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs772821016	ATM	11	108244873	C	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs876660485	ATM	11	108250861	C	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs1555070980	ATM	11	108250907	T	G	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs772926890	ATM	11	108251073	G	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+	ATM	11	108257479	A	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs780619951	ATM	11	108259022	C	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs750663117	ATM	11	108272531	G	A	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs780240314	ATM	11	108272556	T	G	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs1060501687	ATM	11	108272782	G	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs786201957	ATM	11	108279555	C	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs587776551	ATM	11	108281168	G	A	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs1060501551	ATM	11	108289683	A	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs1131691159	ATM	11	108299886	G	C	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs587779844	ATM	11	108301698	C	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs1060501571	ATM	11	108301706	G	A	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs786204751	ATM	11	108304693	C	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs775036118	ATM	11	108307914	C	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs774925473	ATM	11	108309110	A	G	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs2136011029	ATM	11	108310159	G	C	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs1591789046	ATM	11	108317386	T	A	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs904589402	ATM	11	108319978	C	G	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs587780638	ATM	11	108325544	G	A	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs587782403	ATM	11	108327643	A	C	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs28904921	ATM	11	108329202	T	G	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs371638537	ATM	11	108335959	A	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs778269655	ATM	11	108343260	G	A	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs587781363	ATM	11	108345797	C	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs1060501700	ATM	11	108347277	A	C	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs17174393	ATM	11	108353881	G	A	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
+rs121434219	ATM	11	108365476	C	T	reviewed_by_expert_panel	Pathogenic	single_nucleotide_variant
diff --git a/examples/atm/atm_dev.ipynb b/examples/atm/atm_dev.ipynb
new file mode 100644
index 0000000..a7b1e77
--- /dev/null
+++ b/examples/atm/atm_dev.ipynb
@@ -0,0 +1,531 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ATM\n",
+    "\n",
+    "This notebook shows how to develop a classifier with embedded tests in Jupyter."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from bioscript import optional_int, optional_str, write_tsv\n",
+    "from bioscript.classifier import GenotypeClassifier\n",
+    "from bioscript.types import VariantCall\n",
+    "from bioscript import assets_dir"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ASSETS_DIR = assets_dir()\n",
+    "RESULT_HEADERS = [\n",
+    "    \"participant_id\",\n",
+    "    \"filename\",\n",
+    "    \"gene\",\n",
+    "    \"rsid\",\n",
+    "    \"chromosome\",\n",
+    "    \"position\",\n",
+    "    \"genotype\",\n",
+    "    \"ref\",\n",
+    "    \"alt\",\n",
+    "    \"variant_type\",\n",
+    "    \"match_type\",\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_variant_calls(df: pd.DataFrame) -> list[VariantCall]:\n",
+    "    \"\"\"Generate VariantCall objects from ClinVar DataFrame.\"\"\"\n",
+    "    vcs: list[VariantCall] = []\n",
+    "    for _, row in df.iterrows():\n",
+    "        vcs.append(\n",
+    "            VariantCall(\n",
+    "                rsid=optional_str(row[\"rsid\"]),\n",
+    "                ref=optional_str(row[\"ref\"]),\n",
+    "                alt=optional_str(row[\"alt\"]),\n",
+    "                chromosome=optional_str(row[\"chromosome\"]),\n",
+    "                position=optional_int(row[\"position\"]),\n",
+    "                gene=optional_str(row.get(\"gene\"), upper=True),\n",
+    "            )\n",
+    "        )\n",
+    "    return vcs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_vcs() -> list[VariantCall]:\n",
+    "    \"\"\"Load ATM variant calls from ClinVar TSV files.\"\"\"\n",
+    "    data_files = [ASSETS_DIR / name for name in [\"atm_clinvar.tsv\"]]\n",
+    "    dfs = [pd.read_csv(f, sep=\"\\t\") for f in data_files]\n",
+    "    df = pd.concat(dfs, ignore_index=True)\n",
+    "    print(f\"Loaded {len(df)} variants from ATM and ATM\")\n",
+    "    return generate_variant_calls(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ATMClassifier(GenotypeClassifier):\n",
+    "    def classify(self, matches):\n",
+    "        \"\"\"Classify ATM variants and write results to TSV files.\"\"\"\n",
+    "        if not matches.all_matches:\n",
+    "            print(\"No variant matches were found.\", flush=True)\n",
+    "\n",
+    "        # Get categorized matches as report rows\n",
+    "        ref_rows, var_rows, no_rows = matches.categorize_report_rows(\n",
+    "            self.participant_id, self.filename\n",
+    "        )\n",
+    "\n",
+    "        if self.debug:\n",
+    "            write_tsv(f\"{self.output_basename}_ref.tsv\", ref_rows)\n",
+    "            write_tsv(f\"{self.output_basename}_no.tsv\", no_rows)\n",
+    "\n",
+    "        write_tsv(f\"{self.output_basename}.tsv\", var_rows, headers=RESULT_HEADERS)\n",
+    "        \n",
+    "        # Return variant rows for testing\n",
+    "        return var_rows"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "__bioscript__ = {\n",
+    "    \"variant_calls\": get_vcs,\n",
+    "    \"classifier\": ATMClassifier,\n",
+    "    \"name\": \"ATM\",\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Tests\n",
+    "\n",
+    "Write tests using the test_* function convention:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from bioscript import VariantFixture\n",
+    "# from bioscript.types import MatchList\n",
+    "# import os\n",
+    "\n",
+    "# # Create test fixtures for BRCA1 and BRCA2 variants\n",
+    "# fixture = VariantFixture(\n",
+    "#     [\n",
+    "#         {\"rsid\": \"rs80357336\", \"chromosome\": \"17\", \"position\": 43045711},\n",
+    "#         {\"rsid\": \"rs886040303\", \"chromosome\": \"17\", \"position\": 43045728},\n",
+    "#         {\"rsid\": \"rs397509295\", \"chromosome\": \"17\", \"position\": 43045729},\n",
+    "#         {\"rsid\": \"rs80358650\", \"chromosome\": \"13\", \"position\": 32316463},\n",
+    "#         {\"rsid\": \"rs397507571\", \"chromosome\": \"13\", \"position\": 32316470},\n",
+    "#         {\"rsid\": \"rs80358622\", \"chromosome\": \"13\", \"position\": 32316497},\n",
+    "#     ],\n",
+    "#     assembly=\"GRCh38\",\n",
+    "# )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# def test_brca1_heterozygous_variants():\n",
+    "#     \"\"\"Test detection of heterozygous BRCA1 variants.\"\"\"\n",
+    "#     # Create test data with heterozygous variants (one alt allele)\n",
+    "#     variants = fixture([\"GC\", \"GA\", \"GT\", \"GG\", \"GG\", \"GG\"])\n",
+    "    \n",
+    "#     # Create mini variant call list for testing\n",
+    "#     test_vcs = [\n",
+    "#         VariantCall(rsid=\"rs80357336\", ref=\"G\", alt=\"C\", chromosome=\"17\", position=43045711, gene=\"BRCA1\"),\n",
+    "#         VariantCall(rsid=\"rs886040303\", ref=\"G\", alt=\"A\", chromosome=\"17\", position=43045728, gene=\"BRCA1\"),\n",
+    "#         VariantCall(rsid=\"rs397509295\", ref=\"G\", alt=\"T\", chromosome=\"17\", position=43045729, gene=\"BRCA1\"),\n",
+    "#     ]\n",
+    "    \n",
+    "#     matches = MatchList(variant_calls=test_vcs).match_rows(variants)\n",
+    "#     classifier = BRCAClassifier(participant_id=\"TEST_HET\", name=\"BRCA\", filename=\"test.txt\")\n",
+    "#     result = classifier(matches)\n",
+    "    \n",
+    "#     assert len(result) == 3, f\"Expected 3 variant rows, got {len(result)}\"\n",
+    "#     assert all(row[\"gene\"] == \"BRCA1\" for row in result), \"All variants should be BRCA1\"\n",
+    "#     assert all(row[\"match_type\"] == \"VARIANT_CALL\" for row in result), \"All should be variant calls\"\n",
+    "    \n",
+    "#     # Cleanup output file\n",
+    "#     os.remove(\"result_BRCA_TEST_HET.tsv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# def test_brca2_homozygous_variant():\n",
+    "#     \"\"\"Test detection of homozygous BRCA2 variant.\"\"\"\n",
+    "#     # Create test data with one homozygous variant (two alt alleles)\n",
+    "#     variants = fixture([\"GG\", \"GG\", \"GG\", \"AA\", \"GG\", \"GG\"])\n",
+    "    \n",
+    "#     test_vcs = [\n",
+    "#         VariantCall(rsid=\"rs80358650\", ref=\"G\", alt=\"A\", chromosome=\"13\", position=32316463, gene=\"BRCA2\"),\n",
+    "#     ]\n",
+    "\n",
+    "#     matches = MatchList(variant_calls=test_vcs).match_rows(variants)\n",
+    "#     classifier = BRCAClassifier(participant_id=\"TEST_HOM\", name=\"BRCA\", filename=\"test.txt\")\n",
+    "#     result = classifier(matches)\n",
+    "    \n",
+    "#     assert len(result) == 1, f\"Expected 1 variant row, got {len(result)}\"\n",
+    "#     assert result[0][\"gene\"] == \"BRCA2\", \"Variant should be BRCA2\"\n",
+    "#     assert result[0][\"genotype\"] == \"AA\", \"Should be homozygous AA\"\n",
+    "    \n",
+    "#     # Cleanup output file\n",
+    "#     os.remove(\"result_BRCA_TEST_HOM.tsv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# def test_no_variants():\n",
+    "#     \"\"\"Test classifier with no matching variants.\"\"\"\n",
+    "#     # All reference genotypes\n",
+    "#     variants = fixture([\"GG\", \"GG\", \"GG\", \"GG\", \"GG\", \"GG\"])\n",
+    "    \n",
+    "#     test_vcs = [\n",
+    "#         VariantCall(rsid=\"rs80357336\", ref=\"G\", alt=\"C\", chromosome=\"17\", position=43045711, gene=\"BRCA1\"),\n",
+    "#     ]\n",
+    "    \n",
+    "#     matches = MatchList(variant_calls=test_vcs).match_rows(variants)\n",
+    "#     classifier = BRCAClassifier(participant_id=\"TEST_REF\", name=\"BRCA\", filename=\"test.txt\")\n",
+    "#     result = classifier(matches)\n",
+    "    \n",
+    "#     assert len(result) == 0, f\"Expected 0 variant rows, got {len(result)}\"\n",
+    "    \n",
+    "#     # Cleanup output file\n",
+    "#     os.remove(\"result_BRCA_TEST_REF.tsv\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run Tests in Jupyter\n",
+    "\n",
+    "You can run tests directly in the notebook:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # Run tests\n",
+    "# test_brca1_heterozygous_variants()\n",
+    "# test_brca2_homozygous_variant()\n",
+    "# test_no_variants()\n",
+    "# print(\"✓ All tests passed!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Export to Python Module\n",
+    "\n",
+    "Export this notebook to a Python file:\n",
+    "\n",
+    "```bash\n",
+    "bioscript export brca_dev.ipynb -o classify_brca.py\n",
+    "```\n",
+    "\n",
+    "Or in Python:\n",
+    "\n",
+    "```python\n",
+    "from bioscript import export_from_notebook\n",
+    "export_from_notebook(\"brca_dev.ipynb\", \"classify_brca.py\")\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "PosixPath('classify_atm.py')"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from bioscript import export_from_notebook\n",
+    "export_from_notebook(\"atm_dev.ipynb\", \"classify_atm.py\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "============================================================\n",
+      "Testing: classify_atm.py\n",
+      "============================================================\n",
+      "Running tests with pytest: classify_atm.py\n",
+      "\u001b[1m============================= test session starts ==============================\u001b[0m\n",
+      "platform darwin -- Python 3.12.7, pytest-8.4.2, pluggy-1.6.0 -- /Users/madhavajay/dev/bioscript/workspace1/.venv/bin/python3\n",
+      "cachedir: .pytest_cache\n",
+      "rootdir: /Users/madhavajay/dev/bioscript/workspace1/examples/atm\n",
+      "plugins: anyio-4.11.0\n",
+      "collected 0 items                                                              \u001b[0m\n",
+      "\n",
+      "\u001b[33m============================ \u001b[33mno tests ran\u001b[0m\u001b[33m in 0.03s\u001b[0m\u001b[33m =============================\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "!bioscript test classify_atm.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BioVaultProject(name='atm-classifier', author='madhava@openmined.org', workflow='workflow.nf', template=<TemplateType.DYNAMIC_NEXTFLOW: 'dynamic-nextflow'>, version='0.1.0', assets=['classify_atm.py', 'atm_clinvar.tsv'], parameters=[], inputs=[Input(name='participants', type='List[GenotypeRecord]', description='CSV/TSV with participant_id and genotype_file columns', format='csv', path=None, mapping={'participant_id': 'participant_id', 'genotype_file': 'genotype_file'}, cli_flag=None)], outputs=[Output(name='classification_result', type='File', description='ATM variant classification (aggregated)', format='tsv', path='result_ATM.tsv', cli_flag=None)], processes=[ProcessDefinition(name='atm_classifier', script='classify_atm.py', container='ghcr.io/openmined/bioscript:0.1.4', kind='bioscript')], docker_image='ghcr.io/openmined/bioscript:0.1.4', docker_platform='linux/amd64')"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from bioscript import export_bioscript_workflow\n",
+    "\n",
+    "project = export_bioscript_workflow(\n",
+    "    script_path='./classify_atm.py',\n",
+    "    workflow_name='atm-classifier',\n",
+    "    author='madhava@openmined.org',\n",
+    "    target_dir='./',\n",
+    "    assets={\n",
+    "        \"atm_clinvar.tsv\",\n",
+    "    },\n",
+    "    inputs=[\n",
+    "        {\n",
+    "            'name': 'participants',\n",
+    "            'type': 'List[GenotypeRecord]',\n",
+    "            'description': 'CSV/TSV with participant_id and genotype_file columns',\n",
+    "            'format': 'csv',\n",
+    "            'mapping': {\n",
+    "                'participant_id': 'participant_id',\n",
+    "                'genotype_file': 'genotype_file',\n",
+    "            }\n",
+    "        }\n",
+    "    ],\n",
+    "    outputs=[\n",
+    "        {\n",
+    "            'name': 'classification_result',\n",
+    "            'type': 'File',\n",
+    "            'description': 'ATM variant classification (aggregated)',\n",
+    "            'format': 'tsv',\n",
+    "            'path': 'result_ATM.tsv',\n",
+    "        },\n",
+    "    ],\n",
+    ")\n",
+    "project\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BioVaultPipeline(name='atm-classifier', inputs={'samplesheet': 'List[GenotypeRecord]'}, steps=[PipelineStep(step_id='atm', uses='./', with_args={'participants': 'inputs.samplesheet'}, publish={'classification_result': 'File(result_ATM.tsv)'}, store={'counts_sql': SQLStore(source='classification_result', table_name='atm_{run_id}', destination='SQL()', participant_column='participant_id', key_column='participant_id')})])"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from bioscript import export_bioscript_pipeline, PipelineStep, SQLStore\n",
+    "\n",
+    "pipeline = export_bioscript_pipeline(\n",
+    "    pipeline_name='atm-classifier',\n",
+    "    target_dir='./atm-classifier',\n",
+    "    inputs={\n",
+    "        'samplesheet': 'List[GenotypeRecord]',\n",
+    "    },\n",
+    "    steps=[\n",
+    "        PipelineStep(\n",
+    "            step_id='atm',\n",
+    "            uses='./',\n",
+    "            with_args={\n",
+    "                'participants': 'inputs.samplesheet',\n",
+    "            },\n",
+    "            publish={\n",
+    "                'classification_result': 'File(result_ATM.tsv)',\n",
+    "            },\n",
+    "            store={\n",
+    "                'counts_sql': SQLStore(\n",
+    "                    source='classification_result',\n",
+    "                    table_name='atm_{run_id}',\n",
+    "                    destination='SQL()',\n",
+    "                    key_column='participant_id',\n",
+    "                ),\n",
+    "            },\n",
+    "        ),\n",
+    "    ],\n",
+    ")\n",
+    "pipeline\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[bioscript] Current working directory: /Users/madhavajay/dev/bioscript/workspace1/examples/atm\n",
+      "[bioscript] Provided SNP file argument: carika.txt\n",
+      "[bioscript] Provided path absolute? False\n",
+      "[bioscript] Resolved SNP path: /Users/madhavajay/dev/bioscript/workspace1/examples/atm/carika.txt\n",
+      "[bioscript] Resolved exists? True\n",
+      "[bioscript] CWD contents: .DS_Store, .ipynb_checkpoints, .pytest_cache, __pycache__, atm-classifier, atm_clinvar.tsv, atm_dev.ipynb, carika.txt, classify_atm.py, result_ATM_X.tsv, result_ATM_X_no.tsv, result_ATM_X_ref.tsv\n",
+      "[bioscript] Using resolved SNP path: /Users/madhavajay/dev/bioscript/workspace1/examples/atm/carika.txt\n",
+      "Loaded 34 variants from ATM and ATM\n",
+      "participant_id=X\n",
+      "ATM_count=0\n"
+     ]
+    }
+   ],
+   "source": [
+    "!bioscript classify classify_atm.py --file carika.txt --participant_id=\"X\" --debug"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m\u001b[36m__pycache__\u001b[m\u001b[m          \u001b[1m\u001b[36matm-classifier\u001b[m\u001b[m       result_ATM_X_no.tsv\n",
+      "atm_clinvar.tsv      carika.txt           result_ATM_X_ref.tsv\n",
+      "atm_dev.ipynb        classify_atm.py      result_ATM_X.tsv\n"
+     ]
+    }
+   ],
+   "source": [
+    "!ls"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "participant_id\tfilename\tgene\trsid\tchromosome\tposition\tgenotype\tref\talt\tvariant_type\tmatch_type\n"
+     ]
+    }
+   ],
+   "source": [
+    "!cat result_ATM_X.tsv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/2_filter.ipynb b/notebooks/2_filter.ipynb
index 2e50405..52178ee 100644
--- a/notebooks/2_filter.ipynb
+++ b/notebooks/2_filter.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "cd584ffd-b0ad-45d8-9c41-ea667b4f11bf",
    "metadata": {},
    "outputs": [],
@@ -13,7 +13,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "4985decc-0576-4762-b760-f19daad67855",
    "metadata": {},
    "outputs": [],
@@ -23,7 +23,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "f3ea7e37-bcc4-4b07-a778-679669044935",
    "metadata": {},
    "outputs": [],
@@ -33,7 +33,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "42774c1a-7fc8-414a-a7e8-04eed400329a",
    "metadata": {},
    "outputs": [],
@@ -91,7 +91,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "0299df2b-bfab-4333-98cb-007f35b94820",
    "metadata": {},
    "outputs": [],
@@ -146,10 +146,62 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "1537a3b3-c7ed-4051-915d-5a6b30fe59b7",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[W::vcf_parse] Contig '1' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '2' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '3' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '4' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '5' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '6' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '7' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '8' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '9' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '10' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '11' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '12' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '13' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '14' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '15' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '16' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '17' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '18' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '19' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '20' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '21' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig '22' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig 'X' is not defined in the header. (Quick workaround: index the file with tabix.)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CLNSIG unique values: ['Affects', 'Benign', 'Benign/Likely_benign', 'Conflicting_classifications_of_pathogenicity', 'Established_risk_allele', 'Likely_benign', 'Likely_pathogenic', 'Likely_pathogenic,_low_penetrance', 'Likely_pathogenic/Likely_pathogenic,_low_penetrance', 'Likely_pathogenic/Likely_risk_allele', 'Likely_pathogenic/Pathogenic,_low_penetrance', 'Likely_risk_allele', 'Pathogenic', 'Pathogenic,_low_penetrance', 'Pathogenic/Likely_pathogenic', 'Pathogenic/Likely_pathogenic,_low_penetrance', 'Pathogenic/Likely_pathogenic/Likely_risk_allele', 'Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance', 'Pathogenic/Likely_risk_allele', 'Pathogenic/Pathogenic,_low_penetrance', 'Uncertain_risk_allele', 'Uncertain_significance', 'Uncertain_significance/Uncertain_risk_allele', 'association', 'association_not_found', 'confers_sensitivity', 'drug_response', 'no_classification_for_the_single_variant', 'no_classifications_from_unflagged_records', 'not_provided', 'other', 'protective', 'risk_factor']\n",
+      "CLNREVSTAT unique values: ['criteria_provided,_conflicting_classifications', 'criteria_provided,_multiple_submitters,_no_conflicts', 'criteria_provided,_single_submitter', 'no_assertion_criteria_provided', 'no_classification_for_the_single_variant', 'no_classification_provided', 'no_classifications_from_unflagged_records', 'practice_guideline', 'reviewed_by_expert_panel']\n",
+      "CLNVC unique values: ['Deletion', 'Duplication', 'Indel', 'Insertion', 'Inversion', 'Microsatellite', 'Variation', 'single_nucleotide_variant']\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[W::vcf_parse] Contig 'Y' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig 'MT' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig 'NT_113889.1' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig 'NT_187633.1' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig 'NT_187661.1' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig 'NT_187693.1' is not defined in the header. (Quick workaround: index the file with tabix.)\n",
+      "[W::vcf_parse] Contig 'NW_009646201.1' is not defined in the header. (Quick workaround: index the file with tabix.)\n"
+     ]
+    }
+   ],
    "source": [
     "clnsigs, revstats, clnvc_values = collect_unique_clnsig_revstat_clnvc(f\"{DOWNLOAD_PATH}/clinvar.vcf\")\n",
     "\n",
@@ -168,7 +220,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "6d7f45d8-eb18-4afe-93d7-04d62738e4a8",
    "metadata": {},
    "outputs": [],
@@ -208,7 +260,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "4b0bf4da-cd44-4176-bca6-58ba022f461e",
    "metadata": {},
    "outputs": [],
@@ -228,7 +280,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "c815a94a-9e21-4e54-a300-6150f7f6f033",
    "metadata": {},
    "outputs": [],
@@ -304,7 +356,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "2f7eed10-8733-49fa-a321-3e3eaa390f37",
    "metadata": {},
    "outputs": [],
@@ -327,7 +379,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "64703926-b7ab-43da-a154-b0d8f20ee4e9",
    "metadata": {},
    "outputs": [],
@@ -439,10 +491,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "id": "413c2131-f9d4-4899-8826-fd96776c4404",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 503 BRCA1 variants\n"
+     ]
+    }
+   ],
    "source": [
     "df_brca1 = clinvar_df_for_gene_filtered(\n",
     "    vcf_path=vcf_path,\n",
@@ -456,10 +516,276 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "id": "a8042731-20a6-4663-ab28-2802f37081de",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CHROM</th>\n",
+       "      <th>POS</th>\n",
+       "      <th>ID</th>\n",
+       "      <th>REF</th>\n",
+       "      <th>ALT</th>\n",
+       "      <th>RS</th>\n",
+       "      <th>RS_prefixed</th>\n",
+       "      <th>CLNSIG</th>\n",
+       "      <th>GENEINFO</th>\n",
+       "      <th>CLNVC</th>\n",
+       "      <th>CLNREVSTAT</th>\n",
+       "      <th>ORIGIN</th>\n",
+       "      <th>ALLELEID</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43045711</td>\n",
+       "      <td>55630</td>\n",
+       "      <td>G</td>\n",
+       "      <td>C</td>\n",
+       "      <td>80357336</td>\n",
+       "      <td>rs80357336</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70297</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43045711</td>\n",
+       "      <td>55629</td>\n",
+       "      <td>G</td>\n",
+       "      <td>T</td>\n",
+       "      <td>80357336</td>\n",
+       "      <td>rs80357336</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70296</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43045728</td>\n",
+       "      <td>266562</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>886040303</td>\n",
+       "      <td>rs886040303</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>261566</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43045729</td>\n",
+       "      <td>55622</td>\n",
+       "      <td>G</td>\n",
+       "      <td>T</td>\n",
+       "      <td>397509295</td>\n",
+       "      <td>rs397509295</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70289</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43045734</td>\n",
+       "      <td>55620</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>80356873</td>\n",
+       "      <td>rs80356873</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70287</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>498</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43124042</td>\n",
+       "      <td>55638</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>397509299</td>\n",
+       "      <td>rs397509299</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70305</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>499</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43124044</td>\n",
+       "      <td>37664</td>\n",
+       "      <td>A</td>\n",
+       "      <td>G</td>\n",
+       "      <td>80356929</td>\n",
+       "      <td>rs80356929</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>46220</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>500</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43124063</td>\n",
+       "      <td>54902</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>80357134</td>\n",
+       "      <td>rs80357134</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>69569</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>501</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43124089</td>\n",
+       "      <td>55746</td>\n",
+       "      <td>A</td>\n",
+       "      <td>C</td>\n",
+       "      <td>397509332</td>\n",
+       "      <td>rs397509332</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70413</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>502</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43124094</td>\n",
+       "      <td>55072</td>\n",
+       "      <td>C</td>\n",
+       "      <td>A</td>\n",
+       "      <td>80357475</td>\n",
+       "      <td>rs80357475</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>69739</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>503 rows × 13 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    CHROM       POS      ID REF ALT         RS  RS_prefixed      CLNSIG  \\\n",
+       "0      17  43045711   55630   G   C   80357336   rs80357336  Pathogenic   \n",
+       "1      17  43045711   55629   G   T   80357336   rs80357336  Pathogenic   \n",
+       "2      17  43045728  266562   G   A  886040303  rs886040303  Pathogenic   \n",
+       "3      17  43045729   55622   G   T  397509295  rs397509295  Pathogenic   \n",
+       "4      17  43045734   55620   G   A   80356873   rs80356873  Pathogenic   \n",
+       "..    ...       ...     ...  ..  ..        ...          ...         ...   \n",
+       "498    17  43124042   55638   G   A  397509299  rs397509299  Pathogenic   \n",
+       "499    17  43124044   37664   A   G   80356929   rs80356929  Pathogenic   \n",
+       "500    17  43124063   54902   G   A   80357134   rs80357134  Pathogenic   \n",
+       "501    17  43124089   55746   A   C  397509332  rs397509332  Pathogenic   \n",
+       "502    17  43124094   55072   C   A   80357475   rs80357475  Pathogenic   \n",
+       "\n",
+       "      GENEINFO                      CLNVC                CLNREVSTAT ORIGIN  \\\n",
+       "0    BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "1    BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "2    BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "3    BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "4    BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "..         ...                        ...                       ...    ...   \n",
+       "498  BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "499  BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "500  BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "501  BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "502  BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "\n",
+       "     ALLELEID  \n",
+       "0       70297  \n",
+       "1       70296  \n",
+       "2      261566  \n",
+       "3       70289  \n",
+       "4       70287  \n",
+       "..        ...  \n",
+       "498     70305  \n",
+       "499     46220  \n",
+       "500     69569  \n",
+       "501     70413  \n",
+       "502     69739  \n",
+       "\n",
+       "[503 rows x 13 columns]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "df_brca1"
    ]
@@ -474,7 +800,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "id": "d3bb4475-8736-4306-91b6-103feeaf3ea1",
    "metadata": {},
    "outputs": [],
@@ -519,37 +845,172 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "id": "59746fa3-0992-4f21-b03c-8dff8c6d0ed3",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CLNREVSTAT</th>\n",
+       "      <th>count</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>503</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 CLNREVSTAT  count\n",
+       "0  reviewed_by_expert_panel    503"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "count_clnrevstat(df_brca1)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "id": "63a76827-0e22-42bb-9a7f-da227caf5597",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CLNSIG</th>\n",
+       "      <th>count</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>503</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       CLNSIG  count\n",
+       "0  Pathogenic    503"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "count_clnsig(df_brca1)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "id": "821150c0-6ee7-4950-ade6-363b2f796906",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CLNVC</th>\n",
+       "      <th>count</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>503</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                       CLNVC  count\n",
+       "0  single_nucleotide_variant    503"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "count_clnvc(df_brca1)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "id": "7cceea3b-960e-47b6-b057-526ed529182d",
    "metadata": {},
    "outputs": [],
@@ -582,17 +1043,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "id": "9cdf0988-c79f-45db-874c-fcb6fe0f6529",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== CLNREVSTAT (Review Status) ===\n",
+      "              CLNREVSTAT  count\n",
+      "reviewed_by_expert_panel    503\n",
+      "\n",
+      "\n",
+      "=== CLNSIG (Clinical Significance) ===\n",
+      "    CLNSIG  count\n",
+      "Pathogenic    503\n",
+      "\n",
+      "\n",
+      "=== CLNVC (Variant Class) ===\n",
+      "                    CLNVC  count\n",
+      "single_nucleotide_variant    503\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "summarize_clinvar_counts(df_brca1)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "id": "e040046b-98a0-4fc9-b4f1-b213aa10cbf9",
    "metadata": {},
    "outputs": [],
@@ -682,10 +1165,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "id": "0055838d-c80a-4a17-9688-662e75686dc3",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 503 BRCA1 variants (now including Duplications)\n"
+     ]
+    }
+   ],
    "source": [
     "df_brca1 = clinvar_df_for_gene_filtered(\n",
     "    vcf_path=vcf_path,\n",
@@ -699,40 +1190,388 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "id": "aabf5576-5f3f-408c-b5ee-13e7f6f5b7bd",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CHROM</th>\n",
+       "      <th>POS</th>\n",
+       "      <th>ID</th>\n",
+       "      <th>REF</th>\n",
+       "      <th>ALT</th>\n",
+       "      <th>RS</th>\n",
+       "      <th>RS_prefixed</th>\n",
+       "      <th>CLNSIG</th>\n",
+       "      <th>GENEINFO</th>\n",
+       "      <th>CLNVC</th>\n",
+       "      <th>CLNREVSTAT</th>\n",
+       "      <th>ORIGIN</th>\n",
+       "      <th>ALLELEID</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43045711</td>\n",
+       "      <td>55630</td>\n",
+       "      <td>G</td>\n",
+       "      <td>C</td>\n",
+       "      <td>80357336</td>\n",
+       "      <td>rs80357336</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70297</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43045711</td>\n",
+       "      <td>55629</td>\n",
+       "      <td>G</td>\n",
+       "      <td>T</td>\n",
+       "      <td>80357336</td>\n",
+       "      <td>rs80357336</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70296</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43045728</td>\n",
+       "      <td>266562</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>886040303</td>\n",
+       "      <td>rs886040303</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>261566</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43045729</td>\n",
+       "      <td>55622</td>\n",
+       "      <td>G</td>\n",
+       "      <td>T</td>\n",
+       "      <td>397509295</td>\n",
+       "      <td>rs397509295</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70289</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43045734</td>\n",
+       "      <td>55620</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>80356873</td>\n",
+       "      <td>rs80356873</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70287</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>498</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43124042</td>\n",
+       "      <td>55638</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>397509299</td>\n",
+       "      <td>rs397509299</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70305</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>499</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43124044</td>\n",
+       "      <td>37664</td>\n",
+       "      <td>A</td>\n",
+       "      <td>G</td>\n",
+       "      <td>80356929</td>\n",
+       "      <td>rs80356929</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>46220</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>500</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43124063</td>\n",
+       "      <td>54902</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>80357134</td>\n",
+       "      <td>rs80357134</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>69569</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>501</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43124089</td>\n",
+       "      <td>55746</td>\n",
+       "      <td>A</td>\n",
+       "      <td>C</td>\n",
+       "      <td>397509332</td>\n",
+       "      <td>rs397509332</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>70413</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>502</th>\n",
+       "      <td>17</td>\n",
+       "      <td>43124094</td>\n",
+       "      <td>55072</td>\n",
+       "      <td>C</td>\n",
+       "      <td>A</td>\n",
+       "      <td>80357475</td>\n",
+       "      <td>rs80357475</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA1:672</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>69739</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>503 rows × 13 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    CHROM       POS      ID REF ALT         RS  RS_prefixed      CLNSIG  \\\n",
+       "0      17  43045711   55630   G   C   80357336   rs80357336  Pathogenic   \n",
+       "1      17  43045711   55629   G   T   80357336   rs80357336  Pathogenic   \n",
+       "2      17  43045728  266562   G   A  886040303  rs886040303  Pathogenic   \n",
+       "3      17  43045729   55622   G   T  397509295  rs397509295  Pathogenic   \n",
+       "4      17  43045734   55620   G   A   80356873   rs80356873  Pathogenic   \n",
+       "..    ...       ...     ...  ..  ..        ...          ...         ...   \n",
+       "498    17  43124042   55638   G   A  397509299  rs397509299  Pathogenic   \n",
+       "499    17  43124044   37664   A   G   80356929   rs80356929  Pathogenic   \n",
+       "500    17  43124063   54902   G   A   80357134   rs80357134  Pathogenic   \n",
+       "501    17  43124089   55746   A   C  397509332  rs397509332  Pathogenic   \n",
+       "502    17  43124094   55072   C   A   80357475   rs80357475  Pathogenic   \n",
+       "\n",
+       "      GENEINFO                      CLNVC                CLNREVSTAT ORIGIN  \\\n",
+       "0    BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "1    BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "2    BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "3    BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "4    BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "..         ...                        ...                       ...    ...   \n",
+       "498  BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "499  BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "500  BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "501  BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "502  BRCA1:672  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "\n",
+       "     ALLELEID  \n",
+       "0       70297  \n",
+       "1       70296  \n",
+       "2      261566  \n",
+       "3       70289  \n",
+       "4       70287  \n",
+       "..        ...  \n",
+       "498     70305  \n",
+       "499     46220  \n",
+       "500     69569  \n",
+       "501     70413  \n",
+       "502     69739  \n",
+       "\n",
+       "[503 rows x 13 columns]"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "df_brca1"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "id": "6e44c741-c628-4c43-95c0-dd31a96ff754",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[clinvar] exported 503 rows to ./work/brca1_clinvar.tsv\n"
+     ]
+    }
+   ],
    "source": [
     "export_df = export_clinvar_tsv(df_brca1, \"./work/brca1_clinvar.tsv\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "id": "9ab5eeb9-571f-4eac-8ac6-86111589aecd",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== CLNREVSTAT (Review Status) ===\n",
+      "              CLNREVSTAT  count\n",
+      "reviewed_by_expert_panel    503\n",
+      "\n",
+      "\n",
+      "=== CLNSIG (Clinical Significance) ===\n",
+      "    CLNSIG  count\n",
+      "Pathogenic    503\n",
+      "\n",
+      "\n",
+      "=== CLNVC (Variant Class) ===\n",
+      "                    CLNVC  count\n",
+      "single_nucleotide_variant    503\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "summarize_clinvar_counts(df_brca1)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "id": "83714e50-70d1-4006-9078-248229ad8340",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CHROM</th>\n",
+       "      <th>POS</th>\n",
+       "      <th>ID</th>\n",
+       "      <th>REF</th>\n",
+       "      <th>ALT</th>\n",
+       "      <th>RS</th>\n",
+       "      <th>RS_prefixed</th>\n",
+       "      <th>CLNSIG</th>\n",
+       "      <th>GENEINFO</th>\n",
+       "      <th>CLNVC</th>\n",
+       "      <th>CLNREVSTAT</th>\n",
+       "      <th>ORIGIN</th>\n",
+       "      <th>ALLELEID</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [CHROM, POS, ID, REF, ALT, RS, RS_prefixed, CLNSIG, GENEINFO, CLNVC, CLNREVSTAT, ORIGIN, ALLELEID]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# 1) Either REF or ALT has length > 1\n",
     "df_brca1[(df_brca1['REF'].str.len().gt(1).fillna(False)) | (df_brca1['ALT'].str.len().gt(1).fillna(False))]"
@@ -740,10 +1579,62 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "id": "3decd44d-44c4-44ca-a53b-071529eae15e",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CHROM</th>\n",
+       "      <th>POS</th>\n",
+       "      <th>ID</th>\n",
+       "      <th>REF</th>\n",
+       "      <th>ALT</th>\n",
+       "      <th>RS</th>\n",
+       "      <th>RS_prefixed</th>\n",
+       "      <th>CLNSIG</th>\n",
+       "      <th>GENEINFO</th>\n",
+       "      <th>CLNVC</th>\n",
+       "      <th>CLNREVSTAT</th>\n",
+       "      <th>ORIGIN</th>\n",
+       "      <th>ALLELEID</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [CHROM, POS, ID, REF, ALT, RS, RS_prefixed, CLNSIG, GENEINFO, CLNVC, CLNREVSTAT, ORIGIN, ALLELEID]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# 2) Both > 1 AND the same length\n",
     "df_brca1[\n",
@@ -763,10 +1654,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 28,
    "id": "e18f2941-4a68-47ff-829c-57aadbf43458",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 503 BRCA1 variants (now including Duplications)\n"
+     ]
+    }
+   ],
    "source": [
     "df_brca2 = clinvar_df_for_gene_filtered(\n",
     "    vcf_path=vcf_path,\n",
@@ -780,20 +1679,338 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "id": "767d36a7-a3f9-431c-a9b3-5948eecb107b",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CHROM</th>\n",
+       "      <th>POS</th>\n",
+       "      <th>ID</th>\n",
+       "      <th>REF</th>\n",
+       "      <th>ALT</th>\n",
+       "      <th>RS</th>\n",
+       "      <th>RS_prefixed</th>\n",
+       "      <th>CLNSIG</th>\n",
+       "      <th>GENEINFO</th>\n",
+       "      <th>CLNVC</th>\n",
+       "      <th>CLNREVSTAT</th>\n",
+       "      <th>ORIGIN</th>\n",
+       "      <th>ALLELEID</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>13</td>\n",
+       "      <td>32316463</td>\n",
+       "      <td>51579</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>80358650</td>\n",
+       "      <td>rs80358650</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA2:675</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>66247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>13</td>\n",
+       "      <td>32316470</td>\n",
+       "      <td>51063</td>\n",
+       "      <td>G</td>\n",
+       "      <td>T</td>\n",
+       "      <td>397507571</td>\n",
+       "      <td>rs397507571</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA2:675</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>65731</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>13</td>\n",
+       "      <td>32316497</td>\n",
+       "      <td>51527</td>\n",
+       "      <td>G</td>\n",
+       "      <td>T</td>\n",
+       "      <td>80358622</td>\n",
+       "      <td>rs80358622</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA2:675</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>66195</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>13</td>\n",
+       "      <td>32316528</td>\n",
+       "      <td>52161</td>\n",
+       "      <td>G</td>\n",
+       "      <td>T</td>\n",
+       "      <td>81002796</td>\n",
+       "      <td>rs81002796</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA2:675</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>66829</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>13</td>\n",
+       "      <td>32319080</td>\n",
+       "      <td>52285</td>\n",
+       "      <td>T</td>\n",
+       "      <td>A</td>\n",
+       "      <td>397507902</td>\n",
+       "      <td>rs397507902</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA2:675</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>66953</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>619</th>\n",
+       "      <td>13</td>\n",
+       "      <td>32398252</td>\n",
+       "      <td>267170</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>886040849</td>\n",
+       "      <td>rs886040849</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA2:675</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>261555</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>620</th>\n",
+       "      <td>13</td>\n",
+       "      <td>32398349</td>\n",
+       "      <td>267174</td>\n",
+       "      <td>T</td>\n",
+       "      <td>A</td>\n",
+       "      <td>886040852</td>\n",
+       "      <td>rs886040852</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA2:675</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>261558</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>621</th>\n",
+       "      <td>13</td>\n",
+       "      <td>32398396</td>\n",
+       "      <td>52911</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>80359247</td>\n",
+       "      <td>rs80359247</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA2:675</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>3</td>\n",
+       "      <td>67579</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>622</th>\n",
+       "      <td>13</td>\n",
+       "      <td>32398437</td>\n",
+       "      <td>267177</td>\n",
+       "      <td>C</td>\n",
+       "      <td>A</td>\n",
+       "      <td>4987049</td>\n",
+       "      <td>rs4987049</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA2:675</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>261561</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>623</th>\n",
+       "      <td>13</td>\n",
+       "      <td>32398437</td>\n",
+       "      <td>52916</td>\n",
+       "      <td>C</td>\n",
+       "      <td>G</td>\n",
+       "      <td>4987049</td>\n",
+       "      <td>rs4987049</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>BRCA2:675</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>67584</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>624 rows × 13 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    CHROM       POS      ID REF ALT         RS  RS_prefixed      CLNSIG  \\\n",
+       "0      13  32316463   51579   G   A   80358650   rs80358650  Pathogenic   \n",
+       "1      13  32316470   51063   G   T  397507571  rs397507571  Pathogenic   \n",
+       "2      13  32316497   51527   G   T   80358622   rs80358622  Pathogenic   \n",
+       "3      13  32316528   52161   G   T   81002796   rs81002796  Pathogenic   \n",
+       "4      13  32319080   52285   T   A  397507902  rs397507902  Pathogenic   \n",
+       "..    ...       ...     ...  ..  ..        ...          ...         ...   \n",
+       "619    13  32398252  267170   C   T  886040849  rs886040849  Pathogenic   \n",
+       "620    13  32398349  267174   T   A  886040852  rs886040852  Pathogenic   \n",
+       "621    13  32398396   52911   C   T   80359247   rs80359247  Pathogenic   \n",
+       "622    13  32398437  267177   C   A    4987049    rs4987049  Pathogenic   \n",
+       "623    13  32398437   52916   C   G    4987049    rs4987049  Pathogenic   \n",
+       "\n",
+       "      GENEINFO                      CLNVC                CLNREVSTAT ORIGIN  \\\n",
+       "0    BRCA2:675  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "1    BRCA2:675  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "2    BRCA2:675  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "3    BRCA2:675  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "4    BRCA2:675  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "..         ...                        ...                       ...    ...   \n",
+       "619  BRCA2:675  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "620  BRCA2:675  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "621  BRCA2:675  single_nucleotide_variant  reviewed_by_expert_panel      3   \n",
+       "622  BRCA2:675  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "623  BRCA2:675  single_nucleotide_variant  reviewed_by_expert_panel      1   \n",
+       "\n",
+       "     ALLELEID  \n",
+       "0       66247  \n",
+       "1       65731  \n",
+       "2       66195  \n",
+       "3       66829  \n",
+       "4       66953  \n",
+       "..        ...  \n",
+       "619    261555  \n",
+       "620    261558  \n",
+       "621     67579  \n",
+       "622    261561  \n",
+       "623     67584  \n",
+       "\n",
+       "[624 rows x 13 columns]"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "df_brca2"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 30,
    "id": "c7e6ef31-0dd9-4dfa-a456-68607b33015b",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CHROM</th>\n",
+       "      <th>POS</th>\n",
+       "      <th>ID</th>\n",
+       "      <th>REF</th>\n",
+       "      <th>ALT</th>\n",
+       "      <th>RS</th>\n",
+       "      <th>RS_prefixed</th>\n",
+       "      <th>CLNSIG</th>\n",
+       "      <th>GENEINFO</th>\n",
+       "      <th>CLNVC</th>\n",
+       "      <th>CLNREVSTAT</th>\n",
+       "      <th>ORIGIN</th>\n",
+       "      <th>ALLELEID</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [CHROM, POS, ID, REF, ALT, RS, RS_prefixed, CLNSIG, GENEINFO, CLNVC, CLNREVSTAT, ORIGIN, ALLELEID]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# 2) Both > 1 AND the same length\n",
     "df_brca2[\n",
@@ -805,20 +2022,50 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 31,
    "id": "94b2913b-979e-408b-b5f7-be93d14b0410",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=== CLNREVSTAT (Review Status) ===\n",
+      "              CLNREVSTAT  count\n",
+      "reviewed_by_expert_panel    624\n",
+      "\n",
+      "\n",
+      "=== CLNSIG (Clinical Significance) ===\n",
+      "    CLNSIG  count\n",
+      "Pathogenic    624\n",
+      "\n",
+      "\n",
+      "=== CLNVC (Variant Class) ===\n",
+      "                    CLNVC  count\n",
+      "single_nucleotide_variant    624\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "summarize_clinvar_counts(df_brca2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 32,
    "id": "865b4784-5ea5-47b6-af60-fb12de70d60c",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[clinvar] exported 624 rows to ./work/brca2_clinvar.tsv\n"
+     ]
+    }
+   ],
    "source": [
     "export_df = export_clinvar_tsv(df_brca2, \"./work/brca2_clinvar.tsv\")"
    ]
@@ -1217,9 +2464,762 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 33,
    "id": "73856a12-509a-4eb6-a3ae-1e773e1d4989",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 503 ATM variants (now including Duplications)\n"
+     ]
+    }
+   ],
+   "source": [
+    "df_atm = clinvar_df_for_gene_filtered(\n",
+    "    vcf_path=vcf_path,\n",
+    "    gene_name=\"ATM\",\n",
+    "    allowed_types=[\"single_nucleotide_variant\"],\n",
+    "    allowed_clnsig=[\"Pathogenic\"],\n",
+    "    allowed_revstat=[\"practice_guideline\", \"reviewed_by_expert_panel\"]\n",
+    ")\n",
+    "print(f\"Found {len(df_brca1)} ATM variants (now including Duplications)\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "e8a55c16-ff93-44b9-9e21-8867277e1713",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CHROM</th>\n",
+       "      <th>POS</th>\n",
+       "      <th>ID</th>\n",
+       "      <th>REF</th>\n",
+       "      <th>ALT</th>\n",
+       "      <th>RS</th>\n",
+       "      <th>RS_prefixed</th>\n",
+       "      <th>CLNSIG</th>\n",
+       "      <th>GENEINFO</th>\n",
+       "      <th>CLNVC</th>\n",
+       "      <th>CLNREVSTAT</th>\n",
+       "      <th>ORIGIN</th>\n",
+       "      <th>ALLELEID</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108227626</td>\n",
+       "      <td>187275</td>\n",
+       "      <td>T</td>\n",
+       "      <td>C</td>\n",
+       "      <td>786203606</td>\n",
+       "      <td>rs786203606</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>183068</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108227691</td>\n",
+       "      <td>232248</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>746235533</td>\n",
+       "      <td>rs746235533</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>3</td>\n",
+       "      <td>233887</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108235669</td>\n",
+       "      <td>231535</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>747855862</td>\n",
+       "      <td>rs747855862</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>233910</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108235805</td>\n",
+       "      <td>634428</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>876658159</td>\n",
+       "      <td>rs876658159</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>5</td>\n",
+       "      <td>622397</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108244873</td>\n",
+       "      <td>216024</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>772821016</td>\n",
+       "      <td>rs772821016</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>212837</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108250861</td>\n",
+       "      <td>233553</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>876660485</td>\n",
+       "      <td>rs876660485</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>233978</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108250907</td>\n",
+       "      <td>453367</td>\n",
+       "      <td>T</td>\n",
+       "      <td>G</td>\n",
+       "      <td>1555070980</td>\n",
+       "      <td>rs1555070980</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>461289</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108251073</td>\n",
+       "      <td>220555</td>\n",
+       "      <td>G</td>\n",
+       "      <td>T</td>\n",
+       "      <td>772926890</td>\n",
+       "      <td>rs772926890</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>222042</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108257479</td>\n",
+       "      <td>4056347</td>\n",
+       "      <td>A</td>\n",
+       "      <td>T</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4170285</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108259022</td>\n",
+       "      <td>216021</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>780619951</td>\n",
+       "      <td>rs780619951</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>19</td>\n",
+       "      <td>212851</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108272531</td>\n",
+       "      <td>231277</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>750663117</td>\n",
+       "      <td>rs750663117</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>234071</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108272556</td>\n",
+       "      <td>556315</td>\n",
+       "      <td>T</td>\n",
+       "      <td>G</td>\n",
+       "      <td>780240314</td>\n",
+       "      <td>rs780240314</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>545982</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108272782</td>\n",
+       "      <td>407699</td>\n",
+       "      <td>G</td>\n",
+       "      <td>T</td>\n",
+       "      <td>1060501687</td>\n",
+       "      <td>rs1060501687</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>397887</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108279555</td>\n",
+       "      <td>185137</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>786201957</td>\n",
+       "      <td>rs786201957</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>183231</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108281168</td>\n",
+       "      <td>3035</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>587776551</td>\n",
+       "      <td>rs587776551</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>18074</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108289683</td>\n",
+       "      <td>407482</td>\n",
+       "      <td>A</td>\n",
+       "      <td>T</td>\n",
+       "      <td>1060501551</td>\n",
+       "      <td>rs1060501551</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>397772</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108299886</td>\n",
+       "      <td>857860</td>\n",
+       "      <td>G</td>\n",
+       "      <td>C</td>\n",
+       "      <td>1131691159</td>\n",
+       "      <td>rs1131691159</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>852591</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108301698</td>\n",
+       "      <td>127403</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>587779844</td>\n",
+       "      <td>rs587779844</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>132860</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108301706</td>\n",
+       "      <td>407510</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>1060501571</td>\n",
+       "      <td>rs1060501571</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>398353</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108304693</td>\n",
+       "      <td>189177</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>786204751</td>\n",
+       "      <td>rs786204751</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>186801</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108307914</td>\n",
+       "      <td>482526</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>775036118</td>\n",
+       "      <td>rs775036118</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>3</td>\n",
+       "      <td>475938</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108309110</td>\n",
+       "      <td>3021</td>\n",
+       "      <td>A</td>\n",
+       "      <td>G</td>\n",
+       "      <td>774925473</td>\n",
+       "      <td>rs774925473</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>18060</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108310159</td>\n",
+       "      <td>2780229</td>\n",
+       "      <td>G</td>\n",
+       "      <td>C</td>\n",
+       "      <td>2136011029</td>\n",
+       "      <td>rs2136011029</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2943970</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108317386</td>\n",
+       "      <td>826252</td>\n",
+       "      <td>T</td>\n",
+       "      <td>A</td>\n",
+       "      <td>1591789046</td>\n",
+       "      <td>rs1591789046</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>810562</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108319978</td>\n",
+       "      <td>1713223</td>\n",
+       "      <td>C</td>\n",
+       "      <td>G</td>\n",
+       "      <td>904589402</td>\n",
+       "      <td>rs904589402</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1770735</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108325544</td>\n",
+       "      <td>135775</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>587780638</td>\n",
+       "      <td>rs587780638</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>139487</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108327643</td>\n",
+       "      <td>142355</td>\n",
+       "      <td>A</td>\n",
+       "      <td>C</td>\n",
+       "      <td>587782403</td>\n",
+       "      <td>rs587782403</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>152069</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108329202</td>\n",
+       "      <td>3023</td>\n",
+       "      <td>T</td>\n",
+       "      <td>G</td>\n",
+       "      <td>28904921</td>\n",
+       "      <td>rs28904921</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>11</td>\n",
+       "      <td>18062</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108335959</td>\n",
+       "      <td>135780</td>\n",
+       "      <td>A</td>\n",
+       "      <td>T</td>\n",
+       "      <td>371638537</td>\n",
+       "      <td>rs371638537</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>17</td>\n",
+       "      <td>139492</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108343260</td>\n",
+       "      <td>189104</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>778269655</td>\n",
+       "      <td>rs778269655</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>186804</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108345797</td>\n",
+       "      <td>140907</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>587781363</td>\n",
+       "      <td>rs587781363</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>150621</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108347277</td>\n",
+       "      <td>407718</td>\n",
+       "      <td>A</td>\n",
+       "      <td>C</td>\n",
+       "      <td>1060501700</td>\n",
+       "      <td>rs1060501700</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>398370</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108353881</td>\n",
+       "      <td>127463</td>\n",
+       "      <td>G</td>\n",
+       "      <td>A</td>\n",
+       "      <td>17174393</td>\n",
+       "      <td>rs17174393</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>1</td>\n",
+       "      <td>132920</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>11</td>\n",
+       "      <td>108365476</td>\n",
+       "      <td>3029</td>\n",
+       "      <td>C</td>\n",
+       "      <td>T</td>\n",
+       "      <td>121434219</td>\n",
+       "      <td>rs121434219</td>\n",
+       "      <td>Pathogenic</td>\n",
+       "      <td>ATM:472|C11orf65:160140</td>\n",
+       "      <td>single_nucleotide_variant</td>\n",
+       "      <td>reviewed_by_expert_panel</td>\n",
+       "      <td>19</td>\n",
+       "      <td>18068</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   CHROM        POS       ID REF ALT          RS   RS_prefixed      CLNSIG  \\\n",
+       "0     11  108227626   187275   T   C   786203606   rs786203606  Pathogenic   \n",
+       "1     11  108227691   232248   C   T   746235533   rs746235533  Pathogenic   \n",
+       "2     11  108235669   231535   G   A   747855862   rs747855862  Pathogenic   \n",
+       "3     11  108235805   634428   G   A   876658159   rs876658159  Pathogenic   \n",
+       "4     11  108244873   216024   C   T   772821016   rs772821016  Pathogenic   \n",
+       "5     11  108250861   233553   C   T   876660485   rs876660485  Pathogenic   \n",
+       "6     11  108250907   453367   T   G  1555070980  rs1555070980  Pathogenic   \n",
+       "7     11  108251073   220555   G   T   772926890   rs772926890  Pathogenic   \n",
+       "8     11  108257479  4056347   A   T        None          None  Pathogenic   \n",
+       "9     11  108259022   216021   C   T   780619951   rs780619951  Pathogenic   \n",
+       "10    11  108272531   231277   G   A   750663117   rs750663117  Pathogenic   \n",
+       "11    11  108272556   556315   T   G   780240314   rs780240314  Pathogenic   \n",
+       "12    11  108272782   407699   G   T  1060501687  rs1060501687  Pathogenic   \n",
+       "13    11  108279555   185137   C   T   786201957   rs786201957  Pathogenic   \n",
+       "14    11  108281168     3035   G   A   587776551   rs587776551  Pathogenic   \n",
+       "15    11  108289683   407482   A   T  1060501551  rs1060501551  Pathogenic   \n",
+       "16    11  108299886   857860   G   C  1131691159  rs1131691159  Pathogenic   \n",
+       "17    11  108301698   127403   C   T   587779844   rs587779844  Pathogenic   \n",
+       "18    11  108301706   407510   G   A  1060501571  rs1060501571  Pathogenic   \n",
+       "19    11  108304693   189177   C   T   786204751   rs786204751  Pathogenic   \n",
+       "20    11  108307914   482526   C   T   775036118   rs775036118  Pathogenic   \n",
+       "21    11  108309110     3021   A   G   774925473   rs774925473  Pathogenic   \n",
+       "22    11  108310159  2780229   G   C  2136011029  rs2136011029  Pathogenic   \n",
+       "23    11  108317386   826252   T   A  1591789046  rs1591789046  Pathogenic   \n",
+       "24    11  108319978  1713223   C   G   904589402   rs904589402  Pathogenic   \n",
+       "25    11  108325544   135775   G   A   587780638   rs587780638  Pathogenic   \n",
+       "26    11  108327643   142355   A   C   587782403   rs587782403  Pathogenic   \n",
+       "27    11  108329202     3023   T   G    28904921    rs28904921  Pathogenic   \n",
+       "28    11  108335959   135780   A   T   371638537   rs371638537  Pathogenic   \n",
+       "29    11  108343260   189104   G   A   778269655   rs778269655  Pathogenic   \n",
+       "30    11  108345797   140907   C   T   587781363   rs587781363  Pathogenic   \n",
+       "31    11  108347277   407718   A   C  1060501700  rs1060501700  Pathogenic   \n",
+       "32    11  108353881   127463   G   A    17174393    rs17174393  Pathogenic   \n",
+       "33    11  108365476     3029   C   T   121434219   rs121434219  Pathogenic   \n",
+       "\n",
+       "                   GENEINFO                      CLNVC  \\\n",
+       "0                   ATM:472  single_nucleotide_variant   \n",
+       "1                   ATM:472  single_nucleotide_variant   \n",
+       "2                   ATM:472  single_nucleotide_variant   \n",
+       "3                   ATM:472  single_nucleotide_variant   \n",
+       "4                   ATM:472  single_nucleotide_variant   \n",
+       "5                   ATM:472  single_nucleotide_variant   \n",
+       "6                   ATM:472  single_nucleotide_variant   \n",
+       "7                   ATM:472  single_nucleotide_variant   \n",
+       "8                   ATM:472  single_nucleotide_variant   \n",
+       "9                   ATM:472  single_nucleotide_variant   \n",
+       "10                  ATM:472  single_nucleotide_variant   \n",
+       "11                  ATM:472  single_nucleotide_variant   \n",
+       "12                  ATM:472  single_nucleotide_variant   \n",
+       "13                  ATM:472  single_nucleotide_variant   \n",
+       "14                  ATM:472  single_nucleotide_variant   \n",
+       "15                  ATM:472  single_nucleotide_variant   \n",
+       "16                  ATM:472  single_nucleotide_variant   \n",
+       "17                  ATM:472  single_nucleotide_variant   \n",
+       "18                  ATM:472  single_nucleotide_variant   \n",
+       "19                  ATM:472  single_nucleotide_variant   \n",
+       "20                  ATM:472  single_nucleotide_variant   \n",
+       "21  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "22  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "23  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "24  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "25  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "26  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "27  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "28  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "29  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "30  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "31  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "32  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "33  ATM:472|C11orf65:160140  single_nucleotide_variant   \n",
+       "\n",
+       "                  CLNREVSTAT ORIGIN  ALLELEID  \n",
+       "0   reviewed_by_expert_panel      1    183068  \n",
+       "1   reviewed_by_expert_panel      3    233887  \n",
+       "2   reviewed_by_expert_panel      1    233910  \n",
+       "3   reviewed_by_expert_panel      5    622397  \n",
+       "4   reviewed_by_expert_panel      1    212837  \n",
+       "5   reviewed_by_expert_panel      1    233978  \n",
+       "6   reviewed_by_expert_panel      1    461289  \n",
+       "7   reviewed_by_expert_panel      1    222042  \n",
+       "8   reviewed_by_expert_panel      1   4170285  \n",
+       "9   reviewed_by_expert_panel     19    212851  \n",
+       "10  reviewed_by_expert_panel      1    234071  \n",
+       "11  reviewed_by_expert_panel      1    545982  \n",
+       "12  reviewed_by_expert_panel      1    397887  \n",
+       "13  reviewed_by_expert_panel      1    183231  \n",
+       "14  reviewed_by_expert_panel      1     18074  \n",
+       "15  reviewed_by_expert_panel      1    397772  \n",
+       "16  reviewed_by_expert_panel      1    852591  \n",
+       "17  reviewed_by_expert_panel      1    132860  \n",
+       "18  reviewed_by_expert_panel      1    398353  \n",
+       "19  reviewed_by_expert_panel      1    186801  \n",
+       "20  reviewed_by_expert_panel      3    475938  \n",
+       "21  reviewed_by_expert_panel      1     18060  \n",
+       "22  reviewed_by_expert_panel      1   2943970  \n",
+       "23  reviewed_by_expert_panel      1    810562  \n",
+       "24  reviewed_by_expert_panel      1   1770735  \n",
+       "25  reviewed_by_expert_panel      1    139487  \n",
+       "26  reviewed_by_expert_panel      1    152069  \n",
+       "27  reviewed_by_expert_panel     11     18062  \n",
+       "28  reviewed_by_expert_panel     17    139492  \n",
+       "29  reviewed_by_expert_panel      1    186804  \n",
+       "30  reviewed_by_expert_panel      1    150621  \n",
+       "31  reviewed_by_expert_panel      1    398370  \n",
+       "32  reviewed_by_expert_panel      1    132920  \n",
+       "33  reviewed_by_expert_panel     19     18068  "
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_atm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "fa39484a-c4c3-4c7c-a3fe-4dc6ac1c4a3b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[clinvar] exported 34 rows to ./work/atm_clinvar.tsv\n"
+     ]
+    }
+   ],
+   "source": [
+    "export_df = export_clinvar_tsv(df_atm, \"./work/atm_clinvar.tsv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "322d2c3b-9523-4f67-bee0-c3c4398cf85b",
+   "metadata": {},
    "outputs": [],
    "source": []
   }