digitalmethodsinitiative · dale-wahl · May 5, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,10 @@
 
 # Testing artefacts
 .temp-profile
+tests/.env
+tests/.env.local
+__pycache__/
+*.pyc
 
 # logs
 geckodriver.log
diff --git a/js/lib.js b/js/lib.js
@@ -57,6 +57,16 @@ class MissingMappedField {
     toString() {
         return `${this.value}`;
     }
+
+    // Mirror 4CAT's API serialization so JSON.stringify produces the same
+    // tagged form on both sides: 4CAT's /api/dataset/<key>/items/ endpoint,
+    // when called with `missing_fields=keep`, emits missing values as
+    // `{ __missing: true, value: <fallback> }`. Matching that shape here
+    // lets the map_item comparator deep-equal both sides without special
+    // handling.
+    toJSON() {
+        return { __missing: true, value: this.value };
+    }
 }
 
 /**

diff --git a/modules/_loader.js b/modules/_loader.js
@@ -1,3 +1,8 @@
+// Load-order dependency: `wrap_for_map_item` (used below) is a free global
+// defined in js/lib.js, which manifest.json loads as a plain background
+// script before this module. There is no import for it here on purpose —
+// MV2 background scripts share one global scope. If lib.js stops being
+// loaded first, the mapper wrapper below will ReferenceError.
 async function load() {
     const imported_modules = [
         await import("./tiktok.js"),

diff --git a/modules/package.json b/modules/package.json
@@ -0,0 +1,3 @@
+{
+  "type": "module"
+}
diff --git a/tests/.env.example b/tests/.env.example
@@ -0,0 +1,23 @@
+# 4CAT API config for the map_item comparator (`npm run test:compare`).
+# Copy this file to .env in this directory and fill in real values.
+# .env is gitignored; .env.example is the committed template.
+
+# Base URL of the 4CAT instance to hit. No trailing slash. Default ports:
+#   :80   for nginx (production)
+#   :4000 for the Flask dev server
+FOURCAT_URL=http://localhost
+
+# API key for that 4CAT instance. Get one from the 4CAT UI; tied to your
+# user. 4CAT accepts the raw key as the Authorization header value (no
+# `Bearer ` prefix).
+FOURCAT_API_KEY=your-api-key-here
+
+# Comma-separated list of dataset keys (the 32-char ids from 4CAT dataset
+# URLs) to compare. The comparator pulls inputs from /download/<key> and
+# expected outputs from
+# /api/dataset/<key>/items/?annotations=no&missing_fields=keep&stream=true
+# for each. Datasource is read from each dataset's metadata.
+#
+# `npm run test:compare -- <key>` narrows a single run to one key; the key
+# must still be listed here.
+FOURCAT_DATASETS=key1,key2,key3
diff --git a/tests/README.md b/tests/README.md
@@ -1,31 +1,42 @@
 ## Tests for Zeeschuimer
 
-This folder contains **testing** code for Zeeschuimer.
+This folder contains testing code for Zeeschuimer. There are three suites,
+each with a different purpose and a different runtime environment:
 
-### Integration Tests (Selenium)
+| Suite                            | Tests                                                     | Environment        | When it runs                    | Needs                                  |
+|----------------------------------|-----------------------------------------------------------|--------------------|---------------------------------|----------------------------------------|
+| Selenium integration             | Page captures real items from each supported platform     | Real Firefox       | Reviewer-supervised, manual     | Firefox profile, sometimes a human     |
+| Duplicate-behavior unit (Jest)   | DB merge / keep / update semantics in isolation           | jsdom + fake-IDB   | `npm test` (every push)         | None                                   |
+| Module load smoke (Jest, Tier 1) | Each `modules/*.js` parses and imports cleanly            | jsdom              | `npm test` (every push)         | None                                   |
+| `map_item` comparator (Jest, Tier 2) | JS `map_item` output matches 4CAT's Python mapping per item | jsdom + cross-fetch | `npm run test:compare` (on demand) | Live 4CAT, API key, dataset key(s) |
 
-The Python + Selenium tests visit pages on supported platforms
-and see how many items are captured. If the amount of items captured is 
-unexpectedly low or high, this is flagged and may indicate that Zeeschuimer no
-longer properly captures data from the platform.
+Hermetic suites (no external dependencies) live in `npm test`. Anything that
+requires a real browser, a 4CAT server, or a human in the loop is opt-in.
 
-These tests are **supervised** i.e. they require monitoring by a human and 
+### Integration tests (Selenium)
+
+The Python + Selenium tests visit pages on supported platforms and see how
+many items are captured. If the amount of items captured is unexpectedly
+low or high, this is flagged and may indicate that Zeeschuimer no longer
+properly captures data from the platform.
+
+These tests are **supervised** — they require monitoring by a human and
 cannot run fully autonomously, since some platforms (TikTok in particular)
 occasionally show CAPTCHAs that need to be completed for a test to run
 successfully. This is also why Selenium does not run a headless Firefox.
 
-The amount of items returned per page is somewhat variable for most platforms,
-so if the number is slightly lower or higher than expected this is not 
-necessarily a problem (but worth checking).
+The amount of items returned per page is somewhat variable for most
+platforms, so if the number is slightly lower or higher than expected this
+is not necessarily a problem (but worth checking).
 
-Additionally, most platforms require logging in before (full) access to the UI
-is available. The testing script borrows a Firefox profile directory from 
-elsewhere on the system to do this. It will try to find one automatically but
-you can also pass one with the `--profiledir` argument. The idea is that you
-log in to the various sites (Instagram, etc) in your 'normal' Firefox, and the
-tests then borrow that login to interface with the website.
+Most platforms require logging in before (full) access to the UI is
+available. The testing script borrows a Firefox profile directory from
+elsewhere on the system to do this. It will try to find one automatically
+but you can also pass one with the `--profiledir` argument. Log in to the
+various sites (Instagram, etc) in your 'normal' Firefox, and the tests then
+borrow that login.
 
-Run `test.py` to run tests. Required non-standard libraries are in 
+Run `test.py` to run tests. Required non-standard libraries are in
 `requirements.txt`.
 
 Tests are defined in `tests.json` with the following structure:
@@ -35,49 +46,152 @@ Tests are defined in `tests.json` with the following structure:
   "platform id as in zeeschuimer (e.g. 'tiktok.com')": {
     "test case (e.g. 'Home feed')": {
       "url": {
-        "expected": 0,  # amount of items expected to be captured on this page
-        "more-after-scroll": false,  # whether scrolling is supposed to load more items (currently unsupported)
-        "wait": 10  # wait time before checking number of items (optional, default 5)
-      } # more URLS can be added per test case
+        "expected": 0,
+        "more-after-scroll": false,
+        "wait": 10
+      }
     }
   }
 }
 ```
 
-### Unit Tests (Jest)
-
-The JavaScript unit tests verify duplicate-handling logic in isolation using 
-a mocked Dexie database. These tests ensure that when the duplicate behavior 
-setting is changed, the correct existing record is selected for updates.
+### Jest suites
 
 **Prerequisites**
-- Node.js (v18 or later) and npm must be installed
+- Node.js (v18 or later) and npm
+- `cd tests && npm install`
+
+**Recommended: develop the tests inside Docker.** On Windows the global
+permission model can make `npm install` / `npm test` awkward to run from
+an arbitrary shell, and an agentic assistant working in auto-mode will
+hit deny-rules before it can do a `cross-fetch`-style dependency spike.
+Any minimal `node:20`-or-newer image with this repo mounted in is
+enough — install what you need, run `npm install`, run `npm test` and
+`npm run test:compare`. The host's `tests/.env` is picked up via the
+mount, and `FOURCAT_URL` can point at a 4CAT reachable from the
+container (`host.docker.internal` on Windows/Mac, the host IP on
+Linux).
+
+#### Duplicate-behavior unit tests
+
+Verify duplicate-handling logic in isolation using a mocked Dexie database.
+Ensures that when the duplicate behavior setting is changed, the correct
+existing record is selected for updates.
+
+Coverage:
+- Schema upgrade backfills `last_updated` from `timestamp_collected`
+- Compound index correctly selects most recent item by `last_updated`
+- Forward-looking behavior: "keep" → "update" targets newest record
+- Forward-looking behavior: "update" → "keep" creates new records
+- Merge: shallow merge preserves fields from both records
+- Skip: no modifications occur when duplicate found
+- Platform isolation: same `item_id` on different platforms are independent
+- Tie-breaker: when `last_updated` is equal, prefer higher `id`
+
+#### Module load smoke (Tier 1)
+
+For every file under `modules/*.js`, `tests/map_item.test.js` asserts the
+module parses and imports without throwing. Modules with a `map_item`
+export and modules without one both pass this tier — the goal is purely to
+catch a generator that emits a syntax error or an import-time throw.
+
+No data is run through `map_item` here; that work belongs in the
+comparator.
+
+#### `map_item` comparator (Tier 2)
+
+For every 4CAT dataset key listed in `FOURCAT_DATASETS`,
+`tests/map_item_compare.test.js`:
+
+1. sends a HEAD to the items endpoint and reads the datasource id from its
+   `X-4CAT-Dataset-Datasource` response header (no metadata-endpoint call)
+2. translates that id to a Zeeschuimer module name via
+   `zeeschuimer-to-4cat.json` (used in reverse)
+3. fetches `/download/<key>` (NDJSON inputs, already wrapped via
+   `wrap_for_map_item` by Zeeschuimer pre-upload) and
+   `/api/dataset/<key>/items/?annotations=no&missing_fields=keep&stream=true`
+   (expected outputs from 4CAT's Python `map_item`, as NDJSON — `stream=true`
+   avoids the JSON form's `limit=100` pagination)
+4. pairs items by `id` (or by index with a warning if `id` is missing on
+   either side), runs each input through the local `map_item`, and
+   field-by-field diffs against the expected output (4CAT's API-only
+   aggregate `missing_fields` key is excluded; per-field `{__missing:true}`
+   markers are still compared)
 
-**Setup**
+The comparator does **not** exercise `wrap_for_map_item` itself — Zeeschuimer
+applies it pre-storage and `/download/<key>` returns post-wrap items. This
+is an accepted gap; see `docs/map-item-test-plan.md`.
 
-1. Install Node.js dependencies:
-   ```bash
-   cd tests
-   npm install
-   ```
+**Configuration:** copy `tests/.env.example` to `tests/.env` and set:
+- `FOURCAT_URL` — base URL of the 4CAT instance (no trailing slash)
+- `FOURCAT_API_KEY` — raw API key (no `Bearer ` prefix)
+- `FOURCAT_DATASETS` — comma-separated list of dataset keys
 
-**Running tests**
+The comparator hard-errors at startup if any of these are missing.
+
+**Optional knob:** by default the comparator halts a dataset at its first
+failing item (reporting the rest as one skipped "halted" placeholder). To
+compare *every* item, pass `--all`:
 
 ```bash
-npm test
+npm run test:compare -- <dataset_key> --all
 ```
 
-For watch mode during development:
+`FAIL_FAST=0` (or `FAIL_FAST=false`) does the same, but prefer `--all`: an
+inline `FAIL_FAST=0 npm run …` does not reliably reach node when npm/node is
+the Windows binary run through WSL interop, and isn't env syntax in cmd.exe.
+A CLI flag crosses every shell.
+
+### Running
+
 ```bash
+# everything that's hermetic — duplicate-behavior unit + module load smoke
+npm test
+
+# watch mode for the same
 npm run test:watch
+
+# the comparator — every dataset key in FOURCAT_DATASETS
+npm run test:compare
+
+# the comparator narrowed to one dataset key (must still appear in
+# FOURCAT_DATASETS — protects against typos)
+npm run test:compare -- <dataset_key>
+
+# compare every item instead of halting at the first failure
+npm run test:compare -- <dataset_key> --all
 ```
 
-**Test coverage**
-- Schema upgrade backfills `last_updated` from `timestamp_collected`
-- Compound index correctly selects most recent item by `last_updated`
-- Forward-looking behavior: switching from "keep" to "update" targets newest record
-- Forward-looking behavior: switching from "update" to "keep" creates new records
-- Merge behavior: shallow merge preserves fields from both records
-- Skip behavior: no modifications occur when duplicate found
-- Platform isolation: same `item_id` on different platforms are independent
-- Tie-breaker: when `last_updated` is equal, prefer higher `id`
+### Where does a new test go?
+
+- **Pure data transformation, no live external state, runs anywhere.**
+  Duplicate-behavior unit suite (DB logic) or the Tier 1 smoke
+  (`map_item` static checks).
+- **Field-by-field correctness against 4CAT's Python `map_item`.** Tier 2
+  comparator. Add a dataset to `FOURCAT_DATASETS` that covers the case;
+  the comparator will pick it up.
+- **End-to-end user flow in the extension.** Selenium.
+
+### Why the environments differ
+
+The two Jest tiers run in **jsdom** rather than node env. The reasoning:
+
+- `map_item` bodies are pure data transformation, but four of them
+  (`gab`, `pinterest`, `rednote`, `truth`) call `strip_tags`, which
+  invokes `new DOMParser()`. jsdom provides a spec-compliant native
+  `DOMParser`; node env doesn't.
+- jsdom doesn't ship `fetch`. The standard workaround
+  (`undici`) crashes inside jsdom because it pokes at
+  `clearImmediate` / `markResourceTiming` / fast-now timers that jsdom
+  shadows. `cross-fetch` wraps `node-fetch` v2 internally and doesn't
+  hit those Node internals, so it works in jsdom — the comparator
+  imports `cross-fetch/polyfill` to assign `globalThis.fetch`.
+
+The tradeoff is parser parity. `cross-fetch`-via-`node-fetch` and
+jsdom's `DOMParser` are not byte-equal to Firefox's Gecko `DOMParser`,
+which is what runs in production. Whitespace handling around `<br>` and
+block elements is the usual suspect. If the comparator emits false-
+positive diffs on text fields for the four `strip_tags` modules, the
+right fix is to normalise whitespace in the comparator's `deep_equal`
+rather than chase parser parity. The Selenium tier sits above and
+provides the real-Gecko fidelity check.
diff --git a/tests/_module-info.js b/tests/_module-info.js
@@ -0,0 +1,59 @@
+/**
+ * Shared helper for the map_item test drivers.
+ *
+ * Pre-validates a module by:
+ *   1. Running `node --check` on its file (syntax check; avoids the
+ *      worker-killing experimental-ESM crash when a syntax error reaches
+ *      the dynamic importer).
+ *   2. Dynamically importing it and checking for a `map_item` export.
+ *
+ * Results are cached per module name so test files that load this helper
+ * via separate Jest workers/files don't pay the spawnSync cost twice.
+ *
+ * Returns one of four states the test driver can branch on:
+ *   { state: 'ok',           map_item: <fn> }
+ *   { state: 'no_map_item' }
+ *   { state: 'syntax_error', error: <string> }
+ *   { state: 'import_error', error: <Error> }
+ */
+
+import { spawnSync } from 'node:child_process';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const MODULES_ROOT = join(__dirname, '..', 'modules');
+
+const syntax_cache = new Map();
+const inspect_cache = new Map();
+
+function check_module_syntax(module_name) {
+    if (syntax_cache.has(module_name)) return syntax_cache.get(module_name);
+    const module_path = join(MODULES_ROOT, `${module_name}.js`);
+    const result = spawnSync(process.execPath, ['--check', module_path], { encoding: 'utf8' });
+    const out = result.status === 0
+        ? null
+        : (result.stderr || result.stdout || `exit code ${result.status}`).trim();
+    syntax_cache.set(module_name, out);
+    return out;
+}
+
+export async function inspect_module(module_name) {
+    if (inspect_cache.has(module_name)) return inspect_cache.get(module_name);
+    const syntax_error = check_module_syntax(module_name);
+    let result;
+    if (syntax_error) {
+        result = { state: 'syntax_error', error: syntax_error };
+    } else {
+        try {
+            const mod = await import(`../modules/${module_name}.js`);
+            result = typeof mod.map_item === 'function'
+                ? { state: 'ok', map_item: mod.map_item }
+                : { state: 'no_map_item' };
+        } catch (e) {
+            result = { state: 'import_error', error: e };
+        }
+    }
+    inspect_cache.set(module_name, result);
+    return result;
+}
diff --git a/tests/duplicate-behavior.test.js b/tests/duplicate-behavior.test.js
@@ -5,8 +5,9 @@
  * update or merge behaviors to duplicates across navigation boundaries.
  */
 
+import 'fake-indexeddb/auto';
+
 let Dexie;
-require('fake-indexeddb/auto');
 
 // Mock browser extension APIs
 global.browser = {

diff --git a/tests/jest.compare.config.cjs b/tests/jest.compare.config.cjs
@@ -0,0 +1,20 @@
+// Tier 2 — live comparator against a 4CAT instance.
+//
+// Runs only `map_item_compare.test.js`. Requires FOURCAT_URL,
+// FOURCAT_API_KEY, and FOURCAT_DATASETS to be set in tests/.env. Hard-errors
+// rather than silently skipping if env is missing.
+//
+// Env is jsdom so that the four modules using `strip_tags` (gab, pinterest,
+// rednote, truth) have a native DOMParser. The comparator uses cross-fetch
+// to provide a jsdom-friendly fetch (jsdom doesn't ship fetch and undici
+// crashes inside jsdom).
+module.exports = {
+  testEnvironment: 'jsdom',
+  testMatch: ['**/map_item_compare.test.js'],
+  testPathIgnorePatterns: ['/node_modules/'],
+  transform: {},
+  moduleFileExtensions: ['js', 'json'],
+  setupFiles: ['<rootDir>/setup-globals.cjs'],
+  testTimeout: 30000,
+  verbose: true
+};