From 358716a195e7c2e55692c4132564ac805ddc0468 Mon Sep 17 00:00:00 2001 From: Ronald Tse Date: Fri, 15 May 2026 07:12:59 +0800 Subject: [PATCH 1/2] feat: add domain edge support and improve graph visualization - Add domain extraction to build-edges.js with domain-nodes.json output - Add extractDomainEdges to DatasetAdapter for runtime domain detection - Update GraphEngine with domain node type and per-lang edge dedup - Update GraphPanel with domain node rendering (rounded rect, purple) - Add domain display section to ConceptDetail.vue - Update vocabulary store to load domain nodes alongside edges - Add EDGE_TYPE constant and GraphEdge.lang field to types - Update test helpers and add domain/edge tests --- scripts/build-edges.js | 85 +++++++++++++++++++++--- src/__tests__/dataset-adapter.test.ts | 93 ++++++++++++++++++++++++++ src/__tests__/graph.test.ts | 62 ++++++++++++++++++ src/__tests__/test-helpers.ts | 2 + src/adapters/DatasetAdapter.ts | 42 +++++++++++- src/adapters/types.ts | 44 +++++++++++-- src/components/ConceptDetail.vue | 44 ++++++++++++- src/components/GraphPanel.vue | 94 +++++++++++++++++++++------ src/graph/GraphEngine.ts | 11 ++-- src/stores/vocabulary.ts | 35 +++++++++- 10 files changed, 466 insertions(+), 46 deletions(-) diff --git a/scripts/build-edges.js b/scripts/build-edges.js index db84608..711535b 100644 --- a/scripts/build-edges.js +++ b/scripts/build-edges.js @@ -1,7 +1,7 @@ /** - * Pre-computes cross-reference edges for each dataset. - * Reads all concept JSON files, extracts structured and inline references, - * and writes edges.json for each dataset. + * Pre-computes cross-reference and domain edges for each dataset. + * Reads all concept JSON files, extracts structured references and + * authoritative sources (domains), and writes edges.json + domain-nodes.json. * * Usage: node scripts/build-edges.js */ @@ -13,12 +13,18 @@ const __dirname = dirname(fileURLToPath(import.meta.url)); const ROOT = process.cwd(); const DATA_DIR = join(ROOT, 'public', 'data'); -function extractEdgesFromConcept(concept, registerId) { +// --- Normalization --- + +function slugify(text) { + return text.toLowerCase().replace(/[^\w\s-]/g, '').replace(/[\s/]+/g, '-'); +} + +// --- Extractors (open/closed: add new extractors to EXTRACTORS array) --- + +function extractReferences(concept, registerId) { const edges = []; const sourceUri = concept['@id']; - - for (const [_lang, lc] of Object.entries(concept['gl:localizedConcept'] || {})) { - // Structured cross-references (gl:references array, pre-computed during data generation) + for (const [lang, lc] of Object.entries(concept['gl:localizedConcept'] || {})) { if (lc['gl:references']) { for (const ref of lc['gl:references']) { if (ref['@id'] && ref['@id'] !== sourceUri) { @@ -28,15 +34,42 @@ function extractEdgesFromConcept(concept, registerId) { type: 'references', label: ref['gl:term'] || undefined, register: registerId, + lang, }); } } } } + return edges; +} +function extractDomains(concept, registerId) { + const edges = []; + const sourceUri = concept['@id']; + for (const [lang, lc] of Object.entries(concept['gl:localizedConcept'] || {})) { + const domain = lc['gl:domain']; + if (domain) { + edges.push({ + source: sourceUri, + target: `https://glossarist.org/${registerId}/domain/${slugify(domain)}`, + type: 'domain', + label: domain, + register: registerId, + lang, + }); + } + } return edges; } +const EXTRACTORS = [extractReferences, extractDomains]; + +function extractAllEdges(concept, registerId) { + return EXTRACTORS.flatMap(fn => fn(concept, registerId)); +} + +// --- Build --- + function buildEdgesForDataset(datasetDir, registerId) { const conceptsDir = join(datasetDir, 'concepts'); if (!existsSync(conceptsDir)) { @@ -48,13 +81,20 @@ function buildEdgesForDataset(datasetDir, registerId) { console.log(` Processing ${files.length} concepts...`); const allEdges = []; + const domainConceptCount = new Map(); let processed = 0; for (const file of files) { try { const data = JSON.parse(readFileSync(join(conceptsDir, file), 'utf-8')); - const edges = extractEdgesFromConcept(data, registerId); + const edges = extractAllEdges(data, registerId); allEdges.push(...edges); + + for (const edge of edges) { + if (edge.type === 'domain') { + domainConceptCount.set(edge.target, (domainConceptCount.get(edge.target) || 0) + 1); + } + } } catch (e) { console.error(` Error processing ${file}: ${e.message}`); } @@ -64,11 +104,11 @@ function buildEdgesForDataset(datasetDir, registerId) { } } - // Deduplicate edges by source+target pair + // Deduplicate edges by source+target+type+lang const seen = new Set(); const deduped = []; for (const edge of allEdges) { - const key = `${edge.source}→${edge.target}`; + const key = `${edge.source}→${edge.target}→${edge.type}→${edge.lang || ''}`; if (!seen.has(key)) { seen.add(key); deduped.push(edge); @@ -84,6 +124,31 @@ function buildEdgesForDataset(datasetDir, registerId) { const outputPath = join(datasetDir, 'edges.json'); writeFileSync(outputPath, JSON.stringify(output, null, 2)); console.log(` Written ${deduped.length} edges to edges.json (${(JSON.stringify(output).length / 1024).toFixed(1)} KB)`); + + // Build domain-nodes.json + const domainEdgeMap = new Map(); + for (const edge of deduped) { + if (edge.type === 'domain') { + const existing = domainEdgeMap.get(edge.target); + if (existing) { + existing.labels.add(edge.label); + } else { + domainEdgeMap.set(edge.target, { uri: edge.target, labels: new Set([edge.label]), registerId }); + } + } + } + + const domainNodes = [...domainEdgeMap.values()].map(d => ({ + uri: d.uri, + label: [...d.labels][0], + registerId: d.registerId, + conceptCount: domainConceptCount.get(d.uri) || 0, + })).sort((a, b) => b.conceptCount - a.conceptCount); + + const domainOutput = { registerId, domainNodes }; + const domainPath = join(datasetDir, 'domain-nodes.json'); + writeFileSync(domainPath, JSON.stringify(domainOutput, null, 2)); + console.log(` Written ${domainNodes.length} domain nodes to domain-nodes.json`); } // Main diff --git a/src/__tests__/dataset-adapter.test.ts b/src/__tests__/dataset-adapter.test.ts index a0b8323..3e7e8c7 100644 --- a/src/__tests__/dataset-adapter.test.ts +++ b/src/__tests__/dataset-adapter.test.ts @@ -218,6 +218,24 @@ describe('DatasetAdapter', () => { expect(edges[0].label).toBe('functional'); }); + it('tags reference edges with language', () => { + const concept = { + '@id': 'https://glossarist.org/test/concept/1', + 'gl:localizedConcept': { + eng: { 'gl:references': [ + { '@id': 'https://glossarist.org/test/concept/2', 'gl:term': 'other' }, + ]}, + fra: { 'gl:references': [ + { '@id': 'https://glossarist.org/test/concept/3', 'gl:term': 'autre' }, + ]}, + }, + }; + const edges = adapter.extractEdges(concept as any); + expect(edges.length).toBe(2); + expect(edges.find(e => e.lang === 'eng')?.target).toContain('/concept/2'); + expect(edges.find(e => e.lang === 'fra')?.target).toContain('/concept/3'); + }); + it('skips self-references', () => { const concept = { '@id': 'https://glossarist.org/test/concept/102-01-01', @@ -318,6 +336,81 @@ describe('DatasetAdapter', () => { }); }); + describe('extractDomainEdges', () => { + it('extracts domain edges from gl:domain field per language', () => { + const concept = { + '@id': 'https://glossarist.org/test/concept/3', + 'gl:localizedConcept': { + eng: { 'gl:domain': 'geometry' }, + fra: { 'gl:domain': 'géométrie' }, + }, + }; + const edges = adapter.extractDomainEdges(concept as any); + expect(edges.length).toBe(2); + expect(edges.every(e => e.type === 'domain')).toBe(true); + expect(edges.find(e => e.lang === 'eng')?.target).toContain('/domain/geometry'); + expect(edges.find(e => e.lang === 'fra')?.target).toContain('/domain/gomtrie'); + expect(edges.find(e => e.lang === 'eng')?.label).toBe('geometry'); + expect(edges.find(e => e.lang === 'fra')?.label).toBe('géométrie'); + }); + + it('handles same domain across languages', () => { + const concept = { + '@id': 'https://glossarist.org/test/concept/1', + 'gl:localizedConcept': { + eng: { 'gl:domain': 'metadata' }, + fra: { 'gl:domain': 'metadata' }, + }, + }; + const edges = adapter.extractDomainEdges(concept as any); + expect(edges.length).toBe(2); + expect(edges[0].target).toBe(edges[1].target); + expect(edges[0].target).toContain('/domain/metadata'); + }); + + it('skips concepts without gl:domain', () => { + const concept = { + '@id': 'https://glossarist.org/test/concept/1', + 'gl:localizedConcept': { eng: {} }, + }; + const edges = adapter.extractDomainEdges(concept as any); + expect(edges.length).toBe(0); + }); + + it('handles empty localizedConcept', () => { + const concept = { + '@id': 'https://glossarist.org/test/concept/1', + 'gl:localizedConcept': {}, + }; + const edges = adapter.extractDomainEdges(concept as any); + expect(edges.length).toBe(0); + }); + }); + + describe('loadDomainNodes', () => { + it('loads domain nodes from domain-nodes.json', async () => { + mockFetch.mockReturnValue(mockJsonResponse({ + registerId: 'test', + domainNodes: [ + { uri: 'https://glossarist.org/test/domain/iso-19107', label: 'ISO 19107', registerId: 'test', conceptCount: 147 }, + ], + })); + const nodes = await adapter.loadDomainNodes(); + expect(nodes.length).toBe(1); + expect(nodes[0].nodeType).toBe('domain'); + expect(nodes[0].status).toBe('domain'); + expect(nodes[0].loaded).toBe(true); + expect(nodes[0].designations.eng).toBe('ISO 19107'); + expect(mockFetch).toHaveBeenCalledWith('/data/test/domain-nodes.json'); + }); + + it('returns empty array on fetch failure', async () => { + mockFetch.mockReturnValue(Promise.resolve({ ok: false, status: 404 } as Response)); + const nodes = await adapter.loadDomainNodes(); + expect(nodes).toEqual([]); + }); + }); + describe('getLanguages', () => { it('returns languages from manifest', async () => { const manifest = { diff --git a/src/__tests__/graph.test.ts b/src/__tests__/graph.test.ts index fd9f832..59bb19f 100644 --- a/src/__tests__/graph.test.ts +++ b/src/__tests__/graph.test.ts @@ -91,6 +91,37 @@ describe('GraphEngine', () => { expect(g.edgeCount).toBe(1); }); + it('keeps separate edges for different languages', () => { + const g = new GraphEngine(); + g.addEdge({ source: 'uri:a', target: 'uri:b', type: 'references', register: 'test', lang: 'eng' }); + g.addEdge({ source: 'uri:a', target: 'uri:b', type: 'references', register: 'test', lang: 'fra' }); + expect(g.edgeCount).toBe(2); + }); + + it('deduplicates edges with same source+target+type+lang', () => { + const g = new GraphEngine(); + g.addEdge({ source: 'uri:a', target: 'uri:b', type: 'references', register: 'test', lang: 'eng' }); + g.addEdge({ source: 'uri:a', target: 'uri:b', type: 'references', register: 'test', lang: 'eng' }); + expect(g.edgeCount).toBe(1); + }); + + it('creates domain stub with correct fields', () => { + const g = new GraphEngine(); + g.addEdge({ + source: 'https://glossarist.org/isotc211/concept/3', + target: 'https://glossarist.org/isotc211/domain/iso-19105', + type: 'domain', + label: 'ISO 19105', + register: 'isotc211', + lang: 'eng', + }); + const domainNode = g.getNode('https://glossarist.org/isotc211/domain/iso-19105'); + expect(domainNode?.register).toBe('isotc211'); + expect(domainNode?.nodeType).toBe('domain'); + expect(domainNode?.status).toBe('domain'); + expect(domainNode?.loaded).toBe(false); + }); + it('extracts register from URI for stub nodes', () => { const g = new GraphEngine(); g.addEdge({ @@ -172,6 +203,37 @@ describe('GraphEngine', () => { const sub = g.getSubgraph('uri:a', 5); expect(sub.nodes.length).toBe(2); }); + + it('does not traverse past domain nodes in getSubgraph', () => { + const g = new GraphEngine(); + g.addNode(makeNode('https://glossarist.org/test/concept/a', 'a')); + g.addNode(makeNode('https://glossarist.org/test/concept/b', 'b')); + g.addNode(makeNode('https://glossarist.org/test/concept/c', 'c')); + g.addNode(makeNode('https://glossarist.org/test/concept/d', 'd')); + + g.addEdge({ + source: 'https://glossarist.org/test/concept/a', + target: 'https://glossarist.org/test/domain/iso-12345', + type: 'domain', register: 'test', label: 'ISO 12345', lang: 'eng', + }); + g.addEdge({ + source: 'https://glossarist.org/test/concept/b', + target: 'https://glossarist.org/test/domain/iso-12345', + type: 'domain', register: 'test', label: 'ISO 12345', lang: 'eng', + }); + g.addEdge({ + source: 'https://glossarist.org/test/concept/c', + target: 'https://glossarist.org/test/domain/iso-12345', + type: 'domain', register: 'test', label: 'ISO 12345', lang: 'eng', + }); + + const sub = g.getSubgraph('https://glossarist.org/test/concept/a', 3); + const nodeUris = sub.nodes.map(n => n.uri); + expect(nodeUris).toContain('https://glossarist.org/test/concept/a'); + expect(nodeUris).toContain('https://glossarist.org/test/domain/iso-12345'); + expect(nodeUris).not.toContain('https://glossarist.org/test/concept/b'); + expect(nodeUris).not.toContain('https://glossarist.org/test/concept/c'); + }); }); describe('getAllNodes', () => { diff --git a/src/__tests__/test-helpers.ts b/src/__tests__/test-helpers.ts index 3b5341e..1553f9f 100644 --- a/src/__tests__/test-helpers.ts +++ b/src/__tests__/test-helpers.ts @@ -43,6 +43,7 @@ export interface AdapterStubOptions { ensureChunksForRange?: () => Promise; ensureAllChunksLoaded?: () => Promise; extractEdges?: () => any[]; + extractDomainEdges?: () => any[]; getIndexEntry?: () => any; } @@ -61,6 +62,7 @@ export function makeAdapterStub(options: AdapterStubOptions = {}): any { ensureChunksForRange: options.ensureChunksForRange ?? (() => Promise.resolve()), ensureAllChunksLoaded: options.ensureAllChunksLoaded ?? (() => Promise.resolve()), extractEdges: options.extractEdges ?? (() => []), + extractDomainEdges: options.extractDomainEdges ?? (() => []), getIndexEntry: options.getIndexEntry ?? (() => null), }; } diff --git a/src/adapters/DatasetAdapter.ts b/src/adapters/DatasetAdapter.ts index 352b2fd..d4eedd1 100644 --- a/src/adapters/DatasetAdapter.ts +++ b/src/adapters/DatasetAdapter.ts @@ -6,9 +6,14 @@ import type { ConceptDocument, SearchHit, GraphEdge, + GraphNode, } from './types'; import { UriRouter } from './UriRouter'; +function slugify(text: string): string { + return text.toLowerCase().replace(/[^\w\s-]/g, '').replace(/[\s/]+/g, '-'); +} + export class DatasetAdapter { private positionIndex = new Map(); readonly registerId: string; @@ -241,7 +246,7 @@ export class DatasetAdapter { const edges: GraphEdge[] = []; const sourceUri = concept['@id']; - for (const [_lang, lc] of Object.entries(concept['gl:localizedConcept'] || {})) { + for (const [lang, lc] of Object.entries(concept['gl:localizedConcept'] || {})) { if (lc['gl:references']) { for (const ref of lc['gl:references']) { if (ref['@id'] && ref['@id'] !== sourceUri) { @@ -252,6 +257,7 @@ export class DatasetAdapter { type: 'references', label: ref['gl:term'], register: parsed?.registerId ?? this.registerId, + lang, }); } } @@ -261,6 +267,40 @@ export class DatasetAdapter { return edges; } + extractDomainEdges(concept: ConceptDocument): GraphEdge[] { + const edges: GraphEdge[] = []; + const sourceUri = concept['@id']; + for (const [lang, lc] of Object.entries(concept['gl:localizedConcept'] || {})) { + const domain = lc['gl:domain']; + if (domain) { + edges.push({ + source: sourceUri, + target: `https://glossarist.org/${this.registerId}/domain/${slugify(domain)}`, + type: 'domain', + label: domain, + register: this.registerId, + lang, + }); + } + } + return edges; + } + + async loadDomainNodes(): Promise { + const resp = await fetch(`${this.baseUrl}/domain-nodes.json`); + if (!resp.ok) return []; + const data = await resp.json(); + return (data.domainNodes || []).map((dn: any) => ({ + uri: dn.uri, + register: dn.registerId, + conceptId: dn.uri.split('/domain/')[1] || '', + designations: { eng: dn.label }, + status: 'domain', + loaded: true, + nodeType: 'domain' as const, + })); + } + async loadEdgeIndex(): Promise { const resp = await fetch(`${this.baseUrl}/edges.json`); if (!resp.ok) return []; diff --git a/src/adapters/types.ts b/src/adapters/types.ts index 53bcafa..89f05fb 100644 --- a/src/adapters/types.ts +++ b/src/adapters/types.ts @@ -63,12 +63,17 @@ export interface LocalizedConcept { '@type': string; 'gl:languageCode': string; 'gl:entryStatus'?: string; + 'gl:classification'?: string; + 'gl:reviewType'?: string; + 'gl:script'?: string; + 'gl:system'?: string; 'gl:designation'?: Designation[]; 'gl:definition'?: DetailedDefinition[]; 'gl:notes'?: DetailedDefinition[]; 'gl:examples'?: DetailedDefinition[]; 'gl:source'?: ConceptSource[]; - 'gl:release'?: number; + 'gl:release'?: string; + 'gl:lineageSourceSimilarity'?: number; 'gl:reviewDate'?: string; 'gl:reviewDecisionDate'?: string; 'gl:reviewDecisionEvent'?: string; @@ -77,15 +82,37 @@ export interface LocalizedConcept { 'gl:reviewDecisionNotes'?: string; 'gl:dates'?: ConceptDate[]; 'gl:references'?: CrossReference[]; + 'gl:domain'?: string; +} + +export interface GrammarInfo { + 'gl:gender'?: string; + 'gl:number'?: string; + 'gl:noun'?: boolean; + 'gl:verb'?: boolean; + 'gl:adj'?: boolean; + 'gl:adverb'?: boolean; + 'gl:preposition'?: boolean; + 'gl:participle'?: boolean; } export interface Designation { '@type': string; 'gl:normativeStatus': string; 'gl:term': string; - 'gl:gender'?: string; - 'gl:plurality'?: string; + 'gl:grammarInfo'?: GrammarInfo[]; 'gl:international'?: boolean; + 'gl:termType'?: string; + 'gl:absent'?: boolean; + 'gl:geographicalArea'?: string; + 'gl:prefix'?: string; + 'gl:usageInfo'?: string; + 'gl:fieldOfApplication'?: string; + 'gl:acronym'?: boolean; + 'gl:initialism'?: boolean; + 'gl:truncation'?: boolean; + 'gl:text'?: string; + 'gl:image'?: string; } export interface DetailedDefinition { @@ -121,12 +148,18 @@ export interface DatasetRegistry { manifestUrl: string; } +export const EDGE_TYPE = { + REFERENCES: 'references', + DOMAIN: 'domain', +} as const; + export interface GraphEdge { - source: string; // concept URI - target: string; // concept URI + source: string; + target: string; type: string; label?: string; register: string; + lang?: string; } export interface GraphNode { @@ -136,6 +169,7 @@ export interface GraphNode { designations: Record; status: string; loaded: boolean; + nodeType?: 'concept' | 'domain'; } export interface SearchHit { diff --git a/src/components/ConceptDetail.vue b/src/components/ConceptDetail.vue index c9bd242..20bac54 100644 --- a/src/components/ConceptDetail.vue +++ b/src/components/ConceptDetail.vue @@ -277,6 +277,27 @@ function plainTruncate(html: string, max: number = 120): string { const text = cleanContent(html).replace(/<[^>]+>/g, '').replace(/\s+/g, ' ').trim(); return text.length <= max ? text : text.slice(0, max).trimEnd() + '\u2026'; } + +function slugify(text: string): string { + return text.toLowerCase().replace(/[^\w\s-]/g, '').replace(/[\s/]+/g, '-'); +} + +const conceptDomains = computed(() => { + const domainMap = new Map(); + for (const [lang, lc] of Object.entries(props.concept['gl:localizedConcept'] || {})) { + const domain = lc['gl:domain']; + if (domain) { + const slug = slugify(domain); + const existing = domainMap.get(slug); + if (existing) { + if (!existing.langs.includes(lang)) existing.langs.push(lang); + } else { + domainMap.set(slug, { slug, label: domain, langs: [lang] }); + } + } + } + return [...domainMap.values()].sort((a, b) => b.langs.length - a.langs.length); +});