diff --git a/lib/defaultargs.js b/lib/defaultargs.js index 1e06369..fd07b3e 100644 --- a/lib/defaultargs.js +++ b/lib/defaultargs.js @@ -9,6 +9,7 @@ // **defaultargs.coffee** when called on the argv object this // module will create reasonable defaults for options not supplied, // based on what information is provided. +import fs from 'node:fs' import path from 'node:path' import { randomBytes } from 'node:crypto' import { fileURLToPath } from 'node:url' @@ -26,6 +27,17 @@ export default argv => { argv.root ||= __dirname // the directory that contains all the packages that makeup the wiki argv.packageDir ||= path.join(argv.root, '..') + // In a standard npm install, the wiki package.json is at packageDir/wiki/package.json. + // In development, fall back to the package.json in the current working directory. + if (!argv.packageFile) { + const installed = path.join(argv.packageDir, 'wiki', 'package.json') + try { + fs.accessSync(installed, fs.constants.F_OK) + argv.packageFile = installed + } catch { + argv.packageFile = path.join(process.cwd(), 'package.json') + } + } argv.port ||= 3000 argv.home ||= 'welcome-visitors' argv.data ||= path.join(getUserHome(), '.wiki') // see also cli @@ -70,6 +82,7 @@ export default argv => { // resolve all relative paths argv.root = path.resolve(argv.root) argv.packageDir = path.resolve(argv.packageDir) + argv.packageFile = path.resolve(argv.packageFile) argv.data = path.resolve(argv.data) argv.client = path.resolve(argv.client) argv.db = path.resolve(argv.db) diff --git a/lib/errors.js b/lib/errors.js new file mode 100644 index 0000000..05fcbc0 --- /dev/null +++ b/lib/errors.js @@ -0,0 +1,16 @@ +/* + * Federated Wiki : Node Server + * + * Custom error for page-not-found conditions. + * Allows callers to distinguish 404s from unexpected errors + * via instanceof check or the .status property. + */ + +export class PageNotFoundError extends Error { + constructor(slug) { + super(`Page not found: ${slug}`) + this.name = 'PageNotFoundError' + this.status = 404 + this.slug = slug + } +} diff --git a/lib/page.js b/lib/page.js index 60d549b..cdad741 100644 --- a/lib/page.js +++ b/lib/page.js @@ -5,470 +5,226 @@ * Licensed under the MIT license. * https://github.com/fedwiki/wiki-server/blob/master/LICENSE.txt */ -// **page.coffee** -// Module for interacting with pages persisted on the server. -// Everything is stored using json flat files. -// #### Requires #### -// const fs = require('fs') -// const path = require('path') -// const events = require('events') -// const glob = require('glob') -// -// const async = require('async') +// **page.js** +// Module for interacting with pages persisted on the server. +// Uses per-slug promise chaining to serialize I/O on the same page +// while allowing concurrent access to different pages. // -// // const random_id = require('./random_id.cjs') -// const synopsis = require('wiki-client/lib/synopsis') +// All public methods return promises. +// get() throws PageNotFoundError when a page does not exist. +import fsp from 'node:fs/promises' import fs from 'node:fs' import path from 'node:path' -import url from 'node:url' -import events from 'node:events' - +import { pipeline } from 'node:stream/promises' import { createRequire } from 'node:module' -const require = createRequire(import.meta.url) +import writeFileAtomic from 'write-file-atomic' -// Use dynamic import to load package.json from the main application's working directory -const wikiPackageImport = async () => { - let done = false - return new Promise(resolve => { - import('wiki/package.json', { with: { type: 'json' } }) - .then(imported => { - done = true - resolve(imported.default) - }) - .catch(e => { - return e - }) - .then(async () => { - if (done) return - const packageJsonPath = path.join(process.cwd(), 'package.json') - const packageJsonUrl = url.pathToFileURL(packageJsonPath).href - import(packageJsonUrl, { with: { type: 'json' } }) - .then(imported => { - resolve(imported.default) - }) - .catch(e => console.error('problems importing package', e)) - }) - }) -} +import { asSlug, lastEdit, extractPageLinks, synopsis } from './utils.js' +import { PageNotFoundError } from './errors.js' -const packageJson = await wikiPackageImport() +const require = createRequire(import.meta.url) -// import random_id from './random_id.cjs'; // Uncomment if needed -import synopsis from 'wiki-client/lib/synopsis.js' // Add correct extension if necessary +const writeAtomic = (loc, data) => + new Promise((resolve, reject) => writeFileAtomic(loc, data, err => (err ? reject(err) : resolve()))) -const asSlug = name => - name - .replace(/\s/g, '-') - .replace(/[^A-Za-z0-9-]/g, '') - .toLowerCase() +const copyFile = async (source, target) => { + await fsp.mkdir(path.dirname(target), { recursive: true }) + await pipeline(fs.createReadStream(source), fs.createWriteStream(target)) +} + +const exists = async loc => { + try { + await fsp.access(loc) + return true + } catch { + return false + } +} -// Export a function that generates a page handler -// when called with options object. export default argv => { const wikiName = new URL(argv.url).hostname + const packageJson = JSON.parse(fs.readFileSync(argv.packageFile, 'utf8')) + + fs.mkdirSync(argv.db, { recursive: true }) + + // Per-slug promise chain + const locks = new Map() + + const withLock = (slug, fn) => { + const key = slug.startsWith('recycler/') ? slug : `page:${slug}` + const prev = locks.get(key) || Promise.resolve() + const next = prev.then(fn, fn) + locks.set(key, next) + next.then( + () => { + if (locks.get(key) === next) locks.delete(key) + }, + () => { + if (locks.get(key) === next) locks.delete(key) + }, + ) + return next + } - fs.mkdir(argv.db, { recursive: true }, e => { - if (e) throw e - }) - - // create a list of plugin pages. + // Plugin pages const pluginPages = new Map() - Object.keys(packageJson.dependencies) - .filter(depend => depend.startsWith('wiki-plugin')) - .forEach(plugin => { - const pagesPath = path.join(path.dirname(require.resolve(`${plugin}/package`)), 'pages') - fs.readdir(pagesPath, { withFileTypes: true }, (err, entries) => { - if (err) return - entries.forEach(entry => { - if (entry.isFile() && !pluginPages.has(entry.name)) { - pluginPages.set(entry.name, { pluginName: plugin, pluginPath: entry.parentPath }) - } - }) - }) - }) - - // #### Private utility methods. #### - const load_parse = (loc, cb, annotations = {}) => { - let page - fs.readFile(loc, (err, data) => { - if (err) return cb(err) + const initPluginPages = async () => { + const plugins = Object.keys(packageJson.dependencies).filter(d => d.startsWith('wiki-plugin')) + for (const plugin of plugins) { try { - page = JSON.parse(data) - } catch { - const errorPage = path.basename(loc) - const errorPagePath = path.dirname(loc) - const recyclePage = path.resolve(errorPagePath, '..', 'recycle', errorPage) - fs.access(path.dirname(recyclePage), fs.constants.F_OK, err => { - if (!err) { - fs.rename(loc, recyclePage, err => { - if (err) { - console.log(`ERROR: moving problem page ${loc} to recycler`, err) - } else { - console.log(`ERROR: problem page ${loc} moved to recycler`) - } - }) - } else { - fs.mkdir(path.dirname(recyclePage), { recursive: true }, err => { - if (err) { - console.log('ERROR: creating recycler', err) - } else { - fs.rename(loc, recyclePage, err => { - if (err) { - console.log(`ERROR: moving problem page ${loc} to recycler`, err) - } else { - console.log(`ERROR: problem page ${loc} moved to recycler`) - } - }) - } + const pagesPath = path.join(path.dirname(require.resolve(`${plugin}/package`)), 'pages') + const entries = await fsp.readdir(pagesPath, { withFileTypes: true }) + for (const entry of entries) { + if (entry.isFile() && !pluginPages.has(entry.name)) { + pluginPages.set(entry.name, { + pluginName: plugin, + pluginPath: entry.parentPath, }) } - }) - - return cb(null, 'Error Parsing Page', 404) - } - for (const [key, val] of Object.entries(annotations)) { - page[key] = val + } + } catch { + // plugin has no pages directory } - cb(null, page) - }) + } } - const load_parse_copy = (defloc, file, cb) => { - fs.readFile(defloc, (err, data) => { - if (err) cb(err) - let page + const pluginPagesReady = initPluginPages() + + // ---- Private helpers ---- + + const loadParse = async (loc, annotations = {}) => { + const data = await fsp.readFile(loc, 'utf8') + let page + try { + page = JSON.parse(data) + } catch { + const file = path.basename(loc) + const dir = path.dirname(loc) + const recycleLoc = path.resolve(dir, '..', 'recycle', file) try { - page = JSON.parse(data) - } catch (e) { - return cb(e) + await fsp.mkdir(path.dirname(recycleLoc), { recursive: true }) + await fsp.rename(loc, recycleLoc) + console.log(`ERROR: problem page ${loc} moved to recycler`) + } catch (moveErr) { + console.log(`ERROR: moving problem page ${loc} to recycler`, moveErr) } - cb(null, page) - // TODO: what is happening here?! put will never be reached??? - itself.put(file, page, err => { - if (err) cb(err) - }) - }) - } - // Reads and writes are async, but serially queued to avoid race conditions. - const queue = [] - - const tryDefaults = (file, cb) => { - const lastDefault = cb => { - const defloc = path.join(argv.root, 'default-data', 'pages', file) - fs.access(defloc, fs.constants.F_OK, err => { - if (!err) { - cb(defloc) - } else { - cb(null) - } - }) + throw new PageNotFoundError(loc) } + for (const [key, val] of Object.entries(annotations)) { + page[key] = val + } + return page + } + + const tryDefaults = async file => { if (argv.defaults) { const defloc = path.join(argv.data, '..', argv.defaults, 'pages', file) - fs.access(defloc, fs.constants.F_OK, err => { - if (!err) { - cb(defloc) - } else { - lastDefault(cb) - } - }) - } else { - lastDefault(cb) + if (await exists(defloc)) return defloc } + const defloc = path.join(argv.root, 'default-data', 'pages', file) + if (await exists(defloc)) return defloc + return null } - // Main file io function, when called without page it reads, - // when called with page it writes. - const fileio = (action, file, page, cb) => { - const loc = file.startsWith('recycler/') ? path.join(argv.recycler, file.split('/')[1]) : path.join(argv.db, file) - - switch (action) { - case 'delete': - if (file.startsWith('recycler/')) { - // delete from recycler - fs.access(loc, fs.constants.F_OK, err => { - if (!err) - fs.unlink(loc, err => { - cb(err) - }) - }) - } else { - // move page to recycler - fs.access(loc, fs.constants.F_OK, err => { - if (!err) { - const recycleLoc = path.join(argv.recycler, file) - fs.access(path.dirname(recycleLoc), fs.constants.F_OK, err => { - if (!err) { - fs.rename(loc, recycleLoc, err => { - cb(err) - }) - } else { - fs.mkdir(path.dirname(recycleLoc), { recursive: true }, err => { - if (err) cb(err) - fs.rename(loc, recycleLoc, err => { - cb(err) - }) - }) - } - }) - } else { - cb('page does not exist') - } - }) - } - break - case 'recycle': { - const copyFile = (source, target, cb) => { - const done = err => { - if (!cbCalled) { - cb(err) - cbCalled = true - } - return - } + const locFor = file => + file.startsWith('recycler/') ? path.join(argv.recycler, file.split('/')[1]) : path.join(argv.db, file) - let cbCalled = false - - const rd = fs.createReadStream(source) - rd.on('error', err => { - done(err) - return - }) - - const wr = fs.createWriteStream(target) - wr.on('error', err => { - done(err) - return - }) - wr.on('close', () => { - done() - return - }) - rd.pipe(wr) - return - } + // ---- Core I/O ---- - fs.access(loc, fs.constants.F_OK, err => { - if (!err) { - const recycleLoc = path.join(argv.recycler, file) - fs.access(path.dirname(recycleLoc), fs.constants.F_OK, err => { - if (!err) { - copyFile(loc, recycleLoc, err => { - cb(err) - }) - } else { - fs.mkdir(path.dirname(recycleLoc), { recursive: true }, err => { - if (err) cb(err) - copyFile(loc, recycleLoc, err => { - cb(err) - }) - }) - } - }) - } else { - cb('page does not exist') - } - }) - break - } - case 'get': - fs.access(loc, fs.constants.F_OK, err => { - if (!err) { - load_parse(loc, cb, { plugin: undefined }) - } else { - tryDefaults(file, defloc => { - if (defloc) { - load_parse(defloc, cb) - } else { - if (pluginPages.has(file)) { - const { pluginName, pluginPath } = pluginPages.get(file) - load_parse(path.join(pluginPath, file), cb, { plugin: pluginName.slice(12) }) - } else { - cb(null, 'Page not found', 404) - } - } - }) - } - }) - break - case 'put': - page = JSON.stringify(page, null, 2) - fs.access(path.dirname(loc), fs.constants.F_OK, err => { - if (!err) { - fs.writeFile(loc, page, err => { - if (err) { - console.log(`ERROR: write file ${loc} `, err) - } - cb(err) - }) - } else { - fs.mkdir(path.dirname(loc), { recursive: true }, err => { - if (err) cb(err) - fs.writeFile(loc, page, err => { - if (err) { - console.log(`ERROR: write file ${loc} `, err) - } - cb(err) - }) - }) - } - }) - break - default: - console.log(`pagehandler: unrecognized action ${action}`) + const doGet = async file => { + const loc = locFor(file) + if (await exists(loc)) { + return loadParse(loc, { plugin: undefined }) } - } - - // Control variable that tells if the serial queue is currently working. - // Set back to false when all jobs are complete. - let working = false - - // Keep file io working on queued jobs, but don't block the main thread. - const serial = item => { - if (item) { - itself.start() - fileio(item.action, item.file, item.page, (err, data, status) => { - process.nextTick(() => { - serial(queue.shift()) - }) - item.cb(err, data, status) + const defloc = await tryDefaults(file) + if (defloc) { + return loadParse(defloc) + } + await pluginPagesReady + if (pluginPages.has(file)) { + const { pluginName, pluginPath } = pluginPages.get(file) + return loadParse(path.join(pluginPath, file), { + plugin: pluginName.slice(12), }) - } else { - itself.stop() } + throw new PageNotFoundError(file) } - // #### Public stuff #### - // Make the exported object an instance of EventEmitter - // so other modules can tell if it is working or not. - const itself = new events.EventEmitter() - - itself.start = () => { - working = true - itself.emit('working') + const doPut = async (file, page) => { + const loc = locFor(file) + await fsp.mkdir(path.dirname(loc), { recursive: true }) + await writeAtomic(loc, JSON.stringify(page, null, 2)) } - itself.stop = () => { - working = false - itself.emit('finished') + const doDelete = async file => { + const loc = locFor(file) + if (file.startsWith('recycler/')) { + if (await exists(loc)) await fsp.unlink(loc) + return + } + if (!(await exists(loc))) throw new PageNotFoundError(file) + const recycleLoc = path.join(argv.recycler, file) + await fsp.mkdir(path.dirname(recycleLoc), { recursive: true }) + await fsp.rename(loc, recycleLoc) } - itself.isWorking = () => working - - // get method takes a slug and a callback, adding them to the queue, - // starting serial if it isn't already working. - itself.get = (file, cb) => { - queue.push({ action: 'get', file, page: null, cb }) - if (!working) serial(queue.shift()) + const doRecycle = async file => { + const loc = locFor(file) + if (!(await exists(loc))) throw new PageNotFoundError(file) + const recycleLoc = path.join(argv.recycler, file) + await copyFile(loc, recycleLoc) } - // put takes a slugged name, the page as a json object, and a callback. - // adds them to the queue, and starts it unless it is working. - itself.put = (file, page, cb) => { - queue.push({ action: 'put', file, page, cb }) - if (!working) serial(queue.shift()) - } + // ---- Public API ---- - itself.delete = (file, cb) => { - queue.push({ action: 'delete', file, page: null, cb }) - if (!working) serial(queue.shift()) - } + const itself = {} - itself.saveToRecycler = (file, cb) => { - queue.push({ action: 'recycle', file, page: null, cb }) - if (!working) serial(queue.shift()) - } + itself.get = file => withLock(file, () => doGet(file)) - const editDate = journal => { - if (!journal) return undefined - // find the last journal entry, that is not a fork, with a date. - const last = journal.findLast(action => { - return action.date && action.type != 'fork' - }) - return last ? last.date : undefined - } + itself.put = (file, page) => withLock(file, () => doPut(file, page)) - itself.pages = cb => { - const extractPageLinks = (collaborativeLinks, currentItem, currentIndex, array) => { - // extract collaborative links - // - this will need extending if we also extract the id of the item containing the link - try { - const linkRe = /\[\[([^\]]+)\]\]/g - let match = undefined - while ((match = linkRe.exec(currentItem.text)) != null) { - if (!collaborativeLinks.has(asSlug(match[1]))) { - collaborativeLinks.set(asSlug(match[1]), currentItem.id) - } - } - if ('reference' == currentItem.type) { - if (!collaborativeLinks.has(currentItem.slug)) { - collaborativeLinks.set(currentItem.slug, currentItem.id) - } - } - } catch (err) { - console.log( - `METADATA *** ${wikiName} Error extracting links from ${currentIndex} of ${JSON.stringify(array)}`, - err.message, - ) - } - return collaborativeLinks - } + itself.delete = file => withLock(file, () => doDelete(file)) - fs.readdir(argv.db, (e, files) => { - if (e) return cb(e) - const doSitemap = async file => { - return new Promise(resolve => { - itself.get(file, (e, page, status) => { - if (file.match(/^\./)) return resolve(null) - if (e || status === 404) { - console.log('Problem building sitemap:', file, 'e: ', e, 'status:', status) - return resolve(null) // Ignore errors in the pagehandler get. - } + itself.saveToRecycler = file => withLock(file, () => doRecycle(file)) + + itself.pages = async () => { + const files = await fsp.readdir(argv.db) + const results = await Promise.all( + files + .filter(f => !f.startsWith('.')) + .map(async file => { + try { + const page = await itself.get(file) let pageLinksMap try { pageLinksMap = page.story.reduce(extractPageLinks, new Map()) } catch (err) { console.log(`METADATA *** ${wikiName} reduce to extract links on ${file} failed`, err.message) - pageLinksMap = [] + pageLinksMap = new Map() } - // - const pageLinks = pageLinksMap.size > 0 ? Object.fromEntries(pageLinksMap) : undefined - - resolve({ + return { slug: file, title: page.title, - date: editDate(page.journal), + date: lastEdit(page.journal), synopsis: synopsis(page), - links: pageLinks, - }) - }) - }) - } - - Promise.all(files.map(doSitemap)) - .then(sitemap => { - cb( - null, - sitemap.filter(item => item != null), - ) - }) - .catch(e => cb(e)) - }) + links: pageLinksMap.size > 0 ? Object.fromEntries(pageLinksMap) : undefined, + } + } catch (err) { + if (err instanceof PageNotFoundError) return null + console.log('Problem building sitemap:', file, 'e:', err) + return null + } + }), + ) + return results.filter(Boolean) } - itself.slugs = cb => { - fs.readdir(argv.db, { withFileTypes: true }, (e, files) => { - if (e) { - console.log('Problem reading pages directory', e) - return cb(e) - } - - const onlyFiles = files.map(i => (i.isFile() ? i.name : null)).filter(i => i != null && !i?.startsWith('.')) - cb(null, onlyFiles) - }) + itself.slugs = async () => { + const entries = await fsp.readdir(argv.db, { withFileTypes: true }) + return entries.filter(e => e.isFile() && !e.name.startsWith('.')).map(e => e.name) } return itself diff --git a/lib/plugins.js b/lib/plugins.js index dfa7e08..0834a20 100644 --- a/lib/plugins.js +++ b/lib/plugins.js @@ -6,36 +6,29 @@ * https://github.com/fedwiki/wiki-server/blob/master/LICENSE.txt */ -// support server-side plugins +// Support server-side plugins -import fs from 'node:fs' +import fsp from 'node:fs/promises' import { pathToFileURL } from 'node:url' -// import forward from './forward.cjs'; // Uncomment if needed and adjust import style if it's not a default export export default argv => { - // NOTE: plugins are now in their own package directories alongside this one... - // Plugins are in directories of the form wiki-package-* - // those with a server component will have a server directory - const plugins = {} - // http://stackoverflow.com/questions/10914751/loading-node-js-modules-dynamically-based-on-route - - const startServer = (params, plugin) => { + const startServer = async (params, plugin) => { const server = `${argv.packageDir}/${plugin}/server/server.js` - fs.access(server, fs.constants.F_OK, err => { - if (!err) { - console.log('starting plugin', plugin) - import(pathToFileURL(server)) - .then(exported => { - plugins[plugin] = exported - plugins[plugin].startServer?.(params) - }) - .catch(e => { - console.log('failed to start plugin', plugin, e?.stack || e) - }) - } - }) + try { + await fsp.access(server) + } catch { + return + } + try { + console.log('starting plugin', plugin) + const exported = await import(pathToFileURL(server)) + plugins[plugin] = exported + plugins[plugin].startServer?.(params) + } catch (e) { + console.log('failed to start plugin', plugin, e?.stack || e) + } } const startServers = params => { diff --git a/lib/render.js b/lib/render.js new file mode 100644 index 0000000..3397c4d --- /dev/null +++ b/lib/render.js @@ -0,0 +1,140 @@ +/* + * Federated Wiki : Node Server + * + * Copyright Ward Cunningham and other contributors + * Licensed under the MIT license. + * https://github.com/fedwiki/wiki-server/blob/master/LICENSE.txt + */ + +// **render.js** +// Server-side rendering of wiki pages to static HTML. +// Contains link resolution, HTML escaping, sanitization, and the +// top-level render function used to produce the story HTML served +// for `.html` page requests. + +import f from 'flates' +import createDOMPurify from 'dompurify' +import { JSDOM } from 'jsdom' + +import { asSlug } from './utils.js' + +const window = new JSDOM('').window +const DOMPurify = createDOMPurify(window) + +// ---- HTML escaping ---- + +/** + * Escape HTML special characters in a string. + * + * @param {string} [string=''] - The raw text. + * @returns {string} The text with &, <, > replaced by entities. + */ +export const escape = string => (string || '').replace(/&/g, '&').replace(//g, '>') + +// ---- Link resolution ---- + +/** + * Convert wiki-style link markup in a string to HTML anchor tags. + * + * Handles two kinds of links: + * - Internal wiki links: `[[Page Name]]` → `` + * - External links: `[http://example.com label text]` → `` + * + * A stash/unstash mechanism protects generated HTML from being altered + * by the sanitizer pass. The Unicode markers `〖n〗` are used as + * placeholders; any pre-existing markers in the input are defused first. + * + * @param {string} [string=''] - The markup text to convert. + * @param {function} [sanitize=escape] - A text sanitizer applied after + * link extraction but before unstashing. Plugins that do their own + * markup can substitute themselves here, but must escape HTML and + * pass through `〖n〗` markers. + * @param {string[]} [resolutionContext=[]] - A stack of page names + * representing the navigation path, encoded into link title attributes. + * @returns {string} HTML string with links converted to anchor tags. + */ +export const resolveLinks = (string, sanitize = escape, resolutionContext = []) => { + const stashed = [] + + const stash = text => { + const here = stashed.length + stashed.push(text) + return `〖${here}〗` + } + + const unstash = (match, digits) => stashed[+digits] + + const internal = (match, name) => { + const slug = asSlug(name) + const styling = name === name.trim() ? 'internal' : 'internal spaced' + if (slug.length) { + return stash( + `${escape(name)}`, + ) + } else { + return match + } + } + + const external = (match, href, rest) => + stash( + `${escape(rest)} `, + ) + + string = (string || '') + .replace(/〖(\d+)〗/g, '〖 $1 〗') + .replace(/\[\[([^\]]+)\]\]/gi, internal) + .replace(/\[((?:(?:https?|ftp):|\/).*?) (.*?)\]/gi, external) + + return sanitize(string).replace(/〖(\d+)〗/g, unstash) +} + +// ---- Page rendering ---- + +/** + * Render a wiki page's story to static HTML. + * + * Produces the twins, header, and story divs that form the page body + * served for `.html` requests. Handles paragraph, image, html, and + * generic story item types. + * + * @param {{title: string, story: Array<{type: string, text?: string, url?: string, caption?: string}>}} page + * The page object containing a title and story array. + * @returns {string} An HTML string of the rendered page content. + */ +export const render = page => { + return ( + f.div({ class: 'twins' }, f.p('')) + + '\n' + + f.div( + { class: 'header' }, + f.h1( + f.a({ href: '/', style: 'text-decoration: none' }, f.img({ height: '32px', src: '/favicon.png' })) + + ' ' + + page.title, + ), + ) + + '\n' + + f.div( + { class: 'story' }, + page.story + .map(story => { + if (!story) return '' + if (story.type === 'paragraph') { + return f.div({ class: 'item paragraph' }, f.p(resolveLinks(story.text))) + } else if (story.type === 'image') { + return f.div( + { class: 'item image' }, + f.img({ class: 'thumbnail', src: story.url }), + f.p(resolveLinks(story.text || story.caption || 'uploaded image')), + ) + } else if (story.type === 'html') { + return f.div({ class: 'item html' }, f.p(resolveLinks(story.text || '', DOMPurify.sanitize))) + } else { + return f.div({ class: 'item' }, f.p(resolveLinks(story.text || ''))) + } + }) + .join('\n'), + ) + ) +} diff --git a/lib/routes/assets.js b/lib/routes/assets.js new file mode 100644 index 0000000..014f5e0 --- /dev/null +++ b/lib/routes/assets.js @@ -0,0 +1,89 @@ +/* + * Federated Wiki : Node Server + * + * Asset routes: favicon, theme, proxy. + */ + +import fsp from 'node:fs/promises' +import path from 'node:path' +import { pipeline } from 'node:stream/promises' + +export default ({ app, authorized, cors, argv }) => { + // ---- Theme ---- + + app.get(/^\/theme\/(\w+\.\w+)$/, cors, (req, res) => { + res.sendFile(path.join(argv.status, 'theme', req.params[0]), { dotfiles: 'allow' }, e => { + if (e) { + if (req.path === '/theme/style.css') { + res.set('Content-Type', 'text/css') + res.send('') + } else { + res.sendStatus(404) + } + } + }) + }) + + // ---- Favicon ---- + + const favLoc = path.join(argv.status, 'favicon.png') + const defaultFavLoc = path.join(argv.root, 'default-data', 'status', 'favicon.png') + + app.get('/favicon.png', cors, async (req, res) => { + try { + await fsp.access(favLoc) + res.sendFile(favLoc, { dotfiles: 'allow' }) + } catch { + res.sendFile(defaultFavLoc, { dotfiles: 'allow' }) + } + }) + + app.post('/favicon.png', authorized, async (req, res) => { + const favicon = req.body.image.replace(/^data:image\/png;base64,/, '') + const buf = Buffer.from(favicon, 'base64') + try { + await fsp.mkdir(argv.status, { recursive: true }) + await fsp.writeFile(favLoc, buf) + res.send('Favicon Saved') + } catch (e) { + res.e(e) + } + }) + + app.get(/^\/remote\/([a-zA-Z0-9:.-]+\/favicon.png)$/, (req, res) => { + res.redirect(`http://${req.params[0]}`) + }) + + // ---- Proxy ---- + + app.get('/proxy/*splat', authorized, async (req, res) => { + const pathParts = req.originalUrl.split('/') + const remoteHost = pathParts[2] + pathParts.splice(0, 3) + const remoteResource = pathParts.join('/') + const requestURL = 'http://' + remoteHost + '/' + remoteResource + console.log('PROXY Request: ', requestURL) + if ( + requestURL.endsWith('.json') || + requestURL.endsWith('.png') || + requestURL.endsWith('.jpg') || + pathParts[0] === 'plugin' + ) { + try { + const fetchRes = await fetch(requestURL, { signal: AbortSignal.timeout(2000) }) + if (fetchRes.ok) { + res.set('content-type', fetchRes.headers.get('content-type')) + res.set('last-modified', fetchRes.headers.get('last-modified')) + await pipeline(fetchRes.body, res) + } else { + res.status(fetchRes.status).end() + } + } catch (err) { + console.log('ERROR: Proxy Request ', requestURL, err) + res.status(500).end() + } + } else { + res.status(400).end() + } + }) +} diff --git a/lib/routes/meta.js b/lib/routes/meta.js new file mode 100644 index 0000000..830d43f --- /dev/null +++ b/lib/routes/meta.js @@ -0,0 +1,147 @@ +/* + * Federated Wiki : Node Server + * + * Meta routes: sitemap, search index, slugs, plugins, export, version. + */ + +import fsp from 'node:fs/promises' +import path from 'node:path' + +export default ({ app, pagehandler, sitemaphandler, searchhandler, packageJson, authorized, admin, cors, argv }) => { + // ---- Slugs ---- + + app.get('/system/slugs.json', cors, async (req, res) => { + try { + const slugs = await pagehandler.slugs() + res.send(slugs) + } catch (err) { + res.status(500).send(err.message || err) + } + }) + + // ---- Plugins ---- + + app.get('/system/plugins.json', cors, (req, res) => { + try { + const pluginNames = Object.keys(packageJson.dependencies) + .filter(d => d.startsWith('wiki-plugin')) + .map(name => name.slice(12)) + res.send(pluginNames) + } catch (e) { + res.e(e) + } + }) + + app.get('/system/factories.json', async (req, res) => { + const factories = [] + const getFactory = plugin => + import(`${plugin}/factory.json`, { with: { type: 'json' } }) + .then(({ default: factory }) => factories.push(factory)) + .catch(() => {}) + + await Promise.all( + Object.keys(packageJson.dependencies) + .filter(d => d.startsWith('wiki-plugin')) + .map(getFactory), + ) + res.status(200).json(factories) + }) + + // ---- Sitemap ---- + + const sitemapLoc = path.join(argv.status, 'sitemap.json') + const xmlSitemapLoc = path.join(argv.status, 'sitemap.xml') + + app.get('/system/sitemap.json', cors, async (req, res) => { + try { + await fsp.access(sitemapLoc) + res.sendFile(sitemapLoc, { dotfiles: 'allow' }) + } catch { + if (!sitemaphandler.isWorking()) { + await sitemaphandler.createSitemap(pagehandler) + } else { + await new Promise(resolve => sitemaphandler.once('finished', resolve)) + } + res.sendFile(sitemapLoc, { dotfiles: 'allow' }) + } + }) + + app.get('/sitemap.xml', cors, async (req, res) => { + try { + await fsp.access(sitemapLoc) + res.sendFile(xmlSitemapLoc, { dotfiles: 'allow' }) + } catch { + if (!sitemaphandler.isWorking()) { + await sitemaphandler.createSitemap(pagehandler) + } else { + await new Promise(resolve => sitemaphandler.once('finished', resolve)) + } + res.sendFile(xmlSitemapLoc, { dotfiles: 'allow' }) + } + }) + + // ---- Search index ---- + + const searchIndexLoc = path.join(argv.status, 'site-index.json') + + app.get('/system/site-index.json', cors, async (req, res) => { + try { + await fsp.access(searchIndexLoc) + res.sendFile(searchIndexLoc, { dotfiles: 'allow' }) + } catch { + if (!searchhandler.isWorking()) { + await searchhandler.createIndex(pagehandler) + } else { + await new Promise(resolve => searchhandler.once('indexed', resolve)) + } + res.sendFile(searchIndexLoc, { dotfiles: 'allow' }) + } + }) + + // ---- Export ---- + + app.get('/system/export.json', cors, async (req, res) => { + try { + const sitemap = await pagehandler.pages() + const pages = await Promise.all( + sitemap.map(async stub => { + const page = await pagehandler.get(stub.slug) + return { slug: stub.slug, page } + }), + ) + const pageExport = pages.reduce((dict, { slug, page }) => { + dict[slug] = page + return dict + }, {}) + res.json(pageExport) + } catch (e) { + res.e(e) + } + }) + + // ---- Version ---- + + app.get('/system/version.json', admin, async (req, res) => { + const getVersion = async name => { + try { + const { default: pkg } = await import(`${name}/package.json`, { with: { type: 'json' } }) + return { [name]: pkg.version } + } catch { + return { [name]: 'unknown' } + } + } + + const deps = Object.keys(packageJson.dependencies) + const securityVersions = await Promise.all(deps.filter(d => d.startsWith('wiki-security')).map(getVersion)) + const pluginVersions = await Promise.all(deps.filter(d => d.startsWith('wiki-plugin')).map(getVersion)) + + const versions = { + [packageJson.name]: packageJson.version, + ...(await getVersion('wiki-server')), + ...(await getVersion('wiki-client')), + security: Object.assign({}, ...securityVersions), + plugins: Object.assign({}, ...pluginVersions), + } + res.json(versions) + }) +} diff --git a/lib/routes/pages.js b/lib/routes/pages.js new file mode 100644 index 0000000..630eb4e --- /dev/null +++ b/lib/routes/pages.js @@ -0,0 +1,211 @@ +/* + * Federated Wiki : Node Server + * + * Page CRUD routes: JSON get/put/delete, action processing, recycler. + */ + +import fsp from 'node:fs/promises' +import path from 'node:path' +import { PageNotFoundError } from '../errors.js' + +export default ({ app, pagehandler, sitemaphandler, searchhandler, securityhandler, authorized, cors, log, argv }) => { + const remoteGet = async (remote, slug) => { + const remoteURL = new URL(`http://${remote}/${slug}.json`).toString() + const res = await fetch(remoteURL, { signal: AbortSignal.timeout(2000) }) + if (!res.ok) throw new Error(res.statusText) + return res.json() + } + + // ---- JSON page routes ---- + + app.get(/^\/([a-z0-9-]+)\.json$/, cors, async (req, res) => { + try { + const page = await pagehandler.get(req.params[0]) + res.status(200).send(page) + } catch (e) { + if (e instanceof PageNotFoundError) return res.status(404).send(e.message) + res.e(e) + } + }) + + app.get(/^\/remote\/([a-zA-Z0-9:.-]+)\/([a-z0-9-]+)\.json$/, async (req, res) => { + try { + const page = await remoteGet(req.params[0], req.params[1]) + res.status(200).send(page) + } catch (e) { + log('remoteGet error:', e) + res.e(e) + } + }) + + // ---- Page actions ---- + + app.put(/^\/page\/([a-z0-9-]+)\/action$/i, authorized, async (req, res) => { + const action = JSON.parse(req.body.action) + const slug = req.params[0] + + const applyAction = async page => { + try { + page.story = (() => { + switch (action.type) { + case 'move': + return action.order.map(id => { + const match = page.story.filter(p => id === p.id)[0] + if (!match) throw 'Ignoring move. Try reload.' + return match + }) + case 'add': { + const idx = page.story.map(p => p.id).indexOf(action.after) + 1 + page.story.splice(idx, 0, action.item) + return page.story + } + case 'remove': + return page.story.filter(p => p?.id !== action.id) + case 'edit': + return page.story.map(p => (p.id === action.id ? action.item : p)) + case 'create': + case 'fork': + return page.story || [] + default: + log('Unfamiliar action:', action) + throw 'Unfamiliar action ignored' + } + })() + } catch (e) { + return res.e(e) + } + + if (!page.journal) page.journal = [] + if (action.fork) { + page.journal.push({ type: 'fork', site: action.fork, date: action.date - 1 }) + delete action.fork + } + page.journal.push(action) + + try { + await pagehandler.put(slug, page) + res.send('ok') + } catch (e) { + return res.e(e) + } + + sitemaphandler.update(slug, page).catch(e => console.log(`sitemap update error for ${slug}:`, e)) + searchhandler.update(slug, page).catch(e => console.log(`search update error for ${slug}:`, e)) + } + + try { + if (action.fork) { + try { + await pagehandler.saveToRecycler(slug) + } catch (err) { + if (!(err instanceof PageNotFoundError)) { + console.log(`Error saving ${slug} before fork: ${err}`) + } + } + if (action.forkPage) { + const forkPageCopy = JSON.parse(JSON.stringify(action.forkPage)) + delete action.forkPage + await applyAction(forkPageCopy) + } else { + const page = await remoteGet(action.fork, slug) + await applyAction(page) + } + } else if (action.type === 'create') { + const itemCopy = JSON.parse(JSON.stringify(action.item)) + let pageExists = true + try { + await pagehandler.get(slug) + } catch (e) { + if (e instanceof PageNotFoundError) pageExists = false + else throw e + } + if (pageExists) { + res.status(409).send('Page already exists.') + } else { + await applyAction(itemCopy) + } + } else if (action.type === 'fork') { + try { + await pagehandler.saveToRecycler(slug) + } catch (err) { + console.log(`Error saving ${slug} before fork: ${err}`) + } + if (action.forkPage) { + const forkPageCopy = JSON.parse(JSON.stringify(action.forkPage)) + delete action.forkPage + await applyAction(forkPageCopy) + } else { + const page = await remoteGet(action.site, slug) + await applyAction(page) + } + } else { + const page = await pagehandler.get(slug) + await applyAction(page) + } + } catch (e) { + if (e instanceof PageNotFoundError) return res.e(e.message, 404) + res.e(e) + } + }) + + // ---- Delete ---- + + app.delete(/^\/([a-z0-9-]+)\.json$/, authorized, async (req, res) => { + const slug = req.params[0] + try { + await pagehandler.delete(slug) + sitemaphandler.removePage(slug).catch(e => console.log(`sitemap remove error for ${slug}:`, e)) + searchhandler.removePage(slug).catch(e => console.log(`search remove error for ${slug}:`, e)) + res.status(200).send('') + } catch (err) { + res.status(500).send(err.message || err) + } + }) + + // ---- Recycler ---- + + const recyclerFavLoc = path.join(argv.root, 'default-data', 'status', 'recycler.png') + app.get('/recycler/favicon.png', authorized, (req, res) => { + res.sendFile(recyclerFavLoc, { dotfiles: 'allow' }) + }) + + app.get('/recycler/system/slugs.json', authorized, async (req, res) => { + try { + const files = await fsp.readdir(argv.recycler) + const results = await Promise.all( + files.map(async file => { + try { + const page = await pagehandler.get('recycler/' + file) + return { slug: file, title: page.title } + } catch (e) { + if (e instanceof PageNotFoundError) return null + console.log('Problem building recycler map:', file, 'e:', e) + return null + } + }), + ) + res.send(results.filter(Boolean)) + } catch (e) { + res.e(e) + } + }) + + app.get(/^\/recycler\/([a-z0-9-]+)\.json$/, authorized, async (req, res) => { + try { + const page = await pagehandler.get('recycler/' + req.params[0]) + res.status(200).send(page) + } catch (e) { + if (e instanceof PageNotFoundError) return res.status(404).send(e.message) + res.e(e) + } + }) + + app.delete(/^\/recycler\/([a-z0-9-]+)\.json$/, authorized, async (req, res) => { + try { + await pagehandler.delete('recycler/' + req.params[0]) + res.status(200).send('') + } catch (err) { + res.status(500).send(err.message || err) + } + }) +} diff --git a/lib/routes/views.js b/lib/routes/views.js new file mode 100644 index 0000000..382cb40 --- /dev/null +++ b/lib/routes/views.js @@ -0,0 +1,103 @@ +/* + * Federated Wiki : Node Server + * + * View routes: HTML page rendering, multi-page view. + */ + +import fsp from 'node:fs/promises' +import path from 'node:path' +import { render } from '../render.js' +import { PageNotFoundError } from '../errors.js' + +export default ({ app, pagehandler, securityhandler, cors, log, argv, getOwner }) => { + const index = argv.home + '.html' + + // Multi-page view — links like /view/page-a/site.example.com/page-b + app.get(/^((\/[a-zA-Z0-9:.-]+\/[a-z0-9-]+(_rev\d+)?)+)\/?$/, cors, (req, res, next) => { + const urlPages = req.params[0] + .split('/') + .filter((_, i) => i % 2 === 0) + .slice(1) + const urlLocs = req.params[0] + .split('/') + .slice(1) + .filter((_, i) => i % 2 === 0) + if (['plugin', 'auth'].indexOf(urlLocs[0]) > -1) return next() + const title = urlPages.slice().pop().replace(/-+/g, ' ') + const user = securityhandler.getUser(req) + const owner = getOwner() + const info = { + title, + pages: [], + authenticated: !!user, + user, + seedNeighbors: argv.neighbors, + owned: !!owner, + isOwner: !!securityhandler.isAuthorized(req), + ownedBy: owner || '', + } + for (const [idx, page] of urlPages.entries()) { + if (urlLocs[idx] === 'view') { + info.pages.push({ page }) + } else { + info.pages.push({ page, origin: `data-site=${urlLocs[idx]}` }) + } + } + res.render('static.html', info) + }) + + // Single page HTML rendering + app.get(/^\/([a-z0-9-]+)\.html$/, cors, async (req, res, next) => { + const slug = req.params[0] + log(slug) + if (slug === 'runtests') return next() + try { + const page = await pagehandler.get(slug) + page.title ||= slug.replace(/-+/g, ' ') + page.story ||= [] + const user = securityhandler.getUser(req) + const owner = getOwner() + const info = { + title: page.title, + pages: [ + { + page: slug, + generated: 'data-server-generated=true', + story: render(page), + }, + ], + authenticated: !!user, + user, + seedNeighbors: argv.neighbors, + owned: !!owner, + isOwner: !!securityhandler.isAuthorized(req), + ownedBy: owner || '', + } + res.render('static.html', info) + } catch (e) { + if (e instanceof PageNotFoundError) return res.status(404).send(e.message) + res.e(e) + } + }) + + // Oops + app.get('/oops', (req, res) => { + res.statusCode = 403 + res.render('oops.html', { msg: 'This is not your wiki!' }) + }) + + // Root redirect + app.get('/', cors, async (req, res) => { + const home = path.join(argv.assets, 'home', 'index.html') + try { + const stats = await fsp.stat(home) + if (stats.isFile()) { + res.redirect('/assets/home/index.html') + } else { + res.redirect(index) + } + } catch { + res.redirect(index) + } + }) +} diff --git a/lib/search.js b/lib/search.js index 5d2cd4b..660b9f7 100644 --- a/lib/search.js +++ b/lib/search.js @@ -7,387 +7,292 @@ */ // **search.js** +// Maintains a MiniSearch full-text index of all wiki pages. +// +// In-memory mutations (update/remove) are synchronous against the +// MiniSearch instance. Persistence is debounced — rapid successive +// edits coalesce into a single file write. -import fs from 'node:fs' +import fsp from 'node:fs/promises' import path from 'node:path' import events from 'node:events' -import url from 'node:url' import writeFileAtomic from 'write-file-atomic' - import miniSearch from 'minisearch' +import { PageNotFoundError } from './errors.js' + +const writeAtomic = (loc, data) => + new Promise((resolve, reject) => writeFileAtomic(loc, data, err => (err ? reject(err) : resolve()))) export default argv => { const wikiName = new URL(argv.url).hostname - let siteIndex = [] - const queue = [] - let searchPageHandler = null - - // ms since last update we will remove index from memory - // orig - searchTimeoutMs = 1200000 - const searchTimeoutMs = 120000 // temp reduce to 2 minutes - let searchTimeoutHandler = null + let siteIndex = null // miniSearch instance, or null when not loaded + let pagehandlerRef = null const siteIndexLoc = path.join(argv.status, 'site-index.json') const indexUpdateFlag = path.join(argv.status, 'index-updated') + const searchTimeoutMs = 120000 + let searchTimeoutHandler = null + let working = false + let dirty = false + let saveTimer = null + const SAVE_DELAY_MS = 100 + + const itself = new events.EventEmitter() + + // ---- Text extraction ---- + + const extractItemText = text => + text + .replace(/\[([^\]]*?)\][[(].*?[\])]/g, ' $1 ') + .replace(/\[{2}|\[(?:[\S]+)|\]{1,2}/g, ' ') + .replace(/\n/g, ' ') + .replace(//g, ' ') + .replace(/<(?:"[^"]*"['"]*|'[^']*'['"]*|[^'">])+>/g, ' ') + .replace(/<(?:[^>])+>/g, ' ') + .replace(/(https?:.*?)(?=\p{White_Space}|\p{Quotation_Mark}|$)/gu, match => { + try { + return new URL(match).hostname + } catch { + return ' ' + } + }) + .replace(/[\p{P}\p{Emoji}\p{Symbol}}]+/gu, ' ') + .replace(/[\p{White_Space}\n\t]+/gu, ' ') + + const extractableTypes = new Set([ + 'paragraph', + 'markdown', + 'html', + 'reference', + 'image', + 'pagefold', + 'math', + 'mathjax', + 'code', + ]) + + const mediaTypes = new Set(['audio', 'video', 'frame']) - const touch = (file, cb) => { - fs.stat(file, (err, stats) => { - if (err === null) return cb() - fs.open(file, 'w', (err, fd) => { - if (err) cb(err) - fs.close(fd, err => { - cb(err) - }) + const extractMediaText = text => + text + .split(/\r\n?|\n/) + .map(line => { + const first = line.split(/\p{White_Space}/u)[0] + if (first.startsWith('http') || first.toUpperCase() === first || first.startsWith('//')) { + return '' + } + return line }) - }) + .join(' ') + + const extractPageText = (pageText, item) => { + try { + if (!item.text) return pageText + if (extractableTypes.has(item.type)) { + return pageText + ' ' + extractItemText(item.text) + } + if (mediaTypes.has(item.type)) { + return pageText + ' ' + extractItemText(extractMediaText(item.text)) + } + } catch (err) { + console.log(`SITE INDEX *** Error extracting text from item`, err.message) + } + return pageText } - const searchPageUpdate = (slug, page, cb) => { - // to update we have to remove the page first, and then readd it - let pageText + const pageToDoc = (slug, page) => { + let content = '' try { - pageText = page.story.reduce(extractPageText, '') + content = page.story.reduce(extractPageText, '') } catch (err) { - console.log(`SITE INDEX *** ${wikiName} reduce to extract the text on ${slug} failed`, err.message) - pageText = '' + console.log(`SITE INDEX *** ${wikiName} text extraction on ${slug} failed`, err.message) } + return { id: slug, title: page.title, content } + } + + // ---- Index helpers ---- + + const newIndex = () => new miniSearch({ fields: ['title', 'content'] }) + + const applyUpdate = (slug, page) => { + if (!siteIndex) return + const doc = pageToDoc(slug, page) if (siteIndex.has(slug)) { - siteIndex.replace({ - id: slug, - title: page.title, - content: pageText, - }) + siteIndex.replace(doc) } else { - siteIndex.add({ - id: slug, - title: page.title, - content: pageText, - }) + siteIndex.add(doc) } - cb() } - const searchPageRemove = (slug, cb) => { - // remove page from index + const applyRemove = slug => { + if (!siteIndex) return try { siteIndex.discard(slug) } catch (err) { - // swallow error, if the page was not in index if (!err.message.includes('not in the index')) { console.log(`removing ${slug} from index ${wikiName} failed`, err) } } - cb() } - const searchSave = (siteIndex, cb) => { - // save index to file - fs.access(argv.status, fs.constants.F_OK, err => { - if (!err) { - writeFileAtomic(siteIndexLoc, JSON.stringify(siteIndex), e => { - if (e) return cb(e) - touch(indexUpdateFlag, () => { - cb() - }) - }) - } else { - fs.mkdir(argv.status, { recursive: true }, () => { - writeFileAtomic(siteIndexLoc, JSON.stringify(siteIndex), e => { - if (e) return cb(e) - touch(indexUpdateFlag, () => { - cb() - }) - }) - }) - } - }) - } + // ---- Persistence ---- - const searchRestore = cb => { - // restore index, or create if it doesn't already exist - fs.access(siteIndexLoc, fs.constants.F_OK, err => { - if (!err) { - fs.readFile(siteIndexLoc, (err, data) => { - if (err) return cb(err) - try { - siteIndex = miniSearch.loadJSON(data, { - fields: ['title', 'content'], - }) - } catch (e) { - return cb(e) - } - process.nextTick(() => { - serial(queue.shift()) - }) - }) - } - }) + const touch = async file => { + try { + await fsp.stat(file) + } catch { + const fd = await fsp.open(file, 'w') + await fd.close() + } } - const serial = item => { - if (item) { - switch (item.action) { - case 'update': - itself.start() - searchPageUpdate(item.slug, item.page, () => { - process.nextTick(() => { - serial(queue.shift()) - }) - }) - break - case 'remove': - itself.start() - searchPageRemove(item.slug, () => { - process.nextTick(() => { - serial(queue.shift()) - }) - }) - break - default: - console.log(`SITE INDEX *** unexpected action ${item.action} for ${item.page}`) - process.nextTick(() => { - serial(queue.shift) - }) - } - } else { - searchSave(siteIndex, e => { - if (e) console.log('SITE INDEX *** save failed: ' + e) - itself.stop() - }) + const save = async () => { + try { + await fsp.mkdir(argv.status, { recursive: true }) + await writeAtomic(siteIndexLoc, JSON.stringify(siteIndex)) + await touch(indexUpdateFlag) + } catch (e) { + console.log('SITE INDEX *** save failed:', e) } } - const extractItemText = text => { - return text - .replace(/\[([^\]]*?)\][[(].*?[\])]/g, ' $1 ') - .replace(/\[{2}|\[(?:[\S]+)|\]{1,2}/g, ' ') - .replace(/\n/g, ' ') - .replace(//g, ' ') - .replace(/<(?:"[^"]*"['"]*|'[^']*'['"]*|[^'">])+>/g, ' ') - .replace(/<(?:[^>])+>/g, ' ') - .replace(/(https?:.*?)(?=\p{White_Space}|\p{Quotation_Mark}|$)/gu, match => { - try { - const myUrl = new URL(match) - return myUrl.hostname - } catch { - return ' ' - } - }) - .replace(/[\p{P}\p{Emoji}\p{Symbol}}]+/gu, ' ') - .replace(/[\p{White_Space}\n\t]+/gu, ' ') + const scheduleSave = () => { + dirty = true + if (saveTimer) return + saveTimer = setTimeout(async () => { + saveTimer = null + dirty = false + await save() + itself.stop() + if (dirty) scheduleSave() + }, SAVE_DELAY_MS) } - const extractPageText = (pageText, currentItem, currentIndex, array) => { - // console.log('extractPageText', pageText, currentItem, currentIndex, array) + const restore = async () => { try { - if (currentItem.text) { - switch (currentItem.type) { - case 'paragraph': - case 'markdown': - case 'html': - case 'reference': - case 'image': - case 'pagefold': - case 'math': - case 'mathjax': - case 'code': - pageText += ' ' + extractItemText(currentItem.text) - break - case 'audio': - case 'video': - case 'frame': - pageText += - ' ' + - extractItemText( - currentItem.text - .split(/\r\n?|\n/) - .map(line => { - const firstWord = line.split(/\p{White_Space}/u)[0] - if ( - firstWord.startsWith('http') || - firstWord.toUpperCase() === firstWord || - firstWord.startsWith('//') - ) { - // line is markup - return '' - } else { - return line - } - }) - .join(' '), - ) - } + const data = await fsp.readFile(siteIndexLoc, 'utf8') + siteIndex = miniSearch.loadJSON(data, { fields: ['title', 'content'] }) + } catch { + siteIndex = null + } + } + + const ensureLoaded = async () => { + if (!siteIndex && !working) { + await restore() + if (!siteIndex && pagehandlerRef) { + await itself.createIndex(pagehandlerRef) } - } catch (err) { - throw new Error(`Error extracting text from ${currentIndex}, ${JSON.stringify(currentItem)} ${err}, ${err.stack}`) } - return pageText } - // #### Public stuff #### + const resetTimeout = () => { + clearTimeout(searchTimeoutHandler) + if (!argv.test) { + searchTimeoutHandler = setTimeout(() => { + console.log(`SITE INDEX ${wikiName} : removed from memory`) + siteIndex = null + }, searchTimeoutMs) + } + } + + // ---- Public API ---- - var itself = new events.EventEmitter() itself.start = () => { clearTimeout(searchTimeoutHandler) working = true - return itself.emit('indexing') + itself.emit('indexing') } + itself.stop = () => { - const clearsearch = () => { - console.log(`SITE INDEX ${wikiName} : removed from memory`) - siteIndex = [] - clearTimeout(searchTimeoutHandler) - } - searchTimeoutHandler = setTimeout(clearsearch, searchTimeoutMs) working = false - return itself.emit('indexed') + resetTimeout() + itself.emit('indexed') } - itself.isWorking = () => { - return working - } - itself.createIndex = pagehandler => { - itself.start() - - // we save the pagehandler, so we can recreate the site index if it is removed - searchPageHandler = searchPageHandler ?? pagehandler - //timeLabel = `SITE INDEX ${wikiName} : Created` - //console.time timeLabel - - pagehandler.slugs((e, slugs) => { - if (e) { - console.log(`SITE INDEX *** createIndex ${wikiName} error:`, e) - itself.stop() - return e - } - siteIndex = new miniSearch({ - fields: ['title', 'content'], - }) + itself.isWorking = () => working - const indexPromises = slugs.map(slug => { - return new Promise(resolve => { - pagehandler.get(slug, (err, page) => { - if (err) { + itself.createIndex = async pagehandler => { + itself.start() + pagehandlerRef = pagehandlerRef ?? pagehandler + try { + const slugs = await pagehandler.slugs() + siteIndex = newIndex() + await Promise.all( + slugs.map(async slug => { + try { + const page = await pagehandler.get(slug) + siteIndex.add(pageToDoc(slug, page)) + } catch (err) { + if (!(err instanceof PageNotFoundError)) { console.log(`SITE INDEX *** ${wikiName}: error reading page`, slug) - return - } - // page - let pageText - try { - pageText = page.story.reduce(extractPageText, '') - } catch (err) { - console.log(`SITE INDEX *** ${wikiName} reduce to extract text on ${slug} failed`, err.message) - // console.log "page", page - pageText = '' } - siteIndex.add({ - id: slug, - title: page.title, - content: pageText, - }) - resolve() - }) - }) - }) - Promise.all(indexPromises).then(() => { - // console.timeEnd timeLabel - process.nextTick(() => { - serial(queue.shift()) - }) - }) - }) + } + }), + ) + await save() + } catch (e) { + console.log(`SITE INDEX *** createIndex ${wikiName} error:`, e) + } + itself.stop() } - itself.removePage = slug => { - const action = 'remove' - queue.push({ action, slug }) - if (Array.isArray(siteIndex) && !working) { - itself.start() - searchRestore(e => { - if (e) console.log(`SITE INDEX *** Problems restoring search index ${wikiName}:` + e) - itself.createIndex(searchPageHandler) - }) - } else { - if (!working) serial(queue.shift()) - } + itself.update = async (slug, page) => { + await ensureLoaded() + itself.start() + applyUpdate(slug, page) + scheduleSave() } - itself.update = (slug, page) => { - const action = 'update' - queue.push({ action, slug, page }) - if (Array.isArray(siteIndex) && !working) { - itself.start() - searchRestore(e => { - if (e) console.log(`SITE INDEX *** Problems restoring search index ${wikiName}:` + e) - itself.createIndex(searchPageHandler) - }) - } else { - if (!working) serial(queue.shift()) - } + itself.removePage = async slug => { + await ensureLoaded() + itself.start() + applyRemove(slug) + scheduleSave() } - itself.startUp = pagehandler => { - // called on server startup, here we check if wiki already is index - // we only create an index if there is either no index or there have been updates since last startup + + itself.startUp = async pagehandler => { console.log(`SITE INDEX ${wikiName} : StartUp`) - fs.stat(siteIndexLoc, (err, stats) => { - if (err === null) { - // site index exists, but has it been updated? - fs.stat(indexUpdateFlag, (err, stats) => { - if (!err) { - // index has been updated, so recreate it. - itself.createIndex(pagehandler) - // remove the update flag once the index has been created - itself.once('indexed', () => { - fs.unlink(indexUpdateFlag, err => { - if (err) console.log(`+++ SITE INDEX ${wikiName} : unable to delete update flag`) - }) - }) - } else { - // not been updated, but is it the correct version? - fs.readFile(siteIndexLoc, (err, data) => { - if (!err) { - let testIndex - try { - testIndex = JSON.parse(data) - } catch (err) { - testIndex = {} - } - if (testIndex.serializationVersion != 2) - console.log(`+++ SITE INDEX ${wikiName} : updating to latest version.`) - itself.createIndex(pagehandler) - // remove the update flag once the index has been created - itself.once('indexed', () => { - fs.unlink(indexUpdateFlag, err => { - if (err) console.log(`+++ SITE INDEX ${wikiName} : unable to delete update flag`) - }) - }) - } else { - console.log(`+++ SITE INDEX ${wikiName} : error reading index - attempting creating`) - itself.createIndex(pagehandler) - // remove the update flag once the index has been created - itself.once('indexed', () => { - fs.unlink(indexUpdateFlag, err => { - if (err) console.log(`+++ SITE INDEX ${wikiName} : unable to delete update flag`) - }) - }) - } - }) + pagehandlerRef = pagehandler + + let needsRebuild = false + + try { + await fsp.stat(siteIndexLoc) + // Index file exists — check if it's been flagged for update + try { + await fsp.stat(indexUpdateFlag) + needsRebuild = true + } catch { + // No update flag — check serialization version + try { + const data = await fsp.readFile(siteIndexLoc, 'utf8') + const parsed = JSON.parse(data) + if (parsed.serializationVersion !== 2) { + console.log(`+++ SITE INDEX ${wikiName} : updating to latest version.`) + needsRebuild = true } - }) - } else { - // index does not exist, so create it - itself.createIndex(pagehandler) - // remove the update flag once the index has been created - itself.once('indexed', () => { - fs.unlink(indexUpdateFlag, err => { - if (err) console.log(`+++ SITE INDEX ${wikiName} : unable to delete update flag`) - }) - }) + } catch { + console.log(`+++ SITE INDEX ${wikiName} : error reading index — recreating`) + needsRebuild = true + } + } + } catch { + // Index file doesn't exist + needsRebuild = true + } + + if (needsRebuild) { + await itself.createIndex(pagehandler) + try { + await fsp.unlink(indexUpdateFlag) + } catch { + // flag didn't exist, fine } - }) + } } return itself diff --git a/lib/security.js b/lib/security.js index 8b0b342..a95c51f 100644 --- a/lib/security.js +++ b/lib/security.js @@ -5,98 +5,57 @@ * Licensed under the MIT license. * https://github.com/fedwiki/wiki-node-server/blob/master/LICENSE.txt */ + // **security.js** -// Module for default site security. +// Default site security module. // -// This module is not intented for use, but is here to catch a problem with -// configuration of security. It does not provide any authentication, but will -// allow the server to run read-only. +// Not intended for production use — exists to catch misconfiguration. +// Provides no authentication. The server runs read-only unless +// security_legacy is enabled on an unclaimed site. -// #### Requires #### -import fs from 'node:fs' +import fsp from 'node:fs/promises' -// Export a function that generates security handler -// when called with options object. export default (log, loga, argv) => { const security = {} - // #### Private utility methods. #### - - const user = '' - let owner = '' - // save the admin user, and location of the identity file const { admin, id: idFile } = argv - // #### Public stuff #### - - security.authenticate_session = () => { - ;(req, res, next) => { - // not possible to login, so always false - req.isAuthenticated = () => false - return next() + security.retrieveOwner = async () => { + try { + await fsp.access(idFile) + const data = await fsp.readFile(idFile, 'utf8') + owner += data + } catch { + owner = '' } } - // Retrieve owner infomation from identity file in status directory - security.retrieveOwner = cb => { - fs.access(idFile, fs.constants.F_OK, err => { - if (!err) { - fs.readFile(idFile, (err, data) => { - if (err) return cb(err) - owner += data - cb() - }) - } else { - owner = '' - cb() - } - }) - } - - // Return the owners name security.getOwner = () => { - let ownerName - if (!owner.name) { - ownerName = '' - } else { - ownerName = owner.name - } - return ownerName + return owner.name ? owner.name : '' } + security.getUser = req => { return '' } security.isAuthorized = req => { - // nobody is authorized - everything is read-only - // unless legacy support, when unclaimed sites can be editted. if (owner == '') { - if (argv.security_legacy) { - return true - } else { - return false - } - } else { - return false + return !!argv.security_legacy } + return false } - // Wiki server admin + security.isAdmin = () => { - // nobody is admin - unless legacy support, and test - if (argv.security_legacy) { - if (argv.test) { - return true - } else { - return false - } - } else { - return false + if (argv.security_legacy && argv.test) { + return true } + return false } + security.defineRoutes = (app, cors, updateOwner) => { - // default security does not have any routes + // default security has no routes } return security diff --git a/lib/server.js b/lib/server.js index 19dca5b..829f8bf 100644 --- a/lib/server.js +++ b/lib/server.js @@ -6,133 +6,42 @@ * https://github.com/fedwiki/wiki-server/blob/master/LICENSE.txt */ -// **server.coffee** is the main guts of the express version -// of (Smallest Federated Wiki)[https://github.com/WardCunningham/Smallest-Federated-Wiki]. -// The CLI and Farm are just front ends -// for setting arguments, and spawning servers. In a complex system -// you would probably want to replace the CLI/Farm with your own code, -// and use server.coffee directly. -// -// #### Dependencies #### -// anything not in the standard library is included in the repo, or -// can be installed with an: -// npm install - -// Standard lib -import fs from 'fs' -import path from 'path' -import url from 'url' -import { pipeline } from 'node:stream/promises' - -// From npm -import express from 'express' -import hbs from 'express-hbs' -import f from 'flates' - -import createDOMPurify from 'dompurify' -import { JSDOM } from 'jsdom' +// **server.js** — App creation, middleware, startup. +// Route logic lives in routes/*.js modules. -const window = new JSDOM('').window -const DOMPurify = createDOMPurify(window) +import fs from 'node:fs' +import path from 'node:path' -// Using native fetch API (available in Node.js 18+) - -// Express 4 middleware +import express from 'express' +import hbs from 'express-hbs' import logger from 'morgan' import cookieParser from 'cookie-parser' import methodOverride from 'method-override' -// session = require('express-session') // This one was commented out — uncomment if used import sessions from 'client-sessions' import bodyParser from 'body-parser' import errorHandler from 'errorhandler' -// Local files -// Make sure these files are ESM modules or compatible -// If they are CommonJS, you will need to dynamically import them (see below) import defargs from './defaultargs.js' -import resolveClient from 'wiki-client/lib/resolve.js' import pluginsFactory from './plugins.js' import sitemapFactory from './sitemap.js' import searchFactory from './search.js' -import { warn } from 'node:console' import { createRequire } from 'module' -const require = createRequire(import.meta.url) -// Use import to load package.json from the main application's working directory -//const { default: packageJson } = await import('wiki/package.json', { with: { type: 'json' } }) -const wikiPackageImport = async () => { - let done = false - return new Promise(resolve => { - import('wiki/package.json', { with: { type: 'json' } }) - .then(imported => { - done = true - resolve(imported.default) - }) - .catch(e => { - return e - }) - .then(async () => { - if (done) return - const packageJsonPath = path.join(process.cwd(), 'package.json') - const packageJsonUrl = url.pathToFileURL(packageJsonPath).href - import(packageJsonUrl, { with: { type: 'json' } }) - .then(imported => { - resolve(imported.default) - }) - .catch(e => console.error('problems importing package', e)) - }) - }) -} +import mountPageRoutes from './routes/pages.js' +import mountMetaRoutes from './routes/meta.js' +import mountViewRoutes from './routes/views.js' +import mountAssetRoutes from './routes/assets.js' + +const require = createRequire(import.meta.url) -const packageJson = await wikiPackageImport() - -const render = page => { - return ( - f.div({ class: 'twins' }, f.p('')) + - '\n' + - f.div( - { class: 'header' }, - f.h1( - f.a({ href: '/', style: 'text-decoration: none' }, f.img({ height: '32px', src: '/favicon.png' })) + - ' ' + - page.title, - ), - ) + - '\n' + - f.div( - { class: 'story' }, - page.story - .map(story => { - if (!story) return '' - if (story.type === 'paragraph') { - f.div({ class: 'item paragraph' }, f.p(resolveClient.resolveLinks(story.text))) - } else if (story.type === 'image') { - f.div( - { class: 'item image' }, - f.img({ class: 'thumbnail', src: story.url }), - f.p(resolveClient.resolveLinks(story.text || story.caption || 'uploaded image')), - ) - } else if (story.type === 'html') { - f.div({ class: 'item html' }, f.p(resolveClient.resolveLinks(story.text || '', DOMPurify.sanitize))) - } else f.div({ class: 'item' }, f.p(resolveClient.resolveLinks(story.text || ''))) - }) - .join('\n'), - ) - ) -} -// Set export objects for node and coffee to a function that generates a sfw server. export default async argv => { - // Create the main application object, app. const app = express() - - // remove x-powered-by header app.disable('x-powered-by') - // defaultargs.coffee exports a function that takes the argv object - // that is passed in and then does its - // best to supply sane defaults for any arguments that are missing. argv = defargs(argv) + const packageJson = JSON.parse(fs.readFileSync(argv.packageFile, 'utf8')) + app.startOpts = argv const wikiName = new URL(argv.url).hostname @@ -144,6 +53,8 @@ export default async argv => { console.log(stuff) } + // ---- Error handler middleware ---- + const ourErrorHandler = (req, res, next) => { let fired = false res.e = (error, status) => { @@ -159,72 +70,54 @@ export default async argv => { next() } - let pagehandler, sitemaphandler, searchhandler, securityhandler + // ---- Handlers ---- - // Dynamically import database adapter (since the module name is dynamic) const dbModule = await import(argv.database.type) - app.pagehandler = pagehandler = dbModule.default(argv) - - // Initialize sitemap handler - app.sitemaphandler = sitemaphandler = sitemapFactory(argv) - - // Initialize search handler - app.searchhandler = searchhandler = searchFactory(argv) + const pagehandler = dbModule.default(argv) + const sitemaphandler = sitemapFactory(argv) + const searchhandler = searchFactory(argv) - // Dynamically import security adapter (also dynamic) console.log('security_type', argv.security_type) const securityModule = await import(argv.security_type) - app.securityhandler = securityhandler = securityModule.default(log, loga, argv) + const securityhandler = securityModule.default(log, loga, argv) - // If the site is owned, owner will contain the name of the owner - let owner = '' + app.pagehandler = pagehandler + app.sitemaphandler = sitemaphandler + app.searchhandler = searchhandler + app.securityhandler = securityhandler - // If the user is logged in, user will contain their identity - let user = '' + let owner = '' - // Called from authentication when the site is claimed, - // to update the name of the owner held here. const updateOwner = id => { owner = id } + const getOwner = () => owner + + // ---- Middleware ---- - // #### Middleware #### - // - // Allow json to be got cross origin. const cors = (req, res, next) => { res.header('Access-Control-Allow-Origin', req.get('origin') || '*') next() } - const remoteGet = (remote, slug, cb) => { - // assume http, as we know no better at this point and we need to specify a protocol. - const remoteURL = new URL(`http://${remote}/${slug}.json`).toString() - // set a two second timeout - fetch(remoteURL, { signal: AbortSignal.timeout(2000) }) - .then(res => { - if (res.ok) { - return res - } - throw new Error(res.statusText) - }) - .then(res => { - return res.json() - }) - .then(json => { - cb(null, json, 200) - }) - .catch(err => { - console.error('Unable to fetch remote resource', remote, slug, err) - cb(err, 'Page not found', 404) - }) + const authorized = (req, res, next) => { + if (securityhandler.isAuthorized(req)) { + next() + } else { + console.log('rejecting', req.path) + res.sendStatus(403) + } } - // #### Express configuration #### - // Set up all the standard express server options, - // including hbs to use handlebars/mustache templates - // saved with a .html extension, and no layout. + const admin = (req, res, next) => { + if (securityhandler.isAdmin(req)) { + next() + } else { + console.log('rejecting', req.path) + res.sendStatus(403) + } + } - // const staticPathOptions = { dotfiles: 'ignore', etag: true, @@ -233,12 +126,15 @@ export default async argv => { maxAge: '1h', } + // ---- View engine ---- + app.set('views', path.join(require.resolve('wiki-client/package.json'), '..', 'views')) app.set('view engine', 'html') app.engine('html', hbs.express4()) app.set('view options', { layout: false }) - // return deterministically colored strings + // ---- Logger ---- + const colorString = str => { const colorReset = '\x1b[0m' let hash = 0 @@ -254,8 +150,6 @@ export default async argv => { return color + str + colorReset } - // use logger, at least in development, probably needs a param to configure (or turn off). - // use stream to direct to somewhere other than stdout. const vhost = colorString(wikiName) app.use( logger((tokens, req, res) => { @@ -272,35 +166,26 @@ export default async argv => { }), ) + // ---- Standard middleware ---- + app.use(cookieParser()) app.use(bodyParser.json({ limit: argv.uploadLimit })) app.use(bodyParser.urlencoded({ extended: true, limit: argv.uploadLimit })) app.use(methodOverride()) - const cookieValue = { - httpOnly: true, - sameSite: 'lax', - } - if (argv.wiki_domain) { - if (!argv.wiki_domain.endsWith('localhost')) { - cookieValue['domain'] = argv.wiki_domain - } - } - // use secureProxy as TLS is terminated in outside the node process - let cookieName - if (argv.secure_cookie) { - cookieName = 'wikiTlsSession' - cookieValue['secureProxy'] = true - } else { - cookieName = 'wikiSession' + + const cookieValue = { httpOnly: true, sameSite: 'lax' } + if (argv.wiki_domain && !argv.wiki_domain.endsWith('localhost')) { + cookieValue.domain = argv.wiki_domain } + const cookieName = argv.secure_cookie ? 'wikiTlsSession' : 'wikiSession' + if (argv.secure_cookie) cookieValue.secureProxy = true + app.use( sessions({ - cookieName: cookieName, + cookieName, requestKey: 'session', secret: argv.cookieSecret, - // make the session session_duration days long duration: argv.session_duration * 24 * 60 * 60 * 1000, - // add 12 hours to session if less than 12 hours to expiry activeDuration: 24 * 60 * 60 * 1000, cookie: cookieValue, }), @@ -308,707 +193,73 @@ export default async argv => { app.use(ourErrorHandler) - // Add static route to the client + // ---- Static mounts ---- + app.use(express.static(argv.client, staticPathOptions)) - // ##### Define security routes ##### securityhandler.defineRoutes(app, cors, updateOwner) - // Add static route to assets app.use('/assets', cors, express.static(argv.assets)) - // Add static routes to the plugins client. Object.keys(packageJson.dependencies) - .filter(depend => depend.startsWith('wiki-plugin')) + .filter(d => d.startsWith('wiki-plugin')) .forEach(plugin => { - const clientPath = path.join(path.dirname(require.resolve(`${plugin}/package`)), 'client') - const pluginPath = '/plugins/' + plugin.slice(12) - app.use(pluginPath, cors, express.static(clientPath, staticPathOptions)) + try { + const clientPath = path.join(path.dirname(require.resolve(`${plugin}/package`)), 'client') + app.use('/plugins/' + plugin.slice(12), cors, express.static(clientPath, staticPathOptions)) + } catch { + // plugin not installed + } }) - // Add static routes to the security client. if (argv.security != './security') { app.use('/security', express.static(path.join(argv.packageDir, argv.security_type, 'client'), staticPathOptions)) } - // ##### Set up standard environments. ##### - // In dev mode turn on console.log debugging as well as showing the stack on err. if ('development' == app.get('env')) { app.use(errorHandler()) argv.debug = true } - // Show all of the options a server is using. log(argv) - // #### Routes #### - // Routes currently make up the bulk of the Express port of - // Smallest Federated Wiki. Most routes use literal names, - // or regexes to match, and then access req.params directly. - - // ##### Redirects ##### - // Common redirects that may get used throughout the routes. - const index = argv.home + '.html' - const oops = '/oops' - - // ##### Get routes ##### - // Routes have mostly been kept together by http verb, with the exception - // of the openID related routes which are at the end together. - - // Main route for initial contact. Allows us to - // link into a specific set of pages, local and remote. - // Can also be handled by the client, but it also sets up - // the login status, and related footer html, which the client - // relies on to know if it is logged in or not. - app.get(/^((\/[a-zA-Z0-9:.-]+\/[a-z0-9-]+(_rev\d+)?)+)\/?$/, cors, (req, res, next) => { - const urlPages = req.params[0] - .split('/') - .filter((_, index) => index % 2 === 0) - .slice(1) - const urlLocs = req.params[0] - .split('/') - .slice(1) - .filter((_, index) => index % 2 === 0) - if (['plugin', 'auth'].indexOf(urlLocs[0]) > -1) { - return next() - } - const title = urlPages.slice().pop().replace(/-+/g, ' ') - user = securityhandler.getUser(req) - const info = { - title, - pages: [], - authenticated: user ? true : false, - user: user, - seedNeighbors: argv.neighbors, - owned: owner ? true : false, - isOwner: securityhandler.isAuthorized(req) ? true : false, - ownedBy: owner ? owner : '', - } - for (const [idx, page] of urlPages.entries()) { - let pageDiv - if (urlLocs[idx] === 'view') { - pageDiv = { page } - } else { - pageDiv = { page, origin: `data-site=${urlLocs[idx]}` } - } - info.pages.push(pageDiv) - } - res.render('static.html', info) - }) - - app.get(/^\/([a-z0-9-]+)\.html$/, cors, (req, res, next) => { - const slug = req.params[0] - log(slug) - if (slug === 'runtests') return next() - pagehandler.get(slug, (e, page, status) => { - if (e) { - return res.e(e) - } - if (status === 404) { - return res.status(status).send(page) - } - page.title ||= slug.replace(/-+/g, ' ') - page.story ||= [] - user = securityhandler.getUser(req) - - const info = { - title: page.title, - pages: [ - { - page: slug, - generated: 'data-server-generated=true', - story: render(page), - }, - ], - authenticated: user ? true : false, - user: user, - seedNeighbors: argv.neighbors, - owned: owner ? true : false, - isOwner: securityhandler.isAuthorized(req) ? true : false, - ownedBy: owner ? owner : '', - } - res.render('static.html', info) - }) - }) - - app.get('/system/factories.json', (req, res) => { - res.status(200) - res.header('Content-Type', 'application/json') - - const factories = [] - - const getPackageFactory = plugin => { - return new Promise(resolve => { - import(`${plugin}/factory.json`, { with: { type: 'json' } }) - .then(({ default: factory }) => { - resolve(factories.push(factory)) - }) - .catch(() => { - resolve() - }) - }) - } - - Promise.all( - Object.keys(packageJson.dependencies) - .filter(depend => depend.startsWith('wiki-plugin')) - .map(plugin => { - return getPackageFactory(plugin) - }), - ).then(() => res.end(JSON.stringify(factories))) - }) - - // ###### Json Routes ###### - // Handle fetching local and remote json pages. - // Local pages are handled by the pagehandler module. - app.get(/^\/([a-z0-9-]+)\.json$/, cors, (req, res) => { - const file = req.params[0] - pagehandler.get(file, (e, page, status) => { - if (e) { - return res.e(e) - } - res.status(status || 200).send(page) - }) - }) - - // Remote pages use the http client to retrieve the page - // and sends it to the client. TODO: consider caching remote pages locally. - app.get(/^\/remote\/([a-zA-Z0-9:.-]+)\/([a-z0-9-]+)\.json$/, (req, res) => { - remoteGet(req.params[0], req.params[1], (e, page, status) => { - if (e) { - log('remoteGet error:', e) - return res.e(e) - } - res.status(status || 200).send(page) - }) - }) - - // ###### Theme Routes ###### - // If themes doesn't exist send 404 and let the client - // deal with it. - app.get(/^\/theme\/(\w+\.\w+)$/, cors, (req, res) => { - res.sendFile(path.join(argv.status, 'theme', req.params[0]), { dotfiles: 'allow' }, e => { - if (e) { - // swallow the error if the theme does not exist... - if (req.path === '/theme/style.css') { - res.set('Content-Type', 'text/css') - res.send('') - } else { - res.sendStatus(404) - } - } - }) - }) - - // ###### Favicon Routes ###### - // If favLoc doesn't exist send the default favicon. - const favLoc = path.join(argv.status, 'favicon.png') - const defaultFavLoc = path.join(argv.root, 'default-data', 'status', 'favicon.png') - app.get('/favicon.png', cors, (req, res) => { - fs.access(favLoc, fs.constants.F_OK, err => { - if (!err) { - res.sendFile(favLoc, { dotfiles: 'allow' }) - } else { - res.sendFile(defaultFavLoc, { dotfiles: 'allow' }) - } - }) - }) - - const authorized = (req, res, next) => { - if (securityhandler.isAuthorized(req)) { - next() - } else { - console.log('rejecting', req.path) - res.sendStatus(403) - } + // ---- Mount routes ---- + + const ctx = { + app, + pagehandler, + sitemaphandler, + searchhandler, + securityhandler, + packageJson, + authorized, + admin, + cors, + log, + argv, + getOwner, } - // Accept favicon image posted to the server, and if it does not already exist - // save it. - app.post('/favicon.png', authorized, (req, res) => { - const favicon = req.body.image.replace(/^data:image\/png;base64,/, '') - const buf = Buffer.from(favicon, 'base64') - fs.access(argv.status, fs.constants.F_OK, err => { - if (!err) { - fs.writeFile(favLoc, buf, e => { - if (e) { - return res.e(e) - } - res.send('Favicon Saved') - }) - } else { - fs.mkdir(argv.status, { recursive: true }, () => { - fs.writeFile(favLoc, buf, e => { - if (e) { - return res.e(e) - } - res.send('Favicon Saved') - }) - }) - } - }) - }) - - // Redirect remote favicons to the server they are needed from. - app.get(/^\/remote\/([a-zA-Z0-9:.-]+\/favicon.png)$/, (req, res) => { - const remotefav = `http://${req.params[0]}` - res.redirect(remotefav) - }) - - // ###### Recycler Routes ###### - // These routes are only available to the site's owner - - // Give the recycler a standard flag - use the Taiwan symbol as the use of - // negative space outward pointing arrows nicely indicates that items can be removed - const recyclerFavLoc = path.join(argv.root, 'default-data', 'status', 'recycler.png') - app.get('/recycler/favicon.png', authorized, (req, res) => { - res.sendFile(recyclerFavLoc, { dotfiles: 'allow' }) - }) - - // Send an array of pages currently in the recycler via json - app.get('/recycler/system/slugs.json', authorized, (req, res) => { - fs.readdir(argv.recycler, (e, files) => { - if (e) { - return res.e(e) - } - const doRecyclermap = async file => { - return new Promise(resolve => { - const recycleFile = 'recycler/' + file - pagehandler.get(recycleFile, (e, page, status) => { - if (e || status === 404) { - console.log('Problem building recycler map:', file, 'e: ', e) - // this will leave an undefined/empty item in the array, which we will filter out later - return resolve(null) - } - resolve({ - slug: file, - title: page.title, - }) - }) - }) - } + mountViewRoutes(ctx) + mountMetaRoutes(ctx) + mountPageRoutes(ctx) + mountAssetRoutes(ctx) - Promise.all(files.map(doRecyclermap)) - .then(recyclermap => { - recyclermap = recyclermap.filter(el => !!el) - res.send(recyclermap) - }) - .catch(error => { - res.e(error) - }) - }) - }) - - // Fetching page from the recycler - /////^/([a-z0-9-]+)\.json$/// - app.get(/^\/recycler\/([a-z0-9-]+)\.json$/, authorized, (req, res) => { - const file = 'recycler/' + req.params[0] - pagehandler.get(file, (e, page, status) => { - if (e) { - return res.e(e) - } - res.status(status || 200).send(page) - }) - }) - - // Delete page from the recycler - app.delete(/^\/recycler\/([a-z0-9-]+)\.json$/, authorized, (req, res) => { - const file = 'recycler/' + req.params[0] - pagehandler.delete(file, err => { - if (err) { - res.status(500).send(err) - } - res.status(200).send('') - }) - }) - - // ###### Meta Routes ###### - // Send an array of pages in the database via json - app.get('/system/slugs.json', cors, (req, res) => { - pagehandler.slugs((err, files) => { - if (err) { - res.status(500).send(err) - } - res.send(files) - }) - }) - - // Returns a list of installed plugins. (does this get called anymore!) - app.get('/system/plugins.json', cors, (req, res) => { - try { - const pluginNames = Object.keys(require.main.require('./package').dependencies) - .filter(depend => depend.startsWith('wiki-plugin')) - .map(name => name.slice(12)) - res.send(pluginNames) - } catch (e) { - return res.e(e) - } - }) - //{ - const sitemapLoc = path.join(argv.status, 'sitemap.json') - app.get('/system/sitemap.json', cors, (req, res) => { - fs.access(sitemapLoc, fs.constants.F_OK, err => { - if (!err) { - res.sendFile(sitemapLoc, { dotfiles: 'allow' }) - } else { - // only createSitemap if we are not already creating one - if (!sitemaphandler.isWorking()) { - sitemaphandler.createSitemap(pagehandler) - } - // wait for the sitemap file to be written, before sending - sitemaphandler.once('finished', () => { - res.sendFile(sitemapLoc, { dotfiles: 'allow' }) - }) - } - }) - }) - - const xmlSitemapLoc = path.join(argv.status, 'sitemap.xml') - app.get('/sitemap.xml', cors, (req, res) => { - fs.access(sitemapLoc, fs.constants.F_OK, err => { - if (!err) { - res.sendFile(xmlSitemapLoc, { dotfiles: 'allow' }) - } else { - if (!sitemaphandler.isWorking()) { - sitemaphandler.createSitemap(pagehandler) - } - sitemaphandler.once('finished', () => { - res.sendFile(xmlSitemapLoc, { dotfiles: 'allow' }) - }) - } - }) - }) - - const searchIndexLoc = path.join(argv.status, 'site-index.json') - app.get('/system/site-index.json', cors, (req, res) => { - fs.access(searchIndexLoc, fs.constants.F_OK, err => { - if (!err) { - res.sendFile(searchIndexLoc, { dotfiles: 'allow' }) - } else { - // only create index if we are not already creating one - if (!searchhandler.isWorking()) { - searchhandler.createIndex(pagehandler) - } - searchhandler.once('indexed', () => { - res.sendFile(searchIndexLoc, { dotfiles: 'allow' }) - }) - } - }) - }) - - app.get('/system/export.json', cors, (req, res) => { - pagehandler.pages((e, sitemap) => { - if (e) { - return res.e(e) - } - const pagePromises = sitemap.map(stub => { - return new Promise((resolve, reject) => { - pagehandler.get(stub.slug, (error, page) => { - if (error) { - return reject(error) - } - resolve({ slug: stub.slug, page }) - }) - }) - }) - - Promise.all(pagePromises) - .then(pages => { - const pageExport = pages.reduce((dict, combined) => { - dict[combined.slug] = combined.page - return dict - }, {}) - // TODO: this fails for a very large site - res.json(pageExport) - }) - .catch(error => { - res.e(error) - }) - }) - }) + // ---- Startup ---- - const admin = (req, res, next) => { - if (securityhandler.isAdmin(req)) { - next() - } else { - console.log('rejecting', req.path) - res.sendStatus(403) - } - } - - app.get('/system/version.json', admin, (req, res) => { - const getPackageVersion = packageName => { - return new Promise(resolve => { - try { - // Use import to load package.json from the main application's working directory - import(`${packageName}/package.json`, { with: { type: 'json' } }).then(({ default: packageJson }) => { - resolve({ [packageName]: packageJson.version }) - }) - } catch (error) { - console.error(`Error reading package for ${packageName}:`, error) - resolve({ [packageName]: 'unknown' }) - } - }) - } - - const versions = {} - - const security = () => { - return new Promise(resolve => { - Promise.all( - Object.keys(packageJson.dependencies) - .filter(depend => depend.startsWith('wiki-security')) - .map(key => { - return getPackageVersion(key) - }), - ).then(values => { - resolve({ security: values.reduce((acc, cV) => Object.assign(acc, cV), {}) }) - }) - }) - } - - const plugins = () => { - return new Promise(resolve => { - Promise.all( - Object.keys(packageJson.dependencies) - .filter(depend => depend.startsWith('wiki-plugin')) - .map(key => { - return getPackageVersion(key) - }), - ).then(values => { - resolve({ plugins: values.reduce((acc, cV) => Object.assign(acc, cV), {}) }) - }) - }) - } - - Promise.all([getPackageVersion('wiki-server'), getPackageVersion('wiki-client'), security(), plugins()]).then(v => { - Object.assign(versions, { [packageJson.name]: packageJson.version }, ...v) - res.json(versions) - }) - }) - - // ##### Proxy routes ##### - - app.get('/proxy/*splat', authorized, (req, res) => { - const pathParts = req.originalUrl.split('/') - const remoteHost = pathParts[2] - pathParts.splice(0, 3) - const remoteResource = pathParts.join('/') - // this will fail if remote is TLS only! - const requestURL = 'http://' + remoteHost + '/' + remoteResource - console.log('PROXY Request: ', requestURL) - if ( - requestURL.endsWith('.json') || - requestURL.endsWith('.png') || - requestURL.endsWith('.jpg') || - pathParts[0] === 'plugin' - ) { - fetch(requestURL, { signal: AbortSignal.timeout(2000) }) - .then(async fetchRes => { - if (fetchRes.ok) { - res.set('content-type', fetchRes.headers.get('content-type')) - res.set('last-modified', fetchRes.headers.get('last-modified')) - await pipeline(fetchRes.body, res) - } else { - res.status(fetchRes.status).end() - } - }) - .catch(err => { - console.log('ERROR: Proxy Request ', requestURL, err) - res.status(500).end() - }) - } else { - res.status(400).end() - } - }) - - // ##### Put routes ##### - - app.put(/^\/page\/([a-z0-9-]+)\/action$/i, authorized, (req, res) => { - const action = JSON.parse(req.body.action) - // Handle all of the possible actions to be taken on a page, - const actionCB = (e, page, status) => { - //if e then return res.e e - if (status === 404) { - // res.status(status).send(page) - return res.e(page, status) - } - // Using Coffee-Scripts implicit returns we assign page.story to the - // result of a list comprehension by way of a switch expression. - try { - page.story = (() => { - switch (action.type) { - case 'move': - return action.order.map(id => { - const match = page.story.filter(para => id === para.id)[0] - if (!match) throw 'Ignoring move. Try reload.' - return match - }) - case 'add': { - const idx = page.story.map(para => para.id).indexOf(action.after) + 1 - page.story.splice(idx, 0, action.item) - return page.story - } - - case 'remove': - return page.story.filter(para => para?.id !== action.id) - - case 'edit': - return page.story.map(para => { - if (para.id === action.id) { - return action.item - } else { - return para - } - }) - - case 'create': - case 'fork': - return page.story || [] - - default: - log('Unfamiliar action:', action) - //page.story - throw 'Unfamiliar action ignored' - } - })() - } catch (e) { - return res.e(e) - } - // Add a blank journal if it does not exist. - // And add what happened to the journal. - if (!page.journal) { - page.journal = [] - } - if (action.fork) { - page.journal.push({ type: 'fork', site: action.fork, date: action.date - 1 }) - delete action.fork - } - page.journal.push(action) - pagehandler.put(req.params[0], page, e => { - if (e) return res.e(e) - res.send('ok') - // log 'saved' - }) - // update sitemap - sitemaphandler.update(req.params[0], page) - - // update site index - searchhandler.update(req.params[0], page) - } - // log action - - // If the action is a fork, get the page from the remote server, - // otherwise ask pagehandler for it. - if (action.fork) { - pagehandler.saveToRecycler(req.params[0], err => { - if (err && err !== 'page does not exist') { - console.log(`Error saving ${req.params[0]} before fork: ${err}`) - } - if (action.forkPage) { - const forkPageCopy = JSON.parse(JSON.stringify(action.forkPage)) - delete action.forkPage - actionCB(null, forkPageCopy) - } else { - // Legacy path, new clients will provide forkPage on implicit forks. - remoteGet(action.fork, req.params[0], actionCB) - } - }) - } else if (action.type === 'create') { - // Prevent attempt to write circular structure - const itemCopy = JSON.parse(JSON.stringify(action.item)) - pagehandler.get(req.params[0], (e, page, status) => { - if (e) return actionCB(e) - if (status !== 404) { - res.status(409).send('Page already exists.') - } else { - actionCB(null, itemCopy) - } - }) - } else if (action.type === 'fork') { - pagehandler.saveToRecycler(req.params[0], err => { - if (err) console.log(`Error saving ${req.params[0]} before fork: ${err}`) - if (action.forkPage) { - // push - const forkPageCopy = JSON.parse(JSON.stringify(action.forkPage)) - delete action.forkPage - actionCB(null, forkPageCopy) - } else { - // pull - remoteGet(action.site, req.params[0], actionCB) - } - }) - } else { - pagehandler.get(req.params[0], actionCB) - } - }) - - // Return the oops page when login fails. - app.get('/oops', (req, res) => { - res.statusCode = 403 - res.render('oops.html', { msg: 'This is not your wiki!' }) - }) - - // Traditional request to / redirects to index :) - app.get('/', cors, (req, res) => { - const home = path.join(argv.assets, 'home', 'index.html') - fs.stat(home, (err, stats) => { - if (err || !stats.isFile()) { - res.redirect(index) - } else { - res.redirect('/assets/home/index.html') - } - }) - }) - - // ##### Delete Routes ##### - - app.delete(/^\/([a-z0-9-]+)\.json$/, authorized, (req, res) => { - const pageFile = req.params[0] - // we need the original page text to remove it from the index, so get the original text before deleting it - pagehandler.get(pageFile, (e, page, status) => { - const title = page.title - pagehandler.delete(pageFile, err => { - if (err) { - res.status(500).send(err) - } else { - sitemaphandler.removePage(pageFile) - res.status(200).send('') - // update site index - searchhandler.removePage(req.params[0]) - } - }) - }) - }) - - // #### Start the server #### - // - // set a default process exitCode, so we can diferentiate between exiting as part of a reload, - // and an exit after an uncaught error. - // except when test is set, so the tests don't report a fail when closing the server process. process.exitCode = argv.test ? 0 : 1 - // Wait to make sure owner is known before listening. - securityhandler.retrieveOwner(e => { - // Throw if you can't find the initial owner - if (e) throw e - owner = securityhandler.getOwner() - console.log('owner: ' + owner) - app.emit('owner-set') - }) + await securityhandler.retrieveOwner() + owner = securityhandler.getOwner() + console.log('owner: ' + owner) + app.emit('owner-set') app.on('running-serv', server => { - // ### Plugins ### - // Should replace most WebSocketServers below. const plugins = pluginsFactory(argv) plugins.startServers({ argv, app, packageJson }) - // ### Sitemap ### - // create sitemap at start-up sitemaphandler.createSitemap(pagehandler) - // create site index at start-up searchhandler.startUp(pagehandler) }) - // Return app when called, so that it can be watched for events and shutdown with .close() externally. return app } diff --git a/lib/sitemap.js b/lib/sitemap.js index 5bc8a0b..dfac5d8 100644 --- a/lib/sitemap.js +++ b/lib/sitemap.js @@ -6,281 +6,193 @@ * https://github.com/fedwiki/wiki-server/blob/master/LICENSE.txt */ -// **sitemap.coffee** -import fs from 'node:fs' +// **sitemap.js** +// Maintains the JSON and XML sitemaps. +// +// In-memory mutations (update/remove) are synchronous. +// Persistence is debounced — rapid successive edits coalesce +// into a single file write. + +import fsp from 'node:fs/promises' import path from 'node:path' import events from 'node:events' import writeFileAtomic from 'write-file-atomic' import xml2js from 'xml2js' -import synopsis from 'wiki-client/lib/synopsis.js' // Add .js if needed +import { asSlug, lastEdit, extractPageLinks, synopsis } from './utils.js' -const asSlug = name => - name - .replace(/\s/g, '-') - .replace(/[^A-Za-z0-9-]/g, '') - .toLowerCase() +const writeAtomic = (loc, data) => + new Promise((resolve, reject) => writeFileAtomic(loc, data, err => (err ? reject(err) : resolve()))) export default argv => { const wikiName = new URL(argv.url).hostname let sitemap = [] + let pagehandlerRef = null - const queue = [] - - let sitemapPageHandler = null + const sitemapLoc = path.join(argv.status, 'sitemap.json') + const xmlSitemapLoc = path.join(argv.status, 'sitemap.xml') - // ms since last update we will remove sitemap from memory + // ms before clearing sitemap from memory after last save const sitemapTimeoutMs = 120000 let sitemapTimeoutHandler = null - const sitemapLoc = path.join(argv.status, 'sitemap.json') - const xmlSitemapLoc = path.join(argv.status, 'sitemap.xml') - let working = false + let dirty = false + let saveTimer = null + const SAVE_DELAY_MS = 100 - const lastEdit = journal => { - if (!journal) return undefined - // find the last journal entry, that is not a fork, with a date. - const last = journal.findLast(action => { - return action.date && action.type != 'fork' - }) - return last ? last.date : undefined - } + const itself = new events.EventEmitter() - const sitemapUpdate = (file, page, cb) => { - let pageLinks, pageLinksMap - const extractPageLinks = (collaborativeLinks, currentItem, currentIndex, array) => { - // extract collaborative links - // - this will need extending if we also extract the id of the item containing the link - try { - const linkRe = /\[\[([^\]]+)\]\]/g - let match = undefined - while ((match = linkRe.exec(currentItem.text)) != null) { - if (!collaborativeLinks.has(asSlug(match[1]))) { - collaborativeLinks.set(asSlug(match[1]), currentItem.id) - } - } - if ('reference' == currentItem.type) { - if (!collaborativeLinks.has(currentItem.slug)) { - collaborativeLinks.set(currentItem.slug, currentItem.id) - } - } - } catch (err) { - console.log( - `METADATA *** ${wikiName} Error extracting links from ${currentIndex} of ${JSON.stringify(array)}`, - err.message, - ) - } - return collaborativeLinks - } + // ---- Internal helpers ---- + + const extractLinks = page => { + let map try { - pageLinksMap = page.story.reduce(extractPageLinks, new Map()) + map = page.story.reduce(extractPageLinks, new Map()) } catch (err) { - console.log(`METADATA *** ${wikiName} reduce to extract links on ${file} failed`, err.message) - pageLinksMap = [] - } - // - if (pageLinksMap.size > 0) { - pageLinks = Object.fromEntries(pageLinksMap) - } else { - pageLinks = undefined + console.log(`METADATA *** ${wikiName} reduce to extract links failed`, err.message) + return undefined } + return map.size > 0 ? Object.fromEntries(map) : undefined + } - const entry = { - slug: file, - title: page.title, - date: lastEdit(page.journal), - synopsis: synopsis(page), - links: pageLinks, - } - - const slugs = sitemap.map(page => page.slug) - - const idx = slugs.indexOf(file) - - if (~idx) { + const sitemapEntry = (file, page) => ({ + slug: file, + title: page.title, + date: lastEdit(page.journal), + synopsis: synopsis(page), + links: extractLinks(page), + }) + + const applyUpdate = (file, page) => { + const idx = sitemap.findIndex(e => e.slug === file) + const entry = sitemapEntry(file, page) + if (idx !== -1) { sitemap[idx] = entry } else { sitemap.push(entry) } - cb() } - const sitemapRemovePage = (file, cb) => { - const slugs = sitemap.map(page => page.slug) - const idx = slugs.indexOf(file) + const applyRemove = file => { + const idx = sitemap.findIndex(e => e.slug === file) + if (idx !== -1) sitemap.splice(idx, 1) + } - if (~idx) { - sitemap.splice(idx, 1) + const buildXml = sitemap => { + const urls = sitemap.map(page => { + const entry = { loc: argv.url + '/' + page.slug + '.html' } + if (page.date) { + const d = new Date(page.date) + if (!isNaN(d.valueOf())) { + entry.lastmod = d.toISOString().substring(0, 10) + } + } + return entry + }) + const obj = { + urlset: { + $: { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' }, + url: urls, + }, } - cb() + return new xml2js.Builder().buildObject(obj) } - const sitemapSave = (sitemap, cb) => { - fs.access(argv.status, fs.constants.F_OK, err => { - if (!err) { - writeFileAtomic(sitemapLoc, JSON.stringify(sitemap), e => { - if (e) return cb(e) - cb() - }) - } else - fs.mkdir(argv.status, { recursive: true }, () => { - writeFileAtomic(sitemapLoc, JSON.stringify(sitemap), e => { - if (e) return cb(e) - cb() - }) - }) - }) + const save = async () => { + try { + await fsp.mkdir(argv.status, { recursive: true }) + await writeAtomic(sitemapLoc, JSON.stringify(sitemap)) + await writeAtomic(xmlSitemapLoc, buildXml(sitemap)) + } catch (e) { + console.log(`Problems saving sitemap ${wikiName}:`, e) + } } - const sitemapRestore = cb => { - fs.access(sitemapLoc, fs.constants.F_OK, err => { - if (!err) { - fs.readFile(sitemapLoc, (err, data) => { - if (err) return cb(err) - try { - sitemap = JSON.parse(data) - } catch (e) { - return cb(e) - } - process.nextTick(() => { - serial(queue.shift()) - }) - }) - } else { - // sitemap file does not exist, so needs creating - itself.createSitemap(sitemapPageHandler) - } - }) + const scheduleSave = () => { + dirty = true + if (saveTimer) return + saveTimer = setTimeout(async () => { + saveTimer = null + dirty = false + await save() + itself.stop() + // If more mutations arrived during save, save again + if (dirty) scheduleSave() + }, SAVE_DELAY_MS) } - const xmlSitemapSave = (sitemap, cb) => { - const xmlmapPages = [] - sitemap.forEach(page => { - const result = {} - result['loc'] = argv.url + '/' + page.slug + '.html' - if (page.date) { - const date = new Date(page.date) - if (!isNaN(date.valueOf())) { - result['lastmod'] = date.toISOString().substring(0, 10) - } - } - xmlmapPages.push(result) - }) - const xmlmap = { urlset: { $: { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' }, url: xmlmapPages } } - const builder = new xml2js.Builder() - const xml = builder.buildObject(xmlmap) - fs.access(argv.status, fs.constants.F_OK, err => { - if (!err) { - writeFileAtomic(xmlSitemapLoc, xml, e => { - if (e) return cb(e) - cb() - }) - } else { - fs.mkdir(argv.status, { recursive: true }, () => { - writeFileAtomic(xmlSitemapLoc, xml, e => { - if (e) return cb(e) - cb() - }) - }) - } - }) + const restore = async () => { + try { + await fsp.access(sitemapLoc) + const data = await fsp.readFile(sitemapLoc, 'utf8') + sitemap = JSON.parse(data) + } catch { + // File doesn't exist or is corrupt — will be rebuilt by createSitemap + sitemap = [] + } } - const serial = item => { - if (item) { - switch (item.action) { - case 'update': - itself.start() - sitemapUpdate(item.file, item.page, e => process.nextTick(() => serial(queue.shift()))) - break - case 'remove': - itself.start() - sitemapRemovePage(item.file, e => process.nextTick(() => serial(queue.shift()))) - break - default: - console.log(`Sitemap unexpected action ${item.action} for ${item.page} in ${wikiName}`) - process.nextTick(() => serial(queue.shift)) + const ensureLoaded = async () => { + if (sitemap.length === 0 && !working) { + await restore() + if (sitemap.length === 0 && pagehandlerRef) { + await itself.createSitemap(pagehandlerRef) } - } else - sitemapSave(sitemap, e => { - if (e) console.log(`Problems saving sitemap ${wikiName}: ` + e) - itself.stop() - }) - xmlSitemapSave(sitemap, e => { - if (e) console.log(`Problems saving sitemap(xml) ${wikiName}`) + e - }) + } + } + + const resetTimeout = () => { + clearTimeout(sitemapTimeoutHandler) + if (!argv.test) { + sitemapTimeoutHandler = setTimeout(() => { + console.log(`removing sitemap ${wikiName} from memory`) + sitemap.length = 0 + }, sitemapTimeoutMs) + } } - // #### Public stuff #### + // ---- Public API ---- - const itself = new events.EventEmitter() itself.start = () => { clearTimeout(sitemapTimeoutHandler) working = true itself.emit('working') } + itself.stop = () => { - const clearsitemap = () => { - console.log(`removing sitemap ${wikiName} from memory`) - sitemap.length = 0 - clearTimeout(sitemapTimeoutHandler) - } - // don't clear sitemap when in test environment. It just delays the tests completing. - if (!argv.test) sitemapTimeoutHandler = setTimeout(clearsitemap, sitemapTimeoutMs) working = false + resetTimeout() itself.emit('finished') } - itself.isWorking = () => { - working - } - - itself.createSitemap = pagehandler => { - itself.start() - // we save the pagehandler, so we can recreate the sitemap if it is removed - if (!sitemapPageHandler) sitemapPageHandler = pagehandler - pagehandler.pages((e, newsitemap) => { - if (e) { - console.log(`createSitemap ${wikiName} : error ` + e) - itself.stop() - return e - } - sitemap = newsitemap + itself.isWorking = () => working - process.nextTick(() => { - serial(queue.shift()) - }) - }) + itself.createSitemap = async pagehandler => { + itself.start() + pagehandlerRef = pagehandlerRef ?? pagehandler + try { + sitemap = await pagehandler.pages() + await save() + } catch (e) { + console.log(`createSitemap ${wikiName} : error`, e) + } + itself.stop() } - itself.removePage = file => { - const action = 'remove' - queue.push({ action, file }) - if (sitemap.length === 0 && !working) { - itself.start() - sitemapRestore(e => { - if (e) console.log(`Problems restoring sitemap ${wikiName} : ` + e) - itself.createSitemap(sitemapPageHandler) - }) - } else { - if (!working) serial(queue.shift()) - } + itself.update = async (file, page) => { + await ensureLoaded() + itself.start() + applyUpdate(file, page) + scheduleSave() } - itself.update = (file, page) => { - const action = 'update' - queue.push({ action, file, page }) - if (sitemap.length === 0 && !working) { - itself.start() - sitemapRestore(e => { - if (e) console.log(`Problems restoring sitemap ${wikiName} : ` + e) - itself.createSitemap(sitemapPageHandler) - }) - } else { - if (!working) serial(queue.shift()) - } + itself.removePage = async file => { + await ensureLoaded() + itself.start() + applyRemove(file) + scheduleSave() } return itself diff --git a/lib/utils.js b/lib/utils.js new file mode 100644 index 0000000..b009787 --- /dev/null +++ b/lib/utils.js @@ -0,0 +1,140 @@ +/* + * Federated Wiki : Node Server + * + * Copyright Ward Cunningham and other contributors + * Licensed under the MIT license. + * https://github.com/fedwiki/wiki-server/blob/master/LICENSE.txt + */ + +// **utils.js** +// Pure utility functions shared across server modules. + +/** + * Convert a page name to a URL-safe slug. + * + * Whitespace becomes hyphens, non-alphanumeric/non-hyphen characters + * are stripped, and the result is lowercased. + * + * @param {string} name - The human-readable page name. + * @returns {string} The slugified form suitable for use in URLs and filenames. + * + * @example + * asSlug('Hello World') // 'hello-world' + * asSlug('Café!') // 'caf' + */ +export const asSlug = name => + name + .replace(/\s/g, '-') + .replace(/[^A-Za-z0-9-]/g, '') + .toLowerCase() + +/** + * Find the date of the most recent meaningful journal entry. + * + * Scans the journal array in reverse for the last entry that carries a + * date and is not a fork action. Fork entries are excluded because they + * record when content was copied from another site, not when a local + * edit occurred. + * + * @param {Array<{type: string, date?: number}>} [journal] - The page journal. + * @returns {number|undefined} Epoch-ms timestamp of the last edit, or + * undefined if the journal is missing, empty, or contains only forks. + * + * @example + * lastEdit([{ type: 'edit', date: 200 }, { type: 'fork', date: 300 }]) // 200 + * lastEdit([]) // undefined + */ +export const lastEdit = journal => { + if (!journal) return undefined + const last = journal.findLast(action => { + return action.date && action.type != 'fork' + }) + return last ? last.date : undefined +} + +/** + * Extract a short summary from a page's content. + * + * Checks, in order: an explicit `synopsis` field on the page, then the + * text of the first paragraph item, then the second paragraph item, then + * the text of the first item of any type, then the second. Falls back to + * a count of story items or a "no story" message. + * + * The result is trimmed to the first line and capped at 560 characters. + * + * @param {{synopsis?: string, story?: Array<{type: string, text?: string}>}} page + * @returns {string} A plain-text summary of the page. + * + * @example + * synopsis({ story: [{ type: 'paragraph', text: 'Hello world' }] }) + * // 'Hello world' + */ +export const synopsis = page => { + let result = page.synopsis + if (page?.story) { + const p1 = page.story[0] + const p2 = page.story[1] + if (p1 && p1.type === 'paragraph') { + result ||= p1.text + } + if (p2 && p2.type === 'paragraph') { + result ||= p2.text + } + if (p1 && p1.text) { + result ||= p1.text + } + if (p2 && p2.text) { + result ||= p2.text + } + result ||= page.story && `A page with ${page.story.length} items.` + } else { + result = 'A page with no story.' + } + result = result.trim().split(/\r|\n/, 1)[0] + return result.substring(0, 560) +} + +/** + * Reducer that accumulates collaborative links found in a story item. + * + * Extracts two kinds of links: + * - Wiki-style links: `[[Page Name]]` found anywhere in the item's text. + * - Reference items: items with `type: 'reference'` contribute their `slug`. + * + * Each link is stored as a Map entry keyed by the slugified page name, + * with the value being the id of the first item that contains that link. + * Subsequent items linking to the same slug do not overwrite the original. + * + * Intended for use with `Array.prototype.reduce` over a page's story array: + * + * @param {Map} collaborativeLinks - Accumulator map of + * slug → item-id pairs built up across story items. + * @param {{id: string, type: string, text?: string, slug?: string}} currentItem - + * The current story item being processed. + * @param {number} currentIndex - Index of the current item in the story array. + * @param {Array} array - The full story array (used only in error reporting). + * @returns {Map} The updated accumulator. + * + * @example + * const links = page.story.reduce(extractPageLinks, new Map()) + * // Map { 'some-page' => 'item-id-1', 'other-page' => 'item-id-3' } + */ +export const extractPageLinks = (collaborativeLinks, currentItem, currentIndex, array) => { + try { + const linkRe = /\[\[([^\]]+)\]\]/g + let match = undefined + while ((match = linkRe.exec(currentItem.text)) != null) { + if (!collaborativeLinks.has(asSlug(match[1]))) { + collaborativeLinks.set(asSlug(match[1]), currentItem.id) + } + } + if ('reference' == currentItem.type) { + if (!collaborativeLinks.has(currentItem.slug)) { + collaborativeLinks.set(currentItem.slug, currentItem.id) + } + } + } catch (err) { + console.log(`METADATA *** Error extracting links from ${currentIndex} of ${JSON.stringify(array)}`, err.message) + } + return collaborativeLinks +} diff --git a/package.json b/package.json index d55e354..6a56919 100644 --- a/package.json +++ b/package.json @@ -51,8 +51,8 @@ "scripts": { "prettier:format": "prettier --write './**/*.js'", "prettier:check": "prettier --check ./**/*.js", - "test": "cd test; node --test", - "watch": "cd test; node --test --watch", + "test": "node --test", + "watch": "node --test --watch", "update-authors": "node scripts/update-authors.js" }, "devDependencies": { diff --git a/start.js b/start.js new file mode 100644 index 0000000..be23655 --- /dev/null +++ b/start.js @@ -0,0 +1,23 @@ +#!/usr/bin/env node + +import path from 'node:path' +import { fileURLToPath } from 'node:url' +import server from './lib/server.js' + +const __dirname = path.dirname(fileURLToPath(import.meta.url)) + +const argv = { + root: __dirname, + port: parseInt(process.env.PORT || '3000', 10), + data: process.env.WIKI_DATA || undefined, + packageFile: path.join(__dirname, 'package.json'), +} + +const app = await server(argv) + +const { port, host } = app.startOpts + +const srv = app.listen(port, host, () => { + console.log(`wiki listening on http://${host || 'localhost'}:${port}`) + app.emit('running-serv', srv) +}) diff --git a/test/page.js b/test/page.js index 590d8a4..77c8bf7 100644 --- a/test/page.js +++ b/test/page.js @@ -8,16 +8,17 @@ import { fileURLToPath } from 'node:url' const __filename = fileURLToPath(import.meta.url) const __dirname = path.dirname(__filename) -// ESM module imports (assuming default exports) import random from '../lib/random_id.js' import defaultargs from '../lib/defaultargs.js' import pageFactory from '../lib/page.js' +import { PageNotFoundError } from '../lib/errors.js' const testid = random() const argv = defaultargs({ data: path.join('/tmp', 'sfwtests', testid), root: path.join(__dirname, '..'), packageDir: path.join(__dirname, '..', 'node_modules'), + packageFile: path.join(__dirname, 'package.json'), security_legacy: true, }) @@ -30,71 +31,42 @@ console.log('testid', testid) describe('page', () => { describe('#page.put()', () => { it('should save a page', async () => { - return new Promise(resolve => { - page.put('asdf', testpage, e => { - if (e) throw e - resolve() - }) - }) + await page.put('asdf', testpage) }) }) + describe('#page.get()', () => { it('should get a page if it exists', async () => { - return new Promise(resolve => { - page.get('asdf', (e, got) => { - if (e) throw e - assert.equal(got.title, 'Asdf') - resolve() - }) - }) + const got = await page.get('asdf') + assert.equal(got.title, 'Asdf') }) + it('should copy a page from default if nonexistant in db', async () => { - return new Promise(resolve => { - page.get('welcome-visitors', (e, got) => { - if (e) throw e - assert.equal(got.title, 'Welcome Visitors') - resolve() - }) - }) + const got = await page.get('welcome-visitors') + assert.equal(got.title, 'Welcome Visitors') }) - // note: here we assume the wiki-plugin-activity repo has been cloned into an adjacent directory + it('should copy a page from plugins if nonexistant in db', async () => { - return new Promise(resolve => { - page.get('recent-changes', (e, got) => { - if (e) throw e - assert.equal(got.title, 'Recent Changes') - resolve() - }) - }) + const got = await page.get('recent-changes') + assert.equal(got.title, 'Recent Changes') }) - // note: here we assume the wiki-plugin-activity repo has been cloned into an adjacent directory + it('should mark a page from plugins with the plugin name', async () => { - return new Promise(resolve => { - page.get('recent-changes', (e, got) => { - if (e) throw e - assert.equal(got.plugin, 'activity') - resolve() - }) - }) + const got = await page.get('recent-changes') + assert.equal(got.plugin, 'activity') }) - it('should create a page if it exists nowhere', async () => { - return new Promise(resolve => { - page.get(random(), (e, got) => { - if (e) throw e - assert.equal(got, 'Page not found') - resolve() - }) - }) + + it('should throw PageNotFoundError if it exists nowhere', async () => { + await assert.rejects( + () => page.get(random()), + err => err instanceof PageNotFoundError, + ) }) + it('should eventually write the page to disk', async () => { - return new Promise(resolve => { - page.get('asdf', (e, got) => { - if (e) throw e - const page = JSON.parse(fs.readFileSync(path.join(path.sep, 'tmp', 'sfwtests', testid, 'pages', 'asdf'))) - assert.equal(got.title, page.title) - resolve() - }) - }) + const got = await page.get('asdf') + const ondisk = JSON.parse(fs.readFileSync(path.join('/tmp', 'sfwtests', testid, 'pages', 'asdf'), 'utf8')) + assert.equal(got.title, ondisk.title) }) }) }) diff --git a/test/server.js b/test/server.js index b6ed51d..69a0ced 100644 --- a/test/server.js +++ b/test/server.js @@ -9,10 +9,8 @@ import { fileURLToPath } from 'node:url' const __filename = fileURLToPath(import.meta.url) const __dirname = path.dirname(__filename) -// CommonJS server module (.cjs) const server = await import('../index.js') -// ESM modules import random from '../lib/random_id.js' import defaultargs from '../lib/defaultargs.js' @@ -20,26 +18,26 @@ const testid = random() const argv = defaultargs({ data: path.join('/tmp', 'sfwtests', testid), packageDir: path.join(__dirname, '..', 'node_modules'), + packageFile: path.join(__dirname, 'package.json'), port: 55557, security_legacy: true, test: true, }) describe('server', () => { - var app = {} + let app = {} let runningServer = null - before(async done => { - // as starting the server this was does not create a sitemap file, create an empty one + + before(async () => { const sitemapLoc = path.join('/tmp', 'sfwtests', testid, 'status', 'sitemap.json') - fs.mkdirSync(path.join('/tmp', 'sfwtests', testid)) - fs.mkdirSync(path.join('/tmp', 'sfwtests', testid, 'status')) + fs.mkdirSync(path.join('/tmp', 'sfwtests', testid), { recursive: true }) + fs.mkdirSync(path.join('/tmp', 'sfwtests', testid, 'status'), { recursive: true }) fs.writeFileSync(sitemapLoc, JSON.stringify([])) - let x = await server.default(argv) - app = x - // app = server(argv) - app.once('owner-set', async () => { - runningServer = await app.listen(app.startOpts.port, app.startOpts.host, done) + app = await server.default(argv) + + await new Promise(resolve => { + runningServer = app.listen(app.startOpts.port, app.startOpts.host, resolve) }) }) @@ -48,27 +46,17 @@ describe('server', () => { }) const request = supertest('http://localhost:55557') - - // location of the test page const loc = path.join('/tmp', 'sfwtests', testid, 'pages', 'adsf-test-page') it('factories should return a list of plugin', async () => { - await request - .get('/system/factories.json') - .expect(200) - .expect('Content-Type', /json/) - .then(res => { - assert.equal(res.body[1].name, 'Video') - assert.equal(res.body[1].category, 'format') - }) + const res = await request.get('/system/factories.json').expect(200).expect('Content-Type', /json/) + assert.equal(res.body[1].name, 'Video') + assert.equal(res.body[1].category, 'format') }) it('new site should have an empty list of pages', async () => { - await request - .get('/system/slugs.json') - .expect(200) - .expect('Content-Type', /json/) - .then(res => assert.deepEqual(res.body, [])) + const res = await request.get('/system/slugs.json').expect(200).expect('Content-Type', /json/) + assert.deepEqual(res.body, []) }) it('should create a page', async () => { @@ -92,27 +80,18 @@ describe('server', () => { .expect(200) }) - it('should move the paragraphs to the order given ', async () => { + it('should move the paragraphs to the order given', async () => { const body = '{ "type": "move", "order": [ "a1", "a3", "a2", "a4"] }' await request .put('/page/adsf-test-page/action') .send('action=' + body) .expect(200) - .then( - () => { - const page = JSON.parse(fs.readFileSync(loc)) - assert.equal(page.story[1].id, 'a3') - assert.equal(page.story[2].id, 'a2') - assert.equal(page.journal[1].type, 'move') - }, - err => { - throw err - }, - ) - .catch(err => { - throw err - }) + + const page = JSON.parse(fs.readFileSync(loc, 'utf8')) + assert.equal(page.story[1].id, 'a3') + assert.equal(page.story[2].id, 'a2') + assert.equal(page.journal[1].type, 'move') }) it('should add a paragraph', async () => { @@ -126,15 +105,11 @@ describe('server', () => { .put('/page/adsf-test-page/action') .send('action=' + body) .expect(200) - .then(() => { - const page = JSON.parse(fs.readFileSync(loc)) - assert.equal(page.story.length, 5) - assert.equal(page.story[3].id, 'a5') - assert.equal(page.journal[2].type, 'add') - }) - .catch(err => { - throw err - }) + + const page = JSON.parse(fs.readFileSync(loc, 'utf8')) + assert.equal(page.story.length, 5) + assert.equal(page.story[3].id, 'a5') + assert.equal(page.journal[2].type, 'add') }) it('should remove a paragraph with given id', async () => { @@ -147,17 +122,13 @@ describe('server', () => { .put('/page/adsf-test-page/action') .send('action=' + body) .expect(200) - .then(() => { - const page = JSON.parse(fs.readFileSync(loc)) - assert.equal(page.story.length, 4) - assert.equal(page.story[1].id, 'a3') - assert.notEqual(page.story[2].id, 'a2') - assert.equal(page.story[2].id, 'a5') - assert.equal(page.journal[3].type, 'remove') - }) - .catch(err => { - throw err - }) + + const page = JSON.parse(fs.readFileSync(loc, 'utf8')) + assert.equal(page.story.length, 4) + assert.equal(page.story[1].id, 'a3') + assert.notEqual(page.story[2].id, 'a2') + assert.equal(page.story[2].id, 'a5') + assert.equal(page.journal[3].type, 'remove') }) it('should edit a paragraph in place', async () => { @@ -171,14 +142,10 @@ describe('server', () => { .put('/page/adsf-test-page/action') .send('action=' + body) .expect(200) - .then(() => { - const page = JSON.parse(fs.readFileSync(loc)) - assert.equal(page.story[1].text, 'edited') - assert.equal(page.journal[4].type, 'edit') - }) - .catch(err => { - throw err - }) + + const page = JSON.parse(fs.readFileSync(loc, 'utf8')) + assert.equal(page.story[1].text, 'edited') + assert.equal(page.journal[4].type, 'edit') }) it('should default to no change', async () => { @@ -190,17 +157,13 @@ describe('server', () => { .put('/page/adsf-test-page/action') .send('action=' + body) .expect(500) - .then(() => { - const page = JSON.parse(fs.readFileSync(loc)) - assert.equal(page.story.length, 4) - assert.equal(page.journal.length, 5) - assert.equal(page.story[0].id, 'a1') - assert.equal(page.story[3].text, 'this is the fourth paragraph') - assert.equal(page.journal[4].type, 'edit') - }) - .catch(err => { - throw err - }) + + const page = JSON.parse(fs.readFileSync(loc, 'utf8')) + assert.equal(page.story.length, 4) + assert.equal(page.journal.length, 5) + assert.equal(page.story[0].id, 'a1') + assert.equal(page.story[3].text, 'this is the fourth paragraph') + assert.equal(page.journal[4].type, 'edit') }) it('should refuse to create over a page', async () => { @@ -214,44 +177,23 @@ describe('server', () => { .put('/page/adsf-test-page/action') .send('action=' + body) .expect(409) - .then(() => { - const page = JSON.parse(fs.readFileSync(loc)) - assert.notEqual(page.title, 'Doh') - }) - .catch(err => { - throw err - }) + + const page = JSON.parse(fs.readFileSync(loc, 'utf8')) + assert.notEqual(page.title, 'Doh') }) it('site should now have one page', async () => { - await request - .get('/system/slugs.json') - .expect(200) - .expect('Content-Type', /json/) - .then(res => { - assert.equal(res.body.length, 1) - assert.equal(res.body[0], 'adsf-test-page') - }) - .catch(err => { - throw err - }) + const res = await request.get('/system/slugs.json').expect(200).expect('Content-Type', /json/) + assert.equal(res.body.length, 1) + assert.equal(res.body[0], 'adsf-test-page') }) - // Should be a version test, but doesn't seem it's supported in test mode yet. it.skip('server should return a version', async () => { - await request - .get('/system/version.json') - .expect(200) - .expect('Content-Type', /json/) - .then(res => { - assert.equal(res.body.wiki, '0.1') - assert.equal(res.body['wiki-server'], '0.2') - assert.equal(res.body['wiki-client'], '0.3') - assert.equal(res.body.plugins['wiki-plugin-activity'], '0.4') - assert.equal(res.body.plugins['wiki-plugin-video'], '0.5') - }) - .catch(err => { - throw err - }) + const res = await request.get('/system/version.json').expect(200).expect('Content-Type', /json/) + assert.equal(res.body.wiki, '0.1') + assert.equal(res.body['wiki-server'], '0.2') + assert.equal(res.body['wiki-client'], '0.3') + assert.equal(res.body.plugins['wiki-plugin-activity'], '0.4') + assert.equal(res.body.plugins['wiki-plugin-video'], '0.5') }) }) diff --git a/test/sitemap.js b/test/sitemap.js index adf8504..002bf82 100644 --- a/test/sitemap.js +++ b/test/sitemap.js @@ -6,20 +6,19 @@ import fs from 'node:fs' import path from 'node:path' import { fileURLToPath } from 'node:url' -// Emulate __dirname in ESM const __filename = fileURLToPath(import.meta.url) const __dirname = path.dirname(__filename) -// Dynamic import of CommonJS module const server = await import('../index.js') -// ESM imports import random from '../lib/random_id.js' import defaultargs from '../lib/defaultargs.js' const testid = random() const argv = defaultargs({ data: path.join('/tmp', 'sfwtests', testid), + packageDir: path.join(__dirname, '..', 'node_modules'), + packageFile: path.join(__dirname, 'package.json'), port: 55556, security_legacy: true, test: true, @@ -29,13 +28,12 @@ describe('sitemap', () => { let app = {} let runningServer = null - before(async done => { - let x = await server.default(argv) - app = x + before(async () => { + fs.mkdirSync(path.join('/tmp', 'sfwtests', testid, 'pages'), { recursive: true }) + app = await server.default(argv) - // app = server(argv) - app.once('owner-set', () => { - runningServer = app.listen(app.startOpts.port, app.startOpts.host, done) + await new Promise(resolve => { + runningServer = app.listen(app.startOpts.port, app.startOpts.host, resolve) }) }) @@ -44,19 +42,13 @@ describe('sitemap', () => { }) const request = supertest('http://localhost:55556') - fs.mkdirSync(path.join('/tmp', 'sfwtests', testid, 'pages'), { recursive: true }) - - // location of the sitemap const sitemapLoc = path.join('/tmp', 'sfwtests', testid, 'status', 'sitemap.json') + const waitForSitemap = () => new Promise(resolve => app.sitemaphandler.once('finished', resolve)) + it('new site should have an empty sitemap', async () => { - await request - .get('/system/sitemap.json') - .expect(200) - .expect('Content-Type', /json/) - .then(res => { - assert.equal(res.body.length, 0) - }) + const res = await request.get('/system/sitemap.json').expect(200).expect('Content-Type', /json/) + assert.equal(res.body.length, 0) }) it('creating a page should add it to the sitemap', async () => { @@ -78,19 +70,13 @@ describe('sitemap', () => { .put('/page/adsf-test-page/action') .send('action=' + body) .expect(200) - // sitemap update does not happen until after the put has returned, so wait for it to finish - .then(() => new Promise(resolve => app.sitemaphandler.once('finished', () => resolve()))) - .then( - () => { - const sitemap = JSON.parse(fs.readFileSync(sitemapLoc)) - assert.equal(sitemap[0].slug, 'adsf-test-page') - assert.equal(sitemap[0].synopsis, 'this is the first paragraph') - assert.deepEqual(sitemap[0].links, { third: 'a3' }) - }, - err => { - throw err - }, - ) + + await waitForSitemap() + + const sitemap = JSON.parse(fs.readFileSync(sitemapLoc, 'utf8')) + assert.equal(sitemap[0].slug, 'adsf-test-page') + assert.equal(sitemap[0].synopsis, 'this is the first paragraph') + assert.deepEqual(sitemap[0].links, { third: 'a3' }) }) it('synopsis should reflect edit to first paragraph', async () => { @@ -104,23 +90,20 @@ describe('sitemap', () => { .put('/page/adsf-test-page/action') .send('action=' + body) .expect(200) - .then(() => new Promise(resolve => app.sitemaphandler.once('finished', () => resolve()))) - .then(() => { - const sitemap = JSON.parse(fs.readFileSync(sitemapLoc)) - assert.equal(sitemap[0].slug, 'adsf-test-page') - assert.equal(sitemap[0].synopsis, 'edited') - }) + + await waitForSitemap() + + const sitemap = JSON.parse(fs.readFileSync(sitemapLoc, 'utf8')) + assert.equal(sitemap[0].slug, 'adsf-test-page') + assert.equal(sitemap[0].synopsis, 'edited') }) it('deleting a page should remove it from the sitemap', async () => { - await request - .delete('/adsf-test-page.json') - .send() - .expect(200) - .then(() => new Promise(resolve => app.sitemaphandler.once('finished', () => resolve()))) - .then(() => { - const sitemap = JSON.parse(fs.readFileSync(sitemapLoc)) - assert.deepEqual(sitemap, []) - }) + await request.delete('/adsf-test-page.json').send().expect(200) + + await waitForSitemap() + + const sitemap = JSON.parse(fs.readFileSync(sitemapLoc, 'utf8')) + assert.deepEqual(sitemap, []) }) }) diff --git a/test/utils.js b/test/utils.js new file mode 100644 index 0000000..ecd9d9e --- /dev/null +++ b/test/utils.js @@ -0,0 +1,188 @@ +import { describe, it } from 'node:test' +import assert from 'node:assert/strict' + +import { asSlug, lastEdit, extractPageLinks, synopsis } from '../lib/utils.js' +import { resolveLinks, escape } from '../lib/render.js' + +describe('utils', () => { + describe('asSlug', () => { + it('should replace spaces with hyphens', () => { + assert.equal(asSlug('Hello World'), 'hello-world') + }) + it('should lowercase the result', () => { + assert.equal(asSlug('FooBar'), 'foobar') + }) + it('should strip non-alphanumeric non-hyphen characters', () => { + assert.equal(asSlug('Hello, World!'), 'hello-world') + }) + it('should handle multiple consecutive spaces', () => { + assert.equal(asSlug('a b c'), 'a--b---c') + }) + it('should return empty string for empty input', () => { + assert.equal(asSlug(''), '') + }) + it('should handle tabs and newlines as spaces', () => { + assert.equal(asSlug('a\tb\nc'), 'a-b-c') + }) + it('should preserve digits and hyphens', () => { + assert.equal(asSlug('page-123'), 'page-123') + }) + it('should strip unicode characters', () => { + assert.equal(asSlug('café'), 'caf') + }) + }) + + describe('lastEdit', () => { + it('should return undefined for undefined journal', () => { + assert.equal(lastEdit(undefined), undefined) + }) + it('should return undefined for empty journal', () => { + assert.equal(lastEdit([]), undefined) + }) + it('should return the date of the last non-fork entry', () => { + const journal = [ + { type: 'create', date: 100 }, + { type: 'edit', date: 200 }, + { type: 'fork', date: 300 }, + ] + assert.equal(lastEdit(journal), 200) + }) + it('should return undefined if all entries are forks', () => { + const journal = [ + { type: 'fork', date: 100 }, + { type: 'fork', date: 200 }, + ] + assert.equal(lastEdit(journal), undefined) + }) + it('should skip entries without a date', () => { + const journal = [{ type: 'edit', date: 100 }, { type: 'edit' }] + assert.equal(lastEdit(journal), 100) + }) + }) + + describe('extractPageLinks', () => { + it('should extract wiki-style links from text', () => { + const item = { id: 'i1', type: 'paragraph', text: 'see [[Some Page]] for details' } + const links = [item].reduce(extractPageLinks, new Map()) + assert.equal(links.size, 1) + assert.equal(links.get('some-page'), 'i1') + }) + it('should extract multiple links from one item', () => { + const item = { id: 'i1', type: 'paragraph', text: '[[Alpha]] and [[Beta]]' } + const links = [item].reduce(extractPageLinks, new Map()) + assert.equal(links.size, 2) + assert.equal(links.get('alpha'), 'i1') + assert.equal(links.get('beta'), 'i1') + }) + it('should not overwrite an existing slug with a later item id', () => { + const items = [ + { id: 'i1', type: 'paragraph', text: '[[Target]]' }, + { id: 'i2', type: 'paragraph', text: '[[Target]]' }, + ] + const links = items.reduce(extractPageLinks, new Map()) + assert.equal(links.get('target'), 'i1') + }) + it('should extract slug from reference items', () => { + const item = { id: 'i1', type: 'reference', slug: 'ref-page', text: '', site: 'example.com' } + const links = [item].reduce(extractPageLinks, new Map()) + assert.equal(links.get('ref-page'), 'i1') + }) + it('should return empty map when no links present', () => { + const item = { id: 'i1', type: 'paragraph', text: 'no links here' } + const links = [item].reduce(extractPageLinks, new Map()) + assert.equal(links.size, 0) + }) + }) + + describe('synopsis', () => { + it('should use explicit synopsis field if present', () => { + const page = { synopsis: 'explicit', story: [{ type: 'paragraph', text: 'from story' }] } + assert.equal(synopsis(page), 'explicit') + }) + it('should use first paragraph text', () => { + const page = { story: [{ type: 'paragraph', text: 'first para' }] } + assert.equal(synopsis(page), 'first para') + }) + it('should fall back to second paragraph if first is not a paragraph', () => { + const page = { + story: [ + { type: 'image', text: 'img' }, + { type: 'paragraph', text: 'second para' }, + ], + } + assert.equal(synopsis(page), 'second para') + }) + it('should use first item text of any type if no paragraphs', () => { + const page = { story: [{ type: 'markdown', text: 'md text' }] } + assert.equal(synopsis(page), 'md text') + }) + it('should report item count when no text available', () => { + const page = { story: [{ type: 'factory' }, { type: 'factory' }] } + assert.equal(synopsis(page), 'A page with 2 items.') + }) + it('should handle page with no story', () => { + assert.equal(synopsis({}), 'A page with no story.') + }) + it('should truncate at first line break', () => { + const page = { story: [{ type: 'paragraph', text: 'line one\nline two' }] } + assert.equal(synopsis(page), 'line one') + }) + it('should cap output at 560 characters', () => { + const long = 'x'.repeat(600) + const page = { story: [{ type: 'paragraph', text: long }] } + assert.equal(synopsis(page).length, 560) + }) + }) + + describe('escape', () => { + it('should escape ampersands', () => { + assert.equal(escape('a & b'), 'a & b') + }) + it('should escape angle brackets', () => { + assert.equal(escape('
'), '<div>') + }) + it('should handle empty string', () => { + assert.equal(escape(''), '') + }) + it('should handle undefined', () => { + assert.equal(escape(undefined), '') + }) + }) + + describe('resolveLinks', () => { + it('should convert internal wiki links to anchor tags', () => { + const result = resolveLinks('see [[Hello World]] here') + assert.match(result, /class="internal"/) + assert.match(result, /href="\/hello-world\.html"/) + assert.match(result, /data-page-name="hello-world"/) + }) + it('should convert external links to anchor tags', () => { + const result = resolveLinks('see [http://example.com Example] here') + assert.match(result, /class="external"/) + assert.match(result, /href="http:\/\/example\.com"/) + assert.match(result, /Example/) + }) + it('should escape plain text', () => { + const result = resolveLinks('a < b & c > d') + assert.match(result, /</) + assert.match(result, /&/) + assert.match(result, />/) + }) + it('should pass resolution context into link titles', () => { + const result = resolveLinks('[[Test]]', undefined, ['page-a', 'page-b']) + assert.match(result, /title="page-a => page-b"/) + }) + it('should handle empty string', () => { + assert.equal(resolveLinks(''), '') + }) + it('should mark spaced internal links', () => { + const result = resolveLinks('[[ Hello ]]') + assert.match(result, /class="internal spaced"/) + }) + it('should accept a custom sanitizer', () => { + const upper = s => s.toUpperCase() + const result = resolveLinks('plain text', upper) + assert.equal(result, 'PLAIN TEXT') + }) + }) +})