diff --git a/.changeset/legal-cooks-sink.md b/.changeset/legal-cooks-sink.md new file mode 100644 index 000000000..ddddba41d --- /dev/null +++ b/.changeset/legal-cooks-sink.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db-ivm": patch +--- + +Fix bug with setWindow on ordered queries that have no limit. diff --git a/.changeset/light-phones-flash.md b/.changeset/light-phones-flash.md new file mode 100644 index 000000000..95a030b32 --- /dev/null +++ b/.changeset/light-phones-flash.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db": patch +--- + +Add predicate comparison and merging utilities (isWhereSubset, intersectWherePredicates, unionWherePredicates, and related functions) to support predicate push-down in collection sync operations, enabling efficient tracking of loaded data ranges and preventing redundant server requests. Includes performance optimizations for large primitive IN predicates and full support for Date objects in equality, range, and IN clause comparisons. diff --git a/.changeset/open-cups-lose.md b/.changeset/open-cups-lose.md new file mode 100644 index 000000000..44eee1a35 --- /dev/null +++ b/.changeset/open-cups-lose.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db": patch +--- + +Add support for orderBy and limit in currentStateAsChanges function diff --git a/packages/db-ivm/src/operators/topKWithFractionalIndex.ts b/packages/db-ivm/src/operators/topKWithFractionalIndex.ts index 3b75521e3..858503f6f 100644 --- a/packages/db-ivm/src/operators/topKWithFractionalIndex.ts +++ b/packages/db-ivm/src/operators/topKWithFractionalIndex.ts @@ -87,12 +87,25 @@ class TopKArray implements TopK { }): TopKMoveChanges { const oldOffset = this.#topKStart const oldLimit = this.#topKEnd - this.#topKStart - const oldRange: HRange = [this.#topKStart, this.#topKEnd] - this.#topKStart = offset ?? oldOffset - this.#topKEnd = this.#topKStart + (limit ?? oldLimit) + // `this.#topKEnd` can be `Infinity` if it has no limit + // but `diffHalfOpen` expects a finite range + // so we restrict it to the size of the topK if topKEnd is infinite + const oldRange: HRange = [ + this.#topKStart, + this.#topKEnd === Infinity ? this.#topKStart + this.size : this.#topKEnd, + ] - const newRange: HRange = [this.#topKStart, this.#topKEnd] + this.#topKStart = offset ?? oldOffset + this.#topKEnd = this.#topKStart + (limit ?? oldLimit) // can be `Infinity` if limit is `Infinity` + + // Also handle `Infinity` in the newRange + const newRange: HRange = [ + this.#topKStart, + this.#topKEnd === Infinity + ? Math.max(this.#topKStart + this.size, oldRange[1]) // since the new limit is Infinity we need to take everything (so we need to take the biggest (finite) topKEnd) + : this.#topKEnd, + ] const { onlyInA, onlyInB } = diffHalfOpen(oldRange, newRange) const moveIns: Array> = [] diff --git a/packages/db-ivm/tests/operators/topKWithFractionalIndex.test.ts b/packages/db-ivm/tests/operators/topKWithFractionalIndex.test.ts index 5739cfb36..149fffb93 100644 --- a/packages/db-ivm/tests/operators/topKWithFractionalIndex.test.ts +++ b/packages/db-ivm/tests/operators/topKWithFractionalIndex.test.ts @@ -28,8 +28,8 @@ function checkLexicographicOrder(results: Array) { // Check that indices are in the same order as the sorted values for (let i = 0; i < sortedByValue.length - 1; i++) { - const currentIndex = sortedByValue[i].index - const nextIndex = sortedByValue[i + 1].index + const currentIndex = sortedByValue[i]!.index + const nextIndex = sortedByValue[i + 1]!.index // Indices should be in lexicographic order if (!(currentIndex < nextIndex)) { @@ -1120,5 +1120,581 @@ describe(`Operators`, () => { ) expect(moveSortedValues4).toEqual([`a`, `b`]) }) + + it(`should handle moving window from infinite limit to finite limit with same offset`, () => { + const graph = new D2() + const input = graph.newInput<[number, { id: number; value: string }]>() + const tracker = new MessageTracker< + [number, [{ id: number; value: string }, string]] + >() + + let windowFn: + | ((options: { offset?: number; limit?: number }) => void) + | null = null + + // Start with no limit (infinite limit) + input.pipe( + topKWithFractionalIndex((a, b) => a.value.localeCompare(b.value), { + setWindowFn: (fn) => { + windowFn = fn + }, + }), + output((message) => { + tracker.addMessage(message) + }) + ) + + graph.finalize() + + // Initial data - a, b, c, d, e, f + input.sendData( + new MultiSet([ + [[1, { id: 1, value: `a` }], 1], + [[2, { id: 2, value: `b` }], 1], + [[3, { id: 3, value: `c` }], 1], + [[4, { id: 4, value: `d` }], 1], + [[5, { id: 5, value: `e` }], 1], + [[6, { id: 6, value: `f` }], 1], + ]) + ) + graph.run() + + // Initial result should have all 6 elements (no limit) + const initialResult = tracker.getResult(compareFractionalIndex) + expect(initialResult.sortedResults.length).toBe(6) + + const initialSortedValues = initialResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(initialSortedValues).toEqual([`a`, `b`, `c`, `d`, `e`, `f`]) + + // Verify windowFn was set + expect(windowFn).toBeDefined() + + // Move to finite limit of 3 (should show a, b, c) + windowFn!({ offset: 0, limit: 3 }) + graph.run() + + const moveResult = tracker.getResult(compareFractionalIndex) + + // Should now show only first 3 elements + const moveSortedValues = moveResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(moveSortedValues).toEqual([`a`, `b`, `c`]) + + // Check that we have changes (elements d, e, f should be removed) + expect(moveResult.messageCount).toBeGreaterThan(0) + }) + + it(`should handle moving window from infinite limit to finite limit while moving offset forward`, () => { + const graph = new D2() + const input = graph.newInput<[number, { id: number; value: string }]>() + const tracker = new MessageTracker< + [number, [{ id: number; value: string }, string]] + >() + + let windowFn: + | ((options: { offset?: number; limit?: number }) => void) + | null = null + + // Start with no limit (infinite limit) + input.pipe( + topKWithFractionalIndex((a, b) => a.value.localeCompare(b.value), { + offset: 0, + setWindowFn: (fn) => { + windowFn = fn + }, + }), + output((message) => { + tracker.addMessage(message) + }) + ) + + graph.finalize() + + // Initial data - a, b, c, d, e, f + input.sendData( + new MultiSet([ + [[1, { id: 1, value: `a` }], 1], + [[2, { id: 2, value: `b` }], 1], + [[3, { id: 3, value: `c` }], 1], + [[4, { id: 4, value: `d` }], 1], + [[5, { id: 5, value: `e` }], 1], + [[6, { id: 6, value: `f` }], 1], + ]) + ) + graph.run() + + // Initial result should have all 6 elements (no limit) + const initialResult = tracker.getResult(compareFractionalIndex) + expect(initialResult.sortedResults.length).toBe(6) + + // Move to offset 2, limit 3 (should show c, d, e) + windowFn!({ offset: 2, limit: 3 }) + graph.run() + + const moveResult = tracker.getResult(compareFractionalIndex) + + // Should now show elements c, d, e + const moveSortedValues = moveResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(moveSortedValues).toEqual([`c`, `d`, `e`]) + + // Check that we have changes + expect(moveResult.messageCount).toBeGreaterThan(0) + }) + + it(`should handle moving window from infinite limit to finite limit while moving offset backward`, () => { + const graph = new D2() + const input = graph.newInput<[number, { id: number; value: string }]>() + const tracker = new MessageTracker< + [number, [{ id: number; value: string }, string]] + >() + + let windowFn: + | ((options: { offset?: number; limit?: number }) => void) + | null = null + + // Start with no limit (infinite limit) and offset 3 + input.pipe( + topKWithFractionalIndex((a, b) => a.value.localeCompare(b.value), { + offset: 3, + setWindowFn: (fn) => { + windowFn = fn + }, + }), + output((message) => { + tracker.addMessage(message) + }) + ) + + graph.finalize() + + // Initial data - a, b, c, d, e, f + input.sendData( + new MultiSet([ + [[1, { id: 1, value: `a` }], 1], + [[2, { id: 2, value: `b` }], 1], + [[3, { id: 3, value: `c` }], 1], + [[4, { id: 4, value: `d` }], 1], + [[5, { id: 5, value: `e` }], 1], + [[6, { id: 6, value: `f` }], 1], + ]) + ) + graph.run() + + // Initial result should have elements d, e, f (no limit, offset 3) + const initialResult = tracker.getResult(compareFractionalIndex) + expect(initialResult.sortedResults.length).toBe(3) + + const initialSortedValues = initialResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(initialSortedValues).toEqual([`d`, `e`, `f`]) + + // Move to finite limit of 2, moving offset backward to 1 (should show b, c) + windowFn!({ offset: 1, limit: 2 }) + graph.run() + + const moveResult = tracker.getResult(compareFractionalIndex) + + // Should now show elements b, c (offset 1, limit 2) + const moveSortedValues = moveResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(moveSortedValues).toEqual([`b`, `c`]) + + // Check that we have changes (elements d, e, f should be removed, b, c should be added) + expect(moveResult.messageCount).toBeGreaterThan(0) + }) + + it(`should handle moving window from infinite limit to infinite limit with same offset (no-op)`, () => { + const graph = new D2() + const input = graph.newInput<[number, { id: number; value: string }]>() + const tracker = new MessageTracker< + [number, [{ id: number; value: string }, string]] + >() + + let windowFn: + | ((options: { offset?: number; limit?: number }) => void) + | null = null + + // Start with no limit (infinite limit) and offset 2 + input.pipe( + topKWithFractionalIndex((a, b) => a.value.localeCompare(b.value), { + offset: 2, + setWindowFn: (fn) => { + windowFn = fn + }, + }), + output((message) => { + tracker.addMessage(message) + }) + ) + + graph.finalize() + + // Initial data - a, b, c, d, e, f + input.sendData( + new MultiSet([ + [[1, { id: 1, value: `a` }], 1], + [[2, { id: 2, value: `b` }], 1], + [[3, { id: 3, value: `c` }], 1], + [[4, { id: 4, value: `d` }], 1], + [[5, { id: 5, value: `e` }], 1], + [[6, { id: 6, value: `f` }], 1], + ]) + ) + graph.run() + + // Initial result should have elements c, d, e, f (no limit, offset 2) + const initialResult = tracker.getResult(compareFractionalIndex) + expect(initialResult.sortedResults.length).toBe(4) + + const initialSortedValues = initialResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(initialSortedValues).toEqual([`c`, `d`, `e`, `f`]) + + // Move to same offset, still no limit (should show same elements c, d, e, f) + windowFn!({ offset: 2 }) + graph.run() + + const moveResult = tracker.getResult(compareFractionalIndex) + + // Should show same elements c, d, e, f (offset 2, no limit) + const moveSortedValues = moveResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(moveSortedValues).toEqual([`c`, `d`, `e`, `f`]) + + // Check that we have no more changes (this should be a no-op) + expect(moveResult.messageCount).toBe(initialResult.messageCount) + }) + + it(`should handle moving window from infinite limit to infinite limit while moving offset forward`, () => { + const graph = new D2() + const input = graph.newInput<[number, { id: number; value: string }]>() + const tracker = new MessageTracker< + [number, [{ id: number; value: string }, string]] + >() + + let windowFn: + | ((options: { offset?: number; limit?: number }) => void) + | null = null + + // Start with no limit (infinite limit) and offset 0 + input.pipe( + topKWithFractionalIndex((a, b) => a.value.localeCompare(b.value), { + offset: 0, + setWindowFn: (fn) => { + windowFn = fn + }, + }), + output((message) => { + tracker.addMessage(message) + }) + ) + + graph.finalize() + + // Initial data - a, b, c, d, e, f + input.sendData( + new MultiSet([ + [[1, { id: 1, value: `a` }], 1], + [[2, { id: 2, value: `b` }], 1], + [[3, { id: 3, value: `c` }], 1], + [[4, { id: 4, value: `d` }], 1], + [[5, { id: 5, value: `e` }], 1], + [[6, { id: 6, value: `f` }], 1], + ]) + ) + graph.run() + + // Initial result should have all 6 elements (no limit, offset 0) + const initialResult = tracker.getResult(compareFractionalIndex) + expect(initialResult.sortedResults.length).toBe(6) + + const initialSortedValues = initialResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(initialSortedValues).toEqual([`a`, `b`, `c`, `d`, `e`, `f`]) + + // Move to offset 2, still no limit (should show c, d, e, f) + windowFn!({ offset: 2 }) + graph.run() + + const moveResult = tracker.getResult(compareFractionalIndex) + + // Should now show elements c, d, e, f (offset 2, no limit) + const moveSortedValues = moveResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(moveSortedValues).toEqual([`c`, `d`, `e`, `f`]) + + // Check that we have changes (elements a, b should be removed) + expect(moveResult.messageCount).toBeGreaterThan(0) + }) + + it(`should handle moving window from infinite limit to infinite limit while moving offset backward`, () => { + const graph = new D2() + const input = graph.newInput<[number, { id: number; value: string }]>() + const tracker = new MessageTracker< + [number, [{ id: number; value: string }, string]] + >() + + let windowFn: + | ((options: { offset?: number; limit?: number }) => void) + | null = null + + // Start with no limit (infinite limit) and offset 3 + input.pipe( + topKWithFractionalIndex((a, b) => a.value.localeCompare(b.value), { + offset: 3, + setWindowFn: (fn) => { + windowFn = fn + }, + }), + output((message) => { + tracker.addMessage(message) + }) + ) + + graph.finalize() + + // Initial data - a, b, c, d, e, f + input.sendData( + new MultiSet([ + [[1, { id: 1, value: `a` }], 1], + [[2, { id: 2, value: `b` }], 1], + [[3, { id: 3, value: `c` }], 1], + [[4, { id: 4, value: `d` }], 1], + [[5, { id: 5, value: `e` }], 1], + [[6, { id: 6, value: `f` }], 1], + ]) + ) + graph.run() + + // Initial result should have elements d, e, f (no limit, offset 3) + const initialResult = tracker.getResult(compareFractionalIndex) + expect(initialResult.sortedResults.length).toBe(3) + + const initialSortedValues = initialResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(initialSortedValues).toEqual([`d`, `e`, `f`]) + + // Move to offset 1, still no limit (should show b, c, d, e, f) + windowFn!({ offset: 1 }) + graph.run() + + const moveResult = tracker.getResult(compareFractionalIndex) + + // Should now show elements b, c, d, e, f (offset 1, no limit) + const moveSortedValues = moveResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(moveSortedValues).toEqual([`b`, `c`, `d`, `e`, `f`]) + + // Check that we have changes (elements b, c should be added) + expect(moveResult.messageCount).toBeGreaterThan(0) + }) + + it(`should handle moving window from finite limit to infinite limit with same offset`, () => { + const graph = new D2() + const input = graph.newInput<[number, { id: number; value: string }]>() + const tracker = new MessageTracker< + [number, [{ id: number; value: string }, string]] + >() + + let windowFn: + | ((options: { offset?: number; limit?: number }) => void) + | null = null + + // Start with finite limit of 2 and offset 2 + input.pipe( + topKWithFractionalIndex((a, b) => a.value.localeCompare(b.value), { + limit: 2, + offset: 2, + setWindowFn: (fn) => { + windowFn = fn + }, + }), + output((message) => { + tracker.addMessage(message) + }) + ) + + graph.finalize() + + // Initial data - a, b, c, d, e, f + input.sendData( + new MultiSet([ + [[1, { id: 1, value: `a` }], 1], + [[2, { id: 2, value: `b` }], 1], + [[3, { id: 3, value: `c` }], 1], + [[4, { id: 4, value: `d` }], 1], + [[5, { id: 5, value: `e` }], 1], + [[6, { id: 6, value: `f` }], 1], + ]) + ) + graph.run() + + // Initial result should have 2 elements starting from offset 2 (c, d) + const initialResult = tracker.getResult(compareFractionalIndex) + expect(initialResult.sortedResults.length).toBe(2) + + const initialSortedValues = initialResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(initialSortedValues).toEqual([`c`, `d`]) + + // Move to infinite limit, keeping offset 2 (should show c, d, e, f) + windowFn!({ offset: 2, limit: Infinity }) + graph.run() + + const moveResult = tracker.getResult(compareFractionalIndex) + + // Should now show elements c, d, e, f (offset 2, no limit) + const moveSortedValues = moveResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(moveSortedValues).toEqual([`c`, `d`, `e`, `f`]) + + // Check that we have changes (elements e, f should be added) + expect(moveResult.messageCount).toBeGreaterThan(0) + }) + + it(`should handle moving window from finite limit to infinite limit while moving offset forward`, () => { + const graph = new D2() + const input = graph.newInput<[number, { id: number; value: string }]>() + const tracker = new MessageTracker< + [number, [{ id: number; value: string }, string]] + >() + + let windowFn: + | ((options: { offset?: number; limit?: number }) => void) + | null = null + + // Start with finite limit of 2 and offset 1 + input.pipe( + topKWithFractionalIndex((a, b) => a.value.localeCompare(b.value), { + limit: 2, + offset: 1, + setWindowFn: (fn) => { + windowFn = fn + }, + }), + output((message) => { + tracker.addMessage(message) + }) + ) + + graph.finalize() + + // Initial data - a, b, c, d, e, f + input.sendData( + new MultiSet([ + [[1, { id: 1, value: `a` }], 1], + [[2, { id: 2, value: `b` }], 1], + [[3, { id: 3, value: `c` }], 1], + [[4, { id: 4, value: `d` }], 1], + [[5, { id: 5, value: `e` }], 1], + [[6, { id: 6, value: `f` }], 1], + ]) + ) + graph.run() + + // Initial result should have 2 elements starting from offset 1 (b, c) + const initialResult = tracker.getResult(compareFractionalIndex) + expect(initialResult.sortedResults.length).toBe(2) + + const initialSortedValues = initialResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(initialSortedValues).toEqual([`b`, `c`]) + + // Move to infinite limit, moving offset forward to 3 (should show d, e, f) + windowFn!({ offset: 3, limit: Infinity }) + graph.run() + + const moveResult = tracker.getResult(compareFractionalIndex) + + // Should now show elements d, e, f (offset 3, no limit) + const moveSortedValues = moveResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(moveSortedValues).toEqual([`d`, `e`, `f`]) + + // Check that we have changes (elements b, c should be removed, d, e, f should be added) + expect(moveResult.messageCount).toBeGreaterThan(0) + }) + + it(`should handle moving window from finite limit to infinite limit while moving offset backward`, () => { + const graph = new D2() + const input = graph.newInput<[number, { id: number; value: string }]>() + const tracker = new MessageTracker< + [number, [{ id: number; value: string }, string]] + >() + + let windowFn: + | ((options: { offset?: number; limit?: number }) => void) + | null = null + + // Start with finite limit of 2 and offset 3 + input.pipe( + topKWithFractionalIndex((a, b) => a.value.localeCompare(b.value), { + limit: 2, + offset: 3, + setWindowFn: (fn) => { + windowFn = fn + }, + }), + output((message) => { + tracker.addMessage(message) + }) + ) + + graph.finalize() + + // Initial data - a, b, c, d, e, f + input.sendData( + new MultiSet([ + [[1, { id: 1, value: `a` }], 1], + [[2, { id: 2, value: `b` }], 1], + [[3, { id: 3, value: `c` }], 1], + [[4, { id: 4, value: `d` }], 1], + [[5, { id: 5, value: `e` }], 1], + [[6, { id: 6, value: `f` }], 1], + ]) + ) + graph.run() + + // Initial result should have 2 elements starting from offset 3 (d, e) + const initialResult = tracker.getResult(compareFractionalIndex) + expect(initialResult.sortedResults.length).toBe(2) + + const initialSortedValues = initialResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(initialSortedValues).toEqual([`d`, `e`]) + + // Move to infinite limit, moving offset backward to 1 (should show b, c, d, e, f) + windowFn!({ offset: 1, limit: Infinity }) + graph.run() + + const moveResult = tracker.getResult(compareFractionalIndex) + + // Should now show elements b, c, d, e, f (offset 1, no limit) + const moveSortedValues = moveResult.sortedResults.map( + ([_key, [value, _index]]) => value.value + ) + expect(moveSortedValues).toEqual([`b`, `c`, `d`, `e`, `f`]) + + // Check that we have changes (elements b, c, f should be added, d, e should remain) + expect(moveResult.messageCount).toBeGreaterThan(0) + }) }) }) diff --git a/packages/db/src/collection/change-events.ts b/packages/db/src/collection/change-events.ts index 7a0e41acf..10c05ae71 100644 --- a/packages/db/src/collection/change-events.ts +++ b/packages/db/src/collection/change-events.ts @@ -3,15 +3,20 @@ import { toExpression, } from "../query/builder/ref-proxy" import { compileSingleRowExpression } from "../query/compiler/evaluators.js" -import { optimizeExpressionWithIndexes } from "../utils/index-optimization.js" +import { + findIndexForField, + optimizeExpressionWithIndexes, +} from "../utils/index-optimization.js" +import { ensureIndexForField } from "../indexes/auto-index.js" +import { makeComparator } from "../utils/comparison.js" import type { ChangeMessage, CurrentStateAsChangesOptions, SubscribeChangesOptions, } from "../types" -import type { Collection } from "./index.js" +import type { Collection, CollectionImpl } from "./index.js" import type { SingleRowRefProxy } from "../query/builder/ref-proxy" -import type { BasicExpression } from "../query/ir.js" +import type { BasicExpression, OrderBy } from "../query/ir.js" /** * Interface for a collection-like object that provides the necessary methods @@ -28,7 +33,7 @@ export interface CollectionLike< /** * Returns the current state of the collection as an array of changes * @param collection - The collection to get changes from - * @param options - Options including optional where filter + * @param options - Options including optional where filter, orderBy, and limit * @returns An array of changes * @example * // Get all items as changes @@ -41,7 +46,19 @@ export interface CollectionLike< * * // Get only items using a pre-compiled expression * const activeChanges = currentStateAsChanges(collection, { - * whereExpression: eq(row.status, 'active') + * where: eq(row.status, 'active') + * }) + * + * // Get items ordered by name with limit + * const topUsers = currentStateAsChanges(collection, { + * orderBy: [{ expression: row.name, compareOptions: { direction: 'asc' } }], + * limit: 10 + * }) + * + * // Get active users ordered by score (highest score first) + * const topActiveUsers = currentStateAsChanges(collection, { + * where: eq(row.status, 'active'), + * orderBy: [{ expression: row.score, compareOptions: { direction: 'desc' } }], * }) */ export function currentStateAsChanges< @@ -69,9 +86,48 @@ export function currentStateAsChanges< return result } - // TODO: handle orderBy and limit options - // by calling optimizeOrderedLimit + // Validate that limit without orderBy doesn't happen + if (options.limit !== undefined && !options.orderBy) { + throw new Error(`limit cannot be used without orderBy`) + } + + // First check if orderBy is present (optionally with limit) + if (options.orderBy) { + // Create where filter function if present + const whereFilter = options.where + ? createFilterFunctionFromExpression(options.where) + : undefined + + // Get ordered keys using index optimization when possible + const orderedKeys = getOrderedKeys( + collection, + options.orderBy, + options.limit, + whereFilter, + options.optimizedOnly + ) + + if (orderedKeys === undefined) { + // `getOrderedKeys` returned undefined because we asked for `optimizedOnly` and there was no index to use + return + } + + // Convert keys to change messages + const result: Array> = [] + for (const key of orderedKeys) { + const value = collection.get(key) + if (value !== undefined) { + result.push({ + type: `insert`, + key, + value, + }) + } + } + return result + } + // If no orderBy OR orderBy optimization failed, use where clause optimization if (!options.where) { // No filtering, return all items return collectFilteredResults() @@ -246,3 +302,121 @@ export function createFilteredCallback( } } } + +/** + * Gets ordered keys from a collection using index optimization when possible + * @param collection - The collection to get keys from + * @param orderBy - The order by clause + * @param limit - Optional limit on number of keys to return + * @param whereFilter - Optional filter function to apply while traversing + * @returns Array of keys in sorted order + */ +function getOrderedKeys( + collection: CollectionLike, + orderBy: OrderBy, + limit?: number, + whereFilter?: (item: T) => boolean, + optimizedOnly?: boolean +): Array | undefined { + // For single-column orderBy on a ref expression, try index optimization + if (orderBy.length === 1) { + const clause = orderBy[0]! + const orderByExpression = clause.expression + + if (orderByExpression.type === `ref`) { + const propRef = orderByExpression + const fieldPath = propRef.path + + // Ensure index exists for this field + ensureIndexForField( + fieldPath[0]!, + fieldPath, + collection as CollectionImpl, + clause.compareOptions + ) + + // Find the index + const index = findIndexForField( + collection.indexes, + fieldPath, + clause.compareOptions + ) + + if (index && index.supports(`gt`)) { + // Use index optimization + const filterFn = (key: TKey): boolean => { + const value = collection.get(key) + if (value === undefined) { + return false + } + return whereFilter?.(value) ?? true + } + + // Take the keys that match the filter and limit + // if no limit is provided `index.keyCount` is used, + // i.e. we will take all keys that match the filter + return index.take(limit ?? index.keyCount, undefined, filterFn) + } + } + } + + if (optimizedOnly) { + return + } + + // Fallback: collect all items and sort in memory + const allItems: Array<{ key: TKey; value: T }> = [] + for (const [key, value] of collection.entries()) { + if (whereFilter?.(value) ?? true) { + allItems.push({ key, value }) + } + } + + // Sort using makeComparator + const compare = (a: { key: TKey; value: T }, b: { key: TKey; value: T }) => { + for (const clause of orderBy) { + const compareFn = makeComparator(clause.compareOptions) + + // Extract values for comparison + const aValue = extractValueFromItem(a.value, clause.expression) + const bValue = extractValueFromItem(b.value, clause.expression) + + const result = compareFn(aValue, bValue) + if (result !== 0) { + return result + } + } + return 0 + } + + allItems.sort(compare) + const sortedKeys = allItems.map((item) => item.key) + + // Apply limit if provided + if (limit !== undefined) { + return sortedKeys.slice(0, limit) + } + + // if no limit is provided, we will return all keys + return sortedKeys +} + +/** + * Helper function to extract a value from an item based on an expression + */ +function extractValueFromItem(item: any, expression: BasicExpression): any { + if (expression.type === `ref`) { + const propRef = expression + let value = item + for (const pathPart of propRef.path) { + value = value?.[pathPart] + } + return value + } else if (expression.type === `val`) { + return expression.value + } else { + // It must be a function + const evaluator = compileSingleRowExpression(expression) + return evaluator(item as Record) + } +} diff --git a/packages/db/src/query/index.ts b/packages/db/src/query/index.ts index 17f4dd8e7..cb0e3d9e0 100644 --- a/packages/db/src/query/index.ts +++ b/packages/db/src/query/index.ts @@ -57,3 +57,15 @@ export { export { type LiveQueryCollectionConfig } from "./live/types.js" export { type LiveQueryCollectionUtils } from "./live/collection-config-builder.js" + +// Predicate utilities for predicate push-down +export { + isWhereSubset, + unionWherePredicates, + minusWherePredicates, + isOrderBySubset, + isLimitSubset, + isPredicateSubset, +} from "./predicate-utils.js" + +export { DeduplicatedLoadSubset } from "./subset-dedupe.js" diff --git a/packages/db/src/query/predicate-utils.ts b/packages/db/src/query/predicate-utils.ts new file mode 100644 index 000000000..35d95b98e --- /dev/null +++ b/packages/db/src/query/predicate-utils.ts @@ -0,0 +1,1415 @@ +import { Func, Value } from "./ir.js" +import type { BasicExpression, OrderBy, PropRef } from "./ir.js" +import type { LoadSubsetOptions } from "../types.js" + +/** + * Check if one where clause is a logical subset of another. + * Returns true if the subset predicate is more restrictive than (or equal to) the superset predicate. + * + * @example + * // age > 20 is subset of age > 10 (more restrictive) + * isWhereSubset(gt(ref('age'), val(20)), gt(ref('age'), val(10))) // true + * + * @example + * // age > 10 AND name = 'X' is subset of age > 10 (more conditions) + * isWhereSubset(and(gt(ref('age'), val(10)), eq(ref('name'), val('X'))), gt(ref('age'), val(10))) // true + * + * @param subset - The potentially more restrictive predicate + * @param superset - The potentially less restrictive predicate + * @returns true if subset logically implies superset + */ +export function isWhereSubset( + subset: BasicExpression | undefined, + superset: BasicExpression | undefined +): boolean { + // undefined/missing where clause means "no filter" (all data) + // Both undefined means subset relationship holds (all data ⊆ all data) + if (subset === undefined && superset === undefined) { + return true + } + + // If subset is undefined but superset is not, we're requesting ALL data + // but have only loaded SOME data - subset relationship does NOT hold + if (subset === undefined && superset !== undefined) { + return false + } + + // If superset is undefined (no filter = all data loaded), + // then any constrained subset is contained + if (superset === undefined && subset !== undefined) { + return true + } + + return isWhereSubsetInternal(subset!, superset!) +} + +function makeDisjunction( + preds: Array> +): BasicExpression { + if (preds.length === 0) { + return new Value(false) + } + if (preds.length === 1) { + return preds[0]! + } + return new Func(`or`, preds) +} + +function convertInToOr(inField: InField) { + const equalities = inField.values.map( + (value) => new Func(`eq`, [inField.ref, new Value(value)]) + ) + return makeDisjunction(equalities) +} + +function isWhereSubsetInternal( + subset: BasicExpression, + superset: BasicExpression +): boolean { + // If subset is false it is requesting no data, + // thus the result set is empty + // and the empty set is a subset of any set + if (subset.type === `val` && subset.value === false) { + return true + } + + // If expressions are structurally equal, subset relationship holds + if (areExpressionsEqual(subset, superset)) { + return true + } + + // Handle superset being an AND: subset must imply ALL conjuncts + // If superset is (A AND B), then subset ⊆ (A AND B) only if subset ⊆ A AND subset ⊆ B + // Example: (age > 20) ⊆ (age > 10 AND status = 'active') is false (doesn't imply status condition) + if (superset.type === `func` && superset.name === `and`) { + return superset.args.every((arg) => + isWhereSubsetInternal(subset, arg as BasicExpression) + ) + } + + // Handle subset being an AND: (A AND B) implies both A and B + if (subset.type === `func` && subset.name === `and`) { + // For (A AND B) ⊆ C, since (A AND B) implies A, we check if any conjunct implies C + return subset.args.some((arg) => + isWhereSubsetInternal(arg as BasicExpression, superset) + ) + } + + // Turn x IN [A, B, C] into x = A OR x = B OR x = C + // for unified handling of IN and OR + if (subset.type === `func` && subset.name === `in`) { + const inField = extractInField(subset) + if (inField) { + return isWhereSubsetInternal(convertInToOr(inField), superset) + } + } + + if (superset.type === `func` && superset.name === `in`) { + const inField = extractInField(superset) + if (inField) { + return isWhereSubsetInternal(subset, convertInToOr(inField)) + } + } + + // Handle OR in subset: (A OR B) is subset of C only if both A and B are subsets of C + if (subset.type === `func` && subset.name === `or`) { + return subset.args.every((arg) => + isWhereSubsetInternal(arg as BasicExpression, superset) + ) + } + + // Handle OR in superset: subset ⊆ (A OR B) if subset ⊆ A or subset ⊆ B + // (A OR B) as superset means data can satisfy A or B + // If subset is contained in any disjunct, it's contained in the union + if (superset.type === `func` && superset.name === `or`) { + return superset.args.some((arg) => + isWhereSubsetInternal(subset, arg as BasicExpression) + ) + } + + // Handle comparison operators on the same field + if (subset.type === `func` && superset.type === `func`) { + const subsetFunc = subset as Func + const supersetFunc = superset as Func + + // Check if both are comparisons on the same field + const subsetField = extractComparisonField(subsetFunc) + const supersetField = extractComparisonField(supersetFunc) + + if ( + subsetField && + supersetField && + areRefsEqual(subsetField.ref, supersetField.ref) + ) { + return isComparisonSubset( + subsetFunc, + subsetField.value, + supersetFunc, + supersetField.value + ) + } + + /* + // Handle eq vs in + if (subsetFunc.name === `eq` && supersetFunc.name === `in`) { + const subsetFieldEq = extractEqualityField(subsetFunc) + const supersetFieldIn = extractInField(supersetFunc) + if ( + subsetFieldEq && + supersetFieldIn && + areRefsEqual(subsetFieldEq.ref, supersetFieldIn.ref) + ) { + // field = X is subset of field IN [X, Y, Z] if X is in the array + // Use cached primitive set and metadata from extraction + return arrayIncludesWithSet( + supersetFieldIn.values, + subsetFieldEq.value, + supersetFieldIn.primitiveSet ?? null, + supersetFieldIn.areAllPrimitives + ) + } + } + + // Handle in vs in + if (subsetFunc.name === `in` && supersetFunc.name === `in`) { + const subsetFieldIn = extractInField(subsetFunc) + const supersetFieldIn = extractInField(supersetFunc) + if ( + subsetFieldIn && + supersetFieldIn && + areRefsEqual(subsetFieldIn.ref, supersetFieldIn.ref) + ) { + // field IN [A, B] is subset of field IN [A, B, C] if all values in subset are in superset + // Use cached primitive set and metadata from extraction + return subsetFieldIn.values.every((subVal) => + arrayIncludesWithSet( + supersetFieldIn.values, + subVal, + supersetFieldIn.primitiveSet ?? null, + supersetFieldIn.areAllPrimitives + ) + ) + } + } + */ + } + + // Conservative: if we can't determine, return false + return false +} + +/** + * Helper to combine where predicates with common logic for AND/OR operations + */ +function combineWherePredicates( + predicates: Array>, + operation: `and` | `or`, + simplifyFn: ( + preds: Array> + ) => BasicExpression | null +): BasicExpression { + const emptyValue = operation === `and` ? true : false + const identityValue = operation === `and` ? true : false + + if (predicates.length === 0) { + return { type: `val`, value: emptyValue } as BasicExpression + } + + if (predicates.length === 1) { + return predicates[0]! + } + + // Flatten nested expressions of the same operation + const flatPredicates: Array> = [] + for (const pred of predicates) { + if (pred.type === `func` && pred.name === operation) { + flatPredicates.push(...pred.args) + } else { + flatPredicates.push(pred) + } + } + + // Group predicates by field for simplification + const grouped = groupPredicatesByField(flatPredicates) + + // Simplify each group + const simplified: Array> = [] + for (const [field, preds] of grouped.entries()) { + if (field === null) { + // Complex predicates that we can't group by field + simplified.push(...preds) + } else { + // Try to simplify same-field predicates + const result = simplifyFn(preds) + + // For intersection: check for empty set (contradiction) + if ( + operation === `and` && + result && + result.type === `val` && + result.value === false + ) { + // Intersection is empty (conflicting constraints) - entire AND is false + return { type: `val`, value: false } as BasicExpression + } + + // For union: result may be null if simplification failed + if (result) { + simplified.push(result) + } + } + } + + if (simplified.length === 0) { + return { type: `val`, value: identityValue } as BasicExpression + } + + if (simplified.length === 1) { + return simplified[0]! + } + + // Return combined predicate + return { + type: `func`, + name: operation, + args: simplified, + } as BasicExpression +} + +/** + * Combine multiple where predicates with OR logic (union). + * Returns a predicate that is satisfied when any input predicate is satisfied. + * Simplifies when possible (e.g., age > 10 OR age > 20 → age > 10). + * + * @example + * // Take least restrictive + * unionWherePredicates([gt(ref('age'), val(10)), gt(ref('age'), val(20))]) // age > 10 + * + * @example + * // Combine equals into IN + * unionWherePredicates([eq(ref('age'), val(5)), eq(ref('age'), val(10))]) // age IN [5, 10] + * + * @param predicates - Array of where predicates to union + * @returns Combined predicate representing the union + */ +export function unionWherePredicates( + predicates: Array> +): BasicExpression { + return combineWherePredicates(predicates, `or`, unionSameFieldPredicates) +} + +/** + * Compute the difference between two where predicates: `fromPredicate AND NOT(subtractPredicate)`. + * Returns the simplified predicate, or null if the difference cannot be simplified + * (in which case the caller should fetch the full fromPredicate). + * + * @example + * // Range difference + * minusWherePredicates( + * gt(ref('age'), val(10)), // age > 10 + * gt(ref('age'), val(20)) // age > 20 + * ) // → age > 10 AND age <= 20 + * + * @example + * // Set difference + * minusWherePredicates( + * inOp(ref('status'), ['A', 'B', 'C', 'D']), // status IN ['A','B','C','D'] + * inOp(ref('status'), ['B', 'C']) // status IN ['B','C'] + * ) // → status IN ['A', 'D'] + * + * @example + * // Common conditions + * minusWherePredicates( + * and(gt(ref('age'), val(10)), eq(ref('status'), val('active'))), // age > 10 AND status = 'active' + * and(gt(ref('age'), val(20)), eq(ref('status'), val('active'))) // age > 20 AND status = 'active' + * ) // → age > 10 AND age <= 20 AND status = 'active' + * + * @example + * // Complete overlap - empty result + * minusWherePredicates( + * gt(ref('age'), val(20)), // age > 20 + * gt(ref('age'), val(10)) // age > 10 + * ) // → {type: 'val', value: false} (empty set) + * + * @param fromPredicate - The predicate to subtract from + * @param subtractPredicate - The predicate to subtract + * @returns The simplified difference, or null if cannot be simplified + */ +export function minusWherePredicates( + fromPredicate: BasicExpression | undefined, + subtractPredicate: BasicExpression | undefined +): BasicExpression | null { + // If nothing to subtract, return the original + if (subtractPredicate === undefined) { + return ( + fromPredicate ?? + ({ type: `val`, value: true } as BasicExpression) + ) + } + + // If from is undefined then we are asking for all data + // so we need to load all data minus what we already loaded + // i.e. we need to load NOT(subtractPredicate) + if (fromPredicate === undefined) { + return { + type: `func`, + name: `not`, + args: [subtractPredicate], + } as BasicExpression + } + + // Check if fromPredicate is entirely contained in subtractPredicate + // In that case, fromPredicate AND NOT(subtractPredicate) = empty set + if (isWhereSubset(fromPredicate, subtractPredicate)) { + return { type: `val`, value: false } as BasicExpression + } + + // Try to detect and handle common conditions + const commonConditions = findCommonConditions( + fromPredicate, + subtractPredicate + ) + if (commonConditions.length > 0) { + // Extract predicates without common conditions + const fromWithoutCommon = removeConditions(fromPredicate, commonConditions) + const subtractWithoutCommon = removeConditions( + subtractPredicate, + commonConditions + ) + + // Recursively compute difference on simplified predicates + const simplifiedDifference = minusWherePredicates( + fromWithoutCommon, + subtractWithoutCommon + ) + + if (simplifiedDifference !== null) { + // Combine the simplified difference with common conditions + return combineConditions([...commonConditions, simplifiedDifference]) + } + } + + // Check if they are on the same field - if so, we can try to simplify + if (fromPredicate.type === `func` && subtractPredicate.type === `func`) { + const result = minusSameFieldPredicates(fromPredicate, subtractPredicate) + if (result !== null) { + return result + } + } + + // Can't simplify - return null to indicate caller should fetch full fromPredicate + return null +} + +/** + * Helper function to compute difference for same-field predicates + */ +function minusSameFieldPredicates( + fromPred: Func, + subtractPred: Func +): BasicExpression | null { + // Extract field information + const fromField = + extractComparisonField(fromPred) || + extractEqualityField(fromPred) || + extractInField(fromPred) + const subtractField = + extractComparisonField(subtractPred) || + extractEqualityField(subtractPred) || + extractInField(subtractPred) + + // Must be on the same field + if ( + !fromField || + !subtractField || + !areRefsEqual(fromField.ref, subtractField.ref) + ) { + return null + } + + // Handle IN minus IN: status IN [A,B,C,D] - status IN [B,C] = status IN [A,D] + if (fromPred.name === `in` && subtractPred.name === `in`) { + const fromInField = fromField as InField + const subtractInField = subtractField as InField + + // Filter out values that are in the subtract set + const remainingValues = fromInField.values.filter( + (v) => + !arrayIncludesWithSet( + subtractInField.values, + v, + subtractInField.primitiveSet ?? null, + subtractInField.areAllPrimitives + ) + ) + + if (remainingValues.length === 0) { + return { type: `val`, value: false } as BasicExpression + } + + if (remainingValues.length === 1) { + return { + type: `func`, + name: `eq`, + args: [fromField.ref, { type: `val`, value: remainingValues[0] }], + } as BasicExpression + } + + return { + type: `func`, + name: `in`, + args: [fromField.ref, { type: `val`, value: remainingValues }], + } as BasicExpression + } + + // Handle IN minus equality: status IN [A,B,C] - status = B = status IN [A,C] + if (fromPred.name === `in` && subtractPred.name === `eq`) { + const fromInField = fromField as InField + const subtractValue = (subtractField as { ref: PropRef; value: any }).value + + const remainingValues = fromInField.values.filter( + (v) => !areValuesEqual(v, subtractValue) + ) + + if (remainingValues.length === 0) { + return { type: `val`, value: false } as BasicExpression + } + + if (remainingValues.length === 1) { + return { + type: `func`, + name: `eq`, + args: [fromField.ref, { type: `val`, value: remainingValues[0] }], + } as BasicExpression + } + + return { + type: `func`, + name: `in`, + args: [fromField.ref, { type: `val`, value: remainingValues }], + } as BasicExpression + } + + // Handle equality minus equality: age = 15 - age = 15 = empty, age = 15 - age = 20 = age = 15 + if (fromPred.name === `eq` && subtractPred.name === `eq`) { + const fromValue = (fromField as { ref: PropRef; value: any }).value + const subtractValue = (subtractField as { ref: PropRef; value: any }).value + + if (areValuesEqual(fromValue, subtractValue)) { + return { type: `val`, value: false } as BasicExpression + } + + // No overlap - return original + return fromPred as BasicExpression + } + + // Handle range minus range: age > 10 - age > 20 = age > 10 AND age <= 20 + const fromComp = extractComparisonField(fromPred) + const subtractComp = extractComparisonField(subtractPred) + + if ( + fromComp && + subtractComp && + areRefsEqual(fromComp.ref, subtractComp.ref) + ) { + // Try to compute the difference using range logic + const result = minusRangePredicates( + fromPred, + fromComp.value, + subtractPred, + subtractComp.value + ) + return result + } + + // Can't simplify + return null +} + +/** + * Helper to compute difference between range predicates + */ +function minusRangePredicates( + fromFunc: Func, + fromValue: any, + subtractFunc: Func, + subtractValue: any +): BasicExpression | null { + const fromOp = fromFunc.name as `gt` | `gte` | `lt` | `lte` | `eq` + const subtractOp = subtractFunc.name as `gt` | `gte` | `lt` | `lte` | `eq` + const ref = (extractComparisonField(fromFunc) || + extractEqualityField(fromFunc))!.ref + + // age > 10 - age > 20 = (age > 10 AND age <= 20) + if (fromOp === `gt` && subtractOp === `gt`) { + if (fromValue < subtractValue) { + // Result is: fromValue < field <= subtractValue + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + // fromValue >= subtractValue means no overlap + return fromFunc as BasicExpression + } + + // age >= 10 - age >= 20 = (age >= 10 AND age < 20) + if (fromOp === `gte` && subtractOp === `gte`) { + if (fromValue < subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age > 10 - age >= 20 = (age > 10 AND age < 20) + if (fromOp === `gt` && subtractOp === `gte`) { + if (fromValue < subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age >= 10 - age > 20 = (age >= 10 AND age <= 20) + if (fromOp === `gte` && subtractOp === `gt`) { + if (fromValue <= subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age < 30 - age < 20 = (age >= 20 AND age < 30) + if (fromOp === `lt` && subtractOp === `lt`) { + if (fromValue > subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age <= 30 - age <= 20 = (age > 20 AND age <= 30) + if (fromOp === `lte` && subtractOp === `lte`) { + if (fromValue > subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age < 30 - age <= 20 = (age > 20 AND age < 30) + if (fromOp === `lt` && subtractOp === `lte`) { + if (fromValue > subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age <= 30 - age < 20 = (age >= 20 AND age <= 30) + if (fromOp === `lte` && subtractOp === `lt`) { + if (fromValue >= subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // Can't simplify other combinations + return null +} + +/** + * Check if one orderBy clause is a subset of another. + * Returns true if the subset ordering requirements are satisfied by the superset ordering. + * + * @example + * // Subset is prefix of superset + * isOrderBySubset([{expr: age, asc}], [{expr: age, asc}, {expr: name, desc}]) // true + * + * @param subset - The ordering requirements to check + * @param superset - The ordering that might satisfy the requirements + * @returns true if subset is satisfied by superset + */ +export function isOrderBySubset( + subset: OrderBy | undefined, + superset: OrderBy | undefined +): boolean { + // No ordering requirement is always satisfied + if (!subset || subset.length === 0) { + return true + } + + // If there's no superset ordering but subset requires ordering, not satisfied + if (!superset || superset.length === 0) { + return false + } + + // Check if subset is a prefix of superset with matching expressions and compare options + if (subset.length > superset.length) { + return false + } + + for (let i = 0; i < subset.length; i++) { + const subClause = subset[i]! + const superClause = superset[i]! + + // Check if expressions match + if (!areExpressionsEqual(subClause.expression, superClause.expression)) { + return false + } + + // Check if compare options match + if ( + !areCompareOptionsEqual( + subClause.compareOptions, + superClause.compareOptions + ) + ) { + return false + } + } + + return true +} + +/** + * Check if one limit is a subset of another. + * Returns true if the subset limit requirements are satisfied by the superset limit. + * + * @example + * isLimitSubset(10, 20) // true (requesting 10 items when 20 are available) + * isLimitSubset(20, 10) // false (requesting 20 items when only 10 are available) + * isLimitSubset(10, undefined) // true (requesting 10 items when unlimited are available) + * + * @param subset - The limit requirement to check + * @param superset - The limit that might satisfy the requirement + * @returns true if subset is satisfied by superset + */ +export function isLimitSubset( + subset: number | undefined, + superset: number | undefined +): boolean { + // Unlimited superset satisfies any limit requirement + if (superset === undefined) { + return true + } + + // If requesting all data (no limit), we need unlimited data to satisfy it + // But we know superset is not unlimited so we return false + if (subset === undefined) { + return false + } + + // Otherwise, subset must be less than or equal to superset + return subset <= superset +} + +/** + * Check if one predicate (where + orderBy + limit) is a subset of another. + * Returns true if all aspects of the subset predicate are satisfied by the superset. + * + * @example + * isPredicateSubset( + * { where: gt(ref('age'), val(20)), limit: 10 }, + * { where: gt(ref('age'), val(10)), limit: 20 } + * ) // true + * + * @param subset - The predicate requirements to check + * @param superset - The predicate that might satisfy the requirements + * @returns true if subset is satisfied by superset + */ +export function isPredicateSubset( + subset: LoadSubsetOptions, + superset: LoadSubsetOptions +): boolean { + return ( + isWhereSubset(subset.where, superset.where) && + isOrderBySubset(subset.orderBy, superset.orderBy) && + isLimitSubset(subset.limit, superset.limit) + ) +} + +// ============================================================================ +// Helper functions +// ============================================================================ + +/** + * Find common conditions between two predicates. + * Returns an array of conditions that appear in both predicates. + */ +function findCommonConditions( + predicate1: BasicExpression, + predicate2: BasicExpression +): Array> { + const conditions1 = extractAllConditions(predicate1) + const conditions2 = extractAllConditions(predicate2) + + const common: Array> = [] + + for (const cond1 of conditions1) { + for (const cond2 of conditions2) { + if (areExpressionsEqual(cond1, cond2)) { + // Avoid duplicates + if (!common.some((c) => areExpressionsEqual(c, cond1))) { + common.push(cond1) + } + break + } + } + } + + return common +} + +/** + * Extract all individual conditions from a predicate, flattening AND operations. + */ +function extractAllConditions( + predicate: BasicExpression +): Array> { + if (predicate.type === `func` && predicate.name === `and`) { + const conditions: Array> = [] + for (const arg of predicate.args) { + conditions.push(...extractAllConditions(arg as BasicExpression)) + } + return conditions + } + + return [predicate] +} + +/** + * Remove specified conditions from a predicate. + * Returns the predicate with the specified conditions removed, or undefined if all conditions are removed. + */ +function removeConditions( + predicate: BasicExpression, + conditionsToRemove: Array> +): BasicExpression | undefined { + if (predicate.type === `func` && predicate.name === `and`) { + const remainingArgs = predicate.args.filter( + (arg) => + !conditionsToRemove.some((cond) => + areExpressionsEqual(arg as BasicExpression, cond) + ) + ) + + if (remainingArgs.length === 0) { + return undefined + } else if (remainingArgs.length === 1) { + return remainingArgs[0]! + } else { + return { + type: `func`, + name: `and`, + args: remainingArgs, + } as BasicExpression + } + } + + // For non-AND predicates, don't remove anything + return predicate +} + +/** + * Combine multiple conditions into a single predicate using AND logic. + * Flattens nested AND operations to avoid unnecessary nesting. + */ +function combineConditions( + conditions: Array> +): BasicExpression { + if (conditions.length === 0) { + return { type: `val`, value: true } as BasicExpression + } else if (conditions.length === 1) { + return conditions[0]! + } else { + // Flatten all conditions, including those that are already AND operations + const flattenedConditions: Array> = [] + + for (const condition of conditions) { + if (condition.type === `func` && condition.name === `and`) { + // Flatten nested AND operations + flattenedConditions.push(...condition.args) + } else { + flattenedConditions.push(condition) + } + } + + if (flattenedConditions.length === 1) { + return flattenedConditions[0]! + } else { + return { + type: `func`, + name: `and`, + args: flattenedConditions, + } as BasicExpression + } + } +} + +/** + * Find a predicate with a specific operator and value + */ +function findPredicateWithOperator( + predicates: Array>, + operator: string, + value: any +): BasicExpression | undefined { + return predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + const field = extractComparisonField(f) + return f.name === operator && field && areValuesEqual(field.value, value) + } + return false + }) +} + +function areExpressionsEqual(a: BasicExpression, b: BasicExpression): boolean { + if (a.type !== b.type) { + return false + } + + if (a.type === `val` && b.type === `val`) { + return areValuesEqual(a.value, b.value) + } + + if (a.type === `ref` && b.type === `ref`) { + return areRefsEqual(a, b) + } + + if (a.type === `func` && b.type === `func`) { + const aFunc = a + const bFunc = b + if (aFunc.name !== bFunc.name) { + return false + } + if (aFunc.args.length !== bFunc.args.length) { + return false + } + return aFunc.args.every((arg, i) => + areExpressionsEqual(arg, bFunc.args[i]!) + ) + } + + return false +} + +function areValuesEqual(a: any, b: any): boolean { + // Simple equality check - could be enhanced for deep object comparison + if (a === b) { + return true + } + + // Handle NaN + if (typeof a === `number` && typeof b === `number` && isNaN(a) && isNaN(b)) { + return true + } + + // Handle Date objects + if (a instanceof Date && b instanceof Date) { + return a.getTime() === b.getTime() + } + + // For arrays and objects, use reference equality + // (In practice, we don't need deep equality for these cases - + // same object reference means same value for our use case) + if ( + typeof a === `object` && + typeof b === `object` && + a !== null && + b !== null + ) { + return a === b + } + + return false +} + +function areRefsEqual(a: PropRef, b: PropRef): boolean { + if (a.path.length !== b.path.length) { + return false + } + return a.path.every((segment, i) => segment === b.path[i]) +} + +/** + * Check if a value is a primitive (string, number, boolean, null, undefined) + * Primitives can use Set for fast lookups + */ +function isPrimitive(value: any): boolean { + return ( + value === null || + value === undefined || + typeof value === `string` || + typeof value === `number` || + typeof value === `boolean` + ) +} + +/** + * Check if all values in an array are primitives + */ +function areAllPrimitives(values: Array): boolean { + return values.every(isPrimitive) +} + +/** + * Check if a value is in an array, with optional pre-built Set for optimization. + * The primitiveSet is cached in InField during extraction and reused for all lookups. + */ +function arrayIncludesWithSet( + array: Array, + value: any, + primitiveSet: Set | null, + arrayIsAllPrimitives?: boolean +): boolean { + // Fast path: use pre-built Set for O(1) lookup + if (primitiveSet) { + // Skip isPrimitive check if we know the value must be primitive for a match + // (if array is all primitives, only primitives can match) + if (arrayIsAllPrimitives || isPrimitive(value)) { + return primitiveSet.has(value) + } + return false // Non-primitive can't be in primitive-only set + } + + // Fallback: use areValuesEqual for Dates and objects + return array.some((v) => areValuesEqual(v, value)) +} + +/** + * Get the maximum of two values, handling both numbers and Dates + */ +function maxValue(a: any, b: any): any { + if (a instanceof Date && b instanceof Date) { + return a.getTime() > b.getTime() ? a : b + } + return Math.max(a, b) +} + +/** + * Get the minimum of two values, handling both numbers and Dates + */ +function minValue(a: any, b: any): any { + if (a instanceof Date && b instanceof Date) { + return a.getTime() < b.getTime() ? a : b + } + return Math.min(a, b) +} + +function areCompareOptionsEqual( + a: { direction?: `asc` | `desc`; [key: string]: any }, + b: { direction?: `asc` | `desc`; [key: string]: any } +): boolean { + // For now, just compare direction - could be enhanced for other options + return a.direction === b.direction +} + +interface ComparisonField { + ref: PropRef + value: any +} + +function extractComparisonField(func: Func): ComparisonField | null { + // Handle comparison operators: eq, gt, gte, lt, lte + if ([`eq`, `gt`, `gte`, `lt`, `lte`].includes(func.name)) { + // Assume first arg is ref, second is value + const firstArg = func.args[0] + const secondArg = func.args[1] + + if (firstArg?.type === `ref` && secondArg?.type === `val`) { + return { + ref: firstArg, + value: secondArg.value, + } + } + } + + return null +} + +function extractEqualityField(func: Func): ComparisonField | null { + if (func.name === `eq`) { + const firstArg = func.args[0] + const secondArg = func.args[1] + + if (firstArg?.type === `ref` && secondArg?.type === `val`) { + return { + ref: firstArg, + value: secondArg.value, + } + } + } + return null +} + +interface InField { + ref: PropRef + values: Array + // Cached optimization data (computed once, reused many times) + areAllPrimitives?: boolean + primitiveSet?: Set | null +} + +function extractInField(func: Func): InField | null { + if (func.name === `in`) { + const firstArg = func.args[0] + const secondArg = func.args[1] + + if ( + firstArg?.type === `ref` && + secondArg?.type === `val` && + Array.isArray(secondArg.value) + ) { + let values = secondArg.value + // Precompute optimization metadata once + const allPrimitives = areAllPrimitives(values) + let primitiveSet: Set | null = null + + if (allPrimitives && values.length > 10) { + // Build Set and dedupe values at the same time + primitiveSet = new Set(values) + // If we found duplicates, use the deduped array going forward + if (primitiveSet.size < values.length) { + values = Array.from(primitiveSet) + } + } + + return { + ref: firstArg, + values, + areAllPrimitives: allPrimitives, + primitiveSet, + } + } + } + return null +} + +function isComparisonSubset( + subsetFunc: Func, + subsetValue: any, + supersetFunc: Func, + supersetValue: any +): boolean { + const subOp = subsetFunc.name + const superOp = supersetFunc.name + + // Handle same operator + if (subOp === superOp) { + if (subOp === `eq`) { + // field = X is subset of field = X only + // Fast path: primitives can use strict equality + if (isPrimitive(subsetValue) && isPrimitive(supersetValue)) { + return subsetValue === supersetValue + } + return areValuesEqual(subsetValue, supersetValue) + } else if (subOp === `gt`) { + // field > 20 is subset of field > 10 if 20 > 10 + return subsetValue >= supersetValue + } else if (subOp === `gte`) { + // field >= 20 is subset of field >= 10 if 20 >= 10 + return subsetValue >= supersetValue + } else if (subOp === `lt`) { + // field < 10 is subset of field < 20 if 10 <= 20 + return subsetValue <= supersetValue + } else if (subOp === `lte`) { + // field <= 10 is subset of field <= 20 if 10 <= 20 + return subsetValue <= supersetValue + } + } + + // Handle different operators on same field + // eq vs gt/gte: field = 15 is subset of field > 10 if 15 > 10 + if (subOp === `eq` && superOp === `gt`) { + return subsetValue > supersetValue + } + if (subOp === `eq` && superOp === `gte`) { + return subsetValue >= supersetValue + } + if (subOp === `eq` && superOp === `lt`) { + return subsetValue < supersetValue + } + if (subOp === `eq` && superOp === `lte`) { + return subsetValue <= supersetValue + } + + // gt/gte vs gte/gt + if (subOp === `gt` && superOp === `gte`) { + // field > 10 is subset of field >= 10 if 10 >= 10 (always true for same value) + return subsetValue >= supersetValue + } + if (subOp === `gte` && superOp === `gt`) { + // field >= 11 is subset of field > 10 if 11 > 10 + return subsetValue > supersetValue + } + + // lt/lte vs lte/lt + if (subOp === `lt` && superOp === `lte`) { + // field < 10 is subset of field <= 10 if 10 <= 10 + return subsetValue <= supersetValue + } + if (subOp === `lte` && superOp === `lt`) { + // field <= 9 is subset of field < 10 if 9 < 10 + return subsetValue < supersetValue + } + + return false +} + +function groupPredicatesByField( + predicates: Array> +): Map>> { + const groups = new Map>>() + + for (const pred of predicates) { + let fieldKey: string | null = null + + if (pred.type === `func`) { + const func = pred as Func + const field = + extractComparisonField(func) || + extractEqualityField(func) || + extractInField(func) + if (field) { + fieldKey = field.ref.path.join(`.`) + } + } + + const group = groups.get(fieldKey) || [] + group.push(pred) + groups.set(fieldKey, group) + } + + return groups +} + +function unionSameFieldPredicates( + predicates: Array> +): BasicExpression | null { + if (predicates.length === 1) { + return predicates[0]! + } + + // Try to extract range constraints + let maxGt: number | null = null + let maxGte: number | null = null + let minLt: number | null = null + let minLte: number | null = null + const eqValues: Set = new Set() + const inValues: Set = new Set() + const otherPredicates: Array> = [] + + for (const pred of predicates) { + if (pred.type === `func`) { + const func = pred as Func + const field = extractComparisonField(func) + + if (field) { + const value = field.value + if (func.name === `gt`) { + maxGt = maxGt === null ? value : minValue(maxGt, value) + } else if (func.name === `gte`) { + maxGte = maxGte === null ? value : minValue(maxGte, value) + } else if (func.name === `lt`) { + minLt = minLt === null ? value : maxValue(minLt, value) + } else if (func.name === `lte`) { + minLte = minLte === null ? value : maxValue(minLte, value) + } else if (func.name === `eq`) { + eqValues.add(value) + } else { + otherPredicates.push(pred) + } + } else { + const inField = extractInField(func) + if (inField) { + for (const val of inField.values) { + inValues.add(val) + } + } else { + otherPredicates.push(pred) + } + } + } else { + otherPredicates.push(pred) + } + } + + // If we have multiple equality values, combine into IN + if (eqValues.size > 1 || (eqValues.size > 0 && inValues.size > 0)) { + const allValues = [...eqValues, ...inValues] + const ref = predicates.find((p) => { + if (p.type === `func`) { + const field = + extractComparisonField(p as Func) || extractInField(p as Func) + return field !== null + } + return false + }) + + if (ref && ref.type === `func`) { + const field = + extractComparisonField(ref as Func) || extractInField(ref as Func) + if (field) { + return { + type: `func`, + name: `in`, + args: [ + field.ref, + { type: `val`, value: allValues } as BasicExpression, + ], + } as BasicExpression + } + } + } + + // Build the least restrictive range + const result: Array> = [] + + // Choose the least restrictive lower bound + if (maxGt !== null && maxGte !== null) { + // Take the smaller one (less restrictive) + const pred = + maxGte <= maxGt + ? findPredicateWithOperator(predicates, `gte`, maxGte) + : findPredicateWithOperator(predicates, `gt`, maxGt) + if (pred) result.push(pred) + } else if (maxGt !== null) { + const pred = findPredicateWithOperator(predicates, `gt`, maxGt) + if (pred) result.push(pred) + } else if (maxGte !== null) { + const pred = findPredicateWithOperator(predicates, `gte`, maxGte) + if (pred) result.push(pred) + } + + // Choose the least restrictive upper bound + if (minLt !== null && minLte !== null) { + const pred = + minLte >= minLt + ? findPredicateWithOperator(predicates, `lte`, minLte) + : findPredicateWithOperator(predicates, `lt`, minLt) + if (pred) result.push(pred) + } else if (minLt !== null) { + const pred = findPredicateWithOperator(predicates, `lt`, minLt) + if (pred) result.push(pred) + } else if (minLte !== null) { + const pred = findPredicateWithOperator(predicates, `lte`, minLte) + if (pred) result.push(pred) + } + + // Add single eq value + if (eqValues.size === 1 && inValues.size === 0) { + const pred = findPredicateWithOperator(predicates, `eq`, [...eqValues][0]) + if (pred) result.push(pred) + } + + // Add IN if only IN values + if (eqValues.size === 0 && inValues.size > 0) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + return (p as Func).name === `in` + } + return false + })! + ) + } + + // Add other predicates + result.push(...otherPredicates) + + if (result.length === 0) { + return { type: `val`, value: true } as BasicExpression + } + + if (result.length === 1) { + return result[0]! + } + + return { + type: `func`, + name: `or`, + args: result, + } as BasicExpression +} diff --git a/packages/db/src/query/subset-dedupe.ts b/packages/db/src/query/subset-dedupe.ts new file mode 100644 index 000000000..fa8172559 --- /dev/null +++ b/packages/db/src/query/subset-dedupe.ts @@ -0,0 +1,228 @@ +import { + isPredicateSubset, + isWhereSubset, + minusWherePredicates, + unionWherePredicates, +} from "./predicate-utils.js" +import type { BasicExpression } from "./ir.js" +import type { LoadSubsetOptions } from "../types.js" + +/** + * Deduplicated wrapper for a loadSubset function. + * Tracks what data has been loaded and avoids redundant calls by applying + * subset logic to predicates. + * + * @example + * const dedupe = new DeduplicatedLoadSubset(myLoadSubset) + * + * // First call - fetches data + * await dedupe.loadSubset({ where: gt(ref('age'), val(10)) }) + * + * // Second call - subset of first, returns true immediately + * await dedupe.loadSubset({ where: gt(ref('age'), val(20)) }) + * + * // Clear state to start fresh + * dedupe.reset() + */ +export class DeduplicatedLoadSubset { + // The underlying loadSubset function to wrap + private readonly _loadSubset: ( + options: LoadSubsetOptions + ) => true | Promise + + // Combined where predicate for all unlimited calls (no limit) + private unlimitedWhere: BasicExpression | undefined = undefined + + // Flag to track if we've loaded all data (unlimited call with no where clause) + private hasLoadedAllData = false + + // List of all limited calls (with limit, possibly with orderBy) + // We clone options before storing to prevent mutation of stored predicates + private limitedCalls: Array = [] + + // Track in-flight calls to prevent concurrent duplicate requests + // We store both the options and the promise so we can apply subset logic + private inflightCalls: Array<{ + options: LoadSubsetOptions + promise: Promise + }> = [] + + // Generation counter to invalidate in-flight requests after reset() + // When reset() is called, this increments, and any in-flight completion handlers + // check if their captured generation matches before updating tracking state + private generation = 0 + + constructor( + loadSubset: (options: LoadSubsetOptions) => true | Promise + ) { + this._loadSubset = loadSubset + } + + /** + * Load a subset of data, with automatic deduplication based on previously + * loaded predicates and in-flight requests. + * + * This method is auto-bound, so it can be safely passed as a callback without + * losing its `this` context (e.g., `loadSubset: dedupe.loadSubset` in a sync config). + * + * @param options - The predicate options (where, orderBy, limit) + * @returns true if data is already loaded, or a Promise that resolves when data is loaded + */ + loadSubset = (options: LoadSubsetOptions): true | Promise => { + // If we've loaded all data, everything is covered + if (this.hasLoadedAllData) { + return true + } + + // Check against unlimited combined predicate + // If we've loaded all data matching a where clause, we don't need to refetch subsets + if (this.unlimitedWhere !== undefined && options.where !== undefined) { + if (isWhereSubset(options.where, this.unlimitedWhere)) { + return true // Data already loaded via unlimited call + } + } + + // Check against limited calls + if (options.limit !== undefined) { + const alreadyLoaded = this.limitedCalls.some((loaded) => + isPredicateSubset(options, loaded) + ) + + if (alreadyLoaded) { + return true // Already loaded + } + } + + // Check against in-flight calls using the same subset logic as resolved calls + // This prevents duplicate requests when concurrent calls have subset relationships + const matchingInflight = this.inflightCalls.find((inflight) => + isPredicateSubset(options, inflight.options) + ) + + if (matchingInflight !== undefined) { + // An in-flight call will load data that covers this request + // Return the same promise so this caller waits for the data to load + // The in-flight promise already handles tracking updates when it completes + return matchingInflight.promise + } + + // Not fully covered by existing data + // Compute the subset of data that is not covered by the existing data + // such that we only have to load that subset of missing data + const clonedOptions = cloneOptions(options) + if (this.unlimitedWhere !== undefined && options.limit === undefined) { + // Compute difference to get only the missing data + // We can only do this for unlimited queries + // and we can only remove data that was loaded from unlimited queries + // because with limited queries we have no way to express that we already loaded part of the matching data + clonedOptions.where = + minusWherePredicates(clonedOptions.where, this.unlimitedWhere) ?? + clonedOptions.where + } + + // Call underlying loadSubset to load the missing data + const resultPromise = this._loadSubset(clonedOptions) + + // Handle both sync (true) and async (Promise) return values + if (resultPromise === true) { + // Sync return - update tracking synchronously + // Clone options before storing to protect against caller mutation + this.updateTracking(clonedOptions) + return true + } else { + // Async return - track the promise and update tracking after it resolves + + // Capture the current generation - this lets us detect if reset() was called + // while this request was in-flight, so we can skip updating tracking state + const capturedGeneration = this.generation + + // We need to create a reference to the in-flight entry so we can remove it later + const inflightEntry = { + options: clonedOptions, // Store cloned options for subset matching + promise: resultPromise + .then((result) => { + // Only update tracking if this request is still from the current generation + // If reset() was called, the generation will have incremented and we should + // not repopulate the state that was just cleared + if (capturedGeneration === this.generation) { + // Use the cloned options that we captured before any caller mutations + // This ensures we track exactly what was loaded, not what the caller changed + this.updateTracking(clonedOptions) + } + return result + }) + .finally(() => { + // Always remove from in-flight array on completion OR rejection + // This ensures failed requests can be retried instead of being cached forever + const index = this.inflightCalls.indexOf(inflightEntry) + if (index !== -1) { + this.inflightCalls.splice(index, 1) + } + }), + } + + // Store the in-flight entry so concurrent subset calls can wait for it + this.inflightCalls.push(inflightEntry) + return inflightEntry.promise + } + } + + /** + * Reset all tracking state. + * Clears the history of loaded predicates and in-flight calls. + * Use this when you want to start fresh, for example after clearing the underlying data store. + * + * Note: Any in-flight requests will still complete, but they will not update the tracking + * state after the reset. This prevents old requests from repopulating cleared state. + */ + reset(): void { + this.unlimitedWhere = undefined + this.hasLoadedAllData = false + this.limitedCalls = [] + this.inflightCalls = [] + // Increment generation to invalidate any in-flight completion handlers + // This ensures requests that were started before reset() don't repopulate the state + this.generation++ + } + + private updateTracking(options: LoadSubsetOptions): void { + // Update tracking based on whether this was a limited or unlimited call + if (options.limit === undefined) { + // Unlimited call - update combined where predicate + // We ignore orderBy for unlimited calls as mentioned in requirements + if (options.where === undefined) { + // No where clause = all data loaded + this.hasLoadedAllData = true + this.unlimitedWhere = undefined + this.limitedCalls = [] + this.inflightCalls = [] + } else if (this.unlimitedWhere === undefined) { + this.unlimitedWhere = options.where + } else { + this.unlimitedWhere = unionWherePredicates([ + this.unlimitedWhere, + options.where, + ]) + } + } else { + // Limited call - add to list for future subset checks + // Options are already cloned by caller to prevent mutation issues + this.limitedCalls.push(options) + } + } +} + +/** + * Clones a LoadSubsetOptions object to prevent mutation of stored predicates. + * This is crucial because callers often reuse the same options object and mutate + * properties like limit or where between calls. Without cloning, our stored history + * would reflect the mutated values rather than what was actually loaded. + */ +export function cloneOptions(options: LoadSubsetOptions): LoadSubsetOptions { + return { + where: options.where, + orderBy: options.orderBy, + limit: options.limit, + // Note: We don't clone subscription as it's not part of predicate matching + } +} diff --git a/packages/db/tests/collection-change-events.test.ts b/packages/db/tests/collection-change-events.test.ts new file mode 100644 index 000000000..25b7dbb1f --- /dev/null +++ b/packages/db/tests/collection-change-events.test.ts @@ -0,0 +1,439 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" +import { createCollection } from "../src/collection/index.js" +import { currentStateAsChanges } from "../src/collection/change-events.js" +import { Func, PropRef, Value } from "../src/query/ir.js" +import { DEFAULT_COMPARE_OPTIONS } from "../src/utils.js" + +interface TestUser { + id: string + name: string + age: number + score: number + status: `active` | `inactive` +} + +describe(`currentStateAsChanges`, () => { + let mockSync: ReturnType + + beforeEach(() => { + mockSync = vi.fn() + }) + + afterEach(() => { + vi.clearAllMocks() + }) + + const users: Array = [ + { id: `1`, name: `Alice`, age: 25, score: 100, status: `active` }, + { id: `2`, name: `Bob`, age: 30, score: 80, status: `inactive` }, + { id: `3`, name: `Charlie`, age: 35, score: 90, status: `active` }, + { id: `4`, name: `David`, age: 20, score: 70, status: `active` }, + { id: `5`, name: `Eve`, age: 28, score: 95, status: `inactive` }, + ] + + // Helper function to create and populate collection with test data + async function createAndPopulateCollection( + autoIndex: `eager` | `off` = `eager` + ) { + const collection = createCollection({ + id: `test-collection-${autoIndex}`, + getKey: (user) => user.id, + autoIndex, + sync: { + sync: mockSync, + }, + }) + + // Insert users via sync + mockSync.mockImplementation(({ begin, write, commit }) => { + begin() + users.forEach((user) => { + write({ + type: `insert`, + value: user, + }) + }) + commit() + }) + + collection.startSyncImmediate() + await collection.stateWhenReady() + + return collection + } + + describe.each([ + [`with auto-indexing`, `eager`], + [`without auto-indexing`, `off`], + ])(`%s`, (testName, autoIndex) => { + describe(`where clause without orderBy or limit`, () => { + it(`should return all items when no where clause is provided`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection) + + expect(result).toHaveLength(5) + expect(result?.map((change) => change.value.name)).toEqual([ + `Alice`, + `Bob`, + `Charlie`, + `David`, + `Eve`, + ]) + }) + + it(`should filter items based on where clause`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection, { + where: new Func(`eq`, [new PropRef([`status`]), new Value(`active`)]), + }) + + expect(result).toHaveLength(3) + expect(result?.map((change) => change.value.name)).toEqual([ + `Alice`, + `Charlie`, + `David`, + ]) + }) + + it(`should filter items based on numeric where clause`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection, { + where: new Func(`gt`, [new PropRef([`age`]), new Value(25)]), + }) + + expect(result).toHaveLength(3) + expect(result?.map((change) => change.value.name)).toEqual([ + `Bob`, + `Charlie`, + `Eve`, + ]) + }) + }) + + describe(`orderBy without limit and no where clause`, () => { + it(`should return all items ordered by name ascending`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection, { + orderBy: [ + { + expression: new PropRef([`name`]), + compareOptions: { ...DEFAULT_COMPARE_OPTIONS, direction: `asc` }, + }, + ], + }) + + expect(result).toHaveLength(5) + expect(result?.map((change) => change.value.name)).toEqual([ + `Alice`, + `Bob`, + `Charlie`, + `David`, + `Eve`, + ]) + }) + + it(`should return all items ordered by score descending`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection, { + orderBy: [ + { + expression: new PropRef([`score`]), + compareOptions: { ...DEFAULT_COMPARE_OPTIONS, direction: `desc` }, + }, + ], + }) + + expect(result).toHaveLength(5) + expect(result?.map((change) => change.value.name)).toEqual([ + `Alice`, // score: 100 + `Eve`, // score: 95 + `Charlie`, // score: 90 + `Bob`, // score: 80 + `David`, // score: 70 + ]) + }) + + it(`should return all items ordered by age ascending`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection, { + orderBy: [ + { + expression: new PropRef([`age`]), + compareOptions: { ...DEFAULT_COMPARE_OPTIONS, direction: `asc` }, + }, + ], + }) + + expect(result).toHaveLength(5) + expect(result?.map((change) => change.value.name)).toEqual([ + `David`, // age: 20 + `Alice`, // age: 25 + `Eve`, // age: 28 + `Bob`, // age: 30 + `Charlie`, // age: 35 + ]) + }) + }) + + describe(`orderBy with limit and no where clause`, () => { + it(`should return top 3 items ordered by score descending`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection, { + orderBy: [ + { + expression: new PropRef([`score`]), + compareOptions: { ...DEFAULT_COMPARE_OPTIONS, direction: `desc` }, + }, + ], + limit: 3, + }) + + expect(result).toHaveLength(3) + expect(result?.map((change) => change.value.name)).toEqual([ + `Alice`, // score: 100 + `Eve`, // score: 95 + `Charlie`, // score: 90 + ]) + }) + }) + + describe(`orderBy with limit and where clause`, () => { + it(`should return top active users ordered by score descending`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection, { + where: new Func(`eq`, [new PropRef([`status`]), new Value(`active`)]), + orderBy: [ + { + expression: new PropRef([`score`]), + compareOptions: { ...DEFAULT_COMPARE_OPTIONS, direction: `desc` }, + }, + ], + limit: 2, + }) + + expect(result).toHaveLength(2) + expect(result?.map((change) => change.value.name)).toEqual([ + `Alice`, // score: 100, status: active + `Charlie`, // score: 90, status: active + ]) + }) + + it(`should return top users over 25 ordered by age ascending`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection, { + where: new Func(`gt`, [new PropRef([`age`]), new Value(25)]), + orderBy: [ + { + expression: new PropRef([`age`]), + compareOptions: { ...DEFAULT_COMPARE_OPTIONS, direction: `asc` }, + }, + ], + limit: 2, + }) + + expect(result).toHaveLength(2) + expect(result?.map((change) => change.value.name)).toEqual([ + `Eve`, // age: 28 + `Bob`, // age: 30 + ]) + }) + + it(`should handle multi-column orderBy with where clause`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection, { + where: new Func(`eq`, [new PropRef([`status`]), new Value(`active`)]), + orderBy: [ + { + expression: new PropRef([`score`]), + compareOptions: { ...DEFAULT_COMPARE_OPTIONS, direction: `desc` }, + }, + { + expression: new PropRef([`age`]), + compareOptions: { ...DEFAULT_COMPARE_OPTIONS, direction: `asc` }, + }, + ], + limit: 2, + }) + + expect(result).toHaveLength(2) + // Should be ordered by score desc, then age asc for ties + expect(result?.map((change) => change.value.name)).toEqual([ + `Alice`, // score: 100, age: 25 + `Charlie`, // score: 90, age: 35 + ]) + }) + }) + + describe(`error cases`, () => { + it(`should throw error when limit is provided without orderBy`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + expect(() => { + currentStateAsChanges(collection, { + limit: 5, + }) + }).toThrow(`limit cannot be used without orderBy`) + }) + + it(`should throw error when limit is provided without orderBy even with where clause`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + expect(() => { + currentStateAsChanges(collection, { + where: new Func(`eq`, [ + new PropRef([`status`]), + new Value(`active`), + ]), + limit: 3, + }) + }).toThrow(`limit cannot be used without orderBy`) + }) + }) + + describe(`optimizedOnly option`, () => { + it(`should return undefined when optimizedOnly is true and no index is available`, async () => { + // Only test this with auto-indexing disabled + if (autoIndex === `off`) { + const collection = await createAndPopulateCollection(`off`) + + const result = currentStateAsChanges(collection, { + orderBy: [ + { + expression: new PropRef([`score`]), + compareOptions: { + ...DEFAULT_COMPARE_OPTIONS, + direction: `desc`, + }, + }, + ], + limit: 1, + optimizedOnly: true, + }) + + expect(result).toBeUndefined() + } + }) + + it(`should return results when optimizedOnly is true and index is available`, async () => { + // Only test this with auto-indexing enabled + if (autoIndex === `eager`) { + const collection = await createAndPopulateCollection(`eager`) + + const result = currentStateAsChanges(collection, { + orderBy: [ + { + expression: new PropRef([`score`]), + compareOptions: { + ...DEFAULT_COMPARE_OPTIONS, + direction: `desc`, + }, + }, + ], + limit: 1, + optimizedOnly: true, + }) + + expect(result).toHaveLength(1) + expect(result?.[0]?.value.name).toBe(`Alice`) + } + }) + }) + + describe(`edge cases`, () => { + it(`should handle empty collection`, () => { + const collection = createCollection({ + id: `test-collection-empty-${autoIndex}`, + getKey: (user) => user.id, + autoIndex: autoIndex as `eager` | `off`, + sync: { + sync: mockSync, + }, + }) + + // Don't populate the collection + collection.startSyncImmediate() + + const result = currentStateAsChanges(collection) + + expect(result).toHaveLength(0) + }) + + it(`should handle limit larger than collection size`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection, { + orderBy: [ + { + expression: new PropRef([`name`]), + compareOptions: { ...DEFAULT_COMPARE_OPTIONS, direction: `asc` }, + }, + ], + limit: 10, // More than the 5 items in collection + }) + + expect(result).toHaveLength(5) + expect(result?.map((change) => change.value.name)).toEqual([ + `Alice`, + `Bob`, + `Charlie`, + `David`, + `Eve`, + ]) + }) + + it(`should handle limit of 0`, async () => { + const collection = await createAndPopulateCollection( + autoIndex as `eager` | `off` + ) + + const result = currentStateAsChanges(collection, { + orderBy: [ + { + expression: new PropRef([`name`]), + compareOptions: { ...DEFAULT_COMPARE_OPTIONS, direction: `asc` }, + }, + ], + limit: 0, + }) + + expect(result).toHaveLength(0) + }) + }) + }) +}) diff --git a/packages/db/tests/predicate-utils.test.ts b/packages/db/tests/predicate-utils.test.ts new file mode 100644 index 000000000..bf973f15c --- /dev/null +++ b/packages/db/tests/predicate-utils.test.ts @@ -0,0 +1,1115 @@ +import { describe, expect, it } from "vitest" +import { + isLimitSubset, + isOrderBySubset, + isPredicateSubset, + isWhereSubset, + minusWherePredicates, + unionWherePredicates, +} from "../src/query/predicate-utils" +import { Func, PropRef, Value } from "../src/query/ir" +import type { BasicExpression, OrderBy, OrderByClause } from "../src/query/ir" +import type { LoadSubsetOptions } from "../src/types" + +// Helper functions to build expressions more easily +function ref(path: string | Array): PropRef { + return new PropRef(typeof path === `string` ? [path] : path) +} + +function val(value: any): Value { + return new Value(value) +} + +function func(name: string, ...args: Array): Func { + return new Func(name, args) +} + +function eq(left: BasicExpression, right: BasicExpression): Func { + return func(`eq`, left, right) +} + +function gt(left: BasicExpression, right: BasicExpression): Func { + return func(`gt`, left, right) +} + +function gte(left: BasicExpression, right: BasicExpression): Func { + return func(`gte`, left, right) +} + +function lt(left: BasicExpression, right: BasicExpression): Func { + return func(`lt`, left, right) +} + +function lte(left: BasicExpression, right: BasicExpression): Func { + return func(`lte`, left, right) +} + +function and(...args: Array): Func { + return func(`and`, ...args) +} + +function or(...args: Array): Func { + return func(`or`, ...args) +} + +function inOp(left: BasicExpression, values: Array): Func { + return func(`in`, left, val(values)) +} + +function orderByClause( + expression: BasicExpression, + direction: `asc` | `desc` = `asc` +): OrderByClause { + return { + expression, + compareOptions: { + direction, + nulls: `last`, + stringSort: `lexical`, + }, + } +} + +describe(`isWhereSubset`, () => { + describe(`basic cases`, () => { + it(`should return true for both undefined (all data is subset of all data)`, () => { + expect(isWhereSubset(undefined, undefined)).toBe(true) + }) + + it(`should return false for undefined subset with constrained superset`, () => { + // Requesting ALL data but only loaded SOME data = NOT subset + expect(isWhereSubset(undefined, gt(ref(`age`), val(10)))).toBe(false) + }) + + it(`should return true for constrained subset with undefined superset`, () => { + // Loaded ALL data, so any constrained subset is covered + expect(isWhereSubset(gt(ref(`age`), val(20)), undefined)).toBe(true) + }) + + it(`should return true for identical expressions`, () => { + const expr = gt(ref(`age`), val(10)) + expect(isWhereSubset(expr, expr)).toBe(true) + }) + + it(`should return true for structurally equal expressions`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(10)), gt(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should return true when subset is false`, () => { + // When subset is false the result will always be the empty set + // and the empty set is a subset of any set + expect(isWhereSubset(val(false), gt(ref(`age`), val(10)))).toBe(true) + }) + }) + + describe(`comparison operators`, () => { + it(`should handle gt: age > 20 is subset of age > 10`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(20)), gt(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle gt: age > 10 is NOT subset of age > 20`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(10)), gt(ref(`age`), val(20))) + ).toBe(false) + }) + + it(`should handle gte: age >= 20 is subset of age >= 10`, () => { + expect( + isWhereSubset(gte(ref(`age`), val(20)), gte(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle lt: age < 10 is subset of age < 20`, () => { + expect( + isWhereSubset(lt(ref(`age`), val(10)), lt(ref(`age`), val(20))) + ).toBe(true) + }) + + it(`should handle lt: age < 20 is NOT subset of age < 10`, () => { + expect( + isWhereSubset(lt(ref(`age`), val(20)), lt(ref(`age`), val(10))) + ).toBe(false) + }) + + it(`should handle lte: age <= 10 is subset of age <= 20`, () => { + expect( + isWhereSubset(lte(ref(`age`), val(10)), lte(ref(`age`), val(20))) + ).toBe(true) + }) + + it(`should handle eq: age = 15 is subset of age > 10`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(15)), gt(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle eq: age = 5 is NOT subset of age > 10`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(5)), gt(ref(`age`), val(10))) + ).toBe(false) + }) + + it(`should handle eq: age = 15 is subset of age >= 15`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(15)), gte(ref(`age`), val(15))) + ).toBe(true) + }) + + it(`should handle eq: age = 15 is subset of age < 20`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(15)), lt(ref(`age`), val(20))) + ).toBe(true) + }) + + it(`should handle mixed operators: gt vs gte`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(10)), gte(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle mixed operators: gte vs gt`, () => { + expect( + isWhereSubset(gte(ref(`age`), val(11)), gt(ref(`age`), val(10))) + ).toBe(true) + expect( + isWhereSubset(gte(ref(`age`), val(10)), gt(ref(`age`), val(10))) + ).toBe(false) + }) + }) + + describe(`IN operator`, () => { + it(`should handle eq vs in: age = 5 is subset of age IN [5, 10, 15]`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(5)), inOp(ref(`age`), [5, 10, 15])) + ).toBe(true) + }) + + it(`should handle eq vs in: age = 20 is NOT subset of age IN [5, 10, 15]`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(20)), inOp(ref(`age`), [5, 10, 15])) + ).toBe(false) + }) + + it(`should handle in vs in: [5, 10] is subset of [5, 10, 15]`, () => { + expect( + isWhereSubset(inOp(ref(`age`), [5, 10]), inOp(ref(`age`), [5, 10, 15])) + ).toBe(true) + }) + + it(`should handle in vs in: [5, 20] is NOT subset of [5, 10, 15]`, () => { + expect( + isWhereSubset(inOp(ref(`age`), [5, 20]), inOp(ref(`age`), [5, 10, 15])) + ).toBe(false) + }) + + it(`should handle empty IN array: age IN [] is subset of age IN []`, () => { + expect(isWhereSubset(inOp(ref(`age`), []), inOp(ref(`age`), []))).toBe( + true + ) + }) + + it(`should handle empty IN array: age IN [] is subset of age IN [5, 10]`, () => { + expect( + isWhereSubset(inOp(ref(`age`), []), inOp(ref(`age`), [5, 10])) + ).toBe(true) + }) + + it(`should handle empty IN array: age IN [5, 10] is NOT subset of age IN []`, () => { + expect( + isWhereSubset(inOp(ref(`age`), [5, 10]), inOp(ref(`age`), [])) + ).toBe(false) + }) + + it(`should handle singleton IN array: age = 5 is subset of age IN [5]`, () => { + expect(isWhereSubset(eq(ref(`age`), val(5)), inOp(ref(`age`), [5]))).toBe( + true + ) + }) + + it(`should handle singleton IN array: age = 10 is NOT subset of age IN [5]`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(10)), inOp(ref(`age`), [5])) + ).toBe(false) + }) + + it(`should handle singleton IN array: age IN [5] is subset of age IN [5, 10, 15]`, () => { + expect( + isWhereSubset(inOp(ref(`age`), [5]), inOp(ref(`age`), [5, 10, 15])) + ).toBe(true) + }) + + it(`should handle singleton IN array: age IN [20] is NOT subset of age IN [5, 10, 15]`, () => { + expect( + isWhereSubset(inOp(ref(`age`), [20]), inOp(ref(`age`), [5, 10, 15])) + ).toBe(false) + }) + + it(`should handle singleton IN array: age IN [5, 10, 15] is NOT subset of age IN [5]`, () => { + expect( + isWhereSubset(inOp(ref(`age`), [5, 10, 15]), inOp(ref(`age`), [5])) + ).toBe(false) + }) + }) + + describe(`AND combinations`, () => { + it(`should handle AND in subset: (A AND B) is subset of A`, () => { + expect( + isWhereSubset( + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))), + gt(ref(`age`), val(10)) + ) + ).toBe(true) + }) + + it(`should handle AND in subset: (A AND B) is NOT subset of C (different field)`, () => { + expect( + isWhereSubset( + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))), + eq(ref(`name`), val(`John`)) + ) + ).toBe(false) + }) + + it(`should handle AND in superset: A is subset of (A AND B) is false (superset is more restrictive)`, () => { + expect( + isWhereSubset( + gt(ref(`age`), val(10)), + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(false) + }) + + it(`should handle AND in both: (age > 20 AND status = 'active') is subset of (age > 10 AND status = 'active')`, () => { + expect( + isWhereSubset( + and(gt(ref(`age`), val(20)), eq(ref(`status`), val(`active`))), + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(true) + }) + }) + + describe(`OR combinations`, () => { + it(`should handle OR in superset: A is subset of (A OR B)`, () => { + expect( + isWhereSubset( + gt(ref(`age`), val(10)), + or(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(true) + }) + + it(`should return false when subset doesn't imply any branch of OR superset`, () => { + expect( + isWhereSubset( + eq(ref(`age`), val(10)), + or(gt(ref(`age`), val(10)), lt(ref(`age`), val(5))) + ) + ).toBe(false) + }) + + it(`should handle OR in subset: (A OR B) is subset of C only if both A and B are subsets of C`, () => { + expect( + isWhereSubset( + or(gt(ref(`age`), val(20)), gt(ref(`age`), val(30))), + gt(ref(`age`), val(10)) + ) + ).toBe(true) + }) + + it(`should handle OR in both: (age > 20 OR status = 'active') is subset of (age > 10 OR status = 'active')`, () => { + expect( + isWhereSubset( + or(gt(ref(`age`), val(20)), eq(ref(`status`), val(`active`))), + or(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(true) + }) + + it(`should handle OR in subset: (A OR B) is NOT subset of C if either is not a subset`, () => { + expect( + isWhereSubset( + or(gt(ref(`age`), val(20)), lt(ref(`age`), val(5))), + gt(ref(`age`), val(10)) + ) + ).toBe(false) + }) + }) + + describe(`different fields`, () => { + it(`should return false for different fields with no relationship`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(20)), gt(ref(`salary`), val(1000))) + ).toBe(false) + }) + }) + + describe(`Date support`, () => { + const date1 = new Date(`2024-01-01`) + const date2 = new Date(`2024-01-15`) + const date3 = new Date(`2024-02-01`) + + it(`should handle Date equality`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date2)), + eq(ref(`createdAt`), val(date2)) + ) + ).toBe(true) + }) + + it(`should handle Date range comparisons: date > 2024-01-15 is subset of date > 2024-01-01`, () => { + expect( + isWhereSubset( + gt(ref(`createdAt`), val(date2)), + gt(ref(`createdAt`), val(date1)) + ) + ).toBe(true) + }) + + it(`should handle Date range comparisons: date < 2024-01-15 is subset of date < 2024-02-01`, () => { + expect( + isWhereSubset( + lt(ref(`createdAt`), val(date2)), + lt(ref(`createdAt`), val(date3)) + ) + ).toBe(true) + }) + + it(`should handle Date equality vs range: date = 2024-01-15 is subset of date > 2024-01-01`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date2)), + gt(ref(`createdAt`), val(date1)) + ) + ).toBe(true) + }) + + it(`should handle Date equality vs IN: date = 2024-01-15 is subset of date IN [2024-01-01, 2024-01-15, 2024-02-01]`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date2)), + inOp(ref(`createdAt`), [date1, date2, date3]) + ) + ).toBe(true) + }) + + it(`should handle Date IN subset: date IN [2024-01-01, 2024-01-15] is subset of date IN [2024-01-01, 2024-01-15, 2024-02-01]`, () => { + expect( + isWhereSubset( + inOp(ref(`createdAt`), [date1, date2]), + inOp(ref(`createdAt`), [date1, date2, date3]) + ) + ).toBe(true) + }) + + it(`should return false when Date not in IN set`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date1)), + inOp(ref(`createdAt`), [date2, date3]) + ) + ).toBe(false) + }) + }) +}) + +describe(`unionWherePredicates`, () => { + describe(`basic cases`, () => { + it(`should return false for empty array`, () => { + const result = unionWherePredicates([]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + + it(`should return the single predicate as-is`, () => { + const pred = gt(ref(`age`), val(10)) + const result = unionWherePredicates([pred]) + expect(result).toBe(pred) + }) + }) + + describe(`same field comparisons`, () => { + it(`should take least restrictive for gt: age > 10 OR age > 20 → age > 10`, () => { + const result = unionWherePredicates([ + gt(ref(`age`), val(10)), + gt(ref(`age`), val(20)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(10) + }) + + it(`should take least restrictive for gte: age >= 10 OR age >= 20 → age >= 10`, () => { + const result = unionWherePredicates([ + gte(ref(`age`), val(10)), + gte(ref(`age`), val(20)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gte`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(10) + }) + + it(`should take least restrictive for lt: age < 20 OR age < 10 → age < 20`, () => { + const result = unionWherePredicates([ + lt(ref(`age`), val(20)), + lt(ref(`age`), val(10)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`lt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(20) + }) + + it(`should combine eq into IN: age = 5 OR age = 10 → age IN [5, 10]`, () => { + const result = unionWherePredicates([ + eq(ref(`age`), val(5)), + eq(ref(`age`), val(10)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values).toContain(5) + expect(values).toContain(10) + expect(values.length).toBe(2) + }) + + it(`should fold IN and equality into single IN: age IN [1,2] OR age = 3 → age IN [1,2,3]`, () => { + const result = unionWherePredicates([ + inOp(ref(`age`), [1, 2]), + eq(ref(`age`), val(3)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values).toContain(1) + expect(values).toContain(2) + expect(values).toContain(3) + expect(values.length).toBe(3) + }) + + it(`should handle gte and gt together: age > 10 OR age >= 15 → age > 10`, () => { + const result = unionWherePredicates([ + gt(ref(`age`), val(10)), + gte(ref(`age`), val(15)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(10) + }) + }) + + describe(`different fields`, () => { + it(`should combine with OR: age > 10 OR status = 'active'`, () => { + const result = unionWherePredicates([ + gt(ref(`age`), val(10)), + eq(ref(`status`), val(`active`)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`or`) + expect((result as Func).args.length).toBe(2) + }) + }) + + describe(`flatten OR`, () => { + it(`should flatten nested ORs`, () => { + const result = unionWherePredicates([ + or(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))), + eq(ref(`name`), val(`John`)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`or`) + expect((result as Func).args.length).toBe(3) + }) + }) + + describe(`Date support`, () => { + const date1 = new Date(`2024-01-01`) + const date2 = new Date(`2024-01-15`) + const date3 = new Date(`2024-02-01`) + + it(`should combine Date equalities into IN: date = date1 OR date = date2 → date IN [date1, date2]`, () => { + const result = unionWherePredicates([ + eq(ref(`createdAt`), val(date1)), + eq(ref(`createdAt`), val(date2)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values.length).toBe(2) + expect(values).toContainEqual(date1) + expect(values).toContainEqual(date2) + }) + + it(`should fold Date IN and equality: date IN [date1,date2] OR date = date3 → date IN [date1,date2,date3]`, () => { + const result = unionWherePredicates([ + inOp(ref(`createdAt`), [date1, date2]), + eq(ref(`createdAt`), val(date3)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values.length).toBe(3) + expect(values).toContainEqual(date1) + expect(values).toContainEqual(date2) + expect(values).toContainEqual(date3) + }) + }) +}) + +describe(`isOrderBySubset`, () => { + it(`should return true for undefined subset`, () => { + const orderBy: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(undefined, orderBy)).toBe(true) + expect(isOrderBySubset([], orderBy)).toBe(true) + }) + + it(`should return false for undefined superset with non-empty subset`, () => { + const orderBy: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(orderBy, undefined)).toBe(false) + expect(isOrderBySubset(orderBy, [])).toBe(false) + }) + + it(`should return true for identical orderBy`, () => { + const orderBy: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(orderBy, orderBy)).toBe(true) + }) + + it(`should return true when subset is prefix of superset`, () => { + const subset: OrderBy = [orderByClause(ref(`age`), `asc`)] + const superset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ] + expect(isOrderBySubset(subset, superset)).toBe(true) + }) + + it(`should return false when subset is not a prefix`, () => { + const subset: OrderBy = [orderByClause(ref(`name`), `desc`)] + const superset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ] + expect(isOrderBySubset(subset, superset)).toBe(false) + }) + + it(`should return false when directions differ`, () => { + const subset: OrderBy = [orderByClause(ref(`age`), `desc`)] + const superset: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(subset, superset)).toBe(false) + }) + + it(`should return false when subset is longer than superset`, () => { + const subset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + orderByClause(ref(`status`), `asc`), + ] + const superset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ] + expect(isOrderBySubset(subset, superset)).toBe(false) + }) +}) + +describe(`isLimitSubset`, () => { + it(`should return false for undefined subset with limited superset (requesting all data but only have limited)`, () => { + expect(isLimitSubset(undefined, 10)).toBe(false) + }) + + it(`should return true for undefined subset with undefined superset (requesting all data and have all data)`, () => { + expect(isLimitSubset(undefined, undefined)).toBe(true) + }) + + it(`should return true for undefined superset`, () => { + expect(isLimitSubset(10, undefined)).toBe(true) + }) + + it(`should return true when subset <= superset`, () => { + expect(isLimitSubset(10, 20)).toBe(true) + expect(isLimitSubset(10, 10)).toBe(true) + }) + + it(`should return false when subset > superset`, () => { + expect(isLimitSubset(20, 10)).toBe(false) + }) +}) + +describe(`isPredicateSubset`, () => { + it(`should check all components`, () => { + const subset: LoadSubsetOptions = { + where: gt(ref(`age`), val(20)), + orderBy: [orderByClause(ref(`age`), `asc`)], + limit: 10, + } + const superset: LoadSubsetOptions = { + where: gt(ref(`age`), val(10)), + orderBy: [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ], + limit: 20, + } + expect(isPredicateSubset(subset, superset)).toBe(true) + }) + + it(`should return false if where is not subset`, () => { + const subset: LoadSubsetOptions = { + where: gt(ref(`age`), val(5)), + limit: 10, + } + const superset: LoadSubsetOptions = { + where: gt(ref(`age`), val(10)), + limit: 20, + } + expect(isPredicateSubset(subset, superset)).toBe(false) + }) + + it(`should return false if orderBy is not subset`, () => { + const subset: LoadSubsetOptions = { + where: gt(ref(`age`), val(20)), + orderBy: [orderByClause(ref(`name`), `desc`)], + } + const superset: LoadSubsetOptions = { + where: gt(ref(`age`), val(10)), + orderBy: [orderByClause(ref(`age`), `asc`)], + } + expect(isPredicateSubset(subset, superset)).toBe(false) + }) + + it(`should return false if limit is not subset`, () => { + const subset: LoadSubsetOptions = { + where: gt(ref(`age`), val(20)), + limit: 30, + } + const superset: LoadSubsetOptions = { + where: gt(ref(`age`), val(10)), + limit: 20, + } + expect(isPredicateSubset(subset, superset)).toBe(false) + }) +}) + +describe(`minusWherePredicates`, () => { + describe(`basic cases`, () => { + it(`should return original predicate when nothing to subtract`, () => { + const pred = gt(ref(`age`), val(10)) + const result = minusWherePredicates(pred, undefined) + + expect(result).toEqual(pred) + }) + + it(`should return null when from is undefined (can't simplify NOT(B))`, () => { + const subtract = gt(ref(`age`), val(10)) + const result = minusWherePredicates(undefined, subtract) + + expect(result).toEqual({ + type: `func`, + name: `not`, + args: [subtract], + }) + }) + + it(`should return empty set when from is subset of subtract`, () => { + const from = gt(ref(`age`), val(20)) // age > 20 + const subtract = gt(ref(`age`), val(10)) // age > 10 + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should return null when predicates are on different fields`, () => { + const from = gt(ref(`age`), val(10)) + const subtract = eq(ref(`status`), val(`active`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toBeNull() + }) + }) + + describe(`IN minus IN`, () => { + it(`should compute set difference: IN [A,B,C,D] - IN [B,C] = IN [A,D]`, () => { + const from = inOp(ref(`status`), [`A`, `B`, `C`, `D`]) + const subtract = inOp(ref(`status`), [`B`, `C`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `in`, + args: [ref(`status`), val([`A`, `D`])], + }) + }) + + it(`should return empty set when all values are subtracted`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = inOp(ref(`status`), [`A`, `B`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should return original when no overlap`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = inOp(ref(`status`), [`C`, `D`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual(from) + }) + + it(`should collapse to equality when one value remains`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = inOp(ref(`status`), [`B`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `eq`, + args: [ref(`status`), val(`A`)], + }) + }) + }) + + describe(`IN minus equality`, () => { + it(`should remove value from IN: IN [A,B,C] - eq(B) = IN [A,C]`, () => { + const from = inOp(ref(`status`), [`A`, `B`, `C`]) + const subtract = eq(ref(`status`), val(`B`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `in`, + args: [ref(`status`), val([`A`, `C`])], + }) + }) + + it(`should collapse to equality when one value remains`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = eq(ref(`status`), val(`A`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `eq`, + args: [ref(`status`), val(`B`)], + }) + }) + + it(`should return empty set when removing last value`, () => { + const from = inOp(ref(`status`), [`A`]) + const subtract = eq(ref(`status`), val(`A`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + }) + + describe(`equality minus equality`, () => { + it(`should return empty set when same value`, () => { + const from = eq(ref(`age`), val(15)) + const subtract = eq(ref(`age`), val(15)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should return original when different values`, () => { + const from = eq(ref(`age`), val(15)) + const subtract = eq(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual(from) + }) + }) + + describe(`range minus range - gt/gte`, () => { + it(`should compute difference: age > 10 - age > 20 = (age > 10 AND age <= 20)`, () => { + const from = gt(ref(`age`), val(10)) + const subtract = gt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(10)), lte(ref(`age`), val(20))], + }) + }) + + it(`should return original when no overlap: age > 20 - age > 10`, () => { + const from = gt(ref(`age`), val(20)) + const subtract = gt(ref(`age`), val(10)) + const result = minusWherePredicates(from, subtract) + + // age > 20 is subset of age > 10, so result is empty + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should compute difference: age >= 10 - age >= 20 = (age >= 10 AND age < 20)`, () => { + const from = gte(ref(`age`), val(10)) + const subtract = gte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(10)), lt(ref(`age`), val(20))], + }) + }) + + it(`should compute difference: age > 10 - age >= 20 = (age > 10 AND age < 20)`, () => { + const from = gt(ref(`age`), val(10)) + const subtract = gte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(10)), lt(ref(`age`), val(20))], + }) + }) + + it(`should compute difference: age >= 10 - age > 20 = (age >= 10 AND age <= 20)`, () => { + const from = gte(ref(`age`), val(10)) + const subtract = gt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(10)), lte(ref(`age`), val(20))], + }) + }) + }) + + describe(`range minus range - lt/lte`, () => { + it(`should compute difference: age < 30 - age < 20 = (age >= 20 AND age < 30)`, () => { + const from = lt(ref(`age`), val(30)) + const subtract = lt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(20)), lt(ref(`age`), val(30))], + }) + }) + + it(`should return original when no overlap: age < 20 - age < 30`, () => { + const from = lt(ref(`age`), val(20)) + const subtract = lt(ref(`age`), val(30)) + const result = minusWherePredicates(from, subtract) + + // age < 20 is subset of age < 30, so result is empty + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should compute difference: age <= 30 - age <= 20 = (age > 20 AND age <= 30)`, () => { + const from = lte(ref(`age`), val(30)) + const subtract = lte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(20)), lte(ref(`age`), val(30))], + }) + }) + + it(`should compute difference: age < 30 - age <= 20 = (age > 20 AND age < 30)`, () => { + const from = lt(ref(`age`), val(30)) + const subtract = lte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(20)), lt(ref(`age`), val(30))], + }) + }) + + it(`should compute difference: age <= 30 - age < 20 = (age >= 20 AND age <= 30)`, () => { + const from = lte(ref(`age`), val(30)) + const subtract = lt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(20)), lte(ref(`age`), val(30))], + }) + }) + }) + + describe(`common conditions`, () => { + it(`should handle common conditions: (age > 10 AND status = 'active') - (age > 20 AND status = 'active') = (age > 10 AND age <= 20 AND status = 'active')`, () => { + const from = and( + gt(ref(`age`), val(10)), + eq(ref(`status`), val(`active`)) + ) + const subtract = and( + gt(ref(`age`), val(20)), + eq(ref(`status`), val(`active`)) + ) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [ + eq(ref(`status`), val(`active`)), // common condition + gt(ref(`age`), val(10)), + lte(ref(`age`), val(20)), + ], + }) + }) + + it(`should handle multiple common conditions`, () => { + const from = and( + gt(ref(`age`), val(10)), + eq(ref(`status`), val(`active`)), + eq(ref(`department`), val(`engineering`)) + ) + const subtract = and( + gt(ref(`age`), val(20)), + eq(ref(`status`), val(`active`)), + eq(ref(`department`), val(`engineering`)) + ) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [ + eq(ref(`status`), val(`active`)), // common condition + eq(ref(`department`), val(`engineering`)), // common condition + gt(ref(`age`), val(10)), + lte(ref(`age`), val(20)), + ], + }) + }) + + it(`should handle IN with common conditions: (age IN [10,20,30] AND status = 'active') - (age IN [20,30] AND status = 'active') = (age IN [10] AND status = 'active')`, () => { + const from = and( + inOp(ref(`age`), [10, 20, 30]), + eq(ref(`status`), val(`active`)) + ) + const subtract = and( + inOp(ref(`age`), [20, 30]), + eq(ref(`status`), val(`active`)) + ) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [ + eq(ref(`status`), val(`active`)), // common condition + { + type: `func`, + name: `eq`, + args: [ref(`age`), val(10)], + }, + ], + }) + }) + + it(`should return null when common conditions exist but remaining difference cannot be simplified`, () => { + const from = and( + gt(ref(`age`), val(10)), + eq(ref(`status`), val(`active`)) + ) + const subtract = and( + gt(ref(`name`), val(`Z`)), + eq(ref(`status`), val(`active`)) + ) + const result = minusWherePredicates(from, subtract) + + // Can't simplify age > 10 - name > 'Z' (different fields), so returns null + expect(result).toBeNull() + }) + }) + + describe(`Date support`, () => { + it(`should handle Date IN minus Date IN`, () => { + const date1 = new Date(`2024-01-01`) + const date2 = new Date(`2024-01-15`) + const date3 = new Date(`2024-02-01`) + + const from = inOp(ref(`createdAt`), [date1, date2, date3]) + const subtract = inOp(ref(`createdAt`), [date2]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `in`, + args: [ref(`createdAt`), val([date1, date3])], + }) + }) + + it(`should handle Date range difference: date > 2024-01-01 - date > 2024-01-15`, () => { + const date1 = new Date(`2024-01-01`) + const date15 = new Date(`2024-01-15`) + + const from = gt(ref(`createdAt`), val(date1)) + const subtract = gt(ref(`createdAt`), val(date15)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [ + gt(ref(`createdAt`), val(date1)), + lte(ref(`createdAt`), val(date15)), + ], + }) + }) + }) + + describe(`real-world sync scenarios`, () => { + it(`should compute missing data range: need age > 10, already have age > 20`, () => { + const requested = gt(ref(`age`), val(10)) + const alreadyLoaded = gt(ref(`age`), val(20)) + const needToFetch = minusWherePredicates(requested, alreadyLoaded) + + // Need to fetch: 10 < age <= 20 + expect(needToFetch).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(10)), lte(ref(`age`), val(20))], + }) + }) + + it(`should compute missing IDs: need IN [1..100], already have IN [50..100]`, () => { + const allIds = Array.from({ length: 100 }, (_, i) => i + 1) + const loadedIds = Array.from({ length: 51 }, (_, i) => i + 50) + + const requested = inOp(ref(`id`), allIds) + const alreadyLoaded = inOp(ref(`id`), loadedIds) + const needToFetch = minusWherePredicates(requested, alreadyLoaded) + + // Need to fetch: ids 1..49 + const expectedIds = Array.from({ length: 49 }, (_, i) => i + 1) + expect(needToFetch).toEqual({ + type: `func`, + name: `in`, + args: [ref(`id`), val(expectedIds)], + }) + }) + + it(`should return empty when all requested data is already loaded`, () => { + const requested = gt(ref(`age`), val(20)) + const alreadyLoaded = gt(ref(`age`), val(10)) + const needToFetch = minusWherePredicates(requested, alreadyLoaded) + + // Requested is subset of already loaded - nothing more to fetch + expect(needToFetch).toEqual({ type: `val`, value: false }) + }) + }) +}) diff --git a/packages/db/tests/subset-dedupe.test.ts b/packages/db/tests/subset-dedupe.test.ts new file mode 100644 index 000000000..3417f95a5 --- /dev/null +++ b/packages/db/tests/subset-dedupe.test.ts @@ -0,0 +1,562 @@ +import { describe, expect, it } from "vitest" +import { + DeduplicatedLoadSubset, + cloneOptions, +} from "../src/query/subset-dedupe" +import { Func, PropRef, Value } from "../src/query/ir" +import { minusWherePredicates } from "../src/query/predicate-utils" +import type { BasicExpression, OrderBy } from "../src/query/ir" +import type { LoadSubsetOptions } from "../src/types" + +// Helper functions to build expressions more easily +function ref(path: string | Array): PropRef { + return new PropRef(typeof path === `string` ? [path] : path) +} + +function val(value: T): Value { + return new Value(value) +} + +function gt(left: BasicExpression, right: BasicExpression): Func { + return new Func(`gt`, [left, right]) +} + +function lt(left: BasicExpression, right: BasicExpression): Func { + return new Func(`lt`, [left, right]) +} + +function eq(left: BasicExpression, right: BasicExpression): Func { + return new Func(`eq`, [left, right]) +} + +function and(...expressions: Array>): Func { + return new Func(`and`, expressions) +} + +function inOp(left: BasicExpression, values: Array): Func { + return new Func(`in`, [left, new Value(values)]) +} + +function lte(left: BasicExpression, right: BasicExpression): Func { + return new Func(`lte`, [left, right]) +} + +function not(expression: BasicExpression): Func { + return new Func(`not`, [expression]) +} + +describe(`createDeduplicatedLoadSubset`, () => { + it(`should call underlying loadSubset on first call`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) + + expect(callCount).toBe(1) + }) + + it(`should return true immediately for subset unlimited calls`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 10 + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) + expect(callCount).toBe(1) + + // Second call: age > 20 (subset of age > 10) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(20)), + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call underlying function + }) + + it(`should call underlying loadSubset for non-subset unlimited calls`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 20 + await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) }) + expect(callCount).toBe(1) + + // Second call: age > 10 (NOT a subset of age > 20) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) + expect(callCount).toBe(2) // Should call underlying function + }) + + it(`should combine unlimited calls with union`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 20 + await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) }) + expect(callCount).toBe(1) + + // Second call: age < 10 (different range) + await deduplicated.loadSubset({ where: lt(ref(`age`), val(10)) }) + expect(callCount).toBe(2) + + // Third call: age > 25 (subset of age > 20) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(25)), + }) + expect(result).toBe(true) + expect(callCount).toBe(2) // Should not call - covered by first call + }) + + it(`should track limited calls separately`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: age > 10, orderBy age asc, limit 10 + await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 10, + }) + expect(callCount).toBe(1) + + // Second call: age > 20, orderBy age asc, limit 5 (subset) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(20)), + orderBy: orderBy1, + limit: 5, + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - subset of first + }) + + it(`should call underlying for non-subset limited calls`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: age > 10, orderBy age asc, limit 10 + await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 10, + }) + expect(callCount).toBe(1) + + // Second call: age > 10, orderBy age asc, limit 20 (NOT a subset) + await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 20, + }) + expect(callCount).toBe(2) // Should call - limit is larger + }) + + it(`should check limited calls against unlimited combined predicate`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: unlimited age > 10 + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) + expect(callCount).toBe(1) + + // Second call: limited age > 20 with orderBy + limit + // Even though it has a limit, it's covered by the unlimited call + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(20)), + orderBy: orderBy1, + limit: 10, + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - covered by unlimited + }) + + it(`should ignore orderBy for unlimited calls`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: unlimited with orderBy + await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + }) + expect(callCount).toBe(1) + + // Second call: subset where, different orderBy, no limit + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(20)), + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - orderBy ignored for unlimited + }) + + it(`should handle undefined where clauses`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: no where clause (all data) + await deduplicated.loadSubset({}) + expect(callCount).toBe(1) + + // Second call: with where clause (should be covered) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - all data already loaded + }) + + it(`should handle complex real-world scenario`, async () => { + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(options) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`createdAt`), + compareOptions: { + direction: `desc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // Load all active users + await deduplicated.loadSubset({ where: eq(ref(`status`), val(`active`)) }) + expect(callCount).toBe(1) + + // Load top 10 active users by createdAt + const result1 = await deduplicated.loadSubset({ + where: eq(ref(`status`), val(`active`)), + orderBy: orderBy1, + limit: 10, + }) + expect(result1).toBe(true) // Covered by unlimited call + expect(callCount).toBe(1) + + // Load all inactive users + await deduplicated.loadSubset({ where: eq(ref(`status`), val(`inactive`)) }) + expect(callCount).toBe(2) + + // Load top 5 inactive users + const result2 = await deduplicated.loadSubset({ + where: eq(ref(`status`), val(`inactive`)), + orderBy: orderBy1, + limit: 5, + }) + expect(result2).toBe(true) // Covered by unlimited inactive call + expect(callCount).toBe(2) + + // Verify only 2 actual calls were made + expect(calls).toHaveLength(2) + expect(calls[0]).toEqual({ where: eq(ref(`status`), val(`active`)) }) + expect(calls[1]).toEqual({ where: eq(ref(`status`), val(`inactive`)) }) + }) + + describe(`subset deduplication with minusWherePredicates`, () => { + it(`should request only the difference for range predicates`, async () => { + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(cloneOptions(options)) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 20 (loads data for age > 20) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) }) + expect(callCount).toBe(1) + expect(calls[0]).toEqual({ where: gt(ref(`age`), val(20)) }) + + // Second call: age > 10 (should request only age > 10 AND age <= 20) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) + expect(callCount).toBe(2) + expect(calls[1]).toEqual({ + where: and(gt(ref(`age`), val(10)), lte(ref(`age`), val(20))), + }) + }) + + it(`should request only the difference for set predicates`, async () => { + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(cloneOptions(options)) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: status IN ['B', 'C'] (loads data for B and C) + await deduplicated.loadSubset({ + where: inOp(ref(`status`), [`B`, `C`]), + }) + expect(callCount).toBe(1) + expect(calls[0]).toEqual({ where: inOp(ref(`status`), [`B`, `C`]) }) + + // Second call: status IN ['A', 'B', 'C', 'D'] (should request only A and D) + await deduplicated.loadSubset({ + where: inOp(ref(`status`), [`A`, `B`, `C`, `D`]), + }) + expect(callCount).toBe(2) + expect(calls[1]).toEqual({ + where: inOp(ref(`status`), [`A`, `D`]), + }) + }) + + it(`should return true immediately for complete overlap`, async () => { + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(cloneOptions(options)) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 10 (loads data for age > 10) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) + expect(callCount).toBe(1) + + // Second call: age > 20 (completely covered by first call) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(20)), + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not make additional call + }) + + it(`should handle complex predicate differences`, async () => { + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(cloneOptions(options)) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 20 AND status = 'active' + const firstPredicate = and( + gt(ref(`age`), val(20)), + eq(ref(`status`), val(`active`)) + ) + await deduplicated.loadSubset({ where: firstPredicate }) + expect(callCount).toBe(1) + expect(calls[0]).toEqual({ where: firstPredicate }) + + // Second call: age > 10 AND status = 'active' (should request only age > 10 AND age <= 20 AND status = 'active') + const secondPredicate = and( + gt(ref(`age`), val(10)), + eq(ref(`status`), val(`active`)) + ) + + const test = minusWherePredicates(secondPredicate, firstPredicate) + console.log(`test`, test) + + await deduplicated.loadSubset({ where: secondPredicate }) + expect(callCount).toBe(2) + expect(calls[1]).toEqual({ + where: and( + eq(ref(`status`), val(`active`)), + gt(ref(`age`), val(10)), + lte(ref(`age`), val(20)) + ), + }) + }) + + it(`should not apply subset logic to limited calls`, async () => { + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(cloneOptions(options)) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: unlimited age > 20 + await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) }) + expect(callCount).toBe(1) + + // Second call: limited age > 10 with orderBy + limit + // Should request the full predicate, not the difference, because it's limited + await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 10, + }) + expect(callCount).toBe(2) + expect(calls[1]).toEqual({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 10, + }) + }) + + it(`should handle undefined where clauses in subset logic`, async () => { + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(cloneOptions(options)) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 20 + await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) }) + expect(callCount).toBe(1) + + // Second call: no where clause (all data) + // Should request all data except what we already loaded + // i.e. should request NOT (age > 20) + await deduplicated.loadSubset({}) + expect(callCount).toBe(2) + expect(calls[1]).toEqual({ where: not(gt(ref(`age`), val(20))) }) // Should request all data except what we already loaded + }) + + it(`should handle multiple overlapping unlimited calls`, async () => { + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(cloneOptions(options)) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 20 + await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) }) + expect(callCount).toBe(1) + + // Second call: age < 10 (different range) + await deduplicated.loadSubset({ where: lt(ref(`age`), val(10)) }) + expect(callCount).toBe(2) + + // Third call: age > 5 (should request only age >= 10 AND age <= 20, since age < 10 is already covered) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(5)) }) + expect(callCount).toBe(3) + + // Ideally it would be smart enough to optimize it to request only age >= 10 AND age <= 20, since age < 10 is already covered + // However, it doesn't do that currently, so it will not optimize and execute the original query + expect(calls[2]).toEqual({ + where: gt(ref(`age`), val(5)), + }) + + /* + expect(calls[2]).toEqual({ + where: and(gte(ref(`age`), val(10)), lte(ref(`age`), val(20))), + }) + */ + }) + }) +})