diff --git a/jena-benchmarks/jena-benchmarks-jmh/pom.xml b/jena-benchmarks/jena-benchmarks-jmh/pom.xml index 8c612c8f74e..d3dcb890122 100644 --- a/jena-benchmarks/jena-benchmarks-jmh/pom.xml +++ b/jena-benchmarks/jena-benchmarks-jmh/pom.xml @@ -25,7 +25,7 @@ org.apache.jena jena-benchmarks - 6.1.0 + 6.2.0-SNAPSHOT Apache Jena - Benchmarks JMH diff --git a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/jmh/JmhDefaultOptions.java b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/jmh/JmhDefaultOptions.java index fe97134a847..2ea94005779 100644 --- a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/jmh/JmhDefaultOptions.java +++ b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/jmh/JmhDefaultOptions.java @@ -43,8 +43,8 @@ public static ChainedOptionsBuilder getDefaults(Class c) { .mode(Mode.AverageTime) .timeUnit(TimeUnit.SECONDS) .warmupTime(TimeValue.NONE) - .warmupIterations(5) - .measurementIterations(15) + .warmupIterations(4) + .measurementIterations(8) .measurementTime(TimeValue.NONE) .threads(1) .forks(1) diff --git a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/TestGraphAdd.java b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/TestGraphAdd.java index fc8c64d547f..3c9c355a0e1 100644 --- a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/TestGraphAdd.java +++ b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/TestGraphAdd.java @@ -44,14 +44,18 @@ public class TestGraphAdd { @Param({ "GraphMemFast (current)", - "GraphMemValue (current)", +// "GraphMemValue (current)", + "GraphMemIndexedSet EAGER (current)", +// "GraphMemIndexedSet LAZY (current)", +// "GraphMemIndexedSet LAZY_PARALLEL (current)", +// "GraphMemIndexedSet MINIMAL (current)", // "GraphMemRoaring EAGER (current)", // "GraphMemRoaring LAZY (current)", // "GraphMemRoaring LAZY_PARALLEL (current)", // "GraphMemRoaring MINIMAL (current)", // "GraphMemValue (Jena 5.6.0)", - "GraphMemFast (Jena 5.6.0)", - "GraphMemValue (Jena 5.6.0)", +// "GraphMemFast (Jena 5.6.0)", +// "GraphMemValue (Jena 5.6.0)", }) public String param1_GraphImplementation; java.util.function.Supplier graphAdd; diff --git a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/TestGraphCopy.java b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/TestGraphCopy.java index 09a4ca1f94f..f0f1f28ac66 100644 --- a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/TestGraphCopy.java +++ b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/TestGraphCopy.java @@ -46,6 +46,10 @@ public class TestGraphCopy { @Param({ "GraphMemFast (current)", + "GraphMemIndexedSet EAGER (current)", +// "GraphMemIndexedSet LAZY (current)", +// "GraphMemIndexedSet LAZY_PARALLEL (current)", +// "GraphMemIndexedSet MINIMAL (current)", // "GraphMemRoaring EAGER (current)", // "GraphMemRoaring LAZY (current)", // "GraphMemRoaring LAZY_PARALLEL (current)", @@ -55,7 +59,7 @@ public class TestGraphCopy { @Param({ "copy", - "findAndAddAll", +// "findAndAddAll", }) public String param2_CopyOrConstruct; diff --git a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/Context.java b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/Context.java index aa6c6d61583..b00f740d4f0 100644 --- a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/Context.java +++ b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/Context.java @@ -44,6 +44,27 @@ public Context(String graphImplementation) { this.graphClass = GraphClass.GraphMemLegacy; this.jenaVersion = JenaVersion.CURRENT; break; + case "GraphMemIndexedSet (current)": + case "GraphMemIndexedSet EAGER (current)": + this.graphClass = GraphClass.GraphMemIndexedSetEager; + this.jenaVersion = JenaVersion.CURRENT; + break; + case "GraphMemIndexedSet LAZY (current)": + this.graphClass = GraphClass.GraphMemIndexedSetLazy; + this.jenaVersion = JenaVersion.CURRENT; + break; + case "GraphMemIndexedSet LAZY_PARALLEL (current)": + this.graphClass = GraphClass.GraphMemIndexedSetLazyParallel; + this.jenaVersion = JenaVersion.CURRENT; + break; + case "GraphMemIndexedSet MINIMAL (current)": + this.graphClass = GraphClass.GraphMemIndexedSetMinimal; + this.jenaVersion = JenaVersion.CURRENT; + break; + case "GraphMemIndexedSet MANUAL (current)": + this.graphClass = GraphClass.GraphMemIndexedSetManual; + this.jenaVersion = JenaVersion.CURRENT; + break; case "GraphMemRoaring (current)": case "GraphMemRoaring EAGER (current)": this.graphClass = GraphClass.GraphMemRoaringEager; @@ -96,6 +117,11 @@ public enum GraphClass { GraphMemValue, GraphMemFast, GraphMemLegacy, + GraphMemIndexedSetEager, + GraphMemIndexedSetLazy, + GraphMemIndexedSetLazyParallel, + GraphMemIndexedSetMinimal, + GraphMemIndexedSetManual, GraphMemRoaringEager, GraphMemRoaringLazy, GraphMemRoaringLazyParallel, diff --git a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/GraphTripleNodeHelper560.java b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/GraphTripleNodeHelper560.java index 8b4c20cc1ac..1fe48c1f5c0 100644 --- a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/GraphTripleNodeHelper560.java +++ b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/GraphTripleNodeHelper560.java @@ -48,6 +48,7 @@ public Graph createGraph(Context.GraphClass graphClass) { case GraphMemRoaringLazyParallel -> new GraphMem2Roaring(IndexingStrategy.LAZY_PARALLEL); case GraphMemRoaringMinimal -> new GraphMem2Roaring(IndexingStrategy.MINIMAL); case GraphMemRoaringManual -> new GraphMem2Roaring(IndexingStrategy.MANUAL); + default -> throw new IllegalStateException("Unexpected value: " + graphClass); }; } diff --git a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/GraphTripleNodeHelperCurrent.java b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/GraphTripleNodeHelperCurrent.java index 9806bb64184..f178d4b4a3d 100644 --- a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/GraphTripleNodeHelperCurrent.java +++ b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/graph/helper/GraphTripleNodeHelperCurrent.java @@ -27,10 +27,8 @@ import org.apache.jena.graph.Node; import org.apache.jena.graph.NodeFactory; import org.apache.jena.graph.Triple; -import org.apache.jena.mem.GraphMemFast; -import org.apache.jena.mem.GraphMemLegacy; -import org.apache.jena.mem.GraphMemRoaring; -import org.apache.jena.mem.IndexingStrategy; +import org.apache.jena.mem.*; +import org.apache.jena.memvalue.GraphMemValue; import org.apache.jena.riot.RDFDataMgr; public class GraphTripleNodeHelperCurrent implements GraphTripleNodeHelper { @@ -39,9 +37,14 @@ public class GraphTripleNodeHelperCurrent implements GraphTripleNodeHelper new org.apache.jena.memvalue.GraphMemValue(); + case GraphMemValue -> new GraphMemValue(); case GraphMemFast -> new GraphMemFast(); case GraphMemLegacy -> new GraphMemLegacy(); + case GraphMemIndexedSetEager -> new GraphMemIndexedSet(IndexingStrategy.EAGER); + case GraphMemIndexedSetLazy -> new GraphMemIndexedSet(IndexingStrategy.LAZY); + case GraphMemIndexedSetLazyParallel -> new GraphMemIndexedSet(IndexingStrategy.LAZY_PARALLEL); + case GraphMemIndexedSetMinimal -> new GraphMemIndexedSet(IndexingStrategy.MINIMAL); + case GraphMemIndexedSetManual -> new GraphMemIndexedSet(IndexingStrategy.MANUAL); case GraphMemRoaringEager -> new GraphMemRoaring(IndexingStrategy.EAGER); case GraphMemRoaringLazy -> new GraphMemRoaring(IndexingStrategy.LAZY); case GraphMemRoaringLazyParallel -> new GraphMemRoaring(IndexingStrategy.LAZY_PARALLEL); @@ -54,7 +57,7 @@ public Graph createGraph(Context.GraphClass graphClass) { public List readTriples(String graphUri) { var list = new ArrayList(); @SuppressWarnings("deprecation") - var g1 = new org.apache.jena.memvalue.GraphMemValue() { + var g1 = new GraphMemValue() { @Override public void add(Triple t) { list.add(t); @@ -66,7 +69,7 @@ public void add(Triple t) { @Override public List cloneTriples(List triples) { - var list = new java.util.ArrayList(triples.size()); + var list = new ArrayList(triples.size()); triples.forEach(triple -> list.add(cloneTriple(triple))); return list; } diff --git a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsForeachRemaining.java b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsForeachRemaining.java index 54e19b413ef..01d765a749b 100644 --- a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsForeachRemaining.java +++ b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsForeachRemaining.java @@ -28,6 +28,7 @@ import org.apache.jena.atlas.iterator.ActionCount; import org.apache.jena.jmh.JmhDefaultOptions; +import org.apache.jena.mem.collection.Sized; import org.junit.Assert; import org.junit.Test; @@ -75,12 +76,18 @@ public Spliterator createSut(Object[] arrayWithNulls, int elementsCount) if (count != elementsCount) { throw new RuntimeException("Concurrent modification detected"); } + } ; + final var sized = new Sized() { + @Override + public int size() { + return elementsCount; + } }; return switch (param1_iteratorImplementation) { case "memvalue.SparseArraySpliterator" -> new org.apache.jena.memvalue.SparseArraySpliterator<>(arrayWithNulls, count, checkForConcurrentModification); case "mem2.SparseArraySpliterator" -> - new SparseArraySpliterator<>(arrayWithNulls, checkForConcurrentModification); + new SparseArraySpliterator<>(arrayWithNulls, sized); default -> throw new IllegalArgumentException("Unknown spliterator implementation: " + param1_iteratorImplementation); }; diff --git a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsStreamParallel.java b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsStreamParallel.java index c28435e8f2a..62020e2468c 100644 --- a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsStreamParallel.java +++ b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsStreamParallel.java @@ -29,6 +29,7 @@ import org.apache.jena.atlas.iterator.ActionCount; import org.apache.jena.jmh.JmhDefaultOptions; +import org.apache.jena.mem.collection.Sized; import org.junit.Assert; import org.junit.Test; @@ -77,11 +78,17 @@ public Spliterator createSut(Object[] arrayWithNulls, int elementsCount) throw new RuntimeException("Concurrent modification detected"); } }; + final var sized = new Sized() { + @Override + public int size() { + return elementsCount; + } + }; return switch (param1_iteratorImplementation) { case "memvalue.SparseArraySpliterator" -> new org.apache.jena.memvalue.SparseArraySpliterator<>(arrayWithNulls, count, checkForConcurrentModification); case "mem2.SparseArraySpliterator" -> - new SparseArraySpliterator<>(arrayWithNulls, checkForConcurrentModification); + new SparseArraySpliterator<>(arrayWithNulls, sized); default -> throw new IllegalArgumentException("Unknown spliterator implementation: " + param1_iteratorImplementation); }; diff --git a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsTryAdvance.java b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsTryAdvance.java index d5582824587..cca4a1601fe 100644 --- a/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsTryAdvance.java +++ b/jena-benchmarks/jena-benchmarks-jmh/src/test/java/org/apache/jena/mem/spliterator/TestSparseArraySpliteratorsTryAdvance.java @@ -28,6 +28,7 @@ import org.apache.jena.atlas.iterator.ActionCount; import org.apache.jena.jmh.JmhDefaultOptions; +import org.apache.jena.mem.collection.Sized; import org.junit.Assert; import org.junit.Test; @@ -78,11 +79,17 @@ public Spliterator createSut(Object[] arrayWithNulls, int elementsCount) throw new RuntimeException("Concurrent modification detected"); } }; + final var sized = new Sized() { + @Override + public int size() { + return elementsCount; + } + }; return switch (param1_iteratorImplementation) { case "memvalue.SparseArraySpliterator" -> new org.apache.jena.memvalue.SparseArraySpliterator<>(arrayWithNulls, count, checkForConcurrentModification); case "mem2.SparseArraySpliterator" -> - new SparseArraySpliterator<>(arrayWithNulls, checkForConcurrentModification); + new SparseArraySpliterator<>(arrayWithNulls, sized); default -> throw new IllegalArgumentException("Unknown spliterator implementation: " + param1_iteratorImplementation); }; diff --git a/jena-benchmarks/jena-benchmarks-shadedJena560/pom.xml b/jena-benchmarks/jena-benchmarks-shadedJena560/pom.xml index 37918ddd1e7..1319d82d4a6 100644 --- a/jena-benchmarks/jena-benchmarks-shadedJena560/pom.xml +++ b/jena-benchmarks/jena-benchmarks-shadedJena560/pom.xml @@ -25,7 +25,7 @@ org.apache.jena jena-benchmarks - 6.1.0 + 6.2.0-SNAPSHOT Apache Jena - Benchmarks Shaded Jena 5.6.0 diff --git a/jena-benchmarks/pom.xml b/jena-benchmarks/pom.xml index 917b36ebbbe..6f9afb658d1 100644 --- a/jena-benchmarks/pom.xml +++ b/jena-benchmarks/pom.xml @@ -25,7 +25,7 @@ org.apache.jena jena - 6.1.0 + 6.2.0-SNAPSHOT Apache Jena - Benchmark Suite diff --git a/jena-core/src/main/java/org/apache/jena/graph/GraphMemFactory.java b/jena-core/src/main/java/org/apache/jena/graph/GraphMemFactory.java index c192280652c..76e4d469ec0 100644 --- a/jena-core/src/main/java/org/apache/jena/graph/GraphMemFactory.java +++ b/jena-core/src/main/java/org/apache/jena/graph/GraphMemFactory.java @@ -25,6 +25,7 @@ import org.apache.jena.graph.impl.GraphBase ; import org.apache.jena.mem.GraphMemFast; +import org.apache.jena.mem.GraphMemIndexedSet; import org.apache.jena.mem.GraphMemLegacy; import org.apache.jena.mem.GraphMemRoaring; import org.apache.jena.sys.JenaSystem; @@ -143,6 +144,22 @@ public static Graph createGraphMem2() { public static Graph createGraphMemRoaring() { return new GraphMemRoaring(); } + /** + * A graph that stores triples in memory. This class is not thread-safe. + *

+ *

    + *
  • This graph provides term equality.
  • + *
  • Iterator over this graph does not provide Iterator.remove
  • + *
+ *

+ * {@link GraphMemIndexedSet} is supposed to replace {@link GraphMemRoaring} + * in the future. + *

+ * See {@link GraphMemIndexedSet} for details. + */ + public static Graph createGraphMemIndexedSet() + { return new GraphMemIndexedSet(); } + private final static Graph emptyGraph = new GraphBase() { @Override protected ExtendedIterator graphBaseFind(Triple triplePattern) { diff --git a/jena-core/src/main/java/org/apache/jena/mem/GraphMemIndexedSet.java b/jena-core/src/main/java/org/apache/jena/mem/GraphMemIndexedSet.java new file mode 100644 index 00000000000..0c7104ac8c4 --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/GraphMemIndexedSet.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.apache.jena.mem; + +import org.apache.jena.mem.store.TripleStore; +import org.apache.jena.mem.store.indexed.IndexedSetTripleStore; + +/** + * In-memory {@link GraphMem} implementation that stores all triples in a single + * indexed set ({@link IndexedSetTripleStore}). This class is not thread-safe. + *

+ * Different {@link IndexingStrategy indexing strategies} can be selected to + * balance memory usage and lookup performance. The triples themselves always live + * in a flat set; only the auxiliary subject/predicate/object indices are + * controlled by the strategy. See {@link IndexingStrategy} for the trade-offs of + * each variant. + *

+ * While the index has not been built (e.g. with {@link IndexingStrategy#LAZY}, + * {@link IndexingStrategy#LAZY_PARALLEL}, {@link IndexingStrategy#MANUAL} or + * {@link IndexingStrategy#MINIMAL}) the memory footprint is very low and the + * following operations are particularly fast: + *

    + *
  • {@link GraphMem#add} - adds a triple to the graph
  • + *
  • {@link GraphMem#delete} - removes a triple from the graph
  • + *
+ * A typical bulk-load pattern is to start without an index, add all triples and + * then call {@link #initializeIndexParallel()} to build the index in parallel. + */ +public class GraphMemIndexedSet extends GraphMem { + + private final IndexedSetTripleStore indexedSetTripleStore; + + /** + * Creates a new graph using the {@link IndexingStrategy#EAGER} default + * indexing strategy. + */ + public GraphMemIndexedSet() { + this(IndexingStrategy.EAGER); + } + + /** + * Creates a new graph that uses the given indexing strategy. + * + * @param indexingStrategy the indexing strategy to use; controls when the + * subject/predicate/object index is built and how + * pattern lookups are evaluated + */ + public GraphMemIndexedSet(IndexingStrategy indexingStrategy) { + this(new IndexedSetTripleStore(indexingStrategy)); + } + + /** + * Internal constructor used by {@link #copy()} to wrap an already populated + * triple store. + * + * @param tripleStore the triple store to wrap (must be an + * {@link IndexedSetTripleStore}) + */ + private GraphMemIndexedSet(final IndexedSetTripleStore tripleStore) { + super(tripleStore); + this.indexedSetTripleStore = tripleStore; + } + + /** + * {@inheritDoc} + *

+ * Returns an independent copy that preserves the indexing strategy and, + * if the source has its index built, copies the index data structures + * directly to avoid rebuilding them. + */ + @Override + public GraphMemIndexedSet copy() { + return new GraphMemIndexedSet(this.indexedSetTripleStore.copy()); + } + + /** + * Convenience accessor for the typed underlying store. + * + * @return the {@link IndexedSetTripleStore} backing this graph + */ + private IndexedSetTripleStore getIndexedSetTripleStore() { + return this.indexedSetTripleStore; + } + + /** + * Returns the indexing strategy this graph was created with. + * The strategy is fixed for the lifetime of the graph; clearing or + * initializing the index does not change it. + * + * @return the indexing strategy + */ + public IndexingStrategy getIndexingStrategy() { + return indexedSetTripleStore.getIndexingStrategy(); + } + + /** + * Drops the current subject/predicate/object index and reverts to the + * initial strategy. Subsequent pattern lookups will trigger (re)building + * the index according to the configured {@link IndexingStrategy}. + */ + public void resetIndexingStrategy() { + indexedSetTripleStore.resetIndexingStrategy(); + } + + /** + * Build (or rebuild) the index sequentially. + * After this call, pattern lookups will be served by the eager strategy + * regardless of the originally configured indexing strategy. + */ + public void initializeIndex() { + indexedSetTripleStore.initializeIndex(); + } + + /** + * Build (or rebuild) the index in parallel. + * This can be substantially faster than {@link #initializeIndex()} for + * larger graphs. After this call, pattern lookups will be served by the + * eager strategy regardless of the originally configured indexing strategy. + */ + public void initializeIndexParallel() { + indexedSetTripleStore.initializeIndexParallel(); + } + + /** + * Reports whether the index is currently built and ready to serve pattern + * lookups directly. For graphs configured with a non-eager strategy this + * may flip from {@code false} to {@code true} as soon as the first lookup + * is performed (or when {@link #initializeIndex()} is called explicitly). + * + * @return {@code true} iff the index is initialized + */ + public boolean isIndexInitialized() { + return indexedSetTripleStore.isIndexInitialized(); + } +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/IndexingStrategy.java b/jena-core/src/main/java/org/apache/jena/mem/IndexingStrategy.java index 311e3c1476d..10102250f43 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/IndexingStrategy.java +++ b/jena-core/src/main/java/org/apache/jena/mem/IndexingStrategy.java @@ -24,54 +24,61 @@ import org.apache.jena.graph.Graph; /** - * An enumeration that represents different indexing strategies for a graph. - * The indexing strategy determines how triples are indexed to support pattern matching. - * It is assumed that the graph contains a set of triples, and all operations that do not involve - * pattern matching are performed directly on this set, not on the indices. - *
- * Pattern matching refers to operations like {@link Graph#find}, {@link Graph#remove} or {@link Graph#contains} - * that may take a triple pattern as argument, such as "S__", "SP_", "S_O", "_P_", "_PO", or "__O", - * instead of a concrete triple "SPO". - * In the case of a concrete triple these operations should be performed directly on the set of triples - * and not rely on the indices. + * Indexing strategies supported by {@link org.apache.jena.mem.store.indexed.IndexedSetTripleStore} + * and {@link org.apache.jena.mem.store.roaring.RoaringTripleStore}. + * The indexing strategy determines how (and when) the auxiliary + * subject/predicate/object index is maintained for pattern-matching operations. + *

+ * The graph always keeps a flat set of triples. Operations that do not involve + * pattern matching (size, iterating all triples, lookup of a fully concrete + * triple, etc.) are evaluated directly against this set and are unaffected by + * the indexing strategy. + *

+ * Pattern matching refers to {@link Graph#find}, {@link Graph#remove} or + * {@link Graph#contains} called with a triple pattern such as + * {@code S__}, {@code SP_}, {@code S_O}, {@code _P_}, {@code _PO} or + * {@code __O} (where {@code _} denotes a wildcard). + * Lookups for fully concrete triples ({@code SPO}) are always answered + * directly from the triple set and never use the index. */ public enum IndexingStrategy { /** - * Starts with an index as any other in-memory graph. - * {@link Graph#add}, {@link Graph#delete} and {@link Graph#clear()} update the index immediately. - * Clearing the index just rebuilds it from the set of triples. + * The index is always present. + * {@link Graph#add}, {@link Graph#delete} and {@link Graph#clear()} update + * the index immediately. Calling {@code clearIndex} simply discards the + * existing index, which is then rebuilt from the triple set. */ EAGER, /** - * Starts with no index and builds it on demand when pattern matches are requested. - * After initialization, the index behaves like EAGER. - * Index may be cleared manually, then it is rebuilt on demand. + * The index is built on demand the first time a pattern match is requested. + * Once built, behaves like {@link #EAGER}. Calling {@code clearIndex} + * discards the index; it will be rebuilt on demand the next time a + * pattern match is performed. */ LAZY, /** - * Starts with no index and builds it on demand when pattern matches are requested. - * After initialization, the index behaves like EAGER. - * Index may be cleared manually, then it is rebuilt on demand. - * This strategy uses parallel processing to build the index. + * Like {@link #LAZY}, but the on-demand index build uses parallel + * processing for faster initialization on large graphs. */ LAZY_PARALLEL, /** - * Starts with no index and throws an exception if a pattern match is requested, - * but the index has not been initialized manually yet. - * After initialization, the index behaves like EAGER. - * Index may be cleared manually, then it has to be initialized again manually. + * The index is never built automatically. Pattern-match operations throw + * an {@link UnsupportedOperationException} until the index is initialized + * explicitly (e.g. via + * {@link org.apache.jena.mem.GraphMemIndexedSet#initializeIndex()}). + * After initialization, behaves like {@link #EAGER}. */ MANUAL, /** - * Starts with no index and uses filtering on the triple set, - * as long as the index has not been initialized. - * After initialization, the index behaves like EAGER. - * Index may be cleared manually, then filtering is used again until the index is initialized again. + * No index is built. Pattern-match operations are evaluated by linearly + * filtering the triple set, which is space-efficient but slower for large + * graphs. The index can be initialized explicitly to switch to eager + * behavior; calling {@code clearIndex} reverts to filtering again. */ MINIMAL -} +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashBase.java b/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashBase.java index 6166da81479..08245ca6d35 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashBase.java +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashBase.java @@ -24,58 +24,80 @@ import org.apache.jena.mem.spliterator.SparseArraySpliterator; import org.apache.jena.util.iterator.ExtendedIterator; -import java.util.ConcurrentModificationException; import java.util.Spliterator; import java.util.function.Predicate; /** - * This is the base class for {@link FastHashSet} and {@link FastHashSet}. - * It only grows but never shrinks. - * This map does not guarantee any order. Although due to the way it is implemented the elements have a certain order. - * This map does not allow null keys. - * This map is not thread safe. + * Base class for {@link FastHashSet} and {@link FastHashMap}. + * The collection grows on demand but never shrinks. It does not guarantee any + * iteration order (although the implementation does produce a stable order + * for a given insertion/deletion history). It does not allow {@code null} + * keys and is not thread-safe. + *

Internal layout

+ *
    + *
  • positions: power-of-two sized array used as the open-addressing + * probe table (like in {@link java.util.HashMap}). It is indexed by + * {@code hashCode & (positions.length - 1)}. A value of {@code 0} marks + * an empty slot - faster to test than a {@code null} reference. Non-empty + * slots store the bitwise complement ({@code ~}) of the index of the entry + * in the {@code keys}/{@code hashCodesOrDeletedIndices} arrays, so a real + * stored index of {@code 0} encodes as {@code -1} and is therefore distinct + * from "empty".
  • + *
  • keys: dense array of keys, generally filled from index 0 up to + * {@code keysPos}. Slots emptied by deletion become {@code null} and are + * reused before the array is grown. The dense layout enables fast iteration.
  • + *
  • hashCodesOrDeletedIndices: parallel array to {@code keys}. For + * live entries it stores the cached hash code of the key. For deleted slots + * it stores the index of the previously deleted slot, forming a freelist + * whose head is {@code lastDeletedIndex} ({@code -1} if empty).
  • + *
  • keysPos / removedKeysCount: high-water mark and freelist + * length, respectively; the live size is {@code keysPos - removedKeysCount}.
  • + *
+ * The {@code keys} and {@code hashCodesOrDeletedIndices} arrays grow together + * by approximately a factor of 1.5 (similar to {@link java.util.ArrayList}). *

- * The positions array stores negative indices to the entries and hashCode arrays. - * The positions array is implemented as a power of two sized array. (like in {@link java.util.HashMap}) This allows - * to use a fast modulo operation to calculate the index. The indices of the positions array are derived from the - * hashCodes. - * Any position 0 indicates an empty element. The comparison with 0 is faster than comparing elements with null. - *

- * The keys are stored in a keys array and the hashCodesOrDeletedIndices array - * stores the hashCodes of the keys. - * hashCodesOrDeletedIndices is also used to store the indices of the deleted keys to save memory. It works like a - * linked list of deleted keys. The index of the previously deleted key is stored in the hashCodesOrDeletedIndices - * array. lastDeletedIndex is the index of the last deleted key in the hashCodesOrDeletedIndices array and serves as - * the head of the linked list of deleted keys. - * These two arrays grow together. They grow like {@link java.util.ArrayList} with a factor of 1.5. - *

- * keysPos is the index of the next free position in the keys array. - * The keys array is usually completely filled from index 0 to keysPos. Exceptions are the deleted keys. - * Indices that have been deleted are reused for new keys before the keys array is extended. - * The dense nature of the keys array enables fast iteration. - *

- * The index of a key in the keys array never changes. So the index of a key can be used as a handle to the key and - * for random access. + * Once a key is inserted, its index in the {@code keys} array never changes + * until it is removed. The index can therefore be used as a stable handle for + * O(1) random access, e.g. to coordinate parallel arrays of associated data. * * @param the type of the keys */ public abstract class FastHashBase implements JenaMapSetCommon { + /** Initial size of the {@link #positions} probe table. */ protected static final int MINIMUM_HASHES_SIZE = 16; + /** Initial size of the {@link #keys} / {@link #hashCodesOrDeletedIndices} arrays. */ protected static final int MINIMUM_ELEMENTS_SIZE = 8; + /** High-water mark in {@link #keys}; one past the largest slot ever used. */ protected int keysPos = 0; + /** Dense array of stored keys; {@code null} marks a freed slot. */ protected K[] keys; + /** + * For live entries: cached {@link Object#hashCode()} of the corresponding key. + * For freed slots: index of the previously freed slot (singly-linked freelist + * whose head is {@link #lastDeletedIndex}). + */ protected int[] hashCodesOrDeletedIndices; + /** Head of the freelist of removed slots, or {@code -1} if the freelist is empty. */ protected int lastDeletedIndex = -1; + /** Number of freelist entries (i.e. slots in {@link #keys} currently {@code null}). */ protected int removedKeysCount = 0; /** - * The negative indices to the entries and hashCode arrays. - * The indices of the positions array are derived from the hashCodes. - * Any position 0 indicates an empty element. + * Probe table mapping a hash bucket to an entry index in {@link #keys}. + * A slot's value is the bitwise complement ({@code ~}) of the entry index; + * a value of {@code 0} marks an empty slot. */ protected int[] positions; - protected FastHashBase(int initialSize) { + /** + * Creates a base collection sized to hold at least {@code initialSize} + * entries before growing. + * + * @param initialSize the initial capacity of the keys array; the probe + * table is sized to the next power of two at least + * twice as large + */ + protected FastHashBase(final int initialSize) { var positionsSize = Integer.highestOneBit(initialSize << 1); if (positionsSize < initialSize << 1) { positionsSize <<= 1; @@ -85,6 +107,11 @@ protected FastHashBase(int initialSize) { this.hashCodesOrDeletedIndices = new int[initialSize]; } + /** + * Creates a base collection with the default minimum capacities + * ({@link #MINIMUM_HASHES_SIZE} for the probe table and + * {@link #MINIMUM_ELEMENTS_SIZE} for the keys array). + */ protected FastHashBase() { this.positions = new int[MINIMUM_HASHES_SIZE]; this.keys = newKeysArray(MINIMUM_ELEMENTS_SIZE); @@ -95,17 +122,17 @@ protected FastHashBase() { * Copy constructor. * The new map will contain all the same keys of the map to copy. * - * @param baseToCopy + * @param baseToCopy instance to copy */ - protected > FastHashBase(final T baseToCopy) { + protected > FastHashBase(final T baseToCopy) { this.positions = new int[baseToCopy.positions.length]; System.arraycopy(baseToCopy.positions, 0, this.positions, 0, baseToCopy.positions.length); this.hashCodesOrDeletedIndices = new int[baseToCopy.hashCodesOrDeletedIndices.length]; - System.arraycopy(baseToCopy.hashCodesOrDeletedIndices, 0, this.hashCodesOrDeletedIndices, 0, baseToCopy.hashCodesOrDeletedIndices.length); + System.arraycopy(baseToCopy.hashCodesOrDeletedIndices, 0, this.hashCodesOrDeletedIndices, 0, baseToCopy.keysPos); this.keys = newKeysArray(baseToCopy.keys.length); - System.arraycopy(baseToCopy.keys, 0, this.keys, 0, baseToCopy.keys.length); + System.arraycopy(baseToCopy.keys, 0, this.keys, 0, baseToCopy.keysPos); this.keysPos = baseToCopy.keysPos; this.lastDeletedIndex = baseToCopy.lastDeletedIndex; @@ -143,6 +170,17 @@ private int calcNewPositionsSize() { return -1; } + private void fillPositionsArray(int newSize) { + this.positions = new int[newSize]; + var pos = keysPos - 1; + while (-1 < pos) { + if (null != keys[pos]) { + this.positions[findEmptySlotWithoutEqualityCheck(hashCodesOrDeletedIndices[pos])] = ~pos; + } + pos--; + } + } + /** * Grows the positions array if needed. */ @@ -151,13 +189,7 @@ protected final void growPositionsArrayIfNeeded() { if (newSize < 0) { return; } - final var oldPositions = this.positions; - this.positions = new int[newSize]; - for (int oldPosition : oldPositions) { - if (0 != oldPosition) { - this.positions[findEmptySlotWithoutEqualityCheck(hashCodesOrDeletedIndices[~oldPosition])] = oldPosition; - } - } + fillPositionsArray(newSize); } /** @@ -170,13 +202,7 @@ protected final boolean tryGrowPositionsArrayIfNeeded() { if (newSize < 0) { return false; } - final var oldPositions = this.positions; - this.positions = new int[newSize]; - for (int oldPosition : oldPositions) { - if (0 != oldPosition) { - this.positions[findEmptySlotWithoutEqualityCheck(hashCodesOrDeletedIndices[~oldPosition])] = oldPosition; - } - } + fillPositionsArray(newSize); return true; } @@ -245,24 +271,13 @@ public final boolean tryRemove(K e, int hashCode) { } /** - * Removes the element at the given position. + * Remove the given element and return the index it occupied before removal. * - * @param e the element - * @return the index of the removed element or -1 if the element was not found + * @param e the element to remove + * @return the former index of the element, or {@code -1} if it was not present */ public final int removeAndGetIndex(final K e) { - return removeAndGetIndex(e, e.hashCode()); - } - - /** - * Removes the element at the given position. - * - * @param e the element - * @param hashCode the hash code of the element. This is a performance optimization. - * @return the index of the removed element or -1 if the element was not found - */ - public final int removeAndGetIndex(final K e, final int hashCode) { - final var pIndex = findPosition(e, hashCode); + final var pIndex = findPosition(e, e.hashCode()); if (pIndex < 0) { return -1; } @@ -281,18 +296,19 @@ public final void removeUnchecked(K e, int hashCode) { } /** - * Removes the element at the given position. + * Removes the entry referenced by the {@code positions} slot at index + * {@code here} and rehashes the affected probe chain. *

- * This is an implementation of Knuth's Algorithm R from tAoCP vol3, p 527, - * with exchanging of the roles of i and j so that they can be usefully renamed - * to here and scan. - *

- * It relies on linear probing but doesn't require a distinguished REMOVED - * value. Since we resize the table when it gets fullish, we don't worry [much] - * about the overhead of the linear probing. + * This is an implementation of Knuth's Algorithm R from The Art of + * Computer Programming, vol. 3, p. 527, with the roles of {@code i} + * and {@code j} swapped so they can be usefully renamed to here + * and scan. *

+ * It relies on linear probing but doesn't require a distinguished + * {@code REMOVED} sentinel. Since the table is resized once it gets + * fullish, the overhead of linear probing is not a concern. * - * @param here the index in the positions array + * @param here the index in the {@link #positions} array of the slot to clear */ protected void removeFrom(int here) { final var pIndex = ~positions[here]; @@ -345,9 +361,14 @@ public final boolean containsKey(K o) { } /** - * Attentions: Due to the ordering of the keys, this method may be slow - * if matching elements are at the start of the list. - * Try to use {@link #anyMatchRandomOrder(Predicate)} instead. + * {@inheritDoc} + *

+ * Iterates the keys in dense (insertion-order-ish) order. This is fast when + * matches are rare or expected near the end of the array, but can be slow + * when matches are clustered at the start of the array. For workloads + * where many matches are expected, prefer {@link #anyMatchRandomOrder(Predicate)}, + * which scans in probe-table order and tends to find matches sooner when + * they are abundant. */ @Override public final boolean anyMatch(Predicate predicate) { @@ -362,11 +383,16 @@ public final boolean anyMatch(Predicate predicate) { } /** - * This method can be faster than {@link #anyMatch(Predicate)} if one expects - * to find many matches. But it is slower if one expects to find no matches or just a single one. + * Like {@link #anyMatch(Predicate)} but scans the probe table rather than + * the dense {@code keys} array, yielding a roughly hash-based order. + *

+ * This is faster than {@link #anyMatch(Predicate)} when many matches are + * expected (the predicate is more likely to short-circuit early), but + * slower when no or only a single match exists (each iteration must + * test against an empty slot first). * - * @param predicate the predicate to apply to elements of this collection - * @return {@code true} if any element of the collection matches the predicate + * @param predicate the predicate to apply + * @return {@code true} if any element matches the predicate */ public final boolean anyMatchRandomOrder(Predicate predicate) { var pIndex = positions.length - 1; @@ -381,14 +407,22 @@ public final boolean anyMatchRandomOrder(Predicate predicate) { @Override public final ExtendedIterator keyIterator() { - final var initialSize = size(); - final Runnable checkForConcurrentModification = () -> - { - if (size() != initialSize) throw new ConcurrentModificationException(); - }; - return new SparseArrayIterator<>(keys, keysPos, checkForConcurrentModification); + return new SparseArrayIterator<>(keys, keysPos, this); } + /** + * Locates the slot in {@link #positions} that holds {@code e} (with the + * given precomputed hash code). + *

+ * If the key is present, returns the (non-negative) probe-table slot + * index. If the key is absent, returns the bitwise complement of the + * empty probe-table slot at which the key would be inserted, allowing + * insertion to proceed without a second probe walk. + * + * @param e the key to locate + * @param hashCode {@code e.hashCode()} + * @return the position index if found, or {@code ~insertionPosition} if not + */ protected final int findPosition(final K e, final int hashCode) { var pIndex = calcStartIndexByHashCode(hashCode); while (true) { @@ -405,6 +439,15 @@ protected final int findPosition(final K e, final int hashCode) { } } + /** + * Locates the next empty slot in {@link #positions} along the probe chain + * for the given hash code, without checking any existing entries for + * equality. Used after a positions-array resize, when no duplicates can + * exist in the rebuilt table. + * + * @param hashCode the hash code being placed + * @return the index of an empty slot in the probe table + */ protected final int findEmptySlotWithoutEqualityCheck(final int hashCode) { var pIndex = calcStartIndexByHashCode(hashCode); while (true) { @@ -435,11 +478,63 @@ public void clear() { @Override public final Spliterator keySpliterator() { - final var initialSize = this.size(); - final Runnable checkForConcurrentModification = () -> - { - if (this.size() != initialSize) throw new ConcurrentModificationException(); - }; - return new SparseArraySpliterator<>(keys, keysPos, checkForConcurrentModification); + return new SparseArraySpliterator<>(keys, keysPos, this); + } + + /** + * Gets the key at the given index. + * Array bounds are not checked. The caller must ensure the index is valid and corresponds to a non-null key. + * + * @param i the index + * @return the key at the given index + */ + public K getKeyAt(int i) { + return keys[i]; + } + + /** + * Returns the index of the entry holding {@code key}, or {@code -1} if not present. + * + * @param key the key to look up + * @return the entry index, or {@code -1} if the key is absent + */ + public int indexOf(K key) { + final var pIndex = findPosition(key, key.hashCode()); + if (pIndex < 0) { + return -1; + } else { + return ~positions[pIndex]; + } + } + + /** + * Functional interface used by {@link #forEachKey} to receive each live + * key along with the stable index it occupies. + * + * @param the key type + */ + @FunctionalInterface + public interface KeyAndIndexConsumer { + /** + * Receive a single key and its index. + * + * @param key the key + * @param index the stable index of the key in the underlying array + */ + void accept(K key, int index); + } + + /** + * Sequentially invokes {@code consumer} for every live key with its index. + * Skips freed slots. + * + * @param consumer receives each key/index pair + */ + public void forEachKey(KeyAndIndexConsumer consumer) { + for (int i = 0; i < keysPos; i++) { + if(keys[i] != null) { + consumer.accept(keys[i], i); + } + } } } diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashMap.java b/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashMap.java index 04c2761416b..e3f741ba485 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashMap.java +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashMap.java @@ -25,39 +25,56 @@ import org.apache.jena.mem.spliterator.SparseArraySpliterator; import org.apache.jena.util.iterator.ExtendedIterator; -import java.util.ConcurrentModificationException; import java.util.Spliterator; import java.util.function.Supplier; import java.util.function.UnaryOperator; /** - * Map which grows, if needed but never shrinks. - * This map does not guarantee any order. Although due to the way it is implemented the elements have a certain order. - * This map does not allow null keys. - * This map is not thread safe. - * It´s purpose is to support fast add, remove, contains and stream / iterate operations. - * Only remove operations are not as fast as in {@link java.util.HashMap} - * Iterating over this map does not get much faster again after removing elements because the map is not compacted. + * Hash map specialization built on top of {@link FastHashBase}. + * Grows on demand but never shrinks, does not guarantee iteration order, + * does not allow {@code null} keys, and is not thread-safe. + *

+ * Optimized for fast {@code add} / {@code containsKey} / {@code stream} / + * iterate operations. Removal is somewhat slower than in + * {@link java.util.HashMap} because of the back-shifting performed on the + * probe table. Iteration speed does not recover after many removals because + * the dense {@code keys} array is not compacted. + * + * @param the key type + * @param the value type */ -public abstract class FastHashMap extends FastHashBase implements JenaMap { +public abstract class FastHashMap extends FastHashBase implements JenaMapIndexed { + /** + * Parallel array to {@code keys} holding the value associated with each + * stored key. {@code values[i]} is the value for {@code keys[i]} when + * {@code keys[i]} is non-null. + */ protected V[] values; + /** + * Creates a map with the given initial key-array capacity. + * + * @param initialSize the initial capacity of the keys/values arrays + */ protected FastHashMap(int initialSize) { super(initialSize); this.values = newValuesArray(keys.length); } + /** + * Creates a map with the default initial capacity. + */ protected FastHashMap() { super(); this.values = newValuesArray(keys.length); } /** - * Copy constructor. - * The new map will contain all the same keys and values of the map to copy. + * Copy constructor. The new map contains the same keys and the same + * value references as {@code mapToCopy}. * - * @param mapToCopy + * @param mapToCopy the source map */ protected FastHashMap(final FastHashMap mapToCopy) { super(mapToCopy); @@ -66,10 +83,13 @@ protected FastHashMap(final FastHashMap mapToCopy) { } /** - * Copy constructor with value processor. + * Copy constructor that transforms each value via {@code valueProcessor}. + * Useful when the values are mutable and need to be deep-copied to keep + * the new map independent from the source. * - * @param mapToCopy - * @param valueProcessor + * @param mapToCopy the source map + * @param valueProcessor function applied to every non-null value to obtain + * the value to put in the new map */ protected FastHashMap(final FastHashMap mapToCopy, final UnaryOperator valueProcessor) { super(mapToCopy); @@ -82,6 +102,12 @@ protected FastHashMap(final FastHashMap mapToCopy, final UnaryOperator } } + /** + * Gets a new array of values with the given size. + * + * @param size the size of the array + * @return the new array + */ protected abstract V[] newValuesArray(int size); @Override @@ -106,12 +132,10 @@ public void clear() { @Override public boolean tryPut(K key, V value) { + growPositionsArrayIfNeeded(); final var hashCode = key.hashCode(); - var pIndex = findPosition(key, hashCode); + final var pIndex = findPosition(key, hashCode); if (pIndex < 0) { - if (tryGrowPositionsArrayIfNeeded()) { - pIndex = findPosition(key, hashCode); - } final var eIndex = getFreeKeyIndex(); keys[eIndex] = key; values[eIndex] = value; @@ -126,12 +150,10 @@ public boolean tryPut(K key, V value) { @Override public void put(K key, V value) { + growPositionsArrayIfNeeded(); final var hashCode = key.hashCode(); - var pIndex = findPosition(key, hashCode); + final var pIndex = findPosition(key, hashCode); if (pIndex < 0) { - if (tryGrowPositionsArrayIfNeeded()) { - pIndex = findPosition(key, hashCode); - } final var eIndex = getFreeKeyIndex(); keys[eIndex] = key; values[eIndex] = value; @@ -142,8 +164,27 @@ public void put(K key, V value) { } } + @Override + public int putAndGetIndex(K key, V value) { + growPositionsArrayIfNeeded(); + final int hashCode = key.hashCode(); + final var pIndex = findPosition(key, hashCode); + final int eIndex; + if (pIndex < 0) { + eIndex = getFreeKeyIndex(); + keys[eIndex] = key; + hashCodesOrDeletedIndices[eIndex] = hashCode; + positions[~pIndex] = ~eIndex; + } else { + eIndex = ~positions[pIndex]; + } + values[eIndex] = value; + return eIndex; + } + /** * Returns the value at the given index. + * Array bounds are not checked. The caller must ensure the index is valid and corresponds to a non-null key. * * @param i index * @return value @@ -178,12 +219,12 @@ public V computeIfAbsent(K key, Supplier absentValueSupplier) { var pIndex = findPosition(key, hashCode); if (pIndex < 0) { if (tryGrowPositionsArrayIfNeeded()) { - pIndex = findPosition(key, hashCode); + pIndex = ~findEmptySlotWithoutEqualityCheck(hashCode); } + final var value = absentValueSupplier.get(); final var eIndex = getFreeKeyIndex(); keys[eIndex] = key; hashCodesOrDeletedIndices[eIndex] = hashCode; - final var value = absentValueSupplier.get(); values[eIndex] = value; positions[~pIndex] = ~eIndex; return value; @@ -194,18 +235,20 @@ public V computeIfAbsent(K key, Supplier absentValueSupplier) { @Override public void compute(K key, UnaryOperator valueProcessor) { - final int hashCode = key.hashCode(); + final var hashCode = key.hashCode(); var pIndex = findPosition(key, hashCode); if (pIndex < 0) { final var value = valueProcessor.apply(null); if (value == null) return; + if(tryGrowPositionsArrayIfNeeded()) { + pIndex = ~findEmptySlotWithoutEqualityCheck(hashCode); + } final var eIndex = getFreeKeyIndex(); keys[eIndex] = key; hashCodesOrDeletedIndices[eIndex] = hashCode; values[eIndex] = value; positions[~pIndex] = ~eIndex; - tryGrowPositionsArrayIfNeeded(); } else { var eIndex = ~positions[pIndex]; final var value = valueProcessor.apply(values[eIndex]); @@ -217,24 +260,13 @@ public void compute(K key, UnaryOperator valueProcessor) { } } - @Override public ExtendedIterator valueIterator() { - final var initialSize = size(); - final Runnable checkForConcurrentModification = () -> - { - if (size() != initialSize) throw new ConcurrentModificationException(); - }; - return new SparseArrayIterator<>(values, keysPos, checkForConcurrentModification); + return new SparseArrayIterator<>(values, keysPos, this); } @Override public Spliterator valueSpliterator() { - final var initialSize = this.size(); - final Runnable checkForConcurrentModification = () -> - { - if (this.size() != initialSize) throw new ConcurrentModificationException(); - }; - return new SparseArraySpliterator<>(values, keysPos, checkForConcurrentModification); + return new SparseArraySpliterator<>(values, keysPos, this); } } diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashSet.java b/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashSet.java index 134a0092e22..564c52f1265 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashSet.java +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/FastHashSet.java @@ -21,39 +21,42 @@ package org.apache.jena.mem.collection; -import org.apache.jena.mem.iterator.SparseArrayIndexedIterator; -import org.apache.jena.mem.spliterator.SparseArrayIndexedSpliterator; -import org.apache.jena.util.iterator.ExtendedIterator; - -import java.util.ConcurrentModificationException; -import java.util.Spliterator; -import java.util.stream.Stream; -import java.util.stream.StreamSupport; - /** - * Set which grows, if needed but never shrinks. - * This set does not guarantee any order. Although due to the way it is implemented the elements have a certain order. - * This set does not allow null values. - * This set is not thread safe. - * It´s purpose is to support fast add, remove, contains and stream / iterate operations. - * Only remove operations are not as fast as in {@link java.util.HashSet} - * Iterating over this set not get much faster again after removing elements because the set is not compacted. + * Hash set specialization built on top of {@link FastHashBase}. + * Grows on demand but never shrinks, does not guarantee iteration order, + * does not allow {@code null} elements, and is not thread-safe. + *

+ * Optimized for fast {@code add} / {@code containsKey} / {@code stream} / + * iterate operations. Removal is somewhat slower than in + * {@link java.util.HashSet} because of the back-shifting performed on the + * probe table. Iteration speed does not recover after many removals because + * the dense {@code keys} array is not compacted. + * + * @param the element type */ -public abstract class FastHashSet extends FastHashBase implements JenaSetHashOptimized { +public abstract class FastHashSet extends FastHashBase implements JenaSetIndexed { - protected FastHashSet(int initialSize) { + /** + * Creates a set with the given initial key-array capacity. + * + * @param initialSize the initial capacity of the keys array + */ + public FastHashSet(final int initialSize) { super(initialSize); } - protected FastHashSet() { + /** + * Creates a set with the default initial capacity. + */ + public FastHashSet() { super(); } /** - * Copy constructor. - * The new set will contain all the same keys of the set to copy. + * Copy constructor. The new set contains the same elements as + * {@code setToCopy}. * - * @param setToCopy + * @param setToCopy the source set */ protected FastHashSet(final FastHashSet setToCopy) { super(setToCopy); @@ -65,12 +68,12 @@ public boolean tryAdd(K key) { } @Override - public boolean tryAdd(K value, int hashCode) { + public boolean tryAdd(K key, int hashCode) { growPositionsArrayIfNeeded(); - var pIndex = findPosition(value, hashCode); + final var pIndex = findPosition(key, hashCode); if (pIndex < 0) { final var eIndex = getFreeKeyIndex(); - keys[eIndex] = value; + keys[eIndex] = key; hashCodesOrDeletedIndices[eIndex] = hashCode; positions[~pIndex] = ~eIndex; return true; @@ -79,28 +82,23 @@ public boolean tryAdd(K value, int hashCode) { } /** - * Add and get the index of the added element. + * Add an element and return the index it was stored at. + * If the element is already present, returns the bitwise complement + * ({@code ~existingIndex}) of the existing index, so callers can + * distinguish "newly inserted" from "already present" while still + * recovering the index in both cases. * - * @param value the value to add - * @return the index of the added element or the inverse (~) index of the existing element + * @param key the element to add + * @return the new index, or {@code ~existingIndex} if already present */ - public int addAndGetIndex(K value) { - return addAndGetIndex(value, value.hashCode()); - } - - /** - * Add and get the index of the added element. - * - * @param value the value to add - * @param hashCode the hash code of the value. This is a performance optimization. - * @return the index of the added element or the inverse (~) index of the existing element - */ - public int addAndGetIndex(final K value, final int hashCode) { + @Override + public int addAndGetIndex(K key) { growPositionsArrayIfNeeded(); - final var pIndex = findPosition(value, hashCode); + final var hashCode = key.hashCode(); + final var pIndex = findPosition(key, hashCode); if (pIndex < 0) { final var eIndex = getFreeKeyIndex(); - keys[eIndex] = value; + keys[eIndex] = key; hashCodesOrDeletedIndices[eIndex] = hashCode; positions[~pIndex] = ~eIndex; return eIndex; @@ -122,72 +120,4 @@ public void addUnchecked(K value, int hashCode) { hashCodesOrDeletedIndices[eIndex] = hashCode; positions[findEmptySlotWithoutEqualityCheck(hashCode)] = ~eIndex; } - - /** - * Gets the key at the given index. - * - * @param i the index - * @return the key at the given index - */ - public K getKeyAt(int i) { - return keys[i]; - } - - /** - * Entry pairing a key with its index in the set. - * @param index index of the key in the set - * @param key the key - * @param the type of the key - */ - public record IndexedKey(int index, K key) {} - - /** - * Get an iterator over pairs of keys and their indices in the set. - * The iterator is not thread safe. - * - * @return an iterator over pairs of keys and their indices in the set - */ - public final ExtendedIterator> indexedKeyIterator() { - final var initialSize = size(); - final Runnable checkForConcurrentModification = () -> - { - if (size() != initialSize) throw new ConcurrentModificationException(); - }; - return new SparseArrayIndexedIterator<>(keys, keysPos, checkForConcurrentModification); - } - - /** - * Get a spliterator over pairs of keys and their indices in the set. - * The spliterator is not thread safe. - * - * @return a spliterator over pairs of keys and their indices in the set - */ - public final Spliterator> indexedKeySpliterator() { - final var initialSize = this.size(); - final Runnable checkForConcurrentModification = () -> - { - if (this.size() != initialSize) throw new ConcurrentModificationException(); - }; - return new SparseArrayIndexedSpliterator<>(keys, keysPos, checkForConcurrentModification); - } - - /** - * Get a stream over pairs of keys and their indices in the set. - * The stream is not thread safe. - * - * @return a stream over pairs of keys and their indices in the set - */ - public final Stream> indexedKeyStream() { - return StreamSupport.stream(indexedKeySpliterator(), false); - } - - /** - * Get a parallel stream over pairs of keys and their indices in the set. - * The stream is not thread safe. - * - * @return a parallel stream over pairs of keys and their indices in the set - */ - public final Stream> indexedKeyStreamParallel() { - return StreamSupport.stream(indexedKeySpliterator(), true); - } } diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/HashCommonBase.java b/jena-core/src/main/java/org/apache/jena/mem/collection/HashCommonBase.java index 5664a900170..b277789c717 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/collection/HashCommonBase.java +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/HashCommonBase.java @@ -25,7 +25,6 @@ import org.apache.jena.shared.JenaException; import org.apache.jena.util.iterator.ExtendedIterator; -import java.util.ConcurrentModificationException; import java.util.Spliterator; import java.util.function.Predicate; @@ -36,7 +35,7 @@ * * @param the element type */ -public abstract class HashCommonBase { +public abstract class HashCommonBase implements JenaMapSetCommon { /** * Jeremy suggests, from his experiments, that load factors more than * 0.6 leave the table too dense, and little advantage is gained below 0.4. @@ -78,7 +77,7 @@ protected HashCommonBase(int initialCapacity) { * Copy constructor. * The new table will contain all the same keys of the table to copy. * - * @param baseToCopy + * @param baseToCopy the table to copy */ protected HashCommonBase(final HashCommonBase baseToCopy) { this.keys = newKeysArray(baseToCopy.keys.length); @@ -209,18 +208,10 @@ public boolean anyMatch(final Predicate predicate) { } public ExtendedIterator keyIterator() { - final var initialSize = size; - final Runnable checkForConcurrentModification = () -> { - if (size != initialSize) throw new ConcurrentModificationException(); - }; - return new SparseArrayIterator<>(keys, checkForConcurrentModification); + return new SparseArrayIterator<>(keys, this); } public Spliterator keySpliterator() { - final var initialSize = size; - final Runnable checkForConcurrentModification = () -> { - if (size != initialSize) throw new ConcurrentModificationException(); - }; - return new SparseArraySpliterator<>(keys, checkForConcurrentModification); + return new SparseArraySpliterator<>(keys, this); } } diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/HashCommonMap.java b/jena-core/src/main/java/org/apache/jena/mem/collection/HashCommonMap.java index 62e7bd56733..dcdd5557654 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/collection/HashCommonMap.java +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/HashCommonMap.java @@ -24,7 +24,6 @@ import org.apache.jena.mem.spliterator.SparseArraySpliterator; import org.apache.jena.util.iterator.ExtendedIterator; -import java.util.ConcurrentModificationException; import java.util.Spliterator; import java.util.function.Supplier; import java.util.function.UnaryOperator; @@ -207,19 +206,11 @@ protected void removeFrom(int here) { @Override public ExtendedIterator valueIterator() { - final var initialSize = size; - final Runnable checkForConcurrentModification = () -> { - if (size != initialSize) throw new ConcurrentModificationException(); - }; - return new SparseArrayIterator<>(values, checkForConcurrentModification); + return new SparseArrayIterator<>(values, this); } @Override public Spliterator valueSpliterator() { - final var initialSize = size; - final Runnable checkForConcurrentModification = () -> { - if (size != initialSize) throw new ConcurrentModificationException(); - }; - return new SparseArraySpliterator<>(values, checkForConcurrentModification); + return new SparseArraySpliterator<>(values, this); } } diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/JenaMap.java b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaMap.java index 3e13613b08f..6d2423e0097 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/collection/JenaMap.java +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaMap.java @@ -30,6 +30,7 @@ /** * A map from keys of type {@code K} to values of type {@code V}. + * Not thread-safe and does not allow {@code null} keys. * * @param the type of the keys in the map * @param the type of the values in the map diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/JenaMapIndexed.java b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaMapIndexed.java new file mode 100644 index 00000000000..67c366d00eb --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaMapIndexed.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.collection; + +/** + * Extension of {@link JenaMap} that exposes index-based access and lets callers + * supply a precomputed hash code for the key. Indices are stable handles to + * entries (returned by {@link #putAndGetIndex(Object, Object)}) and remain + * valid until the corresponding entry is removed. + *

+ * The hash-code overloads are a performance shortcut for callers that already + * have the hash at hand (typically because the same key is stored in several + * collections). The supplied hash code MUST equal {@code key.hashCode()}, or + * the map will misbehave. + * + * @param the type of the keys in the map + * @param the type of the values in the map + */ +public interface JenaMapIndexed extends JenaMap { + + /** + * Returns the index of the entry with the given key, or a negative value + * if no such entry exists. + * + * @param key the key to look up + * @return the index of the entry, or a negative value if absent + */ + int indexOf(K key); + + /** + * Returns the key stored at the given index. + * + * @param index the index of the entry + * @return the key at that index + */ + K getKeyAt(int index); + + /** + * Returns the value stored at the given index. + * + * @param index the index of the entry + * @return the value at that index + */ + V getValueAt(int index); + + /** + * Put a key-value pair and return the index of the affected entry. + * If the key is already present, its value is updated and the existing + * index is returned. + * + * @param key the key to put + * @param value the value to put + * @return the index of the entry holding {@code key} + */ + int putAndGetIndex(K key, V value); +} diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/JenaMapSetCommon.java b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaMapSetCommon.java index 2533714ce6b..7f96baa19f9 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/collection/JenaMapSetCommon.java +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaMapSetCommon.java @@ -28,22 +28,23 @@ import java.util.stream.StreamSupport; /** - * Common interface for {@link JenaMap} and {@link JenaSet}. * + * Operations shared between the map ({@link JenaMap}) and the set + * ({@link JenaSet}) collections used in the {@code mem} triple store + * implementations. + *

+ * These collections trade some flexibility for speed: they expose only the + * operations needed by triple-store internals (no full {@link java.util.Map} + * or {@link java.util.Set} contract). They are not thread-safe. * - * @param the type of the keys/elements in the collection + * @param the type of the keys (or elements, for sets) in the collection */ -public interface JenaMapSetCommon { +public interface JenaMapSetCommon extends Sized { /** * Clear the collection. */ void clear(); - /** - * @return the number of elements in the collection - */ - int size(); - /** * @return true if the collection is empty */ @@ -75,7 +76,10 @@ public interface JenaMapSetCommon { /** * Removes a key from the collection. - * Attention: Implementations may assume that the key is present. + *

+ * Attention: implementations may assume the key is present and may produce + * undefined behavior (including silently corrupting internal state) if it + * is not. Use {@link #tryRemove(Object)} when in doubt. * * @param key the key to remove */ diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/JenaSet.java b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaSet.java index d3b8a557be9..03848073f56 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/collection/JenaSet.java +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaSet.java @@ -21,9 +21,10 @@ package org.apache.jena.mem.collection; /** - * Set interface specialized for the use cases in triple store implementations. + * Set interface specialized for the use cases in triple-store implementations. + * Not thread-safe; does not allow {@code null} elements. * - * @param + * @param the element type of the set */ public interface JenaSet extends JenaMapSetCommon { @@ -31,13 +32,16 @@ public interface JenaSet extends JenaMapSetCommon { * Add the key to the set if it is not already present. * * @param key the key to add - * @return true if the key was added, false if it was already present + * @return {@code true} if the key was added, {@code false} if it was already present */ boolean tryAdd(E key); /** - * Add the key to the set without checking if it is already present. - * Attention: This method must only be used if it is guaranteed that the key is not already present. + * Add the key to the set without checking whether it is already present. + *

+ * Attention: this method must only be used if the caller has ensured that + * the key is not already in the set; otherwise the set's invariants will + * break (duplicates may be inserted). * * @param key the key to add */ diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/JenaSetHashOptimized.java b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaSetHashOptimized.java index 8cc8aad8daf..0e1d032b356 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/collection/JenaSetHashOptimized.java +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaSetHashOptimized.java @@ -22,17 +22,50 @@ /** - * Extension of {@link JenaSet} that allows to add and remove elements - * with a given hash code. - * This is useful if the hash code is already known. - * Attention: The hash code must be consistent with E::hashCode(). + * Extension of {@link JenaSet} that lets callers supply a precomputed hash + * code. + *

+ * Attention: any caller-supplied hash code MUST equal {@code E.hashCode()}; + * if it does not, the set will misbehave. + * + * @param the element type of the set */ public interface JenaSetHashOptimized extends JenaSet { + + /** + * Add an element with the given precomputed hash code if it is not + * already present. + * + * @param key the element to add + * @param hashCode {@code key.hashCode()} + * @return {@code true} if added, {@code false} if already present + */ boolean tryAdd(E key, int hashCode); + /** + * Add an element with the given precomputed hash code without checking + * whether it is already present. The caller MUST ensure the key is absent. + * + * @param key the element to add + * @param hashCode {@code key.hashCode()} + */ void addUnchecked(E key, int hashCode); + /** + * Try to remove an element with the given precomputed hash code. + * + * @param key the element to remove + * @param hashCode {@code key.hashCode()} + * @return {@code true} if removed, {@code false} if it was not present + */ boolean tryRemove(E key, int hashCode); + /** + * Remove an element assumed to be present, with the given precomputed + * hash code. Behavior is undefined if the element is not in the set. + * + * @param key the element to remove + * @param hashCode {@code key.hashCode()} + */ void removeUnchecked(E key, int hashCode); } diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/JenaSetIndexed.java b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaSetIndexed.java new file mode 100644 index 00000000000..3a621c3ab59 --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/JenaSetIndexed.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.collection; + + +/** + * Extension of {@link JenaSetHashOptimized} that exposes index-based access to elements. + * Indices are stable handles to entries (returned by {@link #addAndGetIndex(Object)}) and remain + * valid until the corresponding entry is removed. + * + * @param the element type of the set + */ +public interface JenaSetIndexed extends JenaSetHashOptimized { + + /** + * Add an element and return the index it was stored at. + * If the element is already present, returns the bitwise complement + * ({@code ~existingIndex}) of the existing index, so callers can + * distinguish "newly inserted" from "already present" while still + * recovering the index in both cases. + * + * @param key the element to add + * @return the new index, or {@code ~existingIndex} if already present + */ + int addAndGetIndex(final E key); + + /** + * Remove the given element and return the index it occupied before removal. + * + * @param key the element to remove + * @return the former index of the element, or {@code -1} if it was not present + */ + int removeAndGetIndex(final E key); + + /** + * Returns the element stored at the given index. + * + * @param index the index to read + * @return the element at that index + */ + E getKeyAt(int index); + + /** + * Returns the index of the given element, or a negative value if it is + * not in the set. + * + * @param key the element to look up + * @return the index of {@code key}, or a negative value if absent + */ + int indexOf(E key); +} diff --git a/jena-core/src/main/java/org/apache/jena/mem/collection/Sized.java b/jena-core/src/main/java/org/apache/jena/mem/collection/Sized.java new file mode 100644 index 00000000000..237740ce8e3 --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/collection/Sized.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.apache.jena.mem.collection; + +/** + * Base interface for sized collections. + * It is typically used to detect concurrent modifications in iterators and spliterators + * by snapshotting the size at construction time and rechecking it at each advance/forEach boundary. + */ +public interface Sized { + + /** + * @return the number of elements in the collection + */ + int size(); +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/iterator/IteratorOfJenaSets.java b/jena-core/src/main/java/org/apache/jena/mem/iterator/IteratorOfJenaSets.java index b0ac6e994bb..8cfc8948a25 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/iterator/IteratorOfJenaSets.java +++ b/jena-core/src/main/java/org/apache/jena/mem/iterator/IteratorOfJenaSets.java @@ -30,16 +30,27 @@ import java.util.function.Consumer; /** - * Iterator that iterates over the entries of sets which are contained in the given iterator of sets. + * Flat-map style iterator that yields every element of every {@link JenaSet} + * produced by the given parent iterator. Empty inner sets are silently + * skipped. Equivalent in spirit to a one-level {@code flatMap} but tailored + * to the {@link JenaSet} API and to {@link NiceIterator}. * - * @param the type of the elements + * @param the element type of the inner sets */ public class IteratorOfJenaSets extends NiceIterator { - final Iterator> parentIterator; + /** Source iterator producing the sets to flatten. */ + private final Iterator> parentIterator; - ExtendedIterator currentIterator; + /** Iterator over the keys of the set currently being consumed. */ + private ExtendedIterator currentIterator; + /** + * Create a flat iterator over the elements of every set produced by + * {@code parentIterator}. + * + * @param parentIterator the source iterator of sets + */ public IteratorOfJenaSets(Iterator> parentIterator) { this.parentIterator = parentIterator; this.currentIterator = parentIterator.hasNext() diff --git a/jena-core/src/main/java/org/apache/jena/mem/iterator/SparseArrayIndexedIterator.java b/jena-core/src/main/java/org/apache/jena/mem/iterator/SparseArrayIndexedIterator.java deleted file mode 100644 index 37f103eae25..00000000000 --- a/jena-core/src/main/java/org/apache/jena/mem/iterator/SparseArrayIndexedIterator.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.apache.jena.mem.iterator; - -import org.apache.jena.mem.collection.FastHashSet; -import org.apache.jena.util.iterator.NiceIterator; - -import java.util.Iterator; -import java.util.NoSuchElementException; -import java.util.function.Consumer; - -/** - * An iterator over a sparse array, that skips null entries. - * This iterator returns elements as {@link FastHashSet.IndexedKey} objects, - * which contain both the index and the value of the element. - * - * The iterator works in ascending order, starting from index 0 up to the specified exclusive index. - * - * This iterator will check for concurrent modifications by invoking a {@link Runnable} - * - * @param the type of the array elements - */ -@SuppressWarnings("all") -public class SparseArrayIndexedIterator extends NiceIterator> implements Iterator> { - - private final E[] entries; - private final Runnable checkForConcurrentModification; - private int pos = 0; - private final int toIndexExclusive; - private boolean hasNext = false; - - public SparseArrayIndexedIterator(final E[] entries, final Runnable checkForConcurrentModification) { - this.entries = entries; - this.toIndexExclusive = entries.length; - this.checkForConcurrentModification = checkForConcurrentModification; - } - - public SparseArrayIndexedIterator(final E[] entries, int toIndexExclusive, final Runnable checkForConcurrentModification) { - this.entries = entries; - this.toIndexExclusive = toIndexExclusive; - this.checkForConcurrentModification = checkForConcurrentModification; - } - - /** - * Returns {@code true} if the iteration has more elements. - * (In other words, returns {@code true} if {@link #next} would - * return an element rather than throwing an exception.) - * - * @return {@code true} if the iteration has more elements - */ - @Override - public boolean hasNext() { - while (toIndexExclusive > pos) { - if (null != entries[pos]) { - hasNext = true; - return true; - } - pos++; - } - hasNext = false; - return false; - } - - /** - * Returns the next element in the iteration. - * - * @return the next element in the iteration - * @throws NoSuchElementException if the iteration has no more elements - */ - @Override - public FastHashSet.IndexedKey next() { - this.checkForConcurrentModification.run(); - if (hasNext || hasNext()) { - hasNext = false; - return new FastHashSet.IndexedKey<>(pos, entries[pos++]); - } - throw new NoSuchElementException(); - } - - @Override - public void forEachRemaining(Consumer> action) { - while (toIndexExclusive > pos) { - if (null != entries[pos]) { - action.accept(new FastHashSet.IndexedKey<>(pos, entries[pos])); - } - pos++; - } - this.checkForConcurrentModification.run(); - } -} diff --git a/jena-core/src/main/java/org/apache/jena/mem/iterator/SparseArrayIterator.java b/jena-core/src/main/java/org/apache/jena/mem/iterator/SparseArrayIterator.java index 936476a80ff..e0b79cd1ff6 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/iterator/SparseArrayIterator.java +++ b/jena-core/src/main/java/org/apache/jena/mem/iterator/SparseArrayIterator.java @@ -21,34 +21,55 @@ package org.apache.jena.mem.iterator; +import org.apache.jena.mem.collection.Sized; import org.apache.jena.util.iterator.NiceIterator; -import java.util.Iterator; +import java.util.ConcurrentModificationException; import java.util.NoSuchElementException; import java.util.function.Consumer; /** - * An iterator over a sparse array, that skips null entries. + * Iterator over a sparse array, walking from high index to low and skipping + * {@code null} entries. Detects concurrent modifications by snapshotting + * {@code set.size()} at construction time and rechecking it on each call to + * {@link #next()} / {@link #forEachRemaining(Consumer)}; throws + * {@link ConcurrentModificationException} if the size has changed. * * @param the type of the array elements */ -public class SparseArrayIterator extends NiceIterator implements Iterator { +public class SparseArrayIterator extends NiceIterator { private final E[] entries; - private final Runnable checkForConcurrentModification; + private final Sized set; + private final int sizeOfSetAtStart; private int pos; private boolean hasNext = false; - public SparseArrayIterator(final E[] entries, final Runnable checkForConcurrentModification) { + /** + * Iterate over the whole array. + * + * @param entries the backing array (not copied) + * @param set the owning collection used to detect concurrent modifications + */ + public SparseArrayIterator(final E[] entries, final Sized set) { this.entries = entries; this.pos = entries.length - 1; - this.checkForConcurrentModification = checkForConcurrentModification; + this.set = set; + this.sizeOfSetAtStart = set.size(); } - public SparseArrayIterator(final E[] entries, int toIndexExclusive, final Runnable checkForConcurrentModification) { + /** + * Iterate over {@code entries[0 .. toIndexExclusive)} (in reverse order). + * + * @param entries the backing array (not copied) + * @param toIndexExclusive exclusive upper bound on the iterated slice + * @param set the owning collection used to detect concurrent modifications + */ + public SparseArrayIterator(final E[] entries, int toIndexExclusive, final Sized set) { this.entries = entries; this.pos = toIndexExclusive - 1; - this.checkForConcurrentModification = checkForConcurrentModification; + this.set = set; + this.sizeOfSetAtStart = set.size(); } /** @@ -62,13 +83,11 @@ public SparseArrayIterator(final E[] entries, int toIndexExclusive, final Runnab public boolean hasNext() { while (-1 < pos) { if (null != entries[pos]) { - hasNext = true; - return true; + return hasNext = true; } pos--; } - hasNext = false; - return false; + return hasNext = false; } /** @@ -79,7 +98,7 @@ public boolean hasNext() { */ @Override public E next() { - this.checkForConcurrentModification.run(); + if (sizeOfSetAtStart != set.size()) throw new ConcurrentModificationException(); if (hasNext || hasNext()) { hasNext = false; return entries[pos--]; @@ -95,6 +114,6 @@ public void forEachRemaining(Consumer action) { } pos--; } - this.checkForConcurrentModification.run(); + if (sizeOfSetAtStart != set.size()) throw new ConcurrentModificationException(); } } diff --git a/jena-core/src/main/java/org/apache/jena/mem/pattern/MatchPattern.java b/jena-core/src/main/java/org/apache/jena/mem/pattern/MatchPattern.java index 94008b155f1..d8536f56311 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/pattern/MatchPattern.java +++ b/jena-core/src/main/java/org/apache/jena/mem/pattern/MatchPattern.java @@ -22,8 +22,17 @@ package org.apache.jena.mem.pattern; /** - * A pattern for matching triples. - * The pattern is defined by the wildcard positions for the subject, predicate and object. + * Categorizes a triple-match pattern by which of the subject, predicate and + * object slots are concrete and which are wildcards (i.e. {@code Node.ANY} + * or {@code null}). + *

+ * The eight enum values cover every possible combination. Triple-store + * implementations dispatch on this enum to pick the most efficient lookup + * path for each kind of pattern (e.g. a fully concrete {@link #SUB_PRE_OBJ} + * is answered directly from the triple set, while a partially open pattern + * such as {@link #ANY_PRE_OBJ} is answered through an index intersection). + * + * @see PatternClassifier */ public enum MatchPattern { /** diff --git a/jena-core/src/main/java/org/apache/jena/mem/pattern/PatternClassifier.java b/jena-core/src/main/java/org/apache/jena/mem/pattern/PatternClassifier.java index 32a6ba182a1..e4cf5644eca 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/pattern/PatternClassifier.java +++ b/jena-core/src/main/java/org/apache/jena/mem/pattern/PatternClassifier.java @@ -25,14 +25,15 @@ import org.apache.jena.graph.Triple; /** - * Classify a triple match into one of the 8 match patterns. + * Utility class that classifies a triple match into one of the eight + * {@link MatchPattern} values. *

- * The classification is based on the concrete-ness of the subject, predicate and object. - * A concrete node is one that is not a variable. + * The classification is based on which of the subject, predicate and object + * are concrete (anything that is not a variable / wildcard / + * {@code null}) and which are wildcards. The result is used by triple-store + * implementations to dispatch to the most efficient lookup path. *

- * The classification is used to select the most efficient implementation of a triple store. - *

- * This is a utility class; there is no need to instantiate it. + * All operations are stateless; this class is not meant to be instantiated. * * @see MatchPattern */ @@ -41,8 +42,16 @@ public class PatternClassifier { private PatternClassifier() { } + /** + * Classify a triple match. + * + * @param tripleMatch the match triple, possibly containing wildcard nodes + * @return the corresponding {@link MatchPattern} + */ public static MatchPattern classify(Triple tripleMatch) { - if (tripleMatch.isConcrete()) { + if (tripleMatch.getSubject().isConcrete() + && tripleMatch.getPredicate().isConcrete() + && tripleMatch.getObject().isConcrete()) { return MatchPattern.SUB_PRE_OBJ; } else { if (tripleMatch.getSubject().isConcrete()) { @@ -73,6 +82,15 @@ public static MatchPattern classify(Triple tripleMatch) { } } + /** + * Classify a triple match given as three nodes. + * Any {@code null} or non-concrete node is treated as a wildcard. + * + * @param sm subject node, or {@code null}/wildcard + * @param pm predicate node, or {@code null}/wildcard + * @param om object node, or {@code null}/wildcard + * @return the corresponding {@link MatchPattern} + */ public static MatchPattern classify(Node sm, Node pm, Node om) { if (null != sm && sm.isConcrete()) { if (null != pm && pm.isConcrete()) { @@ -103,6 +121,5 @@ public static MatchPattern classify(Node sm, Node pm, Node om) { } } } - } } diff --git a/jena-core/src/main/java/org/apache/jena/mem/spliterator/ArraySpliterator.java b/jena-core/src/main/java/org/apache/jena/mem/spliterator/ArraySpliterator.java index 43bbfeeaea8..a5033c22cde 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/spliterator/ArraySpliterator.java +++ b/jena-core/src/main/java/org/apache/jena/mem/spliterator/ArraySpliterator.java @@ -21,52 +21,57 @@ package org.apache.jena.mem.spliterator; +import org.apache.jena.mem.collection.Sized; + +import java.util.ConcurrentModificationException; import java.util.Spliterator; import java.util.function.Consumer; /** - * A spliterator for arrays. This spliterator will iterate over the array - * entries within the given boundaries. - *

- * This spliterator supports splitting into sub-spliterators. + * Top-level spliterator over a contiguous array slice {@code [0, toIndex)}, + * iterating from high index to low. Supports splitting into + * {@link ArraySubSpliterator} children for parallel traversal. *

- * The spliterator will check for concurrent modifications by invoking a {@link Runnable} - * before each action. + * Detects concurrent modifications by snapshotting {@code set.size()} at + * construction time and rechecking it at each advance/forEach boundary. + * Throws {@link ConcurrentModificationException} if the size has changed. * - * @param + * @param the element type */ public class ArraySpliterator implements Spliterator { private final E[] entries; - private final Runnable checkForConcurrentModification; + private final Sized set; + private final int sizeOfSetAtStart; private int pos; /** - * Create a spliterator for the given array, with the given size. + * Create a spliterator over {@code entries[0 .. toIndex)}. * - * @param entries the array - * @param toIndex the index of the last element, exclusive - * @param checkForConcurrentModification runnable to check for concurrent modifications + * @param entries the backing array (not copied) + * @param toIndex exclusive upper bound on the iterated slice + * @param set the owning collection used to detect concurrent modifications */ - public ArraySpliterator(final E[] entries, final int toIndex, final Runnable checkForConcurrentModification) { + public ArraySpliterator(final E[] entries, final int toIndex, final Sized set) { this.entries = entries; this.pos = toIndex; - this.checkForConcurrentModification = checkForConcurrentModification; + this.set = set; + this.sizeOfSetAtStart = set.size(); } /** - * Create a spliterator for the given array, with the given size. + * Create a spliterator over the entire array. * - * @param entries the array - * @param checkForConcurrentModification runnable to check for concurrent modifications + * @param entries the backing array (not copied) + * @param set the owning collection used to detect concurrent modifications */ - public ArraySpliterator(final E[] entries, final Runnable checkForConcurrentModification) { - this(entries, entries.length, checkForConcurrentModification); + public ArraySpliterator(final E[] entries, final Sized set) { + this(entries, entries.length, set); } @Override public boolean tryAdvance(Consumer action) { - this.checkForConcurrentModification.run(); + if (sizeOfSetAtStart != set.size()) throw new ConcurrentModificationException(); if (-1 < --pos) { action.accept(entries[pos]); return true; @@ -79,7 +84,7 @@ public void forEachRemaining(Consumer action) { while (-1 < --pos) { action.accept(entries[pos]); } - this.checkForConcurrentModification.run(); + if (sizeOfSetAtStart != set.size()) throw new ConcurrentModificationException(); } @Override @@ -89,7 +94,7 @@ public Spliterator trySplit() { } final int toIndexOfSubIterator = this.pos; this.pos = pos >>> 1; - return new ArraySubSpliterator<>(entries, this.pos, toIndexOfSubIterator, checkForConcurrentModification); + return new ArraySubSpliterator<>(entries, this.pos, toIndexOfSubIterator, set); } @Override @@ -101,4 +106,4 @@ public long estimateSize() { public int characteristics() { return DISTINCT | SIZED | SUBSIZED | NONNULL | IMMUTABLE; } -} +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/spliterator/ArraySubSpliterator.java b/jena-core/src/main/java/org/apache/jena/mem/spliterator/ArraySubSpliterator.java index 74994708b53..638f2bb0c9e 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/spliterator/ArraySubSpliterator.java +++ b/jena-core/src/main/java/org/apache/jena/mem/spliterator/ArraySubSpliterator.java @@ -21,55 +21,61 @@ package org.apache.jena.mem.spliterator; +import org.apache.jena.mem.collection.Sized; + +import java.util.ConcurrentModificationException; import java.util.Spliterator; import java.util.function.Consumer; /** - * A spliterator for arrays. This spliterator will iterate over the array - * entries within the given boundaries. - *

- * This spliterator supports splitting into sub-spliterators. + * Sub-range spliterator over a contiguous array slice {@code [fromIndex, toIndex)}, + * iterating from high index to low. Produced by splitting an + * {@link ArraySpliterator} (or another {@link ArraySubSpliterator}); supports + * further recursive splits for parallel traversal. *

- * The spliterator will check for concurrent modifications by invoking a {@link Runnable} - * before each action. + * Detects concurrent modifications by snapshotting {@code set.size()} at + * construction time and rechecking it at each advance/forEach boundary. + * Throws {@link ConcurrentModificationException} if the size has changed. * - * @param + * @param the element type */ public class ArraySubSpliterator implements Spliterator { private final E[] entries; private final int fromIndex; - private final Runnable checkForConcurrentModification; + private final Sized set; + private final int sizeOfSetAtStart; private int pos; /** - * Create a spliterator for the given array, with the given size. + * Create a spliterator over {@code entries[fromIndex .. toIndex)}. * - * @param entries the array - * @param fromIndex the index of the first element, inclusive - * @param toIndex the index of the last element, exclusive - * @param checkForConcurrentModification runnable to check for concurrent modifications + * @param entries the backing array (not copied) + * @param fromIndex inclusive lower bound on the iterated slice + * @param toIndex exclusive upper bound on the iterated slice + * @param set the owning collection used to detect concurrent modifications */ - public ArraySubSpliterator(final E[] entries, final int fromIndex, final int toIndex, final Runnable checkForConcurrentModification) { + public ArraySubSpliterator(final E[] entries, final int fromIndex, final int toIndex, final Sized set) { this.entries = entries; this.fromIndex = fromIndex; this.pos = toIndex; - this.checkForConcurrentModification = checkForConcurrentModification; + this.set = set; + this.sizeOfSetAtStart = set.size(); } /** - * Create a spliterator for the given array, with the given size. + * Create a spliterator over the entire array. * - * @param entries the array - * @param checkForConcurrentModification runnable to check for concurrent modifications + * @param entries the backing array (not copied) + * @param set the owning collection used to detect concurrent modifications */ - public ArraySubSpliterator(final E[] entries, final Runnable checkForConcurrentModification) { - this(entries, 0, entries.length, checkForConcurrentModification); + public ArraySubSpliterator(final E[] entries, final Sized set) { + this(entries, 0, entries.length, set); } @Override public boolean tryAdvance(Consumer action) { - this.checkForConcurrentModification.run(); + if (sizeOfSetAtStart != set.size()) throw new ConcurrentModificationException(); if (fromIndex <= --pos) { action.accept(entries[pos]); return true; @@ -82,7 +88,7 @@ public void forEachRemaining(Consumer action) { while (fromIndex <= --pos) { action.accept(entries[pos]); } - this.checkForConcurrentModification.run(); + if (sizeOfSetAtStart != set.size()) throw new ConcurrentModificationException(); } @Override @@ -93,7 +99,7 @@ public Spliterator trySplit() { } final int toIndexOfSubIterator = this.pos; this.pos = fromIndex + (entriesCount >>> 1); - return new ArraySubSpliterator<>(entries, this.pos, toIndexOfSubIterator, checkForConcurrentModification); + return new ArraySubSpliterator<>(entries, this.pos, toIndexOfSubIterator, set); } @Override @@ -105,4 +111,4 @@ public long estimateSize() { public int characteristics() { return DISTINCT | SIZED | SUBSIZED | NONNULL | IMMUTABLE; } -} +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/spliterator/SparseArrayIndexedSpliterator.java b/jena-core/src/main/java/org/apache/jena/mem/spliterator/SparseArrayIndexedSpliterator.java deleted file mode 100644 index 704c9642706..00000000000 --- a/jena-core/src/main/java/org/apache/jena/mem/spliterator/SparseArrayIndexedSpliterator.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.apache.jena.mem.spliterator; - -import java.util.Spliterator; -import java.util.function.Consumer; - -import org.apache.jena.mem.collection.FastHashSet; - -/** - * A spliterator for sparse arrays. This spliterator will iterate over the array - * skipping null entries. - * This spliterator returns elements as {@link FastHashSet.IndexedKey} objects, - * which contain both the index and the value of the element. - *

- * This spliterator works in ascending order, starting from the given start up to the specified exclusive index. - *

- * This spliterator supports splitting into sub-spliterators. - *

- * The spliterator will check for concurrent modifications by invoking a {@link Runnable} - * before each action. - * - * @param the type of the array elements - */ -@SuppressWarnings("all") -public class SparseArrayIndexedSpliterator implements Spliterator> { - - private final E[] entries; - private int currentPositionMinusOne; - private final int toIndexExclusive; - private final Runnable checkForConcurrentModification; - - /** - * Create a spliterator for the given array, with the given size. - * - * @param entries the array - * @param fromIndexInclusive the index of the first element, inclusive - * @param toIndexExclusive the index of the last element, exclusive - * @param checkForConcurrentModification runnable to check for concurrent modifications - */ - public SparseArrayIndexedSpliterator(final E[] entries, final int fromIndexInclusive, final int toIndexExclusive, final Runnable checkForConcurrentModification) { - this.entries = entries; - this.currentPositionMinusOne = fromIndexInclusive-1; // Start at fromIndexInclusive - 1, so that the first call to tryAdvance will increment pos to fromIndexInclusive - this.toIndexExclusive = toIndexExclusive; - this.checkForConcurrentModification = checkForConcurrentModification; - } - - /** - * Create a spliterator for the given array, with the given size. - * - * @param entries the array - * @param toIndexExclusive the index of the last element, exclusive - * @param checkForConcurrentModification runnable to check for concurrent modifications - */ - public SparseArrayIndexedSpliterator(final E[] entries, final int toIndexExclusive, final Runnable checkForConcurrentModification) { - this(entries, 0, toIndexExclusive, checkForConcurrentModification); - } - - /** - * Create a spliterator for the given array, with the given size. - * - * @param entries the array - * @param checkForConcurrentModification runnable to check for concurrent modifications - */ - public SparseArrayIndexedSpliterator(final E[] entries, final Runnable checkForConcurrentModification) { - this(entries, entries.length, checkForConcurrentModification); - } - - - @Override - public boolean tryAdvance(Consumer> action) { - this.checkForConcurrentModification.run(); - while (toIndexExclusive > ++currentPositionMinusOne) { - if (null != entries[currentPositionMinusOne]) { - action.accept(new FastHashSet.IndexedKey<>(currentPositionMinusOne, entries[currentPositionMinusOne])); - return true; - } - } - return false; - } - - @Override - public void forEachRemaining(Consumer> action) { - while (toIndexExclusive > ++currentPositionMinusOne) { - if (null != entries[currentPositionMinusOne]) { - action.accept(new FastHashSet.IndexedKey<>(currentPositionMinusOne, entries[currentPositionMinusOne])); - } - } - this.checkForConcurrentModification.run(); - } - - @Override - public Spliterator> trySplit() { - final var nextPos = currentPositionMinusOne + 1; - final var remaining = toIndexExclusive - nextPos; - if ( remaining < 2) { - return null; - } - final var mid = nextPos + ( remaining >>> 1); - final var fromIndexInclusive = nextPos; - this.currentPositionMinusOne = mid-1; - return new SparseArrayIndexedSpliterator<>(entries, fromIndexInclusive, mid, checkForConcurrentModification); - } - - @Override - public long estimateSize() { return (long) toIndexExclusive - currentPositionMinusOne; } - - @Override - public int characteristics() { - return DISTINCT | NONNULL | IMMUTABLE; - } -} diff --git a/jena-core/src/main/java/org/apache/jena/mem/spliterator/SparseArraySpliterator.java b/jena-core/src/main/java/org/apache/jena/mem/spliterator/SparseArraySpliterator.java index 6752cc9a1c1..add45739dc2 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/spliterator/SparseArraySpliterator.java +++ b/jena-core/src/main/java/org/apache/jena/mem/spliterator/SparseArraySpliterator.java @@ -21,17 +21,24 @@ package org.apache.jena.mem.spliterator; +import org.apache.jena.mem.collection.Sized; + +import java.util.ConcurrentModificationException; import java.util.Spliterator; import java.util.function.Consumer; /** - * A spliterator for sparse arrays. This spliterator will iterate over the array - * skipping null entries. - *

- * This spliterator supports splitting into sub-spliterators. + * Top-level spliterator over a sparse array slice {@code [0, toIndex)}, + * iterating from high index to low and skipping {@code null} entries. + * Produced for backing arrays such as those of + * {@link org.apache.jena.mem.collection.FastHashBase}, where removed slots + * are represented by {@code null}. *

- * The spliterator will check for concurrent modifications by invoking a {@link Runnable} - * before each action. + * Supports splitting into {@link SparseArraySubSpliterator} children for + * parallel traversal. Detects concurrent modifications by snapshotting + * {@code set.size()} at construction time and rechecking it at each + * advance/forEach boundary; throws {@link ConcurrentModificationException} + * if the size has changed. * * @param the type of the array elements */ @@ -39,35 +46,37 @@ public class SparseArraySpliterator implements Spliterator { private final E[] entries; private int pos; - private final Runnable checkForConcurrentModification; + private final Sized set; + private final int sizeOfSetAtStart; /** - * Create a spliterator for the given array, with the given size. + * Create a spliterator over {@code entries[0 .. toIndex)}, skipping nulls. * - * @param entries the array - * @param toIndex the index of the last element, exclusive - * @param checkForConcurrentModification runnable to check for concurrent modifications + * @param entries the backing array (not copied) + * @param toIndex exclusive upper bound on the iterated slice + * @param set the owning collection used to detect concurrent modifications */ - public SparseArraySpliterator(final E[] entries, final int toIndex, final Runnable checkForConcurrentModification) { + public SparseArraySpliterator(final E[] entries, final int toIndex, final Sized set) { this.entries = entries; this.pos = toIndex; - this.checkForConcurrentModification = checkForConcurrentModification; + this.set = set; + this.sizeOfSetAtStart = set.size(); } /** - * Create a spliterator for the given array, with the given size. + * Create a spliterator over the entire array, skipping nulls. * - * @param entries the array - * @param checkForConcurrentModification runnable to check for concurrent modifications + * @param entries the backing array (not copied) + * @param set the owning collection used to detect concurrent modifications */ - public SparseArraySpliterator(final E[] entries, final Runnable checkForConcurrentModification) { - this(entries, entries.length, checkForConcurrentModification); + public SparseArraySpliterator(final E[] entries, final Sized set) { + this(entries, entries.length, set); } @Override public boolean tryAdvance(Consumer action) { - this.checkForConcurrentModification.run(); + if (sizeOfSetAtStart != set.size()) throw new ConcurrentModificationException(); while (-1 < --pos) { if (null != entries[pos]) { action.accept(entries[pos]); @@ -86,7 +95,7 @@ public void forEachRemaining(Consumer action) { } pos--; } - this.checkForConcurrentModification.run(); + if (sizeOfSetAtStart != set.size()) throw new ConcurrentModificationException(); } @Override @@ -96,7 +105,7 @@ public Spliterator trySplit() { } final int toIndexOfSubIterator = this.pos; this.pos = pos >>> 1; - return new SparseArraySubSpliterator<>(entries, this.pos, toIndexOfSubIterator, checkForConcurrentModification); + return new SparseArraySubSpliterator<>(entries, this.pos, toIndexOfSubIterator, set); } @Override diff --git a/jena-core/src/main/java/org/apache/jena/mem/spliterator/SparseArraySubSpliterator.java b/jena-core/src/main/java/org/apache/jena/mem/spliterator/SparseArraySubSpliterator.java index 3eb0784326f..d79242ac78c 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/spliterator/SparseArraySubSpliterator.java +++ b/jena-core/src/main/java/org/apache/jena/mem/spliterator/SparseArraySubSpliterator.java @@ -21,55 +21,62 @@ package org.apache.jena.mem.spliterator; +import org.apache.jena.mem.collection.Sized; + +import java.util.ConcurrentModificationException; import java.util.Spliterator; import java.util.function.Consumer; /** - * A spliterator for sparse arrays. This spliterator will iterate over the array - * skipping null entries. - *

- * This spliterator supports splitting into sub-spliterators. + * Sub-range spliterator over a sparse array slice {@code [fromIndex, toIndex)}, + * iterating from high index to low and skipping {@code null} entries. + * Produced by splitting a {@link SparseArraySpliterator} (or another + * {@link SparseArraySubSpliterator}); supports further recursive splits for + * parallel traversal. *

- * The spliterator will check for concurrent modifications by invoking a {@link Runnable} - * before each action. + * Detects concurrent modifications by snapshotting {@code set.size()} at + * construction time and rechecking it at each advance/forEach boundary; + * throws {@link ConcurrentModificationException} if the size has changed. * - * @param + * @param the type of the array elements */ public class SparseArraySubSpliterator implements Spliterator { private final E[] entries; private final int fromIndex; - private final Runnable checkForConcurrentModification; + private final Sized set; + private final int sizeOfSetAtStart; private int pos; /** - * Create a spliterator for the given array, with the given size. + * Create a spliterator over {@code entries[fromIndex .. toIndex)}, skipping nulls. * - * @param entries the array - * @param fromIndex the index of the first element, inclusive - * @param toIndex the index of the last element, exclusive - * @param checkForConcurrentModification runnable to check for concurrent modifications + * @param entries the backing array (not copied) + * @param fromIndex inclusive lower bound on the iterated slice + * @param toIndex exclusive upper bound on the iterated slice + * @param set the owning collection used to detect concurrent modifications */ - public SparseArraySubSpliterator(final E[] entries, final int fromIndex, final int toIndex, final Runnable checkForConcurrentModification) { + public SparseArraySubSpliterator(final E[] entries, final int fromIndex, final int toIndex, final Sized set) { this.entries = entries; this.fromIndex = fromIndex; this.pos = toIndex; - this.checkForConcurrentModification = checkForConcurrentModification; + this.set = set; + this.sizeOfSetAtStart = set.size(); } /** - * Create a spliterator for the given array, with the given size. + * Create a spliterator over the entire array, skipping nulls. * - * @param entries the array - * @param checkForConcurrentModification runnable to check for concurrent modifications + * @param entries the backing array (not copied) + * @param set the owning collection used to detect concurrent modifications */ - public SparseArraySubSpliterator(final E[] entries, final Runnable checkForConcurrentModification) { - this(entries, 0, entries.length, checkForConcurrentModification); + public SparseArraySubSpliterator(final E[] entries, final Sized set) { + this(entries, 0, entries.length, set); } @Override public boolean tryAdvance(Consumer action) { - this.checkForConcurrentModification.run(); + if (sizeOfSetAtStart != set.size()) throw new ConcurrentModificationException(); while (fromIndex <= --pos) { if (null != entries[pos]) { action.accept(entries[pos]); @@ -88,7 +95,7 @@ public void forEachRemaining(Consumer action) { } pos--; } - this.checkForConcurrentModification.run(); + if (sizeOfSetAtStart != set.size()) throw new ConcurrentModificationException(); } @Override @@ -99,7 +106,7 @@ public Spliterator trySplit() { } final int toIndexOfSubIterator = this.pos; this.pos = fromIndex + (entriesCount >>> 1); - return new SparseArraySubSpliterator<>(entries, this.pos, toIndexOfSubIterator, checkForConcurrentModification); + return new SparseArraySubSpliterator<>(entries, this.pos, toIndexOfSubIterator, set); } diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/TripleStore.java b/jena-core/src/main/java/org/apache/jena/mem/store/TripleStore.java index 207ce6fc44b..93c87f6416c 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/TripleStore.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/TripleStore.java @@ -28,76 +28,96 @@ import java.util.stream.Stream; /** - * A triple store is a collection of triples that supports access to - * triples matching a triple pattern. + * Storage abstraction used by the {@code mem2} in-memory graph implementations. + * A {@code TripleStore} is a set-like collection of {@link Triple}s that also + * supports pattern-based lookup ({@link #find}, {@link #stream(Triple)}, + * {@link #contains}). Implementations are expected to be efficient for the + * lookup patterns described in + * {@link org.apache.jena.mem.pattern.MatchPattern}. + *

+ * Implementations are not required to be thread-safe. */ public interface TripleStore extends Copyable { /** - * Add a triple to the map. + * Add a triple to the store. Does nothing if the triple is already present. * - * @param triple to add + * @param triple the triple to add */ void add(final Triple triple); /** - * Remove a triple from the map. + * Remove a triple from the store. Does nothing if the triple is not present. * - * @param triple to remove + * @param triple the triple to remove */ void remove(final Triple triple); /** - * Remove all triples from the map. + * Remove all triples from the store. After this call, {@link #isEmpty()} + * returns {@code true} and any associated indices are emptied. */ void clear(); /** - * Return the number of triples in the map. + * Returns the number of triples in the store. + * + * @return the number of triples */ int countTriples(); /** - * Return true if the map is empty. + * Returns {@code true} if the store contains no triples. + * + * @return {@code true} if empty */ boolean isEmpty(); /** - * Answer true if the graph contains any triple matching t. + * Returns {@code true} if the store contains any triple matching the given + * pattern. The pattern may contain wildcards (e.g. {@code Node.ANY}). * - * @param tripleMatch triple match pattern, which may be contained + * @param tripleMatch the triple pattern to match + * @return {@code true} if at least one matching triple exists */ boolean contains(final Triple tripleMatch); /** - * Returns a {@link Stream} of all triples in the graph. - * Note: {@link Stream#parallel()} is supported. + * Returns a {@link Stream} of all triples in the store. + * The returned stream supports {@link Stream#parallel()}. * - * @return a stream of triples in this graph. + * @return a stream over every triple in this store */ Stream stream(); /** - * Returns a {@link Stream} of Triples matching the given pattern. - * Note: {@link Stream#parallel()} is supported. + * Returns a {@link Stream} of every triple in the store matching the + * given pattern. The returned stream supports {@link Stream#parallel()}. * - * @param tripleMatch triple match pattern - * @return a stream of triples in this graph matching the pattern. + * @param tripleMatch the triple pattern to match (may contain wildcards) + * @return a stream over the matching triples */ Stream stream(final Triple tripleMatch); /** - * Returns an {@link ExtendedIterator} of all triples in the graph matching the given triple match. + * Returns an {@link ExtendedIterator} over every triple in the store + * matching the given pattern. + * + * @param tripleMatch the triple pattern to match (may contain wildcards) + * @return an iterator over the matching triples */ ExtendedIterator find(final Triple tripleMatch); /** - * Return a new triple store that is a copy of this one. - * Since Nodes and Triples are immutable and shared, the copy can share the same Nodes and Triples. + * Returns an independent copy of this store. + * Since {@link org.apache.jena.graph.Node}s and {@link Triple}s are + * immutable, the copy may share node and triple instances with the + * original; only the container/index data structures are duplicated so + * that mutations in either store do not affect the other. * * @return an independent copy of this store */ @Override TripleStore copy(); -} +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastArrayBunch.java b/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastArrayBunch.java index 07ccc9634a9..f0fba805175 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastArrayBunch.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastArrayBunch.java @@ -32,26 +32,41 @@ import java.util.function.Predicate; /** - * An ArrayBunch implements TripleBunch with a linear search of a short-ish - * array of Triples. The array grows by factor 2. + * Linear-scan implementation of {@link FastTripleBunch} backed by a packed + * {@link Triple} array. Used as long as a bunch stays small; once it grows + * past the configured threshold (see {@link FastTripleStore}) it is replaced + * with a {@link FastHashedTripleBunch}. + *

+ * The array grows by a factor of two when full. Equality of triples within a + * bunch is delegated to {@link #areEqual(Triple, Triple)}, which subclasses + * specialize to compare only the two nodes that are not already + * implied by the enclosing map's key. This avoids redundant equality checks + * on the shared subject/predicate/object. + *

+ * Not thread-safe. */ public abstract class FastArrayBunch implements FastTripleBunch { private static final int INITIAL_SIZE = 4; + /** Number of valid entries in {@link #elements}. */ protected int size = 0; + /** Packed array of triples; entries from {@code 0} to {@code size-1} are live. */ protected Triple[] elements; + /** + * Creates an empty bunch with the default initial capacity. + */ protected FastArrayBunch() { elements = new Triple[INITIAL_SIZE]; } /** - * Copy constructor. - * The new bunch will contain all the same triples of the bunch to copy. - * But it will reserve only the space needed to contain them. Growing is still possible. + * Copy constructor. The new bunch contains the same triples as + * {@code bunchToCopy}; its backing array is sized to fit exactly, + * but can grow further if needed. * - * @param bunchToCopy + * @param bunchToCopy the source bunch */ protected FastArrayBunch(final FastArrayBunch bunchToCopy) { this.elements = new Triple[bunchToCopy.size]; @@ -59,7 +74,17 @@ protected FastArrayBunch(final FastArrayBunch bunchToCopy) { this.size = bunchToCopy.size; } - public abstract boolean areEqual(final Triple a, final Triple b); + /** + * Compare two triples for equality within this bunch. + *

+ * Subclasses specialize this to skip the already-shared component + * (subject, predicate or object) and compare only the remaining two. + * + * @param a first triple + * @param b second triple + * @return {@code true} if the triples are considered equal in this bunch + */ + protected abstract boolean areEqual(final Triple a, final Triple b); @Override public boolean containsKey(Triple t) { @@ -127,6 +152,7 @@ public boolean tryRemove(final Triple t) { for (int i = 0; i < size; i++) { if (areEqual(t, elements[i])) { elements[i] = elements[--size]; + elements[size] = null; return true; } } @@ -138,6 +164,7 @@ public void removeUnchecked(final Triple t) { for (int i = 0; i < size; i++) { if (areEqual(t, elements[i])) { elements[i] = elements[--size]; + elements[size] = null; return; } } @@ -174,11 +201,7 @@ public void forEachRemaining(Consumer action) { @Override public Spliterator keySpliterator() { - final var initialSize = size; - final Runnable checkForConcurrentModification = () -> { - if (size != initialSize) throw new ConcurrentModificationException(); - }; - return new ArraySpliterator<>(elements, size, checkForConcurrentModification); + return new ArraySpliterator<>(elements, size, this); } @Override diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastHashedBunchMap.java b/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastHashedBunchMap.java index b89d3312048..a49d6b54009 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastHashedBunchMap.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastHashedBunchMap.java @@ -25,21 +25,28 @@ import org.apache.jena.mem.collection.FastHashMap; /** - * Map from nodes to triple bunches. + * {@link FastHashMap} specialized to map a {@link Node} to its associated + * {@link FastTripleBunch}. Used by {@link FastTripleStore} to maintain the + * three subject/predicate/object indices. */ public class FastHashedBunchMap extends FastHashMap implements Copyable { + /** + * Creates an empty bunch map with the default initial capacity. + */ public FastHashedBunchMap() { super(); } /** - * Copy constructor. - * The new map will contain all the same nodes as keys of the map to copy, but copies of the bunches as values . + * Copy constructor. The new map has the same node keys as + * {@code mapToCopy}; each value is replaced by a deep copy of the + * corresponding bunch (via {@link FastTripleBunch#copy()}) so that + * mutations of either map cannot affect the other. * - * @param mapToCopy + * @param mapToCopy the source map */ private FastHashedBunchMap(final FastHashedBunchMap mapToCopy) { super(mapToCopy, FastTripleBunch::copy); diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastHashedTripleBunch.java b/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastHashedTripleBunch.java index 459e78c8181..65c9ab70fbf 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastHashedTripleBunch.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastHashedTripleBunch.java @@ -25,13 +25,21 @@ import org.apache.jena.mem.collection.JenaSet; /** - * A set of triples - backed by {@link FastHashSet}. + * Hashed implementation of {@link FastTripleBunch} built on top of + * {@link FastHashSet}. Used by {@link FastTripleStore} once a bunch grows + * past the size threshold at which a linear-scan {@link FastArrayBunch} + * stops being faster. */ public class FastHashedTripleBunch extends FastHashSet implements FastTripleBunch { + /** - * Create a new triple bunch from the given set of triples. + * Create a new hashed bunch pre-populated from the given set of triples. + * The initial capacity is chosen at 1.5x the source size, so the new bunch + * fits the existing triples and has some headroom for growth before it + * needs to rehash. * - * @param set the set of triples + * @param set the source set of triples (typically the array bunch being + * promoted) */ public FastHashedTripleBunch(final JenaSet set) { super((set.size() >> 1) + set.size()); //it should not only fit but also have some space for growth @@ -39,15 +47,18 @@ public FastHashedTripleBunch(final JenaSet set) { } /** - * Copy constructor. - * The new bunch will contain all the same triples of the bunch to copy. + * Copy constructor. The new bunch contains the same triples as + * {@code bunchToCopy}. * - * @param bunchToCopy + * @param bunchToCopy the source bunch */ private FastHashedTripleBunch(final FastHashedTripleBunch bunchToCopy) { super(bunchToCopy); } + /** + * Creates an empty hashed bunch with the default initial capacity. + */ public FastHashedTripleBunch() { super(); } diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastTripleBunch.java b/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastTripleBunch.java index 68f79e72f8a..fe050283188 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastTripleBunch.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastTripleBunch.java @@ -29,27 +29,39 @@ import java.util.function.Predicate; /** - * A bunch of triples - a stripped-down set with specialized methods. A - * bunch is expected to store triples that share some useful property - * (such as having the same subject or predicate). + * Set-like container for a "bunch" of triples that share some useful + * property - typically they all have the same subject, predicate or object, + * because the bunch is the value of a node-keyed map in a + * {@link FastTripleStore}. + *

+ * The interface is a stripped-down set with a few extras tuned for the + * triple-store hot path; concrete implementations are + * {@link FastArrayBunch} (linear scan, used while the bunch is small) and + * {@link FastHashedTripleBunch} (hashed, used once the bunch grows past a + * threshold). */ public interface FastTripleBunch extends JenaSetHashOptimized, Copyable { /** - * Answer true iff this bunch is implemented as an array. - * This field is used to optimize some operations by avoiding the need for instanceOf tests. + * Answer {@code true} iff this bunch is backed by a flat array (i.e. is + * a {@link FastArrayBunch}). Exposed as an explicit method so callers can + * avoid {@code instanceof} checks on this hot path. * - * @return true iff this bunch is implemented as an arrays + * @return {@code true} if this bunch is array-backed */ boolean isArray(); /** - * This method is used to optimize _PO match operations. - * The {@link JenaMapSetCommon#anyMatch(Predicate)} method is faster if there are only a few matches. - * This method is faster if there are many matches and the set is ordered in an unfavorable way. - * _PO matches usually fall into this category. + * Predicate test that scans elements in hash-table order rather than + * dense insertion order. Tuned for {@code _PO} (any-predicate-object) + * matches. + *

+ * {@link JenaMapSetCommon#anyMatch(Predicate)} is faster when matches + * are rare or absent; this method is faster when many matches exist and + * the dense ordering would force scanning past clustered non-matches + * before finding a hit. Both variants short-circuit on the first match. * - * @param predicate the predicate to match - * @return true if any triple in the bunch matches the predicate + * @param predicate the predicate to test against each triple + * @return {@code true} if any triple in the bunch satisfies the predicate */ boolean anyMatchRandomOrder(Predicate predicate); } diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastTripleStore.java b/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastTripleStore.java index 8877bcffe9a..8ed81dc577b 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastTripleStore.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/fast/FastTripleStore.java @@ -68,20 +68,43 @@ */ public class FastTripleStore implements TripleStore { + /** + * Object-bunch size above which {@code _PO} matches consider a + * secondary lookup in the predicate bunch. + */ protected static final int THRESHOLD_FOR_SECONDARY_LOOKUP = 400; + /** + * Maximum size of a subject-keyed array bunch before it is promoted + * to a hashed bunch. Lower than the predicate/object threshold because + * the subject map is the primary entry point for {@code contains}. + */ protected static final int MAX_ARRAY_BUNCH_SIZE_SUBJECT = 16; + /** + * Maximum size of a predicate- or object-keyed array bunch before it is + * promoted to a hashed bunch. + */ protected static final int MAX_ARRAY_BUNCH_SIZE_PREDICATE_OBJECT = 32; - final FastHashedBunchMap subjects; - final FastHashedBunchMap predicates; - final FastHashedBunchMap objects; + private final FastHashedBunchMap subjects; + private final FastHashedBunchMap predicates; + private final FastHashedBunchMap objects; private int size = 0; + /** + * Creates a new, empty fast triple store. + */ public FastTripleStore() { subjects = new FastHashedBunchMap(); predicates = new FastHashedBunchMap(); objects = new FastHashedBunchMap(); } + /** + * Copy constructor used by {@link #copy()}; produces an independent store + * by deep-copying each of the three index maps (which in turn deep-copy + * their bunches). + * + * @param tripleStoreToCopy the source store + */ private FastTripleStore(final FastTripleStore tripleStoreToCopy) { subjects = tripleStoreToCopy.subjects.copy(); predicates = tripleStoreToCopy.predicates.copy(); @@ -380,6 +403,11 @@ public FastTripleStore copy() { return new FastTripleStore(this); } + /** + * Array bunch used as the value in the subject-keyed map: every triple in + * the bunch shares the same subject, so equality only needs to compare + * predicate and object. + */ protected static class ArrayBunchWithSameSubject extends FastArrayBunch { public ArrayBunchWithSameSubject() { @@ -402,6 +430,11 @@ public boolean areEqual(final Triple a, final Triple b) { } } + /** + * Array bunch used as the value in the predicate-keyed map: every triple + * in the bunch shares the same predicate, so equality only needs to + * compare subject and object. + */ protected static class ArrayBunchWithSamePredicate extends FastArrayBunch { public ArrayBunchWithSamePredicate() { @@ -424,6 +457,11 @@ public boolean areEqual(final Triple a, final Triple b) { } } + /** + * Array bunch used as the value in the object-keyed map: every triple in + * the bunch shares the same object, so equality only needs to compare + * subject and predicate. + */ protected static class ArrayBunchWithSameObject extends FastArrayBunch { public ArrayBunchWithSameObject() { diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/indexed/EagerStoreStrategy.java b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/EagerStoreStrategy.java new file mode 100644 index 00000000000..2d0e1ff4404 --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/EagerStoreStrategy.java @@ -0,0 +1,448 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Node; +import org.apache.jena.graph.Triple; +import org.apache.jena.mem.pattern.MatchPattern; +import org.apache.jena.mem.pattern.PatternClassifier; +import org.apache.jena.mem.store.strategies.StoreStrategy; +import org.apache.jena.util.iterator.ExtendedIterator; +import org.apache.jena.util.iterator.NullIterator; + +import java.util.Arrays; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +/** + * {@link StoreStrategy} that maintains a complete subject/predicate/object + * index over the triple set at all times. + *

+ * Three node-keyed index maps ({@link NodesToIndices}) hold, for every + * subject/predicate/object node, an {@link IndexList} of the triple indices + * that mention it. Three parallel reverse-index arrays + * ({@code sReverseIndices}, {@code pReverseIndices}, {@code oReverseIndices}) + * store, for every triple slot, its position inside the corresponding + * {@code IndexList}; this is what makes {@code O(1)} removal possible. + *

+ * The reverse-index arrays are kept the same length as the underlying + * {@code keys} array of the {@link TripleSet}; whenever the triple set grows + * its keys array, the {@code growReverseIndices} hook is invoked to grow the + * reverse arrays too. + */ +public class EagerStoreStrategy implements StoreStrategy { + private static final String UNSUPPORTED_PATTERN_CLASSIFIER = "Unsupported pattern classifier: %s"; + + final TripleSet triples; + final NodesToIndices sNodeToIndices; + final NodesToIndices pNodeToIndices; + final NodesToIndices oNodeToIndices; + private int[] sReverseIndices; + private int[] pReverseIndices; + private int[] oReverseIndices; + + /** + * Build a new eager strategy over the given triple set, indexing every + * triple already present. + * + * @param triples the canonical triple set + * @param parallel if {@code true}, build the three indices concurrently; + * otherwise build them sequentially + */ + public EagerStoreStrategy(final TripleSet triples, boolean parallel) { + this.triples = triples; + this.triples.setOnKeysGrowHook(this::growReverseIndices); + this.sNodeToIndices = new NodesToIndices(); + this.pNodeToIndices = new NodesToIndices(); + this.oNodeToIndices = new NodesToIndices(); + final var indexSize = triples.getInternalKeysLength(); + this.sReverseIndices = new int[indexSize]; + this.pReverseIndices = new int[indexSize]; + this.oReverseIndices = new int[indexSize]; + if (parallel) { + indexAllParallel(); + } else { + indexAll(); + } + } + + /** + * Build a new eager strategy and index the triple set sequentially. + * Equivalent to {@code EagerStoreStrategy(triples, false)}. + * + * @param triples the canonical triple set + */ + public EagerStoreStrategy(final TripleSet triples) { + this(triples, false); + } + + /** + * Copy constructor that reuses an already-built index. Used when copying + * an {@link IndexedSetTripleStore} whose source already has its eager + * index built, so that the copy can avoid the cost of rebuilding it. + *

+ * The {@code triples} parameter must be a copy of the original triple + * set (the indices reference triple slots by index, so the two sets + * must have identical layouts). + * + * @param triples the (already-copied) triple set the + * new strategy will operate on + * @param strategyToCopyIndicesFrom the strategy whose indices should + * be cloned + */ + public EagerStoreStrategy(final TripleSet triples, EagerStoreStrategy strategyToCopyIndicesFrom) { + this.triples = triples; + this.triples.setOnKeysGrowHook(this::growReverseIndices); + this.sNodeToIndices = strategyToCopyIndicesFrom.sNodeToIndices.copy(); + this.pNodeToIndices = strategyToCopyIndicesFrom.pNodeToIndices.copy(); + this.oNodeToIndices = strategyToCopyIndicesFrom.oNodeToIndices.copy(); + this.sReverseIndices = strategyToCopyIndicesFrom.sReverseIndices.clone(); + this.pReverseIndices = strategyToCopyIndicesFrom.pReverseIndices.clone(); + this.oReverseIndices = strategyToCopyIndicesFrom.oReverseIndices.clone(); + } + + @Override + public boolean isIndexInitialized() { + return true; + } + + /** + * Sequentially populate the three subject/predicate/object indices with + * every triple currently in {@code triples}. + */ + private void indexAll() { + // Initialize the index by adding all triples to the index + triples.forEachKey(this::addToIndex); + } + + /** + * Populate the three subject/predicate/object indices in parallel. + * Each of the three indices is touched by exactly one thread, so the + * indices themselves don't need to be thread-safe; only the read-only + * iteration over the triple set runs concurrently. + */ + private void indexAllParallel() { + final var futureIndexObjects = CompletableFuture.runAsync( + () -> triples.forEachKey((t, i) + -> addOIndex(t.getObject(), i))); + + final var futureIndexSubjects = CompletableFuture.runAsync( + () -> triples.forEachKey((t, i) + -> addSIndex(t.getSubject(), i))); + + triples.forEachKey((t, i) + -> addPIndex(t.getPredicate(), i)); + + CompletableFuture.allOf(futureIndexObjects, futureIndexSubjects).join(); + } + + private void addSIndex(final Node subject, final int index) { + final var indices = sNodeToIndices.getOrNew(subject); + sReverseIndices[index] = indices.add(index); + } + + private void addPIndex(final Node predicate, final int index) { + final var indices = pNodeToIndices.getOrNew(predicate); + pReverseIndices[index] = indices.add(index); + } + + private void addOIndex(final Node object, final int index) { + final var indices = oNodeToIndices.getOrNew(object); + oReverseIndices[index] = indices.add(index); + } + + private void removeIndexS(final Node subject, final int index) { + final var indices = sNodeToIndices.get(subject); + var oldPosition = sReverseIndices[index]; + final var switched = indices.removeAt(oldPosition); + if (indices.isEmpty()) { + sNodeToIndices.removeUnchecked(subject); + } else if (-1 < switched) { + sReverseIndices[switched] = oldPosition; + } + } + + private void removeIndexP(final Node predicate, final int index) { + final var indices = pNodeToIndices.get(predicate); + var oldPosition = pReverseIndices[index]; + final var switched = indices.removeAt(oldPosition); + if (indices.isEmpty()) { + pNodeToIndices.removeUnchecked(predicate); + } else if (-1 < switched) { + pReverseIndices[switched] = oldPosition; + } + } + + private void removeIndexO(final Node object, final int index) { + final var indices = oNodeToIndices.get(object); + var oldPosition = oReverseIndices[index]; + final var switched = indices.removeAt(oldPosition); + if (indices.isEmpty()) { + oNodeToIndices.removeUnchecked(object); + } else if (-1 < switched) { + oReverseIndices[switched] = oldPosition; + } + } + + private void growReverseIndices(int keysLength) { + sReverseIndices = Arrays.copyOf(sReverseIndices, keysLength); + pReverseIndices = Arrays.copyOf(pReverseIndices, keysLength); + oReverseIndices = Arrays.copyOf(oReverseIndices, keysLength); + } + + @Override + public void addToIndex(final Triple triple, final int index) { + addSIndex(triple.getSubject(), index); + addPIndex(triple.getPredicate(), index); + addOIndex(triple.getObject(), index); + } + + @Override + public void removeFromIndex(final Triple triple, final int index) { + removeIndexS(triple.getSubject(), index); + removeIndexP(triple.getPredicate(), index); + removeIndexO(triple.getObject(), index); + } + + @Override + public void clearIndex() { + sNodeToIndices.clear(); + pNodeToIndices.clear(); + oNodeToIndices.clear(); + final var indexSize = triples.getInternalKeysLength(); + this.sReverseIndices = new int[indexSize]; + this.pReverseIndices = new int[indexSize]; + this.oReverseIndices = new int[indexSize]; + } + + @Override + public boolean containsMatch(final Triple tripleMatch, final MatchPattern pattern) { + switch (pattern) { + + case SUB_ANY_ANY: + return sNodeToIndices.containsKey(tripleMatch.getSubject()); + case ANY_PRE_ANY: + return pNodeToIndices.containsKey(tripleMatch.getPredicate()); + case ANY_ANY_OBJ: + return oNodeToIndices.containsKey(tripleMatch.getObject()); + + case SUB_PRE_ANY: { + final var sIndices = sNodeToIndices.get(tripleMatch.getSubject()); + if (null == sIndices) + return false; + + final var pIndices = pNodeToIndices.get(tripleMatch.getPredicate()); + if (null == pIndices) + return false; + + return IndexList.intersects(sIndices, sReverseIndices, pIndices, pReverseIndices); + } + + case ANY_PRE_OBJ: { + final var pIndices = pNodeToIndices.get(tripleMatch.getPredicate()); + if (null == pIndices) + return false; + + final var oIndices = oNodeToIndices.get(tripleMatch.getObject()); + if (null == oIndices) + return false; + + return IndexList.intersects(pIndices, pReverseIndices, oIndices, oReverseIndices); + } + + case SUB_ANY_OBJ: { + final var sIndices = sNodeToIndices.get(tripleMatch.getSubject()); + if (null == sIndices) + return false; + + final var oIndices = oNodeToIndices.get(tripleMatch.getObject()); + if (null == oIndices) + return false; + + return IndexList.intersects(sIndices, sReverseIndices, oIndices, oReverseIndices); + } + + default: + throw new IllegalStateException(String.format(UNSUPPORTED_PATTERN_CLASSIFIER, + PatternClassifier.classify(tripleMatch))); + } + } + + @Override + public Stream streamMatch(final Triple tripleMatch, final MatchPattern pattern) { + switch (pattern) { + + case SUB_ANY_ANY: { + final IndexList indexList = sNodeToIndices.get(tripleMatch.getSubject()); + if (indexList == null) { + return Stream.empty(); + } + return StreamSupport.stream( + new IndexListSpliterator(triples, indexList), + false); + } + case ANY_PRE_ANY: { + final IndexList indexList = pNodeToIndices.get(tripleMatch.getPredicate()); + if (indexList == null) { + return Stream.empty(); + } + return StreamSupport.stream( + new IndexListSpliterator(triples, indexList), + false); + } + case ANY_ANY_OBJ: { + final IndexList indexList = oNodeToIndices.get(tripleMatch.getObject()); + if(indexList == null) { + return Stream.empty(); + } + return StreamSupport.stream( + new IndexListSpliterator(triples, indexList), + false); + } + case SUB_PRE_ANY: { + final var sIndices = sNodeToIndices.get(tripleMatch.getSubject()); + if (null == sIndices) + return Stream.empty(); + + final var pIndices = pNodeToIndices.get(tripleMatch.getPredicate()); + if (null == pIndices) + return Stream.empty(); + + return StreamSupport.stream( + new IndexListsSpliterator(triples, + sIndices, sReverseIndices, + pIndices, pReverseIndices), + false); + } + + case ANY_PRE_OBJ: { + final var pIndices = pNodeToIndices.get(tripleMatch.getPredicate()); + if (null == pIndices) + return Stream.empty(); + + final var oIndices = oNodeToIndices.get(tripleMatch.getObject()); + if (null == oIndices) + return Stream.empty(); + + return StreamSupport.stream( + new IndexListsSpliterator(triples, + pIndices, pReverseIndices, + oIndices, oReverseIndices), + false); + } + + case SUB_ANY_OBJ: { + final var sIndices = sNodeToIndices.get(tripleMatch.getSubject()); + if (null == sIndices) + return Stream.empty(); + + final var oIndices = oNodeToIndices.get(tripleMatch.getObject()); + if (null == oIndices) + return Stream.empty(); + + return StreamSupport.stream( + new IndexListsSpliterator(triples, + sIndices, sReverseIndices, + oIndices, oReverseIndices), + false); + } + + default: + throw new IllegalStateException(String.format(UNSUPPORTED_PATTERN_CLASSIFIER, + PatternClassifier.classify(tripleMatch))); + } + } + + @Override + public ExtendedIterator findMatch(final Triple tripleMatch, final MatchPattern pattern) { + switch (pattern) { + + case SUB_ANY_ANY: { + final IndexList indexList = sNodeToIndices.get(tripleMatch.getSubject()); + if (indexList == null) { + return NullIterator.instance(); + } + return new IndexListIterator(triples, indexList); + } + case ANY_PRE_ANY: { + final IndexList indexList = pNodeToIndices.get(tripleMatch.getPredicate()); + if (indexList == null) { + return NullIterator.instance(); + } + return new IndexListIterator(triples, indexList); + } + case ANY_ANY_OBJ: { + final IndexList indexList = oNodeToIndices.get(tripleMatch.getObject()); + if (indexList == null) { + return NullIterator.instance(); + } + return new IndexListIterator(triples, indexList); + } + case SUB_PRE_ANY: { + final var sIndices = sNodeToIndices.get(tripleMatch.getSubject()); + if (null == sIndices) + return NullIterator.instance(); + + final var pIndices = pNodeToIndices.get(tripleMatch.getPredicate()); + if (null == pIndices) + return NullIterator.instance(); + + return new IndexListsIterator(triples, + sIndices, sReverseIndices, + pIndices, pReverseIndices); + } + + case ANY_PRE_OBJ: { + final var pIndices = pNodeToIndices.get(tripleMatch.getPredicate()); + if (null == pIndices) + return NullIterator.instance(); + + final var oIndices = oNodeToIndices.get(tripleMatch.getObject()); + if (null == oIndices) + return NullIterator.instance(); + + return new IndexListsIterator(triples, + pIndices, pReverseIndices, + oIndices, oReverseIndices); + } + + case SUB_ANY_OBJ: { + final var sIndices = sNodeToIndices.get(tripleMatch.getSubject()); + if (null == sIndices) + return NullIterator.instance(); + + final var oIndices = oNodeToIndices.get(tripleMatch.getObject()); + if (null == oIndices) + return NullIterator.instance(); + + return new IndexListsIterator(triples, + sIndices, sReverseIndices, + oIndices, oReverseIndices); + } + + default: + throw new IllegalStateException(String.format(UNSUPPORTED_PATTERN_CLASSIFIER, + PatternClassifier.classify(tripleMatch))); + } + } +} diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexList.java b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexList.java new file mode 100644 index 00000000000..bb906f0784b --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexList.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.atlas.lib.Copyable; + +/** + * Append-only list of {@code int} triple indices, used by the eager indexing + * strategy as the value type of the per-node index lists ("for the subject + * node N, here are the indices of all triples whose subject is N"). + *

+ * Backed by an int array that grows by factor 1.5. + * Removal is constant-time swap-with-last so callers must + * keep an external reverse-index array in sync (see + * {@link org.apache.jena.mem.store.indexed.EagerStoreStrategy}). + */ +public class IndexList implements Copyable { + + private static final int INITIAL_SIZE = 2; + + private int pos = -1; + private int[] elements; + + /** + * Creates an empty list with the default initial capacity. + */ + public IndexList() { + elements = new int[INITIAL_SIZE]; + } + + /** + * Copy constructor. The new list contains the same indices as + * {@code bunchToCopy}; its backing array is sized to fit exactly, + * but can grow further if needed. + * + * @param bunchToCopy the source list + */ + public IndexList(final IndexList bunchToCopy) { + // ensures min size of INITIAL_SIZE, so the new list can grow + this.elements = new int[Math.max(bunchToCopy.size(), INITIAL_SIZE)]; + System.arraycopy(bunchToCopy.elements, 0, this.elements, 0, bunchToCopy.size()); + this.pos = bunchToCopy.pos; + } + + /** + * @return the number of indices currently stored + */ + public int size() { + return pos + 1; + } + + /** + * @return the index of the last stored element, or {@code -1} if empty + */ + public int lastPos() { + return pos; + } + + /** + * @return {@code true} if the list contains no indices + */ + public boolean isEmpty() { + return this.pos == -1; + } + + /** + * Returns the underlying int array. Only the first {@link #size()} + * entries are valid. Exposed as a raw array to allow callers (e.g. + * iterators and intersection routines) to avoid bounds-checked accessors + * in tight loops. + * + * @return the backing array + */ + public int[] getIndices() { + return elements; + } + + /** + * @param pos a position {@code 0 ≤ pos ≤ lastPos()} + * @return the index stored at the given position + */ + public int getIndexAt(final int pos) { + return this.elements[pos]; + } + + /** + * Append the given index to the list. + * + * @param element the triple index to append + * @return the position at which {@code element} was stored (i.e. its + * "reverse index"); callers track this so they can remove it + * later in O(1) + */ + public int add(final int element) { + if (++pos == elements.length) grow(); + elements[pos] = element; + return pos; + } + + /** + * Grows the backing array. Grows by factor 1.5. + * This requires a minimum size of 2 to work. + */ + private void grow() { + final var oldElements = elements; + var newSize = (elements.length >> 1) + elements.length; + if (newSize < 0) { // catches overflow + newSize = Integer.MAX_VALUE; + } + elements = new int[newSize]; + System.arraycopy(oldElements, 0, elements, 0, pos); + } + + /** + * Remove the index at the given position by swapping the last element + * into its place ("swap-with-last"). The caller is responsible for + * updating any external reverse-index that points at the moved element. + * + * @param position the position of the index to remove + * @return the triple index of the element that was moved into + * {@code position} (so the caller can update its reverse index), + * or {@code -1} if the removed element was the last one and + * nothing was moved + */ + public int removeAt(final int position) { + if(pos == position) { + pos--; + return -1; + } else { + elements[position] = elements[pos--]; + return elements[position]; + } + } + + /** + * Returns an independent copy of this list. + * + * @return a deep copy + */ + @Override + public IndexList copy() { + return new IndexList(this); + } + + /** + * Test whether two index lists share at least one common triple index. + * The lists are not assumed to be sorted; this implementation iterates + * the shorter list and checks each entry against the larger list using + * the larger list's reverse-index array, giving {@code O(min(|a|,|b|))}. + * + * @param a first list + * @param reverseIndicesA reverse index for {@code a}: maps a triple index + * to its position in {@code a.getIndices()} + * @param b second list + * @param reverseIndicesB reverse index for {@code b} + * @return {@code true} if {@code a} and {@code b} share any element + */ + public static boolean intersects(final IndexList a, final int[] reverseIndicesA, final IndexList b, final int[] reverseIndicesB) { + if (a.size() < b.size()) { + return intersectsSmallerWithLarger(a, b, reverseIndicesB); + } else { + return intersectsSmallerWithLarger(b, a, reverseIndicesA); + } + } + + private static boolean intersectsSmallerWithLarger(final IndexList smaller, final IndexList larger, final int[] reverseIndicesLarger) { + final var largerSize = larger.size(); + var pos = smaller.lastPos(); + while (-1 < pos) { + final var tripleIndex = smaller.elements[pos--]; + final var potentialIndexInLarger = reverseIndicesLarger[tripleIndex]; + if(potentialIndexInLarger < largerSize) { + if(tripleIndex == larger.elements[potentialIndexInLarger]) { + return true; + } + } + } + return false; + } +} diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListIterator.java b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListIterator.java new file mode 100644 index 00000000000..e06606b6031 --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListIterator.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Triple; +import org.apache.jena.util.iterator.NiceIterator; + +import java.util.ConcurrentModificationException; +import java.util.NoSuchElementException; +import java.util.function.Consumer; + +/** + * Iterator that resolves the integer indices stored in an {@link IndexList} + * back to {@link Triple} instances by looking them up in a {@link TripleSet}. + * Walks the list from its last index to position {@code 0}. + *

+ * Detects concurrent modifications by snapshotting {@code triples.size()} at + * construction time and rechecking it on each call to {@link #next()} / + * {@link #forEachRemaining(Consumer)}; throws + * {@link ConcurrentModificationException} if the size has changed. + */ +public class IndexListIterator extends NiceIterator { + + private final TripleSet triples; + private final int sizeOfSetAtStart; + private final int[] indices; + private int pos; + + /** + * Creates an iterator over the triples whose indices are stored in + * {@code indexList}. + * + * @param triples the canonical set of triples to dereference indices against + * @param indexList the list of triple indices to walk + */ + public IndexListIterator(final TripleSet triples, final IndexList indexList) { + this.triples = triples; + indices = indexList.getIndices(); + pos = indexList.lastPos(); + this.sizeOfSetAtStart = triples.size(); + } + + @Override + public boolean hasNext() { + return -1 < pos; + } + + @Override + public Triple next() { + if (sizeOfSetAtStart != triples.size()) throw new ConcurrentModificationException(); + if(!hasNext()) { + throw new NoSuchElementException(); + } + return triples.getKeyAt(indices[pos--]); + } + + @Override + public void forEachRemaining(Consumer action) { + while (-1 < pos) { + action.accept(triples.getKeyAt(indices[pos--])); + } + if (sizeOfSetAtStart != triples.size()) throw new ConcurrentModificationException(); + } +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListSpliterator.java b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListSpliterator.java new file mode 100644 index 00000000000..cce5268bde3 --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListSpliterator.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Triple; + +import java.util.ConcurrentModificationException; +import java.util.Spliterator; +import java.util.function.Consumer; + +/** + * Spliterator counterpart to {@link IndexListIterator}: walks an + * {@link IndexList} in ascending order and dereferences each integer index + * against a {@link TripleSet} to yield triples. Supports recursive splitting + * for parallel traversal. + *

+ * Detects concurrent modifications by snapshotting {@code triples.size()} at + * construction time and rechecking it at each advance/forEach boundary; + * throws {@link ConcurrentModificationException} if the size has changed. + */ +public class IndexListSpliterator implements Spliterator { + + private final TripleSet triples; + private final int sizeOfSetAtStart; + private final int[] indices; + private final int toPositionExclusive; + private int pos; + + /** + * Creates a spliterator over every triple referenced by the given + * index list. + * + * @param triples the canonical triple set + * @param indexList the list of triple indices to walk + */ + public IndexListSpliterator(final TripleSet triples, final IndexList indexList) { + this(triples, + indexList.getIndices(), + 0, indexList.size()); + } + + /** + * Internal constructor used to produce sub-spliterators from + * {@link #trySplit()}. + * + * @param triples the canonical triple set + * @param indices the raw indices array + * @param from inclusive lower bound on the slice to walk + * @param toExclusive exclusive upper bound on the slice to walk + */ + public IndexListSpliterator(final TripleSet triples, final int[] indices, final int from, final int toExclusive) { + this.triples = triples; + this.sizeOfSetAtStart = triples.size(); + this.indices = indices; + this.pos = from; + this.toPositionExclusive = toExclusive; + } + + @Override + public boolean tryAdvance(Consumer action) { + if (sizeOfSetAtStart != triples.size()) throw new ConcurrentModificationException(); + if (pos < toPositionExclusive) { + action.accept(triples.getKeyAt(indices[pos++])); + return true; + } + return false; + } + + @Override + public void forEachRemaining(Consumer action) { + while (pos < toPositionExclusive) { + action.accept(triples.getKeyAt(indices[pos++])); + } + if (sizeOfSetAtStart != triples.size()) throw new ConcurrentModificationException(); + } + + @Override + public Spliterator trySplit() { + final var remaining = toPositionExclusive - pos; + if (remaining < 2) { + return null; + } + final var oldPos = pos; + this.pos = pos + (remaining >>> 1); + return new IndexListSpliterator(triples, indices, + oldPos, this.pos); + } + + @Override + public long estimateSize() { + return toPositionExclusive - pos; + } + + @Override + public long getExactSizeIfKnown() { + return toPositionExclusive - pos; + } + + @Override + public int characteristics() { + return DISTINCT | SIZED | SUBSIZED | NONNULL | IMMUTABLE; + } +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListsIterator.java b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListsIterator.java new file mode 100644 index 00000000000..6b3aa8b4436 --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListsIterator.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Triple; +import org.apache.jena.util.iterator.NiceIterator; + +import java.util.ConcurrentModificationException; +import java.util.NoSuchElementException; +import java.util.function.Consumer; + +/** + * Iterator over the intersection of two {@link IndexList}s, used by the + * eager indexing strategy to answer two-key patterns + * (e.g. subject-and-predicate or predicate-and-object). + *

+ * The intersection is computed lazily: the iterator walks the shorter of + * the two lists and probes each candidate triple index against the larger + * list's reverse-index array. This gives expected + * {@code O(min(|A|, |B|))} cost without any explicit set-allocation. + *

+ * Detects concurrent modifications by snapshotting {@code triples.size()} at + * construction time and rechecking it on each call to {@link #next()} / + * {@link #forEachRemaining(Consumer)}; throws + * {@link ConcurrentModificationException} if the size has changed. + */ +public class IndexListsIterator extends NiceIterator { + + private final TripleSet triples; + private final int sizeOfSetAtStart; + private final int[] indicesSmaller; + private final int[] indicesLarger; + private final int[] reverseIndicesLarger; + private int pos; + private int tripleIndex; + final int indicesLargerSize; + private boolean hasNext = false; + + /** + * Creates an iterator over the triples whose indices appear in both + * {@code indexListA} and {@code indexListB}. + * + * @param triples the canonical triple set to dereference indices against + * @param indexListA one of the index lists to intersect + * @param reverseIndicesA reverse-index array for {@code indexListA} + * @param indexListB the other index list to intersect + * @param reverseIndicesB reverse-index array for {@code indexListB} + */ + public IndexListsIterator(final TripleSet triples, + final IndexList indexListA, final int[] reverseIndicesA, + final IndexList indexListB, final int[] reverseIndicesB) { + this.triples = triples; + this.sizeOfSetAtStart = triples.size(); + if(indexListA.size() < indexListB.size()) { + indicesSmaller = indexListA.getIndices(); + indicesLarger = indexListB.getIndices(); + reverseIndicesLarger = reverseIndicesB; + pos = indexListA.lastPos(); + indicesLargerSize = indexListB.size(); + } else { + indicesSmaller = indexListB.getIndices(); + indicesLarger = indexListA.getIndices(); + reverseIndicesLarger = reverseIndicesA; + pos = indexListB.lastPos(); + indicesLargerSize = indexListA.size(); + } + } + + @Override + public boolean hasNext() { + if(hasNext) + return true; + + while(-1 < pos) { + tripleIndex = indicesSmaller[pos--]; + final var posLarger = reverseIndicesLarger[tripleIndex]; + + if(posLarger < indicesLargerSize + && indicesLarger[posLarger] == tripleIndex) { + return hasNext = true; + } + } + return false; + } + + @Override + public Triple next() { + if (sizeOfSetAtStart != triples.size()) throw new ConcurrentModificationException(); + if(hasNext || hasNext()) { + hasNext = false; + return triples.getKeyAt(tripleIndex); + } + throw new NoSuchElementException(); + } + + @Override + public void forEachRemaining(Consumer action) { + if(hasNext) { + action.accept(next()); + hasNext = false; + } + while (-1 < pos) { + tripleIndex = indicesSmaller[pos--]; + final var posLarger = reverseIndicesLarger[tripleIndex]; + if(posLarger < indicesLargerSize + && indicesLarger[posLarger] == tripleIndex) { + action.accept(triples.getKeyAt(tripleIndex)); + } + } + if (sizeOfSetAtStart != triples.size()) throw new ConcurrentModificationException(); + } +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListsSpliterator.java b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListsSpliterator.java new file mode 100644 index 00000000000..090b241b675 --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexListsSpliterator.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Triple; + +import java.util.ConcurrentModificationException; +import java.util.Spliterator; +import java.util.function.Consumer; + +/** + * Spliterator counterpart to {@link IndexListsIterator}: walks the + * intersection of two {@link IndexList}s, dereferencing each surviving + * triple index against a {@link TripleSet}. Supports recursive splitting + * for parallel traversal; the split happens on the (smaller) list being + * scanned, never on the larger list (which is only probed via its + * reverse-index array). + *

+ * Detects concurrent modifications by snapshotting {@code triples.size()} at + * construction time and rechecking it at each advance/forEach boundary; + * throws {@link ConcurrentModificationException} if the size has changed. + */ +public class IndexListsSpliterator implements Spliterator { + + private final TripleSet triples; + private final int sizeOfSetAtStart; + private final int[] indicesSmaller; + private final int[] indicesLarger; + private final int[] reverseIndicesLarger; + private final int toPositionExclusive; + private int pos; + final int indicesLargerSize; + + /** + * Creates a spliterator over the triples whose indices appear in both + * {@code indexListA} and {@code indexListB}. + * + * @param triples the canonical triple set + * @param indexListA one of the index lists to intersect + * @param reverseIndicesA reverse-index array for {@code indexListA} + * @param indexListB the other index list to intersect + * @param reverseIndicesB reverse-index array for {@code indexListB} + */ + public IndexListsSpliterator(final TripleSet triples, + final IndexList indexListA, final int[] reverseIndicesA, + final IndexList indexListB, final int[] reverseIndicesB) { + this.triples = triples; + this.sizeOfSetAtStart = triples.size(); + if(indexListA.size() < indexListB.size()) { + indicesSmaller = indexListA.getIndices(); + indicesLarger = indexListB.getIndices(); + reverseIndicesLarger = reverseIndicesB; + toPositionExclusive = indexListA.size(); + pos = 0; + indicesLargerSize = indexListB.size(); + } else { + indicesSmaller = indexListB.getIndices(); + indicesLarger = indexListA.getIndices(); + reverseIndicesLarger = reverseIndicesA; + toPositionExclusive = indexListB.size(); + pos = 0; + indicesLargerSize = indexListA.size(); + } + } + + private IndexListsSpliterator(final TripleSet triples, + final int sizeOfSetAtStart, + final int[] indicesSmaller, + final int[] indicesLarger, final int indicesLargerSize, + final int[] reverseIndicesLarger, + final int from, final int toExclusive) { + this.triples = triples; + this.sizeOfSetAtStart = sizeOfSetAtStart; + this.indicesSmaller = indicesSmaller; + this.indicesLarger = indicesLarger; + this.reverseIndicesLarger = reverseIndicesLarger; + this.pos = from; + this.toPositionExclusive = toExclusive; + this.indicesLargerSize = indicesLargerSize; + } + + + @Override + public boolean tryAdvance(Consumer action) { + if (sizeOfSetAtStart != triples.size()) throw new ConcurrentModificationException(); + while (pos < toPositionExclusive) { + final var tripleIndex = indicesSmaller[pos++]; + final var posLarger = reverseIndicesLarger[tripleIndex]; + if(posLarger < indicesLargerSize + && indicesLarger[posLarger] == tripleIndex) { + action.accept(triples.getKeyAt(tripleIndex)); + return true; + } + } + return false; + } + + @Override + public void forEachRemaining(Consumer action) { + while (pos < toPositionExclusive) { + final var tripleIndex = indicesSmaller[pos++]; + final var posLarger = reverseIndicesLarger[tripleIndex]; + if(posLarger < indicesLargerSize + && indicesLarger[posLarger] == tripleIndex) { + action.accept(triples.getKeyAt(tripleIndex)); + } + } + if (sizeOfSetAtStart != triples.size()) throw new ConcurrentModificationException(); + } + + @Override + public Spliterator trySplit() { + final var remaining = toPositionExclusive - pos; + if (remaining < 2) { + return null; + } + final var oldPos = pos; + this.pos = pos + (remaining >>> 1); + return new IndexListsSpliterator(triples, sizeOfSetAtStart, + indicesSmaller, indicesLarger, indicesLargerSize, + reverseIndicesLarger, + oldPos, this.pos); + } + + @Override + public long estimateSize() { + return toPositionExclusive - pos; + } + + @Override + public long getExactSizeIfKnown() { + return -1; + } + + @Override + public int characteristics() { + return DISTINCT | NONNULL | IMMUTABLE; + } +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexedSetTripleStore.java b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexedSetTripleStore.java new file mode 100644 index 00000000000..6653bfddc8f --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/IndexedSetTripleStore.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Triple; +import org.apache.jena.mem.IndexingStrategy; +import org.apache.jena.mem.pattern.PatternClassifier; +import org.apache.jena.mem.store.TripleStore; +import org.apache.jena.mem.store.strategies.*; +import org.apache.jena.util.iterator.ExtendedIterator; +import org.apache.jena.util.iterator.NiceIterator; +import org.apache.jena.util.iterator.SingletonIterator; + +import java.util.stream.Stream; + +/** + * {@link TripleStore} that stores all triples in a single + * {@link TripleSet} and delegates pattern-matching to a configurable + * {@link StoreStrategy}. The strategy is selected via an + * {@link IndexingStrategy} and may swap itself out at runtime (e.g. a + * {@link LazyStoreStrategy} replaces itself with an + * {@link EagerStoreStrategy} as soon as the first pattern lookup is + * performed). + *

+ * The triples themselves are kept in {@code triples}; each triple has a + * stable index in that set, which the strategy uses to maintain + * subject/predicate/object indices of integer indices rather than triple + * references. + */ +public class IndexedSetTripleStore implements TripleStore { + + /** The flat set of stored triples. Each element has a stable integer index. */ + private final TripleSet triples; + private StoreStrategy currentStrategy; + private final IndexingStrategy indexingStrategy; + + /** + * Creates an indexed store with the {@link IndexingStrategy#EAGER} + * default indexing strategy. + */ + public IndexedSetTripleStore() { + this(IndexingStrategy.EAGER); + } + + /** + * Creates an indexed store using the given indexing strategy. + * + * @param indexingStrategy the indexing strategy to use + */ + public IndexedSetTripleStore(final IndexingStrategy indexingStrategy) { + this.triples = new TripleSet(); + this.indexingStrategy = indexingStrategy; + this.currentStrategy = createStoreStrategy(indexingStrategy); + } + + /** + * Copy constructor used by {@link #copy()}. If the source store has its + * eager index built, the copy reuses the index data structures (without + * rebuilding them); otherwise the copy starts from the configured + * indexing strategy. + * + * @param storeToCopy the source store + */ + private IndexedSetTripleStore(final IndexedSetTripleStore storeToCopy) { + this.triples = storeToCopy.triples.copy(); + this.indexingStrategy = storeToCopy.indexingStrategy; + if(storeToCopy.currentStrategy instanceof EagerStoreStrategy eagerStoreStrategy) { + currentStrategy = new EagerStoreStrategy(triples, eagerStoreStrategy); // Copy the indices from the original strategy + } else { + currentStrategy = createStoreStrategy(indexingStrategy); + } + } + + + private StoreStrategy createStoreStrategy(final IndexingStrategy indexingStrategy) { + return switch (indexingStrategy) { + case EAGER + -> new EagerStoreStrategy(triples); + case LAZY + -> new LazyStoreStrategy(this::setCurrentStrategyToNewEagerStoreStrategy); + case LAZY_PARALLEL + -> new LazyStoreStrategy(this::setCurrentStrategyToNewEagerStoreStrategyParallel); + case MANUAL + -> new ManualStoreStrategy(); + case MINIMAL + -> new MinimalStoreStrategy(triples); + }; + } + + private EagerStoreStrategy setCurrentStrategyToNewEagerStoreStrategy() { + final var eagerStoreStrategy= new EagerStoreStrategy(triples, false); + this.currentStrategy = eagerStoreStrategy; + return eagerStoreStrategy; + } + + private EagerStoreStrategy setCurrentStrategyToNewEagerStoreStrategyParallel() { + final var eagerStoreStrategy= new EagerStoreStrategy(triples, true); + this.currentStrategy = eagerStoreStrategy; + return eagerStoreStrategy; + } + + /** + * Check if the index has been initialized and all triples are indexed. + * + * @return true if the index is initialized, false otherwise + */ + public boolean isIndexInitialized() { + return currentStrategy.isIndexInitialized(); + } + + /** + * Get the indexing strategy of this store. + * + * @return the indexing strategy + */ + public IndexingStrategy getIndexingStrategy() { + return indexingStrategy; + } + + /** + * Reset the current strategy to the initial one. + */ + public void resetIndexingStrategy() { + this.currentStrategy = createStoreStrategy(indexingStrategy); + } + + /** + * Initialize the index for this store. + */ + public void initializeIndex() { + currentStrategy = new EagerStoreStrategy(this.triples, false); + } + + /** + * Initialize the index for this store in parallel. + * This will index all triples in parallel, which can be faster for large datasets. + */ + public void initializeIndexParallel() { + currentStrategy = new EagerStoreStrategy(this.triples, true); + } + + @Override + public void add(final Triple triple) { + final var index = triples.addAndGetIndex(triple); + if (index < 0) { /*triple already exists*/ + return; + } + currentStrategy.addToIndex(triple, index); + } + + @Override + public void remove(final Triple triple) { + final var index = triples.removeAndGetIndex(triple); + if (index < 0) { /*triple does not exist*/ + return; + } + currentStrategy.removeFromIndex(triple, index); + } + + @Override + public void clear() { + this.triples.clear(); + this.currentStrategy.clearIndex(); + } + + @Override + public int countTriples() { + return this.triples.size(); + } + + @Override + public boolean isEmpty() { + return this.triples.isEmpty(); + } + + @Override + public boolean contains(Triple tripleMatch) { + final var matchPattern = PatternClassifier.classify(tripleMatch); + return switch (matchPattern) { + case SUB_ANY_ANY, ANY_PRE_ANY, ANY_ANY_OBJ, SUB_PRE_ANY, ANY_PRE_OBJ, SUB_ANY_OBJ -> + currentStrategy.containsMatch(tripleMatch, matchPattern); + case SUB_PRE_OBJ -> this.triples.containsKey(tripleMatch); + case ANY_ANY_ANY -> !this.isEmpty(); + }; + } + + @Override + public Stream stream() { + return this.triples.keyStream(); + } + + @Override + public Stream stream(Triple tripleMatch) { + var pattern = PatternClassifier.classify(tripleMatch); + return switch (pattern) { + case SUB_PRE_OBJ -> this.triples.containsKey(tripleMatch) ? Stream.of(tripleMatch) : Stream.empty(); + case SUB_PRE_ANY, SUB_ANY_OBJ, SUB_ANY_ANY, ANY_PRE_OBJ, ANY_PRE_ANY, ANY_ANY_OBJ -> + this.currentStrategy.streamMatch(tripleMatch, pattern); + case ANY_ANY_ANY -> this.stream(); + }; + } + + @Override + public ExtendedIterator find(Triple tripleMatch) { + var pattern = PatternClassifier.classify(tripleMatch); + return switch (pattern) { + case SUB_PRE_OBJ -> + this.triples.containsKey(tripleMatch) ? new SingletonIterator<>(tripleMatch) : NiceIterator.emptyIterator(); + case SUB_PRE_ANY, SUB_ANY_OBJ, SUB_ANY_ANY, ANY_PRE_OBJ, ANY_PRE_ANY, ANY_ANY_OBJ -> + currentStrategy.findMatch(tripleMatch, pattern); + case ANY_ANY_ANY -> this.triples.keyIterator(); + }; + } + + @Override + public IndexedSetTripleStore copy() { + return new IndexedSetTripleStore(this); + } +} diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/indexed/NodesToIndices.java b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/NodesToIndices.java new file mode 100644 index 00000000000..ced07c91e0f --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/NodesToIndices.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.atlas.lib.Copyable; +import org.apache.jena.graph.Node; +import org.apache.jena.mem.collection.FastHashMap; + +/** + * {@link FastHashMap} from {@link Node} to {@link IndexList}, used by the + * eager indexing strategy as one of the three subject/predicate/object + * indices ("for this node, here are the indices of all triples that mention + * it in the corresponding slot"). + */ +public class NodesToIndices + extends FastHashMap + implements Copyable { + + /** + * Creates an empty map with the default initial capacity. + */ + public NodesToIndices() { + super(); + } + + /** + * Copy constructor. Each value in the new map is an independent clone + * of the corresponding {@link IndexList} in {@code mapToCopy}. + * + * @param mapToCopy the source map + */ + public NodesToIndices(final NodesToIndices mapToCopy) { + super(mapToCopy, IndexList::copy); + } + + @Override + protected Node[] newKeysArray(int size) { + return new Node[size]; + } + + @Override + protected IndexList[] newValuesArray(int size) { + return new IndexList[size]; + } + + /** + * Returns an independent copy of this map. Keys are shared (nodes are + * immutable), values are cloned. + * + * @return a deep copy of this map + */ + @Override + public NodesToIndices copy() { + return new NodesToIndices(this); + } + + public IndexList getOrNew(Node key) { + final var hashCode = key.hashCode(); + var pIndex = findPosition(key, hashCode); + if (pIndex < 0) { + if (tryGrowPositionsArrayIfNeeded()) { + pIndex = ~findEmptySlotWithoutEqualityCheck(hashCode); + } + final var value = new IndexList(); + final var eIndex = getFreeKeyIndex(); + keys[eIndex] = key; + hashCodesOrDeletedIndices[eIndex] = hashCode; + values[eIndex] = value; + positions[~pIndex] = ~eIndex; + return value; + } else { + return values[~positions[pIndex]]; + } + } +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/indexed/TripleSet.java b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/TripleSet.java new file mode 100644 index 00000000000..50233e500fe --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/store/indexed/TripleSet.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.atlas.lib.Copyable; +import org.apache.jena.graph.Triple; +import org.apache.jena.mem.collection.FastHashSet; + +import java.util.function.IntConsumer; + +/** + * {@link FastHashSet} of {@link Triple}s used as the canonical triple + * collection inside {@link IndexedSetTripleStore}. Adds a hook that fires + * whenever the underlying keys array grows, so that indexes built on top of + * this set (such as the reverse-index arrays in + * {@link org.apache.jena.mem.store.indexed.EagerStoreStrategy}) + * can resize their parallel data structures in lock-step. + */ +public class TripleSet + extends FastHashSet + implements Copyable { + + private IntConsumer onKeysGrowHook = null; + + /** + * Register a callback that is invoked after the keys array grows; the + * callback receives the new array length. Setting this to {@code null} + * disables notifications. + * + * @param onKeysGrowHook callback receiving the new {@code keys.length} + */ + public void setOnKeysGrowHook(IntConsumer onKeysGrowHook) { + this.onKeysGrowHook = onKeysGrowHook; + } + + /** + * Creates an empty triple set. + */ + public TripleSet() { + super(); + } + + /** + * Copy constructor. + * + * @param setToCopy the source set + */ + private TripleSet(final TripleSet setToCopy) { + super(setToCopy); + } + + @Override + protected Triple[] newKeysArray(int size) { + return new Triple[size]; + } + + @Override + protected void growKeysAndHashCodeArrays() { + super.growKeysAndHashCodeArrays(); + if(onKeysGrowHook != null) { + onKeysGrowHook.accept(keys.length); + } + } + + /** + * Returns an independent copy of this set. The grow-hook from the source + * is not propagated to the copy. + * + * @return a new {@link TripleSet} with the same triples + */ + @Override + public TripleSet copy() { + return new TripleSet(this); + } + + /** + * Returns the current length of the underlying {@code keys} array. + * This is the upper bound on the indices that may currently be valid; + * useful for callers that maintain parallel arrays keyed by entry index. + * + * @return the current capacity of the {@code keys} array + */ + public int getInternalKeysLength() { + return keys.length; + } +} \ No newline at end of file diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/legacy/ArrayBunch.java b/jena-core/src/main/java/org/apache/jena/mem/store/legacy/ArrayBunch.java index 0a8ad7bf7ef..097760b3358 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/legacy/ArrayBunch.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/legacy/ArrayBunch.java @@ -54,7 +54,7 @@ public ArrayBunch() { * The new bunch will contain all the same triples of the bunch to copy. * But it will reserve only the space needed to contain them. Growing is still possible. * - * @param bunchToCopy + * @param bunchToCopy the bunch to copy */ private ArrayBunch(final ArrayBunch bunchToCopy) { this.elements = new Triple[bunchToCopy.size]; @@ -168,11 +168,7 @@ public void forEachRemaining(Consumer action) { @Override public Spliterator keySpliterator() { - final var initialSize = size; - final Runnable checkForConcurrentModification = () -> { - if (size != initialSize) throw new ConcurrentModificationException(); - }; - return new ArraySpliterator<>(elements, size, checkForConcurrentModification); + return new ArraySpliterator<>(elements, size, this); } @Override diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/EagerStoreStrategy.java b/jena-core/src/main/java/org/apache/jena/mem/store/roaring/EagerStoreStrategy.java similarity index 92% rename from jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/EagerStoreStrategy.java rename to jena-core/src/main/java/org/apache/jena/mem/store/roaring/EagerStoreStrategy.java index b201c6cfaa0..a9264fa5362 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/EagerStoreStrategy.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/roaring/EagerStoreStrategy.java @@ -19,15 +19,13 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.apache.jena.mem.store.roaring.strategies; +package org.apache.jena.mem.store.roaring; import org.apache.jena.graph.Node; import org.apache.jena.graph.Triple; import org.apache.jena.mem.pattern.MatchPattern; import org.apache.jena.mem.pattern.PatternClassifier; -import org.apache.jena.mem.store.roaring.NodesToBitmapsMap; -import org.apache.jena.mem.store.roaring.RoaringBitmapTripleIterator; -import org.apache.jena.mem.store.roaring.TripleSet; +import org.apache.jena.mem.store.strategies.StoreStrategy; import org.apache.jena.util.iterator.ExtendedIterator; import org.roaringbitmap.FastAggregation; import org.roaringbitmap.ImmutableBitmapDataProvider; @@ -97,8 +95,7 @@ public EagerStoreStrategy(final TripleSet triples, EagerStoreStrategy strategyTo */ private void indexAll() { // Initialize the index by adding all triples to the index - triples.indexedKeyIterator().forEachRemaining(entry -> - addToIndex(entry.key(), entry.index())); + triples.forEachKey(this::addToIndex); } /** @@ -108,15 +105,15 @@ private void indexAll() { */ private void indexAllParallel() { final var futureIndexSubjects = CompletableFuture.runAsync(() -> - triples.indexedKeyIterator().forEachRemaining(entry -> - addIndex(spoBitmaps[0], entry.key().getSubject(), entry.index()))); + triples.forEachKey((triple, index) -> + addIndex(spoBitmaps[0], triple.getSubject(), index))); final var futureIndexPredicates = CompletableFuture.runAsync(() -> - triples.indexedKeyIterator().forEachRemaining(entry -> - addIndex(spoBitmaps[1], entry.key().getPredicate(), entry.index()))); + triples.forEachKey((triple, index) -> + addIndex(spoBitmaps[1], triple.getPredicate(), index))); - triples.indexedKeyIterator().forEachRemaining(entry -> - addIndex(spoBitmaps[2], entry.key().getObject(), entry.index())); + triples.forEachKey((triple, index) -> + addIndex(spoBitmaps[2], triple.getObject(), index)); CompletableFuture.allOf(futureIndexSubjects, futureIndexPredicates).join(); } diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/RoaringTripleStore.java b/jena-core/src/main/java/org/apache/jena/mem/store/roaring/RoaringTripleStore.java index 336694e5e4f..25ad65e1852 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/RoaringTripleStore.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/roaring/RoaringTripleStore.java @@ -25,7 +25,7 @@ import org.apache.jena.mem.IndexingStrategy; import org.apache.jena.mem.pattern.PatternClassifier; import org.apache.jena.mem.store.TripleStore; -import org.apache.jena.mem.store.roaring.strategies.*; +import org.apache.jena.mem.store.strategies.*; import org.apache.jena.util.iterator.ExtendedIterator; import org.apache.jena.util.iterator.NiceIterator; import org.apache.jena.util.iterator.SingletonIterator; diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/StoreStrategy.java b/jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/StoreStrategy.java deleted file mode 100644 index 6d31cbc2537..00000000000 --- a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/StoreStrategy.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.apache.jena.mem.store.roaring.strategies; - -import org.apache.jena.graph.Triple; -import org.apache.jena.mem.pattern.MatchPattern; -import org.apache.jena.util.iterator.ExtendedIterator; - -import java.util.stream.Stream; - -/** - * The store strategy defines how triples are indexed and how matches are found. - * It is used to implement different indexing strategies like Eager, Lazy, Manual, and Minimal. - * For the matching operations, only matches for the patterns SUB_ANY_ANY, ANY_PRE_ANY, ANY_ANY_OBJ, - * SUB_PRE_ANY, ANY_PRE_OBJ, and SUB_ANY_OBJ are supported. - * The patterns SUB_PRE_OBJ and ANY_ANY_ANY are not supported by the store strategies. - */ -public interface StoreStrategy { - /** - * Add a triple to the index if the cuurent strategy supports indexing. - * - * @param triple the triple to add - * @param index the index of the triple in the store - */ - void addToIndex(Triple triple, int index); - - /** - * Remove a triple from the index if the current strategy supports indexing. - * - * @param triple the triple to remove - * @param index the index of the triple in the store - */ - void removeFromIndex(Triple triple, int index); - - /** - * Clear the index of this store if the current strategy supports indexing. - * This will remove all triples from the index. - */ - void clearIndex(); - - /** - * Check if the index contains a match for the given triple and pattern. - * This is used to quickly check if a triple matches a given pattern without retrieving the triples. - * - * @param tripleMatch the triple to match - * @param pattern the pattern to match against - * @return true if there is a match, false otherwise - */ - boolean containsMatch(Triple tripleMatch, MatchPattern pattern); - - /** - * Stream the triples that match the given triple and pattern. - * This is used to retrieve the triples that match a given pattern. - * - * @param tripleMatch the triple to match - * @param pattern the pattern to match against - * @return a stream of triples that match the given pattern - */ - Stream streamMatch(Triple tripleMatch, MatchPattern pattern); - - /** - * Find the triples that match the given triple and pattern. - * This is used to retrieve the triples that match a given pattern as an iterator. - * - * @param tripleMatch the triple to match - * @param pattern the pattern to match against - * @return an iterator over the triples that match the given pattern - */ - ExtendedIterator findMatch(Triple tripleMatch, MatchPattern pattern); -} diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/LazyStoreStrategy.java b/jena-core/src/main/java/org/apache/jena/mem/store/strategies/LazyStoreStrategy.java similarity index 63% rename from jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/LazyStoreStrategy.java rename to jena-core/src/main/java/org/apache/jena/mem/store/strategies/LazyStoreStrategy.java index 4a31bb2469a..d7d59318a71 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/LazyStoreStrategy.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/strategies/LazyStoreStrategy.java @@ -19,7 +19,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.apache.jena.mem.store.roaring.strategies; +package org.apache.jena.mem.store.strategies; import org.apache.jena.graph.Triple; import org.apache.jena.mem.pattern.MatchPattern; @@ -29,15 +29,31 @@ import java.util.stream.Stream; /** - * A lazy store strategy that defers the initialization of the index until it is needed. - * This strategy is useful when the index is not always required, allowing for more efficient memory usage. - * It uses a supplier to create a new instance of {@link EagerStoreStrategy} when needed. + * {@link StoreStrategy} that defers index construction until the first + * pattern lookup. Add/remove are no-ops while the index is absent (the + * triples are still maintained in the enclosing + * {@link org.apache.jena.mem.store.TripleStore} but no + * subject/predicate/object index is updated). On the first + * {@code containsMatch}/{@code streamMatch}/{@code findMatch} call, the + * supplied callback is invoked to build (and install) an + * {@link org.apache.jena.mem.IndexingStrategy#EAGER} implementation; + * the lookup is then forwarded to it. + *

+ * Used to back both {@link org.apache.jena.mem.IndexingStrategy#LAZY} and + * {@link org.apache.jena.mem.IndexingStrategy#LAZY_PARALLEL}; the + * sequential / parallel choice is encoded in the supplied callback. */ public class LazyStoreStrategy implements StoreStrategy { - private final Supplier setCurrentStrategyToNewEagerStoreStrategy; + private final Supplier setCurrentStrategyToNewEagerStoreStrategy; - public LazyStoreStrategy(final Supplier setCurrentStrategyToNewEagerStoreStrategy) { + /** + * @param setCurrentStrategyToNewEagerStoreStrategy callback that builds + * an {@link org.apache.jena.mem.IndexingStrategy#EAGER} strategy, installs it as the enclosing + * store's current strategy, and returns it so this strategy can + * delegate the triggering lookup to it + */ + public LazyStoreStrategy(final Supplier setCurrentStrategyToNewEagerStoreStrategy) { this.setCurrentStrategyToNewEagerStoreStrategy = setCurrentStrategyToNewEagerStoreStrategy; } diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/ManualStoreStrategy.java b/jena-core/src/main/java/org/apache/jena/mem/store/strategies/ManualStoreStrategy.java similarity index 78% rename from jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/ManualStoreStrategy.java rename to jena-core/src/main/java/org/apache/jena/mem/store/strategies/ManualStoreStrategy.java index 83de312190c..44836b47554 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/ManualStoreStrategy.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/strategies/ManualStoreStrategy.java @@ -19,7 +19,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.apache.jena.mem.store.roaring.strategies; +package org.apache.jena.mem.store.strategies; import org.apache.jena.graph.Triple; import org.apache.jena.mem.pattern.MatchPattern; @@ -28,9 +28,15 @@ import java.util.stream.Stream; /** - * A manual store strategy that does not maintain an index. - * This strategy is used when no indexing is required, and all operations are no-ops. - * It throws an exception if any match operation is attempted before the index is initialized. + * {@link StoreStrategy} that never builds an index automatically. + * Add/remove/clear are no-ops on the index side; pattern-match operations + * throw {@link UnsupportedOperationException} until the user explicitly + * initializes the index (typically via + * {@link org.apache.jena.mem.GraphMemIndexedSet#initializeIndex()} or + * {@link org.apache.jena.mem.GraphMemIndexedSet#initializeIndexParallel()}), + * which swaps this strategy out for an {@link org.apache.jena.mem.IndexingStrategy#EAGER} strategy. + *

+ * Used to back {@link org.apache.jena.mem.IndexingStrategy#MANUAL}. */ public class ManualStoreStrategy implements StoreStrategy { @Override diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/MinimalStoreStrategy.java b/jena-core/src/main/java/org/apache/jena/mem/store/strategies/MinimalStoreStrategy.java similarity index 67% rename from jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/MinimalStoreStrategy.java rename to jena-core/src/main/java/org/apache/jena/mem/store/strategies/MinimalStoreStrategy.java index 7cfce2f9f20..65c65b82c35 100644 --- a/jena-core/src/main/java/org/apache/jena/mem/store/roaring/strategies/MinimalStoreStrategy.java +++ b/jena-core/src/main/java/org/apache/jena/mem/store/strategies/MinimalStoreStrategy.java @@ -19,42 +19,49 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.apache.jena.mem.store.roaring.strategies; +package org.apache.jena.mem.store.strategies; import org.apache.jena.graph.Triple; +import org.apache.jena.mem.collection.FastHashSet; import org.apache.jena.mem.pattern.MatchPattern; -import org.apache.jena.mem.store.roaring.TripleSet; import org.apache.jena.util.iterator.ExtendedIterator; import java.util.stream.Stream; /** - * A minimal store strategy that does not maintain any bitmaps or indexes. - * This strategy is used when no indexing is required. - * The matching operations are performed directly on the set of triples. - * This strategy is useful for scenarios where the overhead of maintaining an index is not justified, - * such as when the dataset is small or when the performance of match operations is not critical. + * {@link StoreStrategy} that never builds an index but still answers + * pattern-match operations - by linearly filtering the triple set. Useful + * when the dataset is small or when memory is more precious than match-time + * performance. + *

+ * Used to back {@link org.apache.jena.mem.IndexingStrategy#MINIMAL}. The + * user can switch to eager indexing at any time by calling + * {@link org.apache.jena.mem.GraphMemIndexedSet#initializeIndex()}; calling + * {@code clearIndex} reverts to filtering again. */ public class MinimalStoreStrategy implements StoreStrategy { - private final TripleSet triples; + private final FastHashSet triples; - public MinimalStoreStrategy(final TripleSet triples) { + /** + * @param triples the canonical triple set to filter against + */ + public MinimalStoreStrategy(final FastHashSet triples) { this.triples = triples; } @Override public void addToIndex(final Triple triple, final int index) { - // No-op, as we do not store any bitmaps + // No-op, as we do not store any indices } @Override public void removeFromIndex(final Triple triple, final int index) { - // No-op, as we do not store any bitmaps + // No-op, as we do not store any indices } @Override public void clearIndex() { - // No-op, as we do not store any bitmaps + // No-op, as we do not store any indices } @Override diff --git a/jena-core/src/main/java/org/apache/jena/mem/store/strategies/StoreStrategy.java b/jena-core/src/main/java/org/apache/jena/mem/store/strategies/StoreStrategy.java new file mode 100644 index 00000000000..43f7dbc93b2 --- /dev/null +++ b/jena-core/src/main/java/org/apache/jena/mem/store/strategies/StoreStrategy.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.apache.jena.mem.store.strategies; + +import org.apache.jena.graph.Triple; +import org.apache.jena.mem.pattern.MatchPattern; +import org.apache.jena.util.iterator.ExtendedIterator; + +import java.util.stream.Stream; + +/** + * Plug-in interface that controls how the auxiliary subject/predicate/object + * index of a {@link org.apache.jena.mem.store.TripleStore} which supports strategies + * is maintained and how partial-pattern matches are evaluated. + *

+ * The match methods only need to handle the partial-pattern cases: + * {@link MatchPattern#SUB_ANY_ANY}, {@link MatchPattern#ANY_PRE_ANY}, + * {@link MatchPattern#ANY_ANY_OBJ}, {@link MatchPattern#SUB_PRE_ANY}, + * {@link MatchPattern#ANY_PRE_OBJ} and {@link MatchPattern#SUB_ANY_OBJ}. + * The fully concrete pattern {@link MatchPattern#SUB_PRE_OBJ} and the + * fully open pattern {@link MatchPattern#ANY_ANY_ANY} are answered directly + * from the triple set by the enclosing store and never reach the strategy. + */ +public interface StoreStrategy { + /** + * Notify the strategy that a triple was added to the underlying triple + * set at the given index. Implementations that maintain an index must + * update it; implementations without an index are free to no-op. + * + * @param triple the newly added triple + * @param index the stable index it now occupies in the triple set + */ + void addToIndex(final Triple triple, final int index); + + /** + * Notify the strategy that the triple at the given index has been + * removed from the underlying triple set. Implementations that maintain + * an index must remove the triple from it; implementations without an + * index are free to no-op. + * + * @param triple the removed triple + * @param index the index it occupied immediately before removal + */ + void removeFromIndex(final Triple triple, final int index); + + /** + * Discard any auxiliary index data held by the strategy. Implementations + * without an index may no-op. + */ + void clearIndex(); + + /** + * Check if the index contains a match for the given triple and pattern. + * This is used to quickly check if a triple matches a given pattern without retrieving the triples. + * + * @param tripleMatch the triple to match + * @param pattern the pattern to match against + * @return true if there is a match, false otherwise + */ + boolean containsMatch(final Triple tripleMatch, final MatchPattern pattern); + + /** + * Stream the triples that match the given triple and pattern. + * This is used to retrieve the triples that match a given pattern. + * + * @param tripleMatch the triple to match + * @param pattern the pattern to match against + * @return a stream of triples that match the given pattern + */ + Stream streamMatch(final Triple tripleMatch, final MatchPattern pattern); + + /** + * Find the triples that match the given triple and pattern. + * This is used to retrieve the triples that match a given pattern as an iterator. + * + * @param tripleMatch the triple to match + * @param pattern the pattern to match against + * @return an iterator over the triples that match the given pattern + */ + ExtendedIterator findMatch(final Triple tripleMatch, final MatchPattern pattern); + + /** + * Check if the index has been initialized and all triples are indexed. + * + * @return true if the index is initialized, false otherwise + */ + default boolean isIndexInitialized() { + return false; + } +} \ No newline at end of file diff --git a/jena-core/src/test/java/org/apache/jena/mem/AbstractGraphMemTest.java b/jena-core/src/test/java/org/apache/jena/mem/AbstractGraphMemTest.java index 5659e6a20c8..b75b60c6e98 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/AbstractGraphMemTest.java +++ b/jena-core/src/test/java/org/apache/jena/mem/AbstractGraphMemTest.java @@ -37,6 +37,8 @@ import org.hamcrest.collection.IsEmptyCollection; import org.hamcrest.collection.IsIterableContainingInAnyOrder; +import java.util.ArrayList; + public abstract class AbstractGraphMemTest { protected GraphMem sut; @@ -1044,4 +1046,34 @@ public void testCopyHasNoSideEffects() { assertFalse(sut.contains(triple("s3 p3 o3"))); } + @Test + public void testDeleteAll() { + for(var subjects=1; subjects <= 8 ; subjects++) { + for(var predicates=1; predicates <= 8 ; predicates++) { + for(var objects=1; objects <= 8 ; objects++) { + sut = createGraph(); + var triples = new ArrayList(); + for(var s=0; s < subjects ; s++) { + for(var p=0; p < predicates ; p++) { + for(var o=0; o < objects ; o++) { + var t = triple("s" + s + " p" + p + " o" + o); + triples.add(t); + sut.add(t); + assertTrue(sut.contains(t)); + } + } + } + assertEquals(subjects*predicates*objects, sut.size()); + // print subjects, predicates, objects and size + // System.out.println(subjects + " - " + predicates + " - " + objects + " : " + sut.size()); + for (var triple : triples) { + assertTrue(sut.contains(triple)); + sut.delete(triple); + assertFalse(sut.contains(triple)); + } + assertEquals(0, sut.size()); + } + } + } + } } diff --git a/jena-core/src/test/java/org/apache/jena/mem/GraphMemFastTest.java b/jena-core/src/test/java/org/apache/jena/mem/GraphMemFastTest.java index 868a79a34d8..95546c3f4b8 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/GraphMemFastTest.java +++ b/jena-core/src/test/java/org/apache/jena/mem/GraphMemFastTest.java @@ -21,10 +21,33 @@ package org.apache.jena.mem; +import org.junit.Test; + +import static org.apache.jena.testing_framework.GraphHelper.triple; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertTrue; + +/** + * Concrete instantiation of {@link AbstractGraphMemTest} that exercises + * {@link GraphMemFast} (a {@link GraphMem} backed by a + * {@link org.apache.jena.mem.store.fast.FastTripleStore}). The shared + * contract assertions live in the abstract base; this class only adds tests + * that are specific to the {@code GraphMemFast} variant. + */ public class GraphMemFastTest extends AbstractGraphMemTest { @Override protected GraphMem createGraph() { return new GraphMemFast(); } + + @Test + public void copyReturnsAGraphMemFastInstance() { + sut.add(triple("s p o")); + final var copy = sut.copy(); + // The override on GraphMemFast must preserve the runtime type so + // callers don't lose subclass-specific functionality through copy(). + assertTrue("copy() must return a GraphMemFast", copy instanceof GraphMemFast); + assertNotSame(sut, copy); + } } \ No newline at end of file diff --git a/jena-core/src/test/java/org/apache/jena/mem/GraphMemIndexedSetTest.java b/jena-core/src/test/java/org/apache/jena/mem/GraphMemIndexedSetTest.java new file mode 100644 index 00000000000..3622e49d7f3 --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/mem/GraphMemIndexedSetTest.java @@ -0,0 +1,332 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem; + +import org.apache.jena.graph.Node; +import org.apache.jena.graph.Triple; +import org.apache.jena.mem.pattern.PatternClassifier; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.Mockito; + +import java.util.Arrays; +import java.util.Collection; + +import static org.apache.jena.testing_framework.GraphHelper.triple; +import static org.junit.Assert.*; + +@RunWith(Parameterized.class) +public class GraphMemIndexedSetTest extends AbstractGraphMemTest { + + @Parameterized.Parameter + public IndexingStrategy indexingStrategy; + + @Parameterized.Parameters(name = "{0}") + public static Collection data() { + return Arrays.stream(IndexingStrategy.values()) + .map(strategy -> new Object[]{strategy}) + .toList(); + } + + @Override + public void testDeleteAll() { + // the delete test takes almost a minute with MANUAL indexing + if(indexingStrategy == IndexingStrategy.MANUAL) + return; + + super.testDeleteAll(); + } + + @Override + protected GraphMem createGraph() { + switch (indexingStrategy) { + case EAGER, LAZY, LAZY_PARALLEL, MINIMAL: + return new GraphMemIndexedSet(indexingStrategy); + case MANUAL: + return setupGraphWithSpyForSpecialManualStrategy(); + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + indexingStrategy); + } + } + + private static boolean isPatternRequiringIndexing(final Triple tripleMatch) { + switch(PatternClassifier.classify(tripleMatch)) { + case SUB_PRE_ANY, SUB_ANY_OBJ, SUB_ANY_ANY, ANY_PRE_OBJ, ANY_PRE_ANY, ANY_ANY_OBJ: + return true; + case ANY_ANY_ANY, SUB_PRE_OBJ: + return false; + default: + throw new IllegalArgumentException("Unknown pattern classification: " + PatternClassifier.classify(tripleMatch)); + } + } + + /** + * Builds a {@link GraphMemIndexedSet} with the {@link IndexingStrategy#MANUAL} + * strategy, wrapped in a Mockito spy that exercises the documented + * "fail-then-initialize-then-clear" cycle on every pattern lookup. Without + * the wrapper the inherited tests in {@link AbstractGraphMemTest} would + * fail outright, because a freshly-created MANUAL graph throws + * {@link UnsupportedOperationException} on any pattern operation until + * {@link GraphMemIndexedSet#initializeIndex()} has been called. + */ + private GraphMemIndexedSet setupGraphWithSpyForSpecialManualStrategy() { + final var realGraph = new GraphMemIndexedSet(IndexingStrategy.MANUAL); + final var spyGraph = Mockito.spy(realGraph); + + // Mock {@link Graph#contains(Triple)} + Mockito.doAnswer(invocation -> { + final Triple tripleMatch = invocation.getArgument(0); + // If the triple match is a pattern that requires indexing, throw an exception + assertThrows(UnsupportedOperationException.class, () -> realGraph.contains(tripleMatch)); + + // now initialize the index + realGraph.initializeIndex(); + // determine the result with the index + final var result = realGraph.contains(tripleMatch); + // Reset the indexing strategy for the next call + realGraph.resetIndexingStrategy(); + // Return the result of the store with the index + return result; + }).when(spyGraph).contains(Mockito.argThat(t -> isPatternRequiringIndexing(t))); + + // Mock {@link Graph#find(Triple)} + Mockito.doAnswer(invocation -> { + final Triple tripleMatch = invocation.getArgument(0); + // If the triple match is a pattern that requires indexing, throw an exception + assertThrows(UnsupportedOperationException.class, () -> realGraph.find(tripleMatch)); + + // now initialize the index + realGraph.initializeIndex(); + // determine the result with the index + final var result = realGraph.find(tripleMatch); + // Reset the indexing strategy for the next call + realGraph.resetIndexingStrategy(); + // Return the result of the store with the index + return result; + }).when(spyGraph).find(Mockito.argThat(t -> isPatternRequiringIndexing(t))); + + var triplePatternMatcher = new TriplePatternArgumentCollectMatcher(); + + // Mock {@link Graph#stream(Node, Node, Node)} + Mockito.doAnswer(invocation -> { + final Node sm = invocation.getArgument(0); + final Node pm = invocation.getArgument(1); + final Node om = invocation.getArgument(2); + + // If the triple match is a pattern that requires indexing, throw an exception + assertThrows(UnsupportedOperationException.class, () -> realGraph.stream(sm, pm, om)); + + // now initialize the index + realGraph.initializeIndex(); + // determine the result with the index + final var result = realGraph.stream(sm, pm, om); + // Reset the indexing strategy for the next call + realGraph.resetIndexingStrategy(); + // Return the result of the store with the index + return result; + }).when(spyGraph) + .stream(Mockito.argThat(triplePatternMatcher::matches), + Mockito.argThat(triplePatternMatcher::matches), + Mockito.argThat(triplePatternMatcher::matches)); + + return spyGraph; + } + + /** + * Matcher to collect the arguments of a triple pattern (Node, Node, Node) + * and check if the pattern requires indexing. + * This matcher is used to mock the behavior of methods that take a triple pattern as argument. + */ + private class TriplePatternArgumentCollectMatcher implements org.mockito.ArgumentMatcher { + final Node[] nodes = new Node[3]; + int index = 0; + + @Override + public boolean matches(Node node) { + switch (index) { + case 0: + nodes[0] = node; + index++; + return true; + case 1: + nodes[1] = node; + index++; + return true; + case 2: + nodes[2] = node; + index = 0; // Reset for next match + return isPatternRequiringIndexing(Triple.createMatch(nodes[0], nodes[1], nodes[2])); + default: + return false; // Should not happen + } + } + } + + private GraphMemIndexedSet getSutAsGraphMemIndexedSet() { + return (GraphMemIndexedSet) super.sut; + } + + @Test + public void testGetIndexingStrategy() { + // Given + final var sut = getSutAsGraphMemIndexedSet(); + + // Then + assertEquals(indexingStrategy, sut.getIndexingStrategy()); + } + + @Test + public void testIsIndexInitialized() { + // Given + final var sut = getSutAsGraphMemIndexedSet(); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER: + assertTrue(sut.isIndexInitialized()); + break; + case LAZY, LAZY_PARALLEL, MANUAL, MINIMAL: + assertFalse(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + + // When + sut.add(triple("s p o")); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER: + assertTrue(sut.isIndexInitialized()); + break; + case LAZY, LAZY_PARALLEL, MANUAL, MINIMAL: + assertFalse(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + } + + @Test + public void testLazyInitiallization() { + // Given + final var sut = getSutAsGraphMemIndexedSet(); + sut.add(triple("s p o")); + + // When + sut.contains(triple("s ?? o")); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER, LAZY, LAZY_PARALLEL: + assertTrue(sut.isIndexInitialized()); + break; + case MANUAL, MINIMAL: + assertFalse(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + } + + @Test + public void testManualInitialization() { + // Given + final var sut = getSutAsGraphMemIndexedSet(); + + // When + sut.initializeIndex(); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER, LAZY, LAZY_PARALLEL, MANUAL, MINIMAL: + assertTrue(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + } + + @Test + public void testManualInitializationParallel() { + // Given + final var sut = getSutAsGraphMemIndexedSet(); + + // When + sut.initializeIndexParallel(); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER, LAZY, LAZY_PARALLEL, MANUAL, MINIMAL: + assertTrue(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + } + + @Test + public void testResetIndexingStrategy() { + // Given + final var sut = getSutAsGraphMemIndexedSet(); + sut.initializeIndex(); + + // When + sut.resetIndexingStrategy(); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER: + assertTrue(sut.isIndexInitialized()); + break; + case LAZY, LAZY_PARALLEL, MANUAL, MINIMAL: + assertFalse(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + } + + @Test + public void testCopyPreservesStrategyAndType() { + // The MANUAL case here is the spy-wrapped graph, not a plain + // GraphMemIndexedSet; copy() doesn't go through the spy so we limit + // this test to the strategies that build their graph directly. + if (indexingStrategy == IndexingStrategy.MANUAL) return; + + final var sut = getSutAsGraphMemIndexedSet(); + sut.add(triple("s p o")); + + final var copy = sut.copy(); + assertNotNull(copy); + // The override on GraphMemIndexedSet must preserve the runtime type + // and the indexing strategy. + assertEquals(indexingStrategy, copy.getIndexingStrategy()); + assertTrue(copy.contains(triple("s p o"))); + + // Mutations in the copy must not affect the source + copy.add(triple("s2 p2 o2")); + assertFalse(sut.contains(triple("s2 p2 o2"))); + assertTrue(copy.contains(triple("s2 p2 o2"))); + } +} \ No newline at end of file diff --git a/jena-core/src/test/java/org/apache/jena/mem/IndexingStrategyTest.java b/jena-core/src/test/java/org/apache/jena/mem/IndexingStrategyTest.java new file mode 100644 index 00000000000..643922a650b --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/mem/IndexingStrategyTest.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem; + +import org.apache.jena.mem.store.indexed.IndexedSetTripleStore; +import org.junit.Test; + +import static org.apache.jena.testing_framework.GraphHelper.triple; +import static org.junit.Assert.*; + +/** + * Unit tests for the {@link IndexingStrategy} enum: verifies the five + * documented strategies are present and that an {@link IndexedSetTripleStore} + * can be constructed with each of them. + */ +public class IndexingStrategyTest { + + @Test + public void allDocumentedStrategiesArePresent() { + // If any of these names changes, GraphMemIndexedSet, the strategy + // factory and a number of switch statements in the codebase will + // need to be updated in lock-step. + assertNotNull(IndexingStrategy.valueOf("EAGER")); + assertNotNull(IndexingStrategy.valueOf("LAZY")); + assertNotNull(IndexingStrategy.valueOf("LAZY_PARALLEL")); + assertNotNull(IndexingStrategy.valueOf("MANUAL")); + assertNotNull(IndexingStrategy.valueOf("MINIMAL")); + } + + @Test + public void valueOfRoundTripsForEveryConstant() { + for (IndexingStrategy s : IndexingStrategy.values()) { + assertEquals(s, IndexingStrategy.valueOf(s.name())); + } + } + + @Test(expected = IllegalArgumentException.class) + public void valueOfRejectsUnknownNames() { + IndexingStrategy.valueOf("NOT_A_STRATEGY"); + } + + @Test + public void everyStrategyConstructsAValidIndexedSetTripleStore() { + // Sanity check: the enum is the public configuration knob for + // IndexedSetTripleStore. Every value must lead to a usable store. + for (IndexingStrategy s : IndexingStrategy.values()) { + final var store = new IndexedSetTripleStore(s); + assertEquals(s, store.getIndexingStrategy()); + assertEquals(0, store.countTriples()); + } + } + + @Test + public void graphMemIndexedSetExposesTheSelectedStrategy() { + for (IndexingStrategy s : IndexingStrategy.values()) { + final var graph = new GraphMemIndexedSet(s); + assertEquals(s, graph.getIndexingStrategy()); + } + } + + @Test + public void eagerStrategyMaintainsIndexAfterAdd() { + final var graph = new GraphMemIndexedSet(IndexingStrategy.EAGER); + graph.add(triple("s p o")); + assertTrue(graph.isIndexInitialized()); + } + + @Test + public void lazyStrategyDelaysIndexBuildUntilFirstPatternLookup() { + final var graph = new GraphMemIndexedSet(IndexingStrategy.LAZY); + graph.add(triple("s p o")); + assertFalse(graph.isIndexInitialized()); + // Pattern lookup triggers the build + graph.contains(triple("s ?? ??")); + assertTrue(graph.isIndexInitialized()); + } + + @Test + public void manualStrategyRefusesPatternLookupUntilIndexInitialized() { + final var graph = new GraphMemIndexedSet(IndexingStrategy.MANUAL); + graph.add(triple("s p o")); + assertThrows(UnsupportedOperationException.class, + () -> graph.contains(triple("s ?? ??"))); + graph.initializeIndex(); + // After explicit initialization, lookups succeed. + assertTrue(graph.contains(triple("s ?? ??"))); + } + + @Test + public void minimalStrategyAnswersPatternLookupsWithoutBuildingIndex() { + final var graph = new GraphMemIndexedSet(IndexingStrategy.MINIMAL); + graph.add(triple("s p o")); + assertFalse(graph.isIndexInitialized()); + // MINIMAL filters linearly instead of building an index. + assertTrue(graph.contains(triple("s ?? ??"))); + assertFalse(graph.isIndexInitialized()); + } +} diff --git a/jena-core/src/test/java/org/apache/jena/mem/TS4_GraphMem.java b/jena-core/src/test/java/org/apache/jena/mem/TS4_GraphMem.java index 4fecf61d8a6..09aa5836a17 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/TS4_GraphMem.java +++ b/jena-core/src/test/java/org/apache/jena/mem/TS4_GraphMem.java @@ -30,8 +30,10 @@ import org.apache.jena.mem.spliterator.SparseArraySpliteratorTest; import org.apache.jena.mem.spliterator.SparseArraySubSpliteratorTest; import org.apache.jena.mem.store.fast.FastArrayBunchTest; +import org.apache.jena.mem.store.fast.FastHashedBunchMapTest; import org.apache.jena.mem.store.fast.FastHashedTripleBunchTest; import org.apache.jena.mem.store.fast.FastTripleStoreTest; +import org.apache.jena.mem.store.indexed.*; import org.apache.jena.mem.store.legacy.*; import org.apache.jena.mem.store.roaring.RoaringBitmapTripleIteratorTest; import org.apache.jena.mem.store.roaring.RoaringTripleStoreTest; @@ -62,8 +64,19 @@ // store/fast FastTripleStoreTest.class, FastArrayBunchTest.class, + FastHashedBunchMapTest.class, FastHashedTripleBunchTest.class, + // store/indexed + IndexedSetTripleStoreTest.class, + IndexListTest.class, + IndexListIteratorTest.class, + IndexListSpliteratorTest.class, + IndexListsIteratorTest.class, + IndexListsSpliteratorTest.class, + NodesToIndicesTest.class, + TripleSetTest.class, + // store/roaring RoaringTripleStoreTest.class, RoaringBitmapTripleIteratorTest.class, @@ -81,7 +94,9 @@ // -- GraphMemLegacyTest.class, GraphMemFastTest.class, + GraphMemIndexedSetTest.class, GraphMemRoaringTest.class, - GraphMemTest.class + GraphMemTest.class, + IndexingStrategyTest.class } ) public class TS4_GraphMem {} diff --git a/jena-core/src/test/java/org/apache/jena/mem/collection/AbstractJenaMapNodeTest.java b/jena-core/src/test/java/org/apache/jena/mem/collection/AbstractJenaMapNodeTest.java index ba9d9c15b09..c3ae6f94a20 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/collection/AbstractJenaMapNodeTest.java +++ b/jena-core/src/test/java/org/apache/jena/mem/collection/AbstractJenaMapNodeTest.java @@ -171,14 +171,14 @@ public void testContainKey() { public void testKeyIteratorEmpty() { var iter = sut.keyIterator(); assertFalse(iter.hasNext()); - assertThrows(NoSuchElementException.class, () -> iter.next()); + assertThrows(NoSuchElementException.class, iter::next); } @Test public void testValueIteratorEmpty2() { var iter = sut.valueIterator(); assertFalse(iter.hasNext()); - assertThrows(NoSuchElementException.class, () -> iter.next()); + assertThrows(NoSuchElementException.class, iter::next); } @Test @@ -577,7 +577,7 @@ public void tryPut1000Nodes() { @Test public void computeIfAbsend1000Nodes() { for (int i = 0; i < 1000; i++) { - sut.computeIfAbsent(node("s" + i), () -> new Object()); + sut.computeIfAbsent(node("s" + i), Object::new); } assertEquals(1000, sut.size()); } @@ -613,38 +613,4 @@ public void tryPutAndTryRemove1000Triples() { } assertTrue(sut.isEmpty()); } - - - private static class HashCommonNodeMap extends HashCommonMap { - public HashCommonNodeMap() { - super(10); - } - - @Override - protected Node[] newKeysArray(int size) { - return new Node[size]; - } - - @Override - public void clear() { - super.clear(10); - } - - @Override - protected Object[] newValuesArray(int size) { - return new Object[size]; - } - } - - private static class FastNodeHashMap extends FastHashMap { - @Override - protected Node[] newKeysArray(int size) { - return new Node[size]; - } - - @Override - protected Object[] newValuesArray(int size) { - return new Object[size]; - } - } } \ No newline at end of file diff --git a/jena-core/src/test/java/org/apache/jena/mem/collection/AbstractJenaSetTripleTest.java b/jena-core/src/test/java/org/apache/jena/mem/collection/AbstractJenaSetTripleTest.java index 1cd962e2d56..edaf60e26e2 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/collection/AbstractJenaSetTripleTest.java +++ b/jena-core/src/test/java/org/apache/jena/mem/collection/AbstractJenaSetTripleTest.java @@ -106,7 +106,7 @@ public void testContainKey() { public void testKeyIteratorEmpty() { var iter = sut.keyIterator(); assertFalse(iter.hasNext()); - assertThrows(NoSuchElementException.class, () -> iter.next()); + assertThrows(NoSuchElementException.class, iter::next); } @Test @@ -114,7 +114,7 @@ public void testKeyIteratorNextThrowsConcurrentModificationException() { sut.tryAdd(triple("s o p")); var iter = sut.keyIterator(); sut.tryAdd(triple("s o p2")); - assertThrows(ConcurrentModificationException.class, () -> iter.next()); + assertThrows(ConcurrentModificationException.class, iter::next); } @Test @@ -362,29 +362,4 @@ public void addAndRemove1000Triples() { } assertTrue(sut.isEmpty()); } - - - private static class HashCommonTripleSet extends HashCommonSet { - public HashCommonTripleSet() { - super(10); - } - - @Override - protected Triple[] newKeysArray(int size) { - return new Triple[size]; - } - - @Override - public void clear() { - super.clear(10); - } - } - - private static class FastTripleHashSet extends FastHashSet { - @Override - protected Triple[] newKeysArray(int size) { - return new Triple[size]; - } - } - } \ No newline at end of file diff --git a/jena-core/src/test/java/org/apache/jena/mem/collection/FastHashMapTest2.java b/jena-core/src/test/java/org/apache/jena/mem/collection/FastHashMapTest2.java index f098548b3b3..d932ce23208 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/collection/FastHashMapTest2.java +++ b/jena-core/src/test/java/org/apache/jena/mem/collection/FastHashMapTest2.java @@ -24,9 +24,11 @@ import org.junit.Test; import java.util.function.UnaryOperator; +import java.util.HashMap; +import java.util.HashSet; import static org.apache.jena.testing_framework.GraphHelper.node; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; public class FastHashMapTest2 { @@ -113,6 +115,69 @@ public void testCopyConstructorAddAndDeleteHasNoSideEffects() { assertEquals(2, (int) original.get(node("s2"))); } + @Test + public void testPutAndGetIndexAssignsSequentialIndicesAndReturnsExistingForRepeats() { + var sut = new FastNodeHashMap(); + // First-time puts assign new indices. + final int i0 = sut.putAndGetIndex(node("s"), 100); + final int i1 = sut.putAndGetIndex(node("s1"), 200); + final int i2 = sut.putAndGetIndex(node("s2"), 300); + assertEquals(0, i0); + assertEquals(1, i1); + assertEquals(2, i2); + assertEquals(100, (int) sut.getValueAt(i0)); + assertEquals(200, (int) sut.getValueAt(i1)); + assertEquals(300, (int) sut.getValueAt(i2)); + } + + @Test + public void testPutAndGetIndexOverwritesValueForExistingKey() { + var sut = new FastNodeHashMap(); + final int i0 = sut.putAndGetIndex(node("s"), 100); + // Re-putting the same key returns the SAME index but with the new value. + final int i0Again = sut.putAndGetIndex(node("s"), 999); + assertEquals(i0, i0Again); + assertEquals(999, (int) sut.get(node("s"))); + assertEquals(1, sut.size()); + } + + @Test + public void testForEachKeyVisitsEveryEntryWithItsIndex() { + var sut = new FastNodeHashMap(); + sut.putAndGetIndex(node("a"), 0); + sut.putAndGetIndex(node("b"), 1); + sut.putAndGetIndex(node("c"), 2); + + final HashMap seen = new HashMap<>(); + sut.forEachKey(seen::put); + + assertEquals(3, seen.size()); + assertEquals(Integer.valueOf(0), seen.get(node("a"))); + assertEquals(Integer.valueOf(1), seen.get(node("b"))); + assertEquals(Integer.valueOf(2), seen.get(node("c"))); + } + + @Test + public void testForEachKeySkipsRemovedSlots() { + var sut = new FastNodeHashMap(); + sut.putAndGetIndex(node("a"), 0); + sut.putAndGetIndex(node("b"), 1); + sut.putAndGetIndex(node("c"), 2); + sut.tryRemove(node("b")); + + final HashSet visited = new HashSet<>(); + sut.forEachKey((k, i) -> visited.add(k)); + assertEquals(2, visited.size()); + assertTrue(visited.contains(node("a"))); + assertTrue(visited.contains(node("c"))); + } + + @Test + public void testForEachKeyOnEmptyMapIsNoOp() { + var sut = new FastNodeHashMap(); + sut.forEachKey((k, i) -> fail("consumer must not be called on an empty map")); + } + private static class FastNodeHashMap extends FastHashMap { public FastNodeHashMap() { diff --git a/jena-core/src/test/java/org/apache/jena/mem/collection/FastHashSetTest2.java b/jena-core/src/test/java/org/apache/jena/mem/collection/FastHashSetTest2.java index 5841491b892..1fc61f1a578 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/collection/FastHashSetTest2.java +++ b/jena-core/src/test/java/org/apache/jena/mem/collection/FastHashSetTest2.java @@ -23,8 +23,6 @@ import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.ConcurrentModificationException; import java.util.List; import static org.apache.jena.testing_framework.GraphHelper.node; @@ -55,13 +53,33 @@ public void testAddAndGetIndex() { @Test public void testAddAndGetIndexWithSameHashCode() { - assertEquals(0, sut.addAndGetIndex("a", 0)); - assertEquals(1, sut.addAndGetIndex("b", 0)); - assertEquals(2, sut.addAndGetIndex("c", 0)); + final var a = new Object() { + @Override + public int hashCode() { + return 0; + } + }; + final var b = new Object() { + @Override + public int hashCode() { + return 0; + } + }; + final var c = new Object() { + @Override + public int hashCode() { + return 0; + } + }; + + var objectHashSet = new FastObjectHashSet(); + assertEquals(0, objectHashSet.addAndGetIndex(a)); + assertEquals(1, objectHashSet.addAndGetIndex(b)); + assertEquals(2, objectHashSet.addAndGetIndex(c)); - assertEquals(~0, sut.addAndGetIndex("a", 0)); - assertEquals(~1, sut.addAndGetIndex("b", 0)); - assertEquals(~2, sut.addAndGetIndex("c", 0)); + assertEquals(~0, objectHashSet.addAndGetIndex(a)); + assertEquals(~1, objectHashSet.addAndGetIndex(b)); + assertEquals(~2, objectHashSet.addAndGetIndex(c)); } @Test @@ -188,127 +206,44 @@ public void testCopyConstructorAddAndDeleteHasNoSideEffects() { } @Test - public void testindexedKeyIterator() { + public void testForEachKeyVisitsEveryKeyWithItsIndex() { var items = List.of("a", "b", "c", "d", "e"); - sut = new FastStringHashSet(3); for (String item : items) { sut.addAndGetIndex(item); } - var iterator = sut.indexedKeyIterator(); - for (var i=0; i seen = new java.util.HashMap<>(); + sut.forEachKey(seen::put); - sut = new FastStringHashSet(3); - for (String item : items) { - sut.addAndGetIndex(item); - } - - var iterator = sut.indexedKeySpliterator(); - for (var i=0; i { - assertEquals(items.get(index), indexedKey.key()); - assertEquals(index, indexedKey.index()); - })); - } - assertFalse(iterator.tryAdvance(indexedKey -> { - fail("There should be no more elements in the iterator"); - })); - } - - @Test - public void testIndexedKeyStream() { - var items = List.of("a", "b", "c", "d", "e"); - - sut = new FastStringHashSet(3); - for (String item : items) { - sut.addAndGetIndex(item); - } - - var indexedKeys = sut.indexedKeyStream().toList(); - assertEquals(items.size(), indexedKeys.size()); - for (var i=0; i(); - for (var i = 0; i < 1000; i++) { - items.add(i); - checkSum+= i; - } - - sut = new FastStringHashSet(); - for (var value : items) { - sut.addAndGetIndex(value.toString()); - } - - final var sum = sut.indexedKeyStreamParallel() - .map(pair -> Integer.parseInt(pair.key())) - .reduce(0, Integer::sum); - assertEquals(checkSum, sum); - } - - @Test - public void testIndexedKeySpliteratorAdvanceThrowsConcurrentModificationException() { - sut = new FastStringHashSet(3); - sut.tryAdd("a"); - var spliterator = sut.indexedKeySpliterator(); - sut.tryAdd("b"); - assertThrows(ConcurrentModificationException.class, () -> spliterator.tryAdvance(t -> { - })); - } - - @Test - public void testIndexedKeySpliteratorForEachRemainingThrowsConcurrentModificationException() { - sut = new FastStringHashSet(3); - sut.tryAdd("a"); - var spliterator = sut.indexedKeySpliterator(); - sut.tryAdd("b"); - assertThrows(ConcurrentModificationException.class, () -> spliterator.forEachRemaining(t -> { - })); - } - - @Test - public void testIndexedKeyIteratorForEachRemainingThrowsConcurrentModificationException() { + public void testForEachKeyOnEmptySetIsNoOp() { sut = new FastStringHashSet(3); - sut.tryAdd("a"); - var spliterator = sut.indexedKeyIterator(); - sut.tryAdd("b"); - assertThrows(ConcurrentModificationException.class, () -> spliterator.forEachRemaining(t -> { - })); + sut.forEachKey((k, i) -> fail("consumer must not be called on empty set")); } @Test - public void testIndexedKeyIteratorNextThrowsConcurrentModificationException() { + public void testForEachKeySkipsRemovedSlots() { sut = new FastStringHashSet(3); - sut.tryAdd("a"); - var spliterator = sut.indexedKeyIterator(); - sut.tryAdd("b"); - assertThrows(ConcurrentModificationException.class, spliterator::next); + sut.addAndGetIndex("a"); + sut.addAndGetIndex("b"); + sut.addAndGetIndex("c"); + // Remove the middle entry; forEachKey must not visit the freed slot. + sut.tryRemove("b"); + + final var keys = new java.util.ArrayList(); + sut.forEachKey((k, i) -> keys.add(k)); + assertEquals(2, keys.size()); + assertTrue(keys.contains("a")); + assertTrue(keys.contains("c")); + assertFalse(keys.contains("b")); } private static class FastObjectHashSet extends FastHashSet { diff --git a/jena-core/src/test/java/org/apache/jena/mem/iterator/SparseArrayIndexedIteratorTest.java b/jena-core/src/test/java/org/apache/jena/mem/iterator/SparseArrayIndexedIteratorTest.java deleted file mode 100644 index cfffcf93904..00000000000 --- a/jena-core/src/test/java/org/apache/jena/mem/iterator/SparseArrayIndexedIteratorTest.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ -package org.apache.jena.mem.iterator; - -import org.junit.Test; - -import java.util.NoSuchElementException; - -import static org.junit.Assert.*; - -public class SparseArrayIndexedIteratorTest { - - private SparseArrayIndexedIterator iterator; - - @Test - public void testHasNextAndNextWithNonNullEntries() { - String[] entries = new String[]{"first", "second", "third"}; - iterator = new SparseArrayIndexedIterator<>(entries, () -> { - }); - - assertTrue(iterator.hasNext()); - var entry = iterator.next(); - assertEquals(0, entry.index()); - assertEquals("first", entry.key()); - - assertTrue(iterator.hasNext()); - entry = iterator.next(); - assertEquals(1, entry.index()); - assertEquals("second", entry.key()); - - assertTrue(iterator.hasNext()); - entry = iterator.next(); - assertEquals(2, entry.index()); - assertEquals("third", entry.key()); - - assertFalse(iterator.hasNext()); - } - - @Test - public void testConstrucorWithToIndexConstraint3() { - String[] entries = new String[]{"first", "second", "third"}; - iterator = new SparseArrayIndexedIterator<>(entries, 3, () -> { - }); - - assertTrue(iterator.hasNext()); - var entry = iterator.next(); - assertEquals(0, entry.index()); - assertEquals("first", entry.key()); - - assertTrue(iterator.hasNext()); - entry = iterator.next(); - assertEquals(1, entry.index()); - assertEquals("second", entry.key()); - - assertTrue(iterator.hasNext()); - entry = iterator.next(); - assertEquals(2, entry.index()); - assertEquals("third", entry.key()); - - assertFalse(iterator.hasNext()); - } - - @Test - public void testConstrucorWithToIndexConstraint2() { - String[] entries = new String[]{"first", "second", "third"}; - iterator = new SparseArrayIndexedIterator<>(entries, 2, () -> { - }); - - assertTrue(iterator.hasNext()); - var entry = iterator.next(); - assertEquals(0, entry.index()); - assertEquals("first", entry.key()); - - assertTrue(iterator.hasNext()); - entry = iterator.next(); - assertEquals(1, entry.index()); - assertEquals("second", entry.key()); - - assertFalse(iterator.hasNext()); - } - - @Test - public void testConstrucorWithToIndexConstraint1() { - String[] entries = new String[]{"first", "second", "third"}; - iterator = new SparseArrayIndexedIterator<>(entries, 1, () -> { - }); - - assertTrue(iterator.hasNext()); - var entry = iterator.next(); - assertEquals(0, entry.index()); - assertEquals("first", entry.key()); - - assertFalse(iterator.hasNext()); - } - - @Test - public void testConstrucorWithToIndexConstraint0() { - String[] entries = new String[]{"first", "second", "third"}; - iterator = new SparseArrayIndexedIterator<>(entries, 0, () -> { - }); - - assertFalse(iterator.hasNext()); - assertThrows(NoSuchElementException.class, () -> iterator.next()); - } - - @Test - public void testHasNextAndNextWithNullEntries() { - String[] entries = new String[]{"first", null, "third", null, "fifth"}; - iterator = new SparseArrayIndexedIterator<>(entries, () -> { - }); - - assertTrue(iterator.hasNext()); - var entry = iterator.next(); - assertEquals(0, entry.index()); - assertEquals("first", entry.key()); - - assertTrue(iterator.hasNext()); - entry = iterator.next(); - assertEquals(2, entry.index()); - assertEquals("third", entry.key()); - - assertTrue(iterator.hasNext()); - entry = iterator.next(); - assertEquals(4, entry.index()); - assertEquals("fifth", entry.key()); - - assertFalse(iterator.hasNext()); - } - - @Test - public void testHasNextAndNextWithNoElements() { - String[] entries = new String[]{}; - iterator = new SparseArrayIndexedIterator<>(entries, () -> { - }); - - assertFalse(iterator.hasNext()); - assertThrows(NoSuchElementException.class, () -> iterator.next()); - } - - @Test - public void testForEachRemaining() { - String[] entries = new String[]{"first", null, "third", null, "fifth"}; - iterator = new SparseArrayIndexedIterator<>(entries, () -> { - }); - int[] count = new int[]{0}; - iterator.forEachRemaining(entry -> { - assertNotNull(entry); - count[0]++; - }); - assertEquals(3, count[0]); - } -} - diff --git a/jena-core/src/test/java/org/apache/jena/mem/iterator/SparseArrayIteratorTest.java b/jena-core/src/test/java/org/apache/jena/mem/iterator/SparseArrayIteratorTest.java index d93d8a3beb2..393e3019cb2 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/iterator/SparseArrayIteratorTest.java +++ b/jena-core/src/test/java/org/apache/jena/mem/iterator/SparseArrayIteratorTest.java @@ -20,6 +20,8 @@ */ package org.apache.jena.mem.iterator; +import org.apache.jena.mem.collection.FastHashSet; +import org.apache.jena.mem.collection.JenaSet; import org.junit.Test; import java.util.NoSuchElementException; @@ -28,13 +30,19 @@ public class SparseArrayIteratorTest { + private static final JenaSet dummySetForConcurrencyCheck = new FastHashSet<>() { + @Override + protected Object[] newKeysArray(int size) { + return new Object[size]; + } + }; + private SparseArrayIterator iterator; @Test public void testHasNextAndNextWithNonNullEntries() { String[] entries = new String[]{"first", "second", "third"}; - iterator = new SparseArrayIterator<>(entries, () -> { - }); + iterator = new SparseArrayIterator<>(entries, dummySetForConcurrencyCheck); assertTrue(iterator.hasNext()); assertEquals("third", iterator.next()); @@ -48,8 +56,7 @@ public void testHasNextAndNextWithNonNullEntries() { @Test public void testConstrucorWithToIndexConstraint3() { String[] entries = new String[]{"first", "second", "third"}; - iterator = new SparseArrayIterator<>(entries, 3, () -> { - }); + iterator = new SparseArrayIterator<>(entries, 3, dummySetForConcurrencyCheck); assertTrue(iterator.hasNext()); assertEquals("third", iterator.next()); @@ -63,8 +70,7 @@ public void testConstrucorWithToIndexConstraint3() { @Test public void testConstrucorWithToIndexConstraint2() { String[] entries = new String[]{"first", "second", "third"}; - iterator = new SparseArrayIterator<>(entries, 2, () -> { - }); + iterator = new SparseArrayIterator<>(entries, 2, dummySetForConcurrencyCheck); assertTrue(iterator.hasNext()); assertEquals("second", iterator.next()); @@ -76,8 +82,7 @@ public void testConstrucorWithToIndexConstraint2() { @Test public void testConstrucorWithToIndexConstraint1() { String[] entries = new String[]{"first", "second", "third"}; - iterator = new SparseArrayIterator<>(entries, 1, () -> { - }); + iterator = new SparseArrayIterator<>(entries, 1, dummySetForConcurrencyCheck); assertTrue(iterator.hasNext()); assertEquals("first", iterator.next()); @@ -87,8 +92,7 @@ public void testConstrucorWithToIndexConstraint1() { @Test public void testConstrucorWithToIndexConstraint0() { String[] entries = new String[]{"first", "second", "third"}; - iterator = new SparseArrayIterator<>(entries, 0, () -> { - }); + iterator = new SparseArrayIterator<>(entries, 0, dummySetForConcurrencyCheck); assertFalse(iterator.hasNext()); assertThrows(NoSuchElementException.class, () -> iterator.next()); @@ -97,8 +101,7 @@ public void testConstrucorWithToIndexConstraint0() { @Test public void testHasNextAndNextWithNullEntries() { String[] entries = new String[]{"first", null, "third", null, "fifth"}; - iterator = new SparseArrayIterator<>(entries, () -> { - }); + iterator = new SparseArrayIterator<>(entries, dummySetForConcurrencyCheck); assertTrue(iterator.hasNext()); assertEquals("fifth", iterator.next()); @@ -112,8 +115,7 @@ public void testHasNextAndNextWithNullEntries() { @Test public void testHasNextAndNextWithNoElements() { String[] entries = new String[]{}; - iterator = new SparseArrayIterator<>(entries, () -> { - }); + iterator = new SparseArrayIterator<>(entries, dummySetForConcurrencyCheck); assertFalse(iterator.hasNext()); assertThrows(NoSuchElementException.class, () -> iterator.next()); @@ -122,8 +124,7 @@ public void testHasNextAndNextWithNoElements() { @Test public void testForEachRemaining() { String[] entries = new String[]{"first", null, "third", null, "fifth"}; - iterator = new SparseArrayIterator<>(entries, () -> { - }); + iterator = new SparseArrayIterator<>(entries, dummySetForConcurrencyCheck); int[] count = new int[]{0}; iterator.forEachRemaining(entry -> { assertNotNull(entry); diff --git a/jena-core/src/test/java/org/apache/jena/mem/pattern/PatternClassifierTest.java b/jena-core/src/test/java/org/apache/jena/mem/pattern/PatternClassifierTest.java index 23643c6da4f..99e1562d530 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/pattern/PatternClassifierTest.java +++ b/jena-core/src/test/java/org/apache/jena/mem/pattern/PatternClassifierTest.java @@ -20,16 +20,29 @@ */ package org.apache.jena.mem.pattern; +import org.apache.jena.graph.Node; import org.junit.Test; import static org.apache.jena.testing_framework.GraphHelper.node; import static org.apache.jena.testing_framework.GraphHelper.triple; import static org.junit.Assert.assertEquals; +/** + * Unit tests for {@link PatternClassifier}: maps a triple match into one of + * the eight {@link MatchPattern} buckets based on which of the subject, + * predicate and object slots are concrete and which are wildcards. + *

+ * Both classification overloads are tested for every combination of + * concrete/wildcard slots, and the {@code (Node, Node, Node)} overload is + * additionally tested with explicit {@code null} arguments and + * {@link Node#ANY}, both of which must be treated as wildcards. + */ public class PatternClassifierTest { @Test - public void testClassifyTriple() { + public void classifyTripleCoversAllEightCombinations() { + // The wildcard "??" is parsed as a non-concrete (variable) node by + // the test helper. assertEquals(MatchPattern.SUB_PRE_OBJ, PatternClassifier.classify(triple("s p o"))); assertEquals(MatchPattern.SUB_PRE_ANY, PatternClassifier.classify(triple("s p ??"))); assertEquals(MatchPattern.SUB_ANY_OBJ, PatternClassifier.classify(triple("s ?? o"))); @@ -41,7 +54,7 @@ public void testClassifyTriple() { } @Test - public void testClassifyNodes() { + public void classifyNodesCoversAllEightCombinations() { assertEquals(MatchPattern.SUB_PRE_OBJ, PatternClassifier.classify(node("s"), node("p"), node("o"))); assertEquals(MatchPattern.SUB_PRE_ANY, PatternClassifier.classify(node("s"), node("p"), node("??"))); assertEquals(MatchPattern.SUB_ANY_OBJ, PatternClassifier.classify(node("s"), node("??"), node("o"))); @@ -52,4 +65,26 @@ public void testClassifyNodes() { assertEquals(MatchPattern.ANY_ANY_ANY, PatternClassifier.classify(node("??"), node("??"), node("??"))); } + @Test + public void classifyNodesTreatsNullAsWildcard() { + // The graph-find contract allows callers to pass null for a slot + // they don't care about; the classifier must handle that without NPE. + assertEquals(MatchPattern.SUB_PRE_OBJ, PatternClassifier.classify(node("s"), node("p"), node("o"))); + assertEquals(MatchPattern.SUB_PRE_ANY, PatternClassifier.classify(node("s"), node("p"), null)); + assertEquals(MatchPattern.SUB_ANY_OBJ, PatternClassifier.classify(node("s"), null, node("o"))); + assertEquals(MatchPattern.SUB_ANY_ANY, PatternClassifier.classify(node("s"), null, null)); + assertEquals(MatchPattern.ANY_PRE_OBJ, PatternClassifier.classify(null, node("p"), node("o"))); + assertEquals(MatchPattern.ANY_PRE_ANY, PatternClassifier.classify(null, node("p"), null)); + assertEquals(MatchPattern.ANY_ANY_OBJ, PatternClassifier.classify(null, null, node("o"))); + assertEquals(MatchPattern.ANY_ANY_ANY, PatternClassifier.classify(null, null, null)); + } + + @Test + public void classifyNodesTreatsNodeAnyAsWildcard() { + // Node.ANY is the standard wildcard sentinel used by Graph.find. + assertEquals(MatchPattern.SUB_PRE_ANY, PatternClassifier.classify(node("s"), node("p"), Node.ANY)); + assertEquals(MatchPattern.SUB_ANY_OBJ, PatternClassifier.classify(node("s"), Node.ANY, node("o"))); + assertEquals(MatchPattern.ANY_PRE_OBJ, PatternClassifier.classify(Node.ANY, node("p"), node("o"))); + assertEquals(MatchPattern.ANY_ANY_ANY, PatternClassifier.classify(Node.ANY, Node.ANY, Node.ANY)); + } } \ No newline at end of file diff --git a/jena-core/src/test/java/org/apache/jena/mem/spliterator/ArraySpliteratorTest.java b/jena-core/src/test/java/org/apache/jena/mem/spliterator/ArraySpliteratorTest.java index 4fe0d7382f2..ae2afc7fd47 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/spliterator/ArraySpliteratorTest.java +++ b/jena-core/src/test/java/org/apache/jena/mem/spliterator/ArraySpliteratorTest.java @@ -20,6 +20,9 @@ */ package org.apache.jena.mem.spliterator; + +import org.apache.jena.mem.collection.FastHashSet; +import org.apache.jena.mem.collection.JenaSet; import org.junit.Test; import java.util.ArrayList; @@ -30,149 +33,113 @@ public class ArraySpliteratorTest { + private static final JenaSet dummySetForConcurrencyCheck = new FastHashSet<>() { + @Override + protected Object[] newKeysArray(int size) { + return new Object[size]; + } + }; + @Test public void tryAdvanceEmpty() { - { - Integer[] array = new Integer[0]; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); - assertFalse(spliterator.tryAdvance((i) -> { - fail("Should not have advanced"); - })); - } + Integer[] array = new Integer[0]; + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); + assertFalse(spliterator.tryAdvance((i) -> fail("Should not have advanced"))); } @Test public void tryAdvanceOne() { - { - Integer[] array = new Integer[]{1}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - while (spliterator.tryAdvance((i) -> { - itemsFound.add(1); - })); - assertEquals(1, itemsFound.size()); - itemsFound.contains(1); - } + Integer[] array = new Integer[]{1}; + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + while (spliterator.tryAdvance((i) -> itemsFound.add(1))); + assertEquals(1, itemsFound.size()); + assertTrue(itemsFound.contains(1)); } @Test public void tryAdvanceTwo() { - { - Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - while (spliterator.tryAdvance((i) -> { - itemsFound.add(i); - })); - assertEquals(2, itemsFound.size()); - itemsFound.contains(1); - itemsFound.contains(2); - } + Integer[] array = new Integer[]{1, 2}; + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + while (spliterator.tryAdvance(itemsFound::add)); + assertEquals(2, itemsFound.size()); + assertTrue(itemsFound.contains(1)); + assertTrue(itemsFound.contains(2)); } @Test public void tryAdvanceThree() { - { - Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - while (spliterator.tryAdvance((i) -> { - itemsFound.add(i); - })); - assertEquals(3, itemsFound.size()); - itemsFound.contains(1); - itemsFound.contains(2); - itemsFound.contains(3); - } + Integer[] array = new Integer[]{1, 2, 3}; + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + while (spliterator.tryAdvance(itemsFound::add)); + assertEquals(3, itemsFound.size()); + assertTrue(itemsFound.contains(1)); + assertTrue(itemsFound.contains(2)); + assertTrue(itemsFound.contains(3)); } @Test public void forEachRemainingEmpty() { - { - Integer[] array = new Integer[]{}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - spliterator.forEachRemaining((i) -> { - itemsFound.add(i); - }); - assertEquals(0, itemsFound.size()); - } + Integer[] array = new Integer[]{}; + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + spliterator.forEachRemaining(itemsFound::add); + assertEquals(0, itemsFound.size()); } @Test public void forEachRemainingOne() { - { - Integer[] array = new Integer[]{1}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - spliterator.forEachRemaining((i) -> { - itemsFound.add(i); - }); - assertEquals(1, itemsFound.size()); - itemsFound.contains(1); - } + Integer[] array = new Integer[]{1}; + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + spliterator.forEachRemaining(itemsFound::add); + assertEquals(1, itemsFound.size()); + assertTrue(itemsFound.contains(1)); } @Test public void forEachRemainingTwo() { - { - Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - spliterator.forEachRemaining((i) -> { - itemsFound.add(i); - }); - assertEquals(2, itemsFound.size()); - itemsFound.contains(1); - itemsFound.contains(2); - } + Integer[] array = new Integer[]{1, 2}; + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + spliterator.forEachRemaining(itemsFound::add); + assertEquals(2, itemsFound.size()); + assertTrue(itemsFound.contains(1)); + assertTrue(itemsFound.contains(2)); } @Test public void forEachRemainingThree() { - { - Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - spliterator.forEachRemaining((i) -> { - itemsFound.add(i); - }); - assertEquals(3, itemsFound.size()); - itemsFound.contains(1); - itemsFound.contains(2); - itemsFound.contains(3); - } + Integer[] array = new Integer[]{1, 2, 3}; + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + spliterator.forEachRemaining(itemsFound::add); + assertEquals(3, itemsFound.size()); + assertTrue(itemsFound.contains(1)); + assertTrue(itemsFound.contains(2)); + assertTrue(itemsFound.contains(3)); } @Test public void trySplitEmpty() { Integer[] array = new Integer[]{}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void trySplitOne() { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void trySplitTwo() { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(2, 3, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -183,8 +150,7 @@ public void trySplitTwo() { @Test public void trySplitThree() { Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(3, 4, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -195,8 +161,7 @@ public void trySplitThree() { @Test public void trySplitFour() { Integer[] array = new Integer[]{1, 2, 3, 4}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(4, 5, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -207,8 +172,7 @@ public void trySplitFour() { @Test public void trySplitFive() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(5, 6, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -224,8 +188,7 @@ public void trySplitOneHundred() { array[i] = i; } } - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertEquals(array.length, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -241,56 +204,49 @@ private void assertBetween(long min, long max, long estimateSize) { @Test public void estimateSizeZero() { Integer[] array = new Integer[]{}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(0, 1, spliterator.estimateSize()); } @Test public void estimateSizeOne() { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(1, 2, spliterator.estimateSize()); } @Test public void estimateSizeTwo() { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(2, 3, spliterator.estimateSize()); } @Test public void estimateSizeFive() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(5, 6, spliterator.estimateSize()); } @Test public void characteristics() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); assertEquals(DISTINCT | SIZED | SUBSIZED | NONNULL | IMMUTABLE, spliterator.characteristics()); } @Test public void splitWithOneElementNull() { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void splitWithOneRemainingElementNull() { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new ArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySpliterator<>(array, dummySetForConcurrencyCheck); spliterator.tryAdvance((i) -> { }); assertNull(spliterator.trySplit()); diff --git a/jena-core/src/test/java/org/apache/jena/mem/spliterator/ArraySubSpliteratorTest.java b/jena-core/src/test/java/org/apache/jena/mem/spliterator/ArraySubSpliteratorTest.java index f8d44700da9..696b6be7be9 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/spliterator/ArraySubSpliteratorTest.java +++ b/jena-core/src/test/java/org/apache/jena/mem/spliterator/ArraySubSpliteratorTest.java @@ -20,6 +20,8 @@ */ package org.apache.jena.mem.spliterator; +import org.apache.jena.mem.collection.FastHashSet; +import org.apache.jena.mem.collection.JenaSet; import org.junit.Test; import java.util.ArrayList; @@ -30,149 +32,113 @@ public class ArraySubSpliteratorTest { + private static final JenaSet dummySetForConcurrencyCheck = new FastHashSet<>() { + @Override + protected Object[] newKeysArray(int size) { + return new Object[size]; + } + }; + @Test public void tryAdvanceEmpty() { - { - Integer[] array = new Integer[0]; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); - assertFalse(spliterator.tryAdvance((i) -> { - fail("Should not have advanced"); - })); - } + Integer[] array = new Integer[0]; + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); + assertFalse(spliterator.tryAdvance((i) -> fail("Should not have advanced"))); } @Test public void tryAdvanceOne() { - { - Integer[] array = new Integer[]{1}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - while (spliterator.tryAdvance((i) -> { - itemsFound.add(1); - })); - assertEquals(1, itemsFound.size()); - itemsFound.contains(1); - } + Integer[] array = new Integer[]{1}; + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + while (spliterator.tryAdvance((i) -> itemsFound.add(1))); + assertEquals(1, itemsFound.size()); + assertTrue(itemsFound.contains(1)); } @Test public void tryAdvanceTwo() { - { - Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - while (spliterator.tryAdvance((i) -> { - itemsFound.add(i); - })); - assertEquals(2, itemsFound.size()); - itemsFound.contains(1); - itemsFound.contains(2); - } + Integer[] array = new Integer[]{1, 2}; + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + while (spliterator.tryAdvance(itemsFound::add)); + assertEquals(2, itemsFound.size()); + assertTrue(itemsFound.contains(1)); + assertTrue(itemsFound.contains(2)); } @Test public void tryAdvanceThree() { - { - Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - while (spliterator.tryAdvance((i) -> { - itemsFound.add(i); - })); - assertEquals(3, itemsFound.size()); - itemsFound.contains(1); - itemsFound.contains(2); - itemsFound.contains(3); - } + Integer[] array = new Integer[]{1, 2, 3}; + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + while (spliterator.tryAdvance(itemsFound::add)); + assertEquals(3, itemsFound.size()); + assertTrue(itemsFound.contains(1)); + assertTrue(itemsFound.contains(2)); + assertTrue(itemsFound.contains(3)); } @Test public void forEachRemainingEmpty() { - { - Integer[] array = new Integer[]{}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - spliterator.forEachRemaining((i) -> { - itemsFound.add(i); - }); - assertEquals(0, itemsFound.size()); - } + Integer[] array = new Integer[]{}; + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + spliterator.forEachRemaining(itemsFound::add); + assertEquals(0, itemsFound.size()); } @Test public void forEachRemainingOne() { - { - Integer[] array = new Integer[]{1}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - spliterator.forEachRemaining((i) -> { - itemsFound.add(i); - }); - assertEquals(1, itemsFound.size()); - itemsFound.contains(1); - } + Integer[] array = new Integer[]{1}; + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + spliterator.forEachRemaining(itemsFound::add); + assertEquals(1, itemsFound.size()); + assertTrue(itemsFound.contains(1)); } @Test public void forEachRemainingTwo() { - { - Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - spliterator.forEachRemaining((i) -> { - itemsFound.add(i); - }); - assertEquals(2, itemsFound.size()); - itemsFound.contains(1); - itemsFound.contains(2); - } + Integer[] array = new Integer[]{1, 2}; + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + spliterator.forEachRemaining(itemsFound::add); + assertEquals(2, itemsFound.size()); + assertTrue(itemsFound.contains(1)); + assertTrue(itemsFound.contains(2)); } @Test public void forEachRemainingThree() { - { - Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - spliterator.forEachRemaining((i) -> { - itemsFound.add(i); - }); - assertEquals(3, itemsFound.size()); - itemsFound.contains(1); - itemsFound.contains(2); - itemsFound.contains(3); - } + Integer[] array = new Integer[]{1, 2, 3}; + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); + var itemsFound = new ArrayList<>(); + spliterator.forEachRemaining(itemsFound::add); + assertEquals(3, itemsFound.size()); + assertTrue(itemsFound.contains(1)); + assertTrue(itemsFound.contains(2)); + assertTrue(itemsFound.contains(3)); } @Test public void trySplitEmpty() { Integer[] array = new Integer[]{}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void trySplitOne() { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void trySplitTwo() { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(2, 3, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -183,8 +149,7 @@ public void trySplitTwo() { @Test public void trySplitThree() { Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(3, 4, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -195,8 +160,7 @@ public void trySplitThree() { @Test public void trySplitFour() { Integer[] array = new Integer[]{1, 2, 3, 4}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(4, 5, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -207,8 +171,7 @@ public void trySplitFour() { @Test public void trySplitFive() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(5, 6, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -224,8 +187,7 @@ public void trySplitOneHundred() { array[i] = i; } } - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertEquals(array.length, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -241,56 +203,49 @@ private void assertBetween(long min, long max, long estimateSize) { @Test public void estimateSizeZero() { Integer[] array = new Integer[]{}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(0, 1, spliterator.estimateSize()); } @Test public void estimateSizeOne() { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(1, 2, spliterator.estimateSize()); } @Test public void estimateSizeTwo() { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(2, 3, spliterator.estimateSize()); } @Test public void estimateSizeFive() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(5, 6, spliterator.estimateSize()); } @Test public void characteristics() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertEquals(DISTINCT | SIZED | SUBSIZED | NONNULL | IMMUTABLE, spliterator.characteristics()); } @Test public void splitWithOneElementNull() { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void splitWithOneRemainingElementNull() { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new ArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new ArraySubSpliterator<>(array, dummySetForConcurrencyCheck); spliterator.tryAdvance((i) -> { }); assertNull(spliterator.trySplit()); diff --git a/jena-core/src/test/java/org/apache/jena/mem/spliterator/SparseArrayIndexedSpliteratorTest.java b/jena-core/src/test/java/org/apache/jena/mem/spliterator/SparseArrayIndexedSpliteratorTest.java deleted file mode 100644 index 11e4fb8a4e8..00000000000 --- a/jena-core/src/test/java/org/apache/jena/mem/spliterator/SparseArrayIndexedSpliteratorTest.java +++ /dev/null @@ -1,675 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * - * SPDX-License-Identifier: Apache-2.0 - */ -package org.apache.jena.mem.spliterator; - -import org.junit.Test; - -import org.apache.jena.mem.collection.FastHashSet; -import java.util.ArrayList; - -import static java.util.Spliterator.*; -import static org.junit.Assert.*; - -public class SparseArrayIndexedSpliteratorTest { - - @Test - public void tryAdvanceEmpty() { - { - final var array = new String[0]; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - assertFalse(spliterator.tryAdvance(i -> fail("Should not have advanced"))); - } - { - final var array = new String[1]; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - assertFalse(spliterator.tryAdvance(i -> fail("Should not have advanced"))); - } - } - - @Test - public void tryAdvanceOne() { - { - final var array = new String[]{"a"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - for(int i=0; i { - assertEquals(index, entry.index()); - assertEquals(array[index], entry.key()); - })); - } - assertFalse(spliterator.tryAdvance(i -> fail("Should not have advanced"))); - } - { - final var array = new String[]{"a", null}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var itemsFound = new ArrayList<>(); - while (true) { - if (!spliterator.tryAdvance(entry -> itemsFound.add(entry.key()))) { - break; - } - } - assertEquals(1, itemsFound.size()); - assertTrue(itemsFound.contains("a")); - } - { - final var array = new String[]{null, "a"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - assertTrue(spliterator.tryAdvance(entry -> { - assertEquals(1, entry.index()); - assertEquals("a", entry.key()); - })); - assertFalse(spliterator.tryAdvance(i -> fail("Should not have advanced"))); - } - } - - @Test - public void tryAdvanceTwo() { - { - final var array = new String[]{"a", "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - while (spliterator.tryAdvance(i -> { - keysFound.add(i.key()); - indicesFound.add(i.index()); - })); - assertEquals(2, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals(0, indicesFound.get(0)); - assertEquals(1, indicesFound.get(1)); - } - { - final var array = new String[]{"a", null, "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - while (spliterator.tryAdvance(i -> { - keysFound.add(i.key()); - indicesFound.add(i.index()); - })); - assertEquals(2, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals(0, indicesFound.get(0)); - assertEquals(2, indicesFound.get(1)); - } - { - final var array = new String[]{"a", null, null, "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - while (spliterator.tryAdvance(i -> { - keysFound.add(i.key()); - indicesFound.add(i.index()); - })); - assertEquals(2, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals(0, indicesFound.get(0)); - assertEquals(3, indicesFound.get(1)); - } - { - final var array = new String[]{null, "a", null, "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - while (spliterator.tryAdvance(i -> { - keysFound.add(i.key()); - indicesFound.add(i.index()); - })); - assertEquals(2, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals(1, indicesFound.get(0)); - assertEquals(3, indicesFound.get(1)); - } - { - final var array = new String[]{null, "a", null, null, "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - while (spliterator.tryAdvance(i -> { - keysFound.add(i.key()); - indicesFound.add(i.index()); - })); - assertEquals(2, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals(1, indicesFound.get(0)); - assertEquals(4, indicesFound.get(1)); - } - } - - @Test - public void tryAdvanceThree() { - { - final var array = new String[]{"a", "b", "c"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - while (spliterator.tryAdvance(i -> { - keysFound.add(i.key()); - indicesFound.add(i.index()); - })); - assertEquals(3, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals("c", keysFound.get(2)); - assertEquals(0, indicesFound.get(0)); - assertEquals(1, indicesFound.get(1)); - assertEquals(2, indicesFound.get(2)); - } - { - final var array = new String[]{"a", null, "b", "c"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - while (spliterator.tryAdvance(i -> { - keysFound.add(i.key()); - indicesFound.add(i.index()); - })); - assertEquals(3, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals("c", keysFound.get(2)); - assertEquals(0, indicesFound.get(0)); - assertEquals(2, indicesFound.get(1)); - assertEquals(3, indicesFound.get(2)); - } - { - final var array = new String[]{"a", null, null, "b", "c"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - while (spliterator.tryAdvance(i -> { - keysFound.add(i.key()); - indicesFound.add(i.index()); - })); - assertEquals(3, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals("c", keysFound.get(2)); - assertEquals(0, indicesFound.get(0)); - assertEquals(3, indicesFound.get(1)); - assertEquals(4, indicesFound.get(2)); - } - { - final var array = new String[]{null, "a", null, "b", null, "c"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - while (spliterator.tryAdvance(i -> { - keysFound.add(i.key()); - indicesFound.add(i.index()); - })); - assertEquals(3, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals("c", keysFound.get(2)); - assertEquals(1, indicesFound.get(0)); - assertEquals(3, indicesFound.get(1)); - assertEquals(5, indicesFound.get(2)); - } - { - final var array = new String[]{null, "a", null, null, "b", null, "c"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - while (spliterator.tryAdvance(i -> { - keysFound.add(i.key()); - indicesFound.add(i.index()); - })); - assertEquals(3, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals("c", keysFound.get(2)); - assertEquals(1, indicesFound.get(0)); - assertEquals(4, indicesFound.get(1)); - assertEquals(6, indicesFound.get(2)); - } - } - - @Test - public void forEachRemainingEmpty() { - { - final var array = new String[]{}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList>(); - spliterator.forEachRemaining(itemsFound::add); - assertEquals(0, itemsFound.size()); - } - { - final var array = new String[]{null}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - var itemsFound = new ArrayList<>(); - spliterator.forEachRemaining(itemsFound::add); - assertEquals(0, itemsFound.size()); - } - } - - @Test - public void forEachRemainingOne() { - { - final var array = new String[]{"a"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(1, keysFound.size()); - assertTrue(keysFound.contains("a")); - assertTrue(indicesFound.contains(0)); - } - { - final var array = new String[]{null, "a"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(1, keysFound.size()); - assertTrue(keysFound.contains("a")); - assertTrue(indicesFound.contains(1)); - } - { - final var array = new String[]{"a", null}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(1, keysFound.size()); - assertTrue(keysFound.contains("a")); - assertTrue(indicesFound.contains(0)); - } - } - - @Test - public void forEachRemainingTwo() { - { - final var array = new String[]{"a", "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(2, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals(0, indicesFound.get(0)); - assertEquals(1, indicesFound.get(1)); - } - { - final var array = new String[]{"a", null, "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(2, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals(0, indicesFound.get(0)); - assertEquals(2, indicesFound.get(1)); - } - { - final var array = new String[]{"a", null, null, "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(2, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals(0, indicesFound.get(0)); - assertEquals(3, indicesFound.get(1)); - } - { - final var array = new String[]{null, "a", null, "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(2, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals(1, indicesFound.get(0)); - assertEquals(3, indicesFound.get(1)); - } - { - final var array = new String[]{null, "a", null, null, "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(2, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals(1, indicesFound.get(0)); - assertEquals(4, indicesFound.get(1)); - } - } - - @Test - public void forEachRemainingThree() { - { - final var array = new String[]{"a", "b", "c"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(3, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals("c", keysFound.get(2)); - assertEquals(0, indicesFound.get(0)); - assertEquals(1, indicesFound.get(1)); - assertEquals(2, indicesFound.get(2)); - } - { - final var array = new String[]{"a", null, "b", "c"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(3, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals("c", keysFound.get(2)); - assertEquals(0, indicesFound.get(0)); - assertEquals(2, indicesFound.get(1)); - assertEquals(3, indicesFound.get(2)); - } - { - final var array = new String[]{"a", null, null, "b", "c"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(3, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals("c", keysFound.get(2)); - assertEquals(0, indicesFound.get(0)); - assertEquals(3, indicesFound.get(1)); - assertEquals(4, indicesFound.get(2)); - } - { - final var array = new String[]{null, "a", null, "b", "c"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(3, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals("c", keysFound.get(2)); - assertEquals(1, indicesFound.get(0)); - assertEquals(3, indicesFound.get(1)); - assertEquals(4, indicesFound.get(2)); - } - { - final var array = new String[]{null, "a", null, null, "b", "c"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(3, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals("c", keysFound.get(2)); - assertEquals(1, indicesFound.get(0)); - assertEquals(4, indicesFound.get(1)); - assertEquals(5, indicesFound.get(2)); - } - } - - @Test - public void trySplitEmpty() { - final var array = new String[]{}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - assertNull(spliterator.trySplit()); - } - - @Test - public void trySplitOne() { - final var array = new String[]{"a"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - assertNull(spliterator.trySplit()); - } - - @Test - public void trySplitTwo() { - final var array = new String[]{"a", "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - // Estimated size is not exact - assertBetween(2, 3, spliterator.estimateSize()); - final var split = spliterator.trySplit(); - assertBetween(1, 2, spliterator.estimateSize()); - assertBetween(1, 3, split.estimateSize()); - } - - @Test - public void trySplitThree() { - final var array = new String[]{"a", "b", "c"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - // Estimated size is not exact - assertBetween(3, 4, spliterator.estimateSize()); - final var split = spliterator.trySplit(); - assertBetween(1, 3, spliterator.estimateSize()); - assertBetween(2, 3, split.estimateSize()); - } - - @Test - public void trySplitFour() { - final var array = new String[]{"a", "b", "c", "d"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - // Estimated size is not exact - assertBetween(4, 5, spliterator.estimateSize()); - final var split = spliterator.trySplit(); - assertBetween(2, 3, spliterator.estimateSize()); - assertBetween(2, 4, split.estimateSize()); - } - - @Test - public void trySplitFive() { - final var array = new String[]{"a", "b", "c", "d", "e"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - // Estimated size is not exact - assertBetween(5, 6, spliterator.estimateSize()); - final var split = spliterator.trySplit(); - assertBetween(2, 4, spliterator.estimateSize()); - assertBetween(2, 4, split.estimateSize()); - - final var keysFound = new ArrayList<>(); - final var indicesFound = new ArrayList<>(); - split.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - spliterator.forEachRemaining(entry -> { - keysFound.add(entry.key()); - indicesFound.add(entry.index()); - }); - assertEquals(5, keysFound.size()); - assertEquals("a", keysFound.get(0)); - assertEquals("b", keysFound.get(1)); - assertEquals("c", keysFound.get(2)); - assertEquals("d", keysFound.get(3)); - assertEquals("e", keysFound.get(4)); - assertEquals(0, indicesFound.get(0)); - assertEquals(1, indicesFound.get(1)); - assertEquals(2, indicesFound.get(2)); - assertEquals(3, indicesFound.get(3)); - assertEquals(4, indicesFound.get(4)); - } - - @Test - public void trySplitOneHundred() { - Integer[] array = new Integer[200]; - for (int i = 0; i < array.length; i++) { - if (i % 2 == 0) { - array[i] = i; - } - } - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - // Estimated size is not exact - assertBetween(array.length, array.length+1, spliterator.estimateSize()); - final var split = spliterator.trySplit(); - assertBetween(array.length / 2, (array.length / 2) + 1, spliterator.estimateSize()); - assertBetween(array.length / 2, (array.length / 2) + 1, split.estimateSize()); - } - - private void assertBetween(long min, long max, long estimateSize) { - assertTrue("estimateSize=" + estimateSize + " min=" + min + " max=" + max, estimateSize >= min); - assertTrue("estimateSize=" + estimateSize + " min=" + min + " max=" + max, estimateSize <= max); - } - - @Test - public void estimateSizeZero() { - final var array = new String[]{}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - assertBetween(0, 1, spliterator.estimateSize()); - } - - @Test - public void estimateSizeOne() { - final var array = new String[]{"a"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - assertBetween(1, 2, spliterator.estimateSize()); - } - - @Test - public void estimateSizeTwo() { - final var array = new String[]{"a", "b"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - assertBetween(2, 3, spliterator.estimateSize()); - } - - @Test - public void estimateSizeFive() { - final var array = new String[]{"a", "b", "c", "d", "e"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - assertBetween(5, 6, spliterator.estimateSize()); - } - - @Test - public void characteristics() { - final var array = new String[]{"a", "b", "c", "d", "e"}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - assertEquals(DISTINCT | NONNULL | IMMUTABLE, spliterator.characteristics()); - } - - @Test - public void splitWithOneElementNull() { - final var array = new String[]{null}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - assertNull(spliterator.trySplit()); - } - - @Test - public void splitWithOneRemainingElementNull() { - final var array = new String[]{"a", null}; - final var spliterator = new SparseArrayIndexedSpliterator<>(array, () -> { - }); - spliterator.tryAdvance(i -> {}); - assertNull(spliterator.trySplit()); - } -} \ No newline at end of file diff --git a/jena-core/src/test/java/org/apache/jena/mem/spliterator/SparseArraySpliteratorTest.java b/jena-core/src/test/java/org/apache/jena/mem/spliterator/SparseArraySpliteratorTest.java index c40114b1769..bef17791849 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/spliterator/SparseArraySpliteratorTest.java +++ b/jena-core/src/test/java/org/apache/jena/mem/spliterator/SparseArraySpliteratorTest.java @@ -20,6 +20,8 @@ */ package org.apache.jena.mem.spliterator; +import org.apache.jena.mem.collection.FastHashSet; +import org.apache.jena.mem.collection.JenaSet; import org.junit.Test; import java.util.ArrayList; @@ -30,18 +32,23 @@ public class SparseArraySpliteratorTest { + private static final JenaSet dummySetForConcurrencyCheck = new FastHashSet<>() { + @Override + protected Object[] newKeysArray(int size) { + return new Object[size]; + } + }; + @Test public void tryAdvanceEmpty() { { Integer[] array = new Integer[0]; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); assertFalse(spliterator.tryAdvance(i -> fail("Should not have advanced"))); } { Integer[] array = new Integer[1]; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); assertFalse(spliterator.tryAdvance(i -> fail("Should not have advanced"))); } } @@ -50,8 +57,7 @@ public void tryAdvanceEmpty() { public void tryAdvanceOne() { { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance( itemsFound::add)) { /*empty*/ } assertEquals(1, itemsFound.size()); @@ -59,8 +65,7 @@ public void tryAdvanceOne() { } { Integer[] array = new Integer[]{1, null}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(1, itemsFound.size()); @@ -68,8 +73,7 @@ public void tryAdvanceOne() { } { Integer[] array = new Integer[]{null, 1}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(1, itemsFound.size()); @@ -81,8 +85,7 @@ public void tryAdvanceOne() { public void tryAdvanceTwo() { { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(2, itemsFound.size()); @@ -91,8 +94,7 @@ public void tryAdvanceTwo() { } { Integer[] array = new Integer[]{1, null, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(2, itemsFound.size()); @@ -101,8 +103,7 @@ public void tryAdvanceTwo() { } { Integer[] array = new Integer[]{1, null, null, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(2, itemsFound.size()); @@ -111,8 +112,7 @@ public void tryAdvanceTwo() { } { Integer[] array = new Integer[]{null, 1, null, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(2, itemsFound.size()); @@ -121,8 +121,7 @@ public void tryAdvanceTwo() { } { Integer[] array = new Integer[]{null, 1, null, null, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(2, itemsFound.size()); @@ -135,8 +134,7 @@ public void tryAdvanceTwo() { public void tryAdvanceThree() { { Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(3, itemsFound.size()); @@ -146,8 +144,7 @@ public void tryAdvanceThree() { } { Integer[] array = new Integer[]{1, null, 2, 3}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(3, itemsFound.size()); @@ -157,8 +154,7 @@ public void tryAdvanceThree() { } { Integer[] array = new Integer[]{1, null, null, 2, 3}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(3, itemsFound.size()); @@ -168,8 +164,7 @@ public void tryAdvanceThree() { } { Integer[] array = new Integer[]{null, 1, null, 2, null, 3}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(3, itemsFound.size()); @@ -179,8 +174,7 @@ public void tryAdvanceThree() { } { Integer[] array = new Integer[]{null, 1, null, null, 2, null, 3}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(3, itemsFound.size()); @@ -194,16 +188,14 @@ public void tryAdvanceThree() { public void forEachRemainingEmpty() { { Integer[] array = new Integer[]{}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(0, itemsFound.size()); } { Integer[] array = new Integer[]{null}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(0, itemsFound.size()); @@ -214,8 +206,7 @@ public void forEachRemainingEmpty() { public void forEachRemainingOne() { { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(1, itemsFound.size()); @@ -223,8 +214,7 @@ public void forEachRemainingOne() { } { Integer[] array = new Integer[]{null, 1}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(1, itemsFound.size()); @@ -232,8 +222,7 @@ public void forEachRemainingOne() { } { Integer[] array = new Integer[]{1, null}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(1, itemsFound.size()); @@ -245,8 +234,7 @@ public void forEachRemainingOne() { public void forEachRemainingTwo() { { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(2, itemsFound.size()); @@ -255,8 +243,7 @@ public void forEachRemainingTwo() { } { Integer[] array = new Integer[]{1, null, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(2, itemsFound.size()); @@ -265,8 +252,7 @@ public void forEachRemainingTwo() { } { Integer[] array = new Integer[]{1, null, null, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(2, itemsFound.size()); @@ -275,8 +261,7 @@ public void forEachRemainingTwo() { } { Integer[] array = new Integer[]{null, 1, null, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(2, itemsFound.size()); @@ -285,8 +270,7 @@ public void forEachRemainingTwo() { } { Integer[] array = new Integer[]{null, 1, null, null, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(2, itemsFound.size()); @@ -299,8 +283,7 @@ public void forEachRemainingTwo() { public void forEachRemainingThree() { { Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(3, itemsFound.size()); @@ -310,8 +293,7 @@ public void forEachRemainingThree() { } { Integer[] array = new Integer[]{1, null, 2, 3}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(3, itemsFound.size()); @@ -321,8 +303,7 @@ public void forEachRemainingThree() { } { Integer[] array = new Integer[]{1, null, null, 2, 3}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(3, itemsFound.size()); @@ -332,8 +313,7 @@ public void forEachRemainingThree() { } { Integer[] array = new Integer[]{null, 1, null, 2, 3}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(3, itemsFound.size()); @@ -343,8 +323,7 @@ public void forEachRemainingThree() { } { Integer[] array = new Integer[]{null, 1, null, null, 2, 3}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(3, itemsFound.size()); @@ -354,24 +333,21 @@ public void forEachRemainingThree() { @Test public void trySplitEmpty() { Integer[] array = new Integer[]{}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void trySplitOne() { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void trySplitTwo() { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(2, 3, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -382,8 +358,7 @@ public void trySplitTwo() { @Test public void trySplitThree() { Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(3, 4, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -394,8 +369,7 @@ public void trySplitThree() { @Test public void trySplitFour() { Integer[] array = new Integer[]{1, 2, 3, 4}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(4, 5, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -406,8 +380,7 @@ public void trySplitFour() { @Test public void trySplitFive() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(5, 6, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -423,8 +396,7 @@ public void trySplitOneHundred() { array[i] = i; } } - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertEquals(array.length, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -440,56 +412,49 @@ private void assertBetween(long min, long max, long estimateSize) { @Test public void estimateSizeZero() { Integer[] array = new Integer[]{}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(0, 1, spliterator.estimateSize()); } @Test public void estimateSizeOne() { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(1, 2, spliterator.estimateSize()); } @Test public void estimateSizeTwo() { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(2, 3, spliterator.estimateSize()); } @Test public void estimateSizeFive() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(5, 6, spliterator.estimateSize()); } @Test public void characteristics() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); assertEquals(DISTINCT | NONNULL | IMMUTABLE, spliterator.characteristics()); } @Test public void splitWithOneElementNull() { Integer[] array = new Integer[]{null}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void splitWithOneRemainingElementNull() { Integer[] array = new Integer[]{1, null}; - Spliterator spliterator = new SparseArraySpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySpliterator<>(array, dummySetForConcurrencyCheck); spliterator.tryAdvance(i -> { }); assertNull(spliterator.trySplit()); diff --git a/jena-core/src/test/java/org/apache/jena/mem/spliterator/SparseArraySubSpliteratorTest.java b/jena-core/src/test/java/org/apache/jena/mem/spliterator/SparseArraySubSpliteratorTest.java index 5354297c4b4..8b83aeb373d 100644 --- a/jena-core/src/test/java/org/apache/jena/mem/spliterator/SparseArraySubSpliteratorTest.java +++ b/jena-core/src/test/java/org/apache/jena/mem/spliterator/SparseArraySubSpliteratorTest.java @@ -20,6 +20,8 @@ */ package org.apache.jena.mem.spliterator; +import org.apache.jena.mem.collection.FastHashSet; +import org.apache.jena.mem.collection.JenaSet; import org.junit.Test; import java.util.ArrayList; @@ -30,18 +32,23 @@ public class SparseArraySubSpliteratorTest { + private static final JenaSet dummySetForConcurrencyCheck = new FastHashSet<>() { + @Override + protected Object[] newKeysArray(int size) { + return new Object[size]; + } + }; + @Test public void tryAdvanceEmpty() { { Integer[] array = new Integer[0]; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertFalse(spliterator.tryAdvance(i -> fail("Should not have advanced"))); } { Integer[] array = new Integer[1]; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertFalse(spliterator.tryAdvance(i -> fail("Should not have advanced"))); } } @@ -50,8 +57,7 @@ public void tryAdvanceEmpty() { public void tryAdvanceOne() { { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(1, itemsFound.size()); @@ -59,8 +65,7 @@ public void tryAdvanceOne() { } { Integer[] array = new Integer[]{1, null}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(1, itemsFound.size()); @@ -68,8 +73,7 @@ public void tryAdvanceOne() { } { Integer[] array = new Integer[]{null, 1}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(1, itemsFound.size()); @@ -81,8 +85,7 @@ public void tryAdvanceOne() { public void tryAdvanceTwo() { { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(2, itemsFound.size()); @@ -91,8 +94,7 @@ public void tryAdvanceTwo() { } { Integer[] array = new Integer[]{1, null, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(2, itemsFound.size()); @@ -101,8 +103,7 @@ public void tryAdvanceTwo() { } { Integer[] array = new Integer[]{1, null, null, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(2, itemsFound.size()); @@ -111,8 +112,7 @@ public void tryAdvanceTwo() { } { Integer[] array = new Integer[]{null, 1, null, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(2, itemsFound.size()); @@ -121,8 +121,7 @@ public void tryAdvanceTwo() { } { Integer[] array = new Integer[]{null, 1, null, null, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(2, itemsFound.size()); @@ -135,8 +134,7 @@ public void tryAdvanceTwo() { public void tryAdvanceThree() { { Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(3, itemsFound.size()); @@ -146,8 +144,7 @@ public void tryAdvanceThree() { } { Integer[] array = new Integer[]{1, null, 2, 3}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(3, itemsFound.size()); @@ -157,8 +154,7 @@ public void tryAdvanceThree() { } { Integer[] array = new Integer[]{1, null, null, 2, 3}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(3, itemsFound.size()); @@ -168,8 +164,7 @@ public void tryAdvanceThree() { } { Integer[] array = new Integer[]{null, 1, null, 2, null, 3}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(3, itemsFound.size()); @@ -179,8 +174,7 @@ public void tryAdvanceThree() { } { Integer[] array = new Integer[]{null, 1, null, null, 2, null, 3}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); while (spliterator.tryAdvance(itemsFound::add)) { /*empty*/ } assertEquals(3, itemsFound.size()); @@ -194,16 +188,14 @@ public void tryAdvanceThree() { public void forEachRemainingEmpty() { { Integer[] array = new Integer[]{}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(0, itemsFound.size()); } { Integer[] array = new Integer[]{null}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(0, itemsFound.size()); @@ -214,8 +206,7 @@ public void forEachRemainingEmpty() { public void forEachRemainingOne() { { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(1, itemsFound.size()); @@ -223,8 +214,7 @@ public void forEachRemainingOne() { } { Integer[] array = new Integer[]{null, 1}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(1, itemsFound.size()); @@ -232,8 +222,7 @@ public void forEachRemainingOne() { } { Integer[] array = new Integer[]{1, null}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(1, itemsFound.size()); @@ -245,8 +234,7 @@ public void forEachRemainingOne() { public void forEachRemainingTwo() { { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(2, itemsFound.size()); @@ -255,8 +243,7 @@ public void forEachRemainingTwo() { } { Integer[] array = new Integer[]{1, null, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(2, itemsFound.size()); @@ -265,8 +252,7 @@ public void forEachRemainingTwo() { } { Integer[] array = new Integer[]{1, null, null, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(2, itemsFound.size()); @@ -275,8 +261,7 @@ public void forEachRemainingTwo() { } { Integer[] array = new Integer[]{null, 1, null, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(2, itemsFound.size()); @@ -285,8 +270,7 @@ public void forEachRemainingTwo() { } { Integer[] array = new Integer[]{null, 1, null, null, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(2, itemsFound.size()); @@ -299,8 +283,7 @@ public void forEachRemainingTwo() { public void forEachRemainingThree() { { Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(3, itemsFound.size()); @@ -310,8 +293,7 @@ public void forEachRemainingThree() { } { Integer[] array = new Integer[]{1, null, 2, 3}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(3, itemsFound.size()); @@ -321,8 +303,7 @@ public void forEachRemainingThree() { } { Integer[] array = new Integer[]{1, null, null, 2, 3}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(3, itemsFound.size()); @@ -332,8 +313,7 @@ public void forEachRemainingThree() { } { Integer[] array = new Integer[]{null, 1, null, 2, 3}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(3, itemsFound.size()); @@ -343,8 +323,7 @@ public void forEachRemainingThree() { } { Integer[] array = new Integer[]{null, 1, null, null, 2, 3}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); var itemsFound = new ArrayList<>(); spliterator.forEachRemaining(itemsFound::add); assertEquals(3, itemsFound.size()); @@ -357,24 +336,21 @@ public void forEachRemainingThree() { @Test public void trySplitEmpty() { Integer[] array = new Integer[]{}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void trySplitOne() { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void trySplitTwo() { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(2, 3, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -385,8 +361,7 @@ public void trySplitTwo() { @Test public void trySplitThree() { Integer[] array = new Integer[]{1, 2, 3}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(3, 4, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -397,8 +372,7 @@ public void trySplitThree() { @Test public void trySplitFour() { Integer[] array = new Integer[]{1, 2, 3, 4}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(4, 5, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -409,8 +383,7 @@ public void trySplitFour() { @Test public void trySplitFive() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertBetween(5, 6, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -426,8 +399,7 @@ public void trySplitOneHundred() { array[i] = i; } } - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); // Estimated size is not exact assertEquals(array.length, spliterator.estimateSize()); Spliterator split = spliterator.trySplit(); @@ -443,56 +415,49 @@ private void assertBetween(long min, long max, long estimateSize) { @Test public void estimateSizeZero() { Integer[] array = new Integer[]{}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(0, 1, spliterator.estimateSize()); } @Test public void estimateSizeOne() { Integer[] array = new Integer[]{1}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(1, 2, spliterator.estimateSize()); } @Test public void estimateSizeTwo() { Integer[] array = new Integer[]{1, 2}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(2, 3, spliterator.estimateSize()); } @Test public void estimateSizeFive() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertBetween(5, 6, spliterator.estimateSize()); } @Test public void characteristics() { Integer[] array = new Integer[]{1, 2, 3, 4, 5}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertEquals(DISTINCT | NONNULL | IMMUTABLE, spliterator.characteristics()); } @Test public void splitWithOneElementNull() { Integer[] array = new Integer[]{null}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); assertNull(spliterator.trySplit()); } @Test public void splitWithOneRemainingElementNull() { Integer[] array = new Integer[]{1, null}; - Spliterator spliterator = new SparseArraySubSpliterator<>(array, () -> { - }); + Spliterator spliterator = new SparseArraySubSpliterator<>(array, dummySetForConcurrencyCheck); spliterator.tryAdvance(i -> {}); assertNull(spliterator.trySplit()); } diff --git a/jena-core/src/test/java/org/apache/jena/mem/store/fast/FastHashedBunchMapTest.java b/jena-core/src/test/java/org/apache/jena/mem/store/fast/FastHashedBunchMapTest.java new file mode 100644 index 00000000000..b4b6e97eba5 --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/mem/store/fast/FastHashedBunchMapTest.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.store.fast; + +import org.apache.jena.graph.Node; +import org.junit.Test; + +import static org.apache.jena.testing_framework.GraphHelper.node; +import static org.apache.jena.testing_framework.GraphHelper.triple; +import static org.junit.Assert.*; + +/** + * Unit tests for {@link FastHashedBunchMap}: a {@link Node}-keyed map of + * {@link FastTripleBunch}es, with a deep-copying constructor. + */ +public class FastHashedBunchMapTest { + + @Test + public void newMapIsEmpty() { + final var map = new FastHashedBunchMap(); + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + } + + @Test + public void putAndGetReturnSameBunch() { + final var map = new FastHashedBunchMap(); + final Node key = node("s"); + final FastTripleBunch bunch = new FastHashedTripleBunch(); + bunch.tryAdd(triple("s p o")); + bunch.tryAdd(triple("s p o2")); + + map.put(key, bunch); + assertEquals(1, map.size()); + assertSame(bunch, map.get(key)); + } + + @Test + public void getReturnsNullForMissingKey() { + final var map = new FastHashedBunchMap(); + assertNull(map.get(node("missing"))); + } + + @Test + public void copyClonesEveryBunch() { + final var src = new FastHashedBunchMap(); + final var bunchA = new FastHashedTripleBunch(); + bunchA.tryAdd(triple("a p1 o1")); + bunchA.tryAdd(triple("a p2 o2")); + final var bunchB = new FastHashedTripleBunch(); + bunchB.tryAdd(triple("b p1 o1")); + src.put(node("a"), bunchA); + src.put(node("b"), bunchB); + + final var copy = src.copy(); + assertEquals(2, copy.size()); + + final var copiedA = copy.get(node("a")); + final var copiedB = copy.get(node("b")); + assertNotNull(copiedA); + assertNotNull(copiedB); + + // Bunches are deep-copied: same content, different identity. + assertNotSame(bunchA, copiedA); + assertNotSame(bunchB, copiedB); + assertEquals(bunchA.size(), copiedA.size()); + assertEquals(bunchB.size(), copiedB.size()); + assertTrue(copiedA.containsKey(triple("a p1 o1"))); + assertTrue(copiedA.containsKey(triple("a p2 o2"))); + assertTrue(copiedB.containsKey(triple("b p1 o1"))); + } + + @Test + public void copyIsIndependentOfSource() { + final var src = new FastHashedBunchMap(); + final var bunch = new FastHashedTripleBunch(); + bunch.tryAdd(triple("s p o")); + src.put(node("s"), bunch); + + final var copy = src.copy(); + // mutating the source bunch must not be visible in the copy + bunch.tryAdd(triple("s p o2")); + assertEquals(2, src.get(node("s")).size()); + assertEquals(1, copy.get(node("s")).size()); + assertFalse(copy.get(node("s")).containsKey(triple("s p o2"))); + } + + @Test + public void computeIfAbsentInsertsAndReusesValues() { + final var map = new FastHashedBunchMap(); + final var produced = new FastHashedTripleBunch(); + final FastTripleBunch first = map.computeIfAbsent(node("k"), () -> produced); + assertSame(produced, first); + + // calling again must return the same bunch without invoking the supplier + final FastTripleBunch second = map.computeIfAbsent(node("k"), + () -> { throw new AssertionError("supplier must not be called when key is present"); }); + assertSame(produced, second); + } + + @Test + public void putReplacesExistingBunch() { + final var map = new FastHashedBunchMap(); + final var b1 = new FastHashedTripleBunch(); + b1.tryAdd(triple("s p o")); + final var b2 = new FastHashedTripleBunch(); + b2.tryAdd(triple("s p o2")); + + map.put(node("k"), b1); + map.put(node("k"), b2); + assertEquals(1, map.size()); + assertSame(b2, map.get(node("k"))); + } + + @Test + public void removeRemovesEntry() { + final var map = new FastHashedBunchMap(); + map.put(node("k"), new FastHashedTripleBunch()); + assertTrue(map.tryRemove(node("k"))); + assertEquals(0, map.size()); + assertNull(map.get(node("k"))); + // removing again is a no-op + assertFalse(map.tryRemove(node("k"))); + } +} diff --git a/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListIteratorTest.java b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListIteratorTest.java new file mode 100644 index 00000000000..d9f4e1eecb7 --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListIteratorTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Triple; +import org.junit.Before; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.ConcurrentModificationException; +import java.util.HashSet; +import java.util.NoSuchElementException; + +import static org.apache.jena.testing_framework.GraphHelper.triple; +import static org.junit.Assert.*; + +/** + * Unit tests for {@link IndexListIterator}: iterates an {@link IndexList} of + * triple indices, dereferencing each index against a {@link TripleSet}. + */ +public class IndexListIteratorTest { + + private TripleSet triples; + private IndexList list; + private Triple t1; + private Triple t2; + private Triple t3; + private int idx1; + private int idx2; + private int idx3; + + @Before + public void setUp() { + triples = new TripleSet(); + list = new IndexList(); + t1 = triple("s1 p1 o1"); + t2 = triple("s2 p2 o2"); + t3 = triple("s3 p3 o3"); + idx1 = triples.addAndGetIndex(t1); + idx2 = triples.addAndGetIndex(t2); + idx3 = triples.addAndGetIndex(t3); + list.add(idx1); + list.add(idx2); + list.add(idx3); + } + + @Test + public void iteratesAllTriplesInReverseInsertionOrder() { + final var it = new IndexListIterator(triples, list); + // The iterator walks from lastPos back to position 0 + assertTrue(it.hasNext()); + assertEquals(t3, it.next()); + assertTrue(it.hasNext()); + assertEquals(t2, it.next()); + assertTrue(it.hasNext()); + assertEquals(t1, it.next()); + assertFalse(it.hasNext()); + } + + @Test + public void iteratorOverEmptyListYieldsNothing() { + final var emptyList = new IndexList(); + final var it = new IndexListIterator(triples, emptyList); + assertFalse(it.hasNext()); + } + + @Test(expected = NoSuchElementException.class) + public void nextThrowsWhenExhausted() { + final var emptyList = new IndexList(); + final var it = new IndexListIterator(triples, emptyList); + it.next(); + } + + @Test + public void forEachRemainingVisitsEveryTriple() { + final var collected = new HashSet(); + final var it = new IndexListIterator(triples, list); + it.forEachRemaining(collected::add); + final var expected = new HashSet(); + expected.add(t1); + expected.add(t2); + expected.add(t3); + assertEquals(expected, collected); + } + + @Test(expected = ConcurrentModificationException.class) + public void nextDetectsConcurrentModification() { + final var it = new IndexListIterator(triples, list); + // Adding a new triple to the canonical set after constructing the + // iterator must invalidate it. + triples.addAndGetIndex(triple("s4 p4 o4")); + it.next(); + } + + @Test(expected = ConcurrentModificationException.class) + public void forEachRemainingDetectsConcurrentModification() { + final var it = new IndexListIterator(triples, list); + triples.addAndGetIndex(triple("s5 p5 o5")); + it.forEachRemaining(t -> {}); + } + + @Test + public void singleElementListIteratesExactlyOnce() { + final var single = new IndexList(); + single.add(idx2); + final var it = new IndexListIterator(triples, single); + final var collected = new ArrayList(); + while (it.hasNext()) { + collected.add(it.next()); + } + assertEquals(1, collected.size()); + assertEquals(t2, collected.get(0)); + } +} diff --git a/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListSpliteratorTest.java b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListSpliteratorTest.java new file mode 100644 index 00000000000..9515e0c1d46 --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListSpliteratorTest.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Triple; +import org.junit.Before; +import org.junit.Test; + +import java.util.*; +import java.util.stream.StreamSupport; + +import static org.apache.jena.testing_framework.GraphHelper.triple; +import static org.junit.Assert.*; + +/** + * Unit tests for {@link IndexListSpliterator}: walks an {@link IndexList}, + * dereferences each index against a {@link TripleSet}, and supports + * recursive splitting for parallel traversal. + */ +public class IndexListSpliteratorTest { + + private TripleSet triples; + private IndexList list; + private List expected; + + @Before + public void setUp() { + triples = new TripleSet(); + list = new IndexList(); + expected = new ArrayList<>(); + for (int i = 0; i < 8; i++) { + final var t = triple("s" + i + " p o"); + expected.add(t); + list.add(triples.addAndGetIndex(t)); + } + } + + @Test + public void tryAdvanceVisitsEveryTriple() { + final var sp = new IndexListSpliterator(triples, list); + final var collected = new ArrayList(); + while (sp.tryAdvance(collected::add)) { /* noop */ } + assertEquals(new HashSet<>(expected), new HashSet<>(collected)); + assertEquals(expected.size(), collected.size()); + } + + @Test + public void forEachRemainingVisitsEveryTriple() { + final var sp = new IndexListSpliterator(triples, list); + final var collected = new ArrayList(); + sp.forEachRemaining(collected::add); + assertEquals(new HashSet<>(expected), new HashSet<>(collected)); + } + + @Test + public void streamYieldsAllTriples() { + final var sp = new IndexListSpliterator(triples, list); + final var collected = StreamSupport.stream(sp, false).toList(); + assertEquals(new HashSet<>(expected), new HashSet<>(collected)); + } + + @Test + public void parallelStreamYieldsAllTriples() { + final var sp = new IndexListSpliterator(triples, list); + final var collected = StreamSupport.stream(sp, true).toList(); + assertEquals(new HashSet<>(expected), new HashSet<>(collected)); + } + + @Test + public void trySplitProducesNonOverlappingHalves() { + final var sp = new IndexListSpliterator(triples, list); + final var prefix = sp.trySplit(); + assertNotNull("a list of 8 must split", prefix); + + final var firstHalf = new ArrayList(); + final var secondHalf = new ArrayList(); + prefix.forEachRemaining(firstHalf::add); + sp.forEachRemaining(secondHalf::add); + + // Sums must equal the full set + final var combined = new ArrayList<>(firstHalf); + combined.addAll(secondHalf); + assertEquals(expected.size(), combined.size()); + assertEquals(new HashSet<>(expected), new HashSet<>(combined)); + + // The two halves are disjoint + final var asSetA = new HashSet<>(firstHalf); + for (final var t : secondHalf) { + assertFalse("split must be disjoint", asSetA.contains(t)); + } + } + + @Test + public void trySplitReturnsNullWhenNothingLeftToSplit() { + final var single = new IndexList(); + single.add(triples.addAndGetIndex(triple("only s p o"))); + final var sp = new IndexListSpliterator(triples, single); + // Single element cannot be split + assertNull(sp.trySplit()); + } + + @Test + public void characteristicsAdvertiseDistinctSizedNonNullImmutable() { + final var sp = new IndexListSpliterator(triples, list); + final int chars = sp.characteristics(); + assertTrue((chars & Spliterator.DISTINCT) != 0); + assertTrue((chars & Spliterator.SIZED) != 0); + assertTrue((chars & Spliterator.SUBSIZED) != 0); + assertTrue((chars & Spliterator.NONNULL) != 0); + assertTrue((chars & Spliterator.IMMUTABLE) != 0); + } + + @Test + public void estimateSizeAndExactSizeMatchRemainingCount() { + final var sp = new IndexListSpliterator(triples, list); + assertEquals(expected.size(), sp.estimateSize()); + assertEquals(expected.size(), sp.getExactSizeIfKnown()); + + // After consuming one element, both must drop + sp.tryAdvance(t -> {}); + assertEquals(expected.size() - 1L, sp.estimateSize()); + assertEquals(expected.size() - 1L, sp.getExactSizeIfKnown()); + } + + @Test(expected = ConcurrentModificationException.class) + public void tryAdvanceDetectsConcurrentModification() { + final var sp = new IndexListSpliterator(triples, list); + triples.addAndGetIndex(triple("s99 p o")); + sp.tryAdvance(t -> {}); + } + + @Test(expected = ConcurrentModificationException.class) + public void forEachRemainingDetectsConcurrentModification() { + final var sp = new IndexListSpliterator(triples, list); + triples.addAndGetIndex(triple("s99 p o")); + sp.forEachRemaining(t -> {}); + } +} diff --git a/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListTest.java b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListTest.java new file mode 100644 index 00000000000..7bb68b50e48 --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListTest.java @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.store.indexed; + +import org.junit.Test; + +import static org.junit.Assert.*; + +/** + * Unit tests for {@link IndexList}: append-only int list with O(1) + * swap-with-last removal, used as the value type of the per-node index + * lists in the eager indexing strategy. + */ +public class IndexListTest { + + @Test + public void newListIsEmpty() { + final var list = new IndexList(); + assertTrue(list.isEmpty()); + assertEquals(0, list.size()); + assertEquals(-1, list.lastPos()); + } + + @Test + public void addReturnsSequentialPositions() { + final var list = new IndexList(); + assertEquals(0, list.add(10)); + assertEquals(1, list.add(20)); + assertEquals(2, list.add(30)); + + assertFalse(list.isEmpty()); + assertEquals(3, list.size()); + assertEquals(2, list.lastPos()); + assertEquals(10, list.getIndexAt(0)); + assertEquals(20, list.getIndexAt(1)); + assertEquals(30, list.getIndexAt(2)); + } + + @Test + public void addGrowsBackingArrayBeyondInitialCapacity() { + final var list = new IndexList(); + // initial backing array has length 2 (INITIAL_SIZE), so adding more + // than two elements forces at least one grow(). + for (int i = 0; i < 100; i++) { + assertEquals(i, list.add(i * 7)); + } + assertEquals(100, list.size()); + for (int i = 0; i < 100; i++) { + assertEquals(i * 7, list.getIndexAt(i)); + } + assertTrue("backing array must have grown", list.getIndices().length >= 100); + } + + @Test + public void getIndicesExposesRawArray() { + final var list = new IndexList(); + list.add(7); + list.add(11); + final var raw = list.getIndices(); + // first list.size() entries must contain the values in insertion order + assertEquals(7, raw[0]); + assertEquals(11, raw[1]); + } + + @Test + public void removeAtLastReturnsMinusOne() { + final var list = new IndexList(); + list.add(10); + list.add(20); + + // removing the last element does not move anything + assertEquals(-1, list.removeAt(1)); + assertEquals(1, list.size()); + assertEquals(10, list.getIndexAt(0)); + } + + @Test + public void removeAtMiddleSwapsWithLast() { + final var list = new IndexList(); + list.add(10); + list.add(20); + list.add(30); + + // removing position 0 swaps the last element (30) into position 0. + // The return value tells the caller that the triple-index 30 moved. + assertEquals(30, list.removeAt(0)); + assertEquals(2, list.size()); + assertEquals(30, list.getIndexAt(0)); + assertEquals(20, list.getIndexAt(1)); + } + + @Test + public void removeUntilEmpty() { + final var list = new IndexList(); + list.add(1); + list.add(2); + list.add(3); + + list.removeAt(2); + list.removeAt(1); + list.removeAt(0); + + assertTrue(list.isEmpty()); + assertEquals(0, list.size()); + assertEquals(-1, list.lastPos()); + } + + @Test + public void copyConstructorIsIndependent() { + final var src = new IndexList(); + src.add(1); + src.add(2); + src.add(3); + + final var copy = new IndexList(src); + assertEquals(src.size(), copy.size()); + for (int i = 0; i < src.size(); i++) { + assertEquals(src.getIndexAt(i), copy.getIndexAt(i)); + } + + // mutating the copy must not affect the source + copy.add(99); + assertEquals(4, copy.size()); + assertEquals(3, src.size()); + } + + @Test + public void copyEqualsCopyConstructor() { + final var src = new IndexList(); + src.add(11); + src.add(22); + src.add(33); + + final var clone = src.copy(); + assertNotSame(src, clone); + assertEquals(src.size(), clone.size()); + for (int i = 0; i < src.size(); i++) { + assertEquals(src.getIndexAt(i), clone.getIndexAt(i)); + } + } + + @Test + public void growAfterCopy() { + final var src = new IndexList(); + + { // empty source list: clone should be empty but independent + final var clone = src.copy(); + assertNotSame(src, clone); + assertEquals(src.size(), clone.size()); + + clone.add(11); + clone.add(22); + clone.add(33); + assertEquals(3, clone.size()); + assertTrue(src.isEmpty()); + } + + src.add(111); // add first element + + { // non-empty source list: clone should have same content but be independent + final var srcSizeBefore = src.size(); + final var clone = src.copy(); + assertNotSame(src, clone); + assertEquals(src.size(), clone.size()); + + clone.add(11); + clone.add(22); + clone.add(33); + assertEquals(3+srcSizeBefore, clone.size()); + assertEquals(srcSizeBefore, src.size()); + } + + src.add(222); // add second element + + { // non-empty source list: clone should have same content but be independent + final var srcSizeBefore = src.size(); + final var clone = src.copy(); + assertNotSame(src, clone); + assertEquals(src.size(), clone.size()); + + clone.add(11); + clone.add(22); + clone.add(33); + assertEquals(3+srcSizeBefore, clone.size()); + assertEquals(srcSizeBefore, src.size()); + } + } + + @Test + public void intersectsReturnsTrueForCommonElement() { + final var a = new IndexList(); + final var b = new IndexList(); + // Triple indices 0..9 used; reverseIndices arrays must be at least + // that long. + final int universe = 16; + final int[] reverseA = new int[universe]; + final int[] reverseB = new int[universe]; + + addToList(a, reverseA, 1); + addToList(a, reverseA, 4); + addToList(a, reverseA, 7); + + addToList(b, reverseB, 7); + addToList(b, reverseB, 9); + + assertTrue(IndexList.intersects(a, reverseA, b, reverseB)); + // symmetric + assertTrue(IndexList.intersects(b, reverseB, a, reverseA)); + } + + @Test + public void intersectsReturnsFalseForDisjointLists() { + final var a = new IndexList(); + final var b = new IndexList(); + final int universe = 16; + final int[] reverseA = new int[universe]; + final int[] reverseB = new int[universe]; + + addToList(a, reverseA, 1); + addToList(a, reverseA, 2); + addToList(a, reverseA, 3); + + addToList(b, reverseB, 4); + addToList(b, reverseB, 5); + addToList(b, reverseB, 6); + + assertFalse(IndexList.intersects(a, reverseA, b, reverseB)); + assertFalse(IndexList.intersects(b, reverseB, a, reverseA)); + } + + @Test + public void intersectsHandlesEmptyList() { + final var a = new IndexList(); + final var b = new IndexList(); + final int[] reverseA = new int[8]; + final int[] reverseB = new int[8]; + + addToList(b, reverseB, 1); + addToList(b, reverseB, 2); + + assertFalse(IndexList.intersects(a, reverseA, b, reverseB)); + assertFalse(IndexList.intersects(b, reverseB, a, reverseA)); + } + + /** Helper that mirrors how EagerStoreStrategy keeps its reverse-index in sync. */ + private static void addToList(final IndexList list, final int[] reverseIndices, final int tripleIndex) { + final int pos = list.add(tripleIndex); + reverseIndices[tripleIndex] = pos; + } +} diff --git a/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListsIteratorTest.java b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListsIteratorTest.java new file mode 100644 index 00000000000..3b204851e71 --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListsIteratorTest.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Triple; +import org.junit.Before; +import org.junit.Test; + +import java.util.*; + +import static org.apache.jena.testing_framework.GraphHelper.triple; +import static org.junit.Assert.*; + +/** + * Unit tests for {@link IndexListsIterator}: lazily walks the intersection + * of two {@link IndexList}s using their reverse-index arrays. + */ +public class IndexListsIteratorTest { + + private TripleSet triples; + private int[] reverseA; + private int[] reverseB; + + @Before + public void setUp() { + triples = new TripleSet(); + // pre-size reverse arrays generously; the test triples will get + // small indices. + reverseA = new int[64]; + reverseB = new int[64]; + } + + private int addTriple(final String spec) { + return triples.addAndGetIndex(triple(spec)); + } + + private static void appendTo(final IndexList list, final int[] reverse, final int tripleIndex) { + final int pos = list.add(tripleIndex); + reverse[tripleIndex] = pos; + } + + @Test + public void iteratesIntersectionOfTwoLists() { + final int idx1 = addTriple("s p1 o1"); + final int idx2 = addTriple("s p2 o2"); + final int idx3 = addTriple("s p3 o3"); + final int idx4 = addTriple("x p4 o4"); + final int idx5 = addTriple("y p5 o5"); + + final var listA = new IndexList(); + appendTo(listA, reverseA, idx1); + appendTo(listA, reverseA, idx2); + appendTo(listA, reverseA, idx3); + + final var listB = new IndexList(); + appendTo(listB, reverseB, idx2); + appendTo(listB, reverseB, idx3); + appendTo(listB, reverseB, idx4); + appendTo(listB, reverseB, idx5); + + final var it = new IndexListsIterator(triples, + listA, reverseA, listB, reverseB); + final var collected = new HashSet(); + while (it.hasNext()) { + collected.add(it.next()); + } + final var expected = new HashSet(); + expected.add(triple("s p2 o2")); + expected.add(triple("s p3 o3")); + assertEquals(expected, collected); + } + + @Test + public void emptyIntersectionYieldsNoElements() { + final int idx1 = addTriple("a b c"); + final int idx2 = addTriple("d e f"); + final int idx3 = addTriple("g h i"); + + final var listA = new IndexList(); + appendTo(listA, reverseA, idx1); + + final var listB = new IndexList(); + appendTo(listB, reverseB, idx2); + appendTo(listB, reverseB, idx3); + + final var it = new IndexListsIterator(triples, + listA, reverseA, listB, reverseB); + assertFalse(it.hasNext()); + } + + @Test(expected = NoSuchElementException.class) + public void nextThrowsWhenIntersectionExhausted() { + final int idx1 = addTriple("a b c"); + final int idx2 = addTriple("d e f"); + + final var listA = new IndexList(); + appendTo(listA, reverseA, idx1); + final var listB = new IndexList(); + appendTo(listB, reverseB, idx2); + + final var it = new IndexListsIterator(triples, + listA, reverseA, listB, reverseB); + it.next(); + } + + @Test + public void hasNextIsIdempotent() { + final int idx1 = addTriple("a b c"); + + final var listA = new IndexList(); + appendTo(listA, reverseA, idx1); + final var listB = new IndexList(); + appendTo(listB, reverseB, idx1); + + final var it = new IndexListsIterator(triples, + listA, reverseA, listB, reverseB); + assertTrue(it.hasNext()); + // calling hasNext repeatedly must not advance + assertTrue(it.hasNext()); + assertTrue(it.hasNext()); + assertEquals(triple("a b c"), it.next()); + assertFalse(it.hasNext()); + } + + @Test + public void forEachRemainingVisitsIntersectionOnly() { + final int idx1 = addTriple("a b c"); + final int idx2 = addTriple("d e f"); + final int idx3 = addTriple("g h i"); + final int idx4 = addTriple("j k l"); + + final var listA = new IndexList(); + appendTo(listA, reverseA, idx1); + appendTo(listA, reverseA, idx2); + appendTo(listA, reverseA, idx3); + + final var listB = new IndexList(); + appendTo(listB, reverseB, idx2); + appendTo(listB, reverseB, idx3); + appendTo(listB, reverseB, idx4); + + final var it = new IndexListsIterator(triples, + listA, reverseA, listB, reverseB); + final var collected = new ArrayList(); + it.forEachRemaining(collected::add); + final var expected = new HashSet(); + expected.add(triple("d e f")); + expected.add(triple("g h i")); + assertEquals(expected, new HashSet<>(collected)); + } + + @Test + public void forEachRemainingFlushesPrefetchedHasNextElement() { + final int idx1 = addTriple("a b c"); + final int idx2 = addTriple("d e f"); + + final var listA = new IndexList(); + appendTo(listA, reverseA, idx1); + appendTo(listA, reverseA, idx2); + + final var listB = new IndexList(); + appendTo(listB, reverseB, idx1); + appendTo(listB, reverseB, idx2); + + final var it = new IndexListsIterator(triples, + listA, reverseA, listB, reverseB); + // prime the look-ahead buffer + assertTrue(it.hasNext()); + + final var collected = new ArrayList(); + it.forEachRemaining(collected::add); + // Both intersected triples must be reported, including the pre-fetched one. + assertEquals(new HashSet<>(Arrays.asList(triple("a b c"), triple("d e f"))), + new HashSet<>(collected)); + } + + @Test(expected = ConcurrentModificationException.class) + public void nextDetectsConcurrentModification() { + final int idx1 = addTriple("a b c"); + + final var listA = new IndexList(); + appendTo(listA, reverseA, idx1); + final var listB = new IndexList(); + appendTo(listB, reverseB, idx1); + + final var it = new IndexListsIterator(triples, + listA, reverseA, listB, reverseB); + triples.addAndGetIndex(triple("z z z")); + it.next(); + } +} diff --git a/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListsSpliteratorTest.java b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListsSpliteratorTest.java new file mode 100644 index 00000000000..683bf619a91 --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexListsSpliteratorTest.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Triple; +import org.junit.Before; +import org.junit.Test; + +import java.util.*; +import java.util.stream.StreamSupport; + +import static org.apache.jena.testing_framework.GraphHelper.triple; +import static org.junit.Assert.*; + +/** + * Unit tests for {@link IndexListsSpliterator}: walks the intersection of + * two {@link IndexList}s and supports recursive splitting. + */ +public class IndexListsSpliteratorTest { + + private TripleSet triples; + private int[] reverseA; + private int[] reverseB; + private IndexList listA; + private IndexList listB; + private List commonTriples; + + @Before + public void setUp() { + triples = new TripleSet(); + reverseA = new int[64]; + reverseB = new int[64]; + + listA = new IndexList(); + listB = new IndexList(); + commonTriples = new ArrayList<>(); + + // Eight triples in A, last six also in B. + for (int i = 0; i < 8; i++) { + final var t = triple("a" + i + " p o"); + final int idx = triples.addAndGetIndex(t); + appendTo(listA, reverseA, idx); + if (i >= 2) { + appendTo(listB, reverseB, idx); + commonTriples.add(t); + } + } + // Two extra triples in B only + for (int i = 0; i < 2; i++) { + final var t = triple("b" + i + " p o"); + final int idx = triples.addAndGetIndex(t); + appendTo(listB, reverseB, idx); + } + } + + private static void appendTo(final IndexList list, final int[] reverse, final int tripleIndex) { + final int pos = list.add(tripleIndex); + reverse[tripleIndex] = pos; + } + + @Test + public void tryAdvanceVisitsExactlyTheIntersection() { + final var sp = new IndexListsSpliterator(triples, listA, reverseA, listB, reverseB); + final var collected = new HashSet(); + while (sp.tryAdvance(collected::add)) { /* noop */ } + assertEquals(new HashSet<>(commonTriples), collected); + } + + @Test + public void forEachRemainingVisitsIntersectionOnly() { + final var sp = new IndexListsSpliterator(triples, listA, reverseA, listB, reverseB); + final var collected = new HashSet(); + sp.forEachRemaining(collected::add); + assertEquals(new HashSet<>(commonTriples), collected); + } + + @Test + public void streamYieldsIntersection() { + final var sp = new IndexListsSpliterator(triples, listA, reverseA, listB, reverseB); + final var collected = StreamSupport.stream(sp, false).toList(); + assertEquals(new HashSet<>(commonTriples), new HashSet<>(collected)); + assertEquals(commonTriples.size(), collected.size()); + } + + @Test + public void parallelStreamYieldsIntersection() { + final var sp = new IndexListsSpliterator(triples, listA, reverseA, listB, reverseB); + final var collected = StreamSupport.stream(sp, true).toList(); + assertEquals(new HashSet<>(commonTriples), new HashSet<>(collected)); + } + + @Test + public void trySplitDividesIntersectionAcrossHalves() { + final var sp = new IndexListsSpliterator(triples, listA, reverseA, listB, reverseB); + final var prefix = sp.trySplit(); + assertNotNull(prefix); + + final var firstHalf = new ArrayList(); + final var secondHalf = new ArrayList(); + prefix.forEachRemaining(firstHalf::add); + sp.forEachRemaining(secondHalf::add); + + final var combined = new ArrayList<>(firstHalf); + combined.addAll(secondHalf); + assertEquals(new HashSet<>(commonTriples), new HashSet<>(combined)); + assertEquals(commonTriples.size(), combined.size()); + + final var asSetA = new HashSet<>(firstHalf); + for (final var t : secondHalf) { + assertFalse("split must be disjoint", asSetA.contains(t)); + } + } + + @Test + public void trySplitReturnsNullWhenSmallerListCannotBeSplit() { + // Use a tiny smaller list (size 1) to force trySplit to return null. + final var triplesLocal = new TripleSet(); + final int[] revA = new int[8]; + final int[] revB = new int[8]; + final var a = new IndexList(); + final var b = new IndexList(); + final int idx = triplesLocal.addAndGetIndex(triple("only s p o")); + appendTo(a, revA, idx); + appendTo(b, revB, idx); + final var sp = new IndexListsSpliterator(triplesLocal, a, revA, b, revB); + assertNull(sp.trySplit()); + } + + @Test + public void characteristicsAdvertiseDistinctNonNullImmutableButNotSized() { + final var sp = new IndexListsSpliterator(triples, listA, reverseA, listB, reverseB); + final int chars = sp.characteristics(); + assertTrue((chars & Spliterator.DISTINCT) != 0); + assertTrue((chars & Spliterator.NONNULL) != 0); + assertTrue((chars & Spliterator.IMMUTABLE) != 0); + // Intersection size is not known up front, so SIZED must NOT be set. + assertEquals(0, chars & Spliterator.SIZED); + } + + @Test + public void getExactSizeIfKnownReportsUnknown() { + final var sp = new IndexListsSpliterator(triples, listA, reverseA, listB, reverseB); + assertEquals(-1, sp.getExactSizeIfKnown()); + } + + @Test + public void estimateSizeIsAtMostSmallerListSize() { + final var sp = new IndexListsSpliterator(triples, listA, reverseA, listB, reverseB); + // estimateSize is the remaining range of the smaller list, which is + // an upper bound on the intersection. + final long est = sp.estimateSize(); + assertTrue("estimate must bound actual intersection", + est >= commonTriples.size()); + } + + @Test(expected = ConcurrentModificationException.class) + public void tryAdvanceDetectsConcurrentModification() { + final var sp = new IndexListsSpliterator(triples, listA, reverseA, listB, reverseB); + triples.addAndGetIndex(triple("z z z")); + sp.tryAdvance(t -> {}); + } + + @Test(expected = ConcurrentModificationException.class) + public void forEachRemainingDetectsConcurrentModification() { + final var sp = new IndexListsSpliterator(triples, listA, reverseA, listB, reverseB); + triples.addAndGetIndex(triple("z z z")); + sp.forEachRemaining(t -> {}); + } +} diff --git a/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexedSetTripleStoreTest.java b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexedSetTripleStoreTest.java new file mode 100644 index 00000000000..fb0b8323541 --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/IndexedSetTripleStoreTest.java @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Triple; +import org.apache.jena.mem.IndexingStrategy; +import org.apache.jena.mem.pattern.PatternClassifier; +import org.apache.jena.mem.store.AbstractTripleStoreTest; +import org.apache.jena.mem.store.TripleStore; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.Mockito; + +import java.util.Arrays; +import java.util.Collection; + +import static org.apache.jena.testing_framework.GraphHelper.triple; +import static org.junit.Assert.*; + +@RunWith(Parameterized.class) +public class IndexedSetTripleStoreTest extends AbstractTripleStoreTest { + + @Parameterized.Parameter + public IndexingStrategy indexingStrategy; + + @Parameterized.Parameters(name = "{0}") + public static Collection data() { + return Arrays.stream(IndexingStrategy.values()) + .map(strategy -> new Object[]{strategy}) + .toList(); + } + + @Override + protected TripleStore createTripleStore() { + switch (indexingStrategy) { + case EAGER, LAZY, LAZY_PARALLEL, MINIMAL: + return new IndexedSetTripleStore(indexingStrategy); + case MANUAL: + return setupStoreWithSpyForSpecialManualStrategy(); + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + indexingStrategy); + } + } + + private static boolean isPatternRequiringIndexing(final Triple tripleMatch) { + switch(PatternClassifier.classify(tripleMatch)) { + case SUB_PRE_ANY, SUB_ANY_OBJ, SUB_ANY_ANY, ANY_PRE_OBJ, ANY_PRE_ANY, ANY_ANY_OBJ: + return true; + case ANY_ANY_ANY, SUB_PRE_OBJ: + return false; + default: + throw new IllegalArgumentException("Unknown pattern classification: " + PatternClassifier.classify(tripleMatch)); + } + } + + private IndexedSetTripleStore setupStoreWithSpyForSpecialManualStrategy() { + final var realStore = new IndexedSetTripleStore(IndexingStrategy.MANUAL); + // Spy setup for the manual strategy + final var spyStore = Mockito.spy(realStore); + + // Mock {@link TripleStore#contains(Triple)} + Mockito.doAnswer(invocation -> { + final Triple tripleMatch = invocation.getArgument(0); + // If the triple match is a pattern that requires indexing, throw an exception + assertThrows(UnsupportedOperationException.class, () -> realStore.contains(tripleMatch)); + + // now initialize the index + realStore.initializeIndex(); + // determine the result with the index + final var result = realStore.contains(tripleMatch); + // Reset the indexing strategy for the next call + realStore.resetIndexingStrategy(); + // Return the result of the store with the index + return result; + }).when(spyStore).contains(Mockito.argThat(t -> isPatternRequiringIndexing(t))); + + // Mock {@link TripleStore#find(Triple)} + Mockito.doAnswer(invocation -> { + final Triple tripleMatch = invocation.getArgument(0); + // If the triple match is a pattern that requires indexing, throw an exception + assertThrows(UnsupportedOperationException.class, () -> realStore.find(tripleMatch)); + + // now initialize the index + realStore.initializeIndex(); + // determine the result with the index + final var result = realStore.find(tripleMatch); + // Reset the indexing strategy for the next call + realStore.resetIndexingStrategy(); + // Return the result of the store with the index + return result; + }).when(spyStore).find(Mockito.argThat(t -> isPatternRequiringIndexing(t))); + + // Mock {@link TripleStore#stream(Triple)} + Mockito.doAnswer(invocation -> { + final Triple tripleMatch = invocation.getArgument(0); + // If the triple match is a pattern that requires indexing, throw an exception + assertThrows(UnsupportedOperationException.class, () -> realStore.stream(tripleMatch)); + + // now initialize the index + realStore.initializeIndex(); + // determine the result with the index + final var result = realStore.stream(tripleMatch); + // Reset the indexing strategy for the next call + realStore.resetIndexingStrategy(); + // Return the result of the store with the index + return result; + }).when(spyStore).stream(Mockito.argThat(t -> isPatternRequiringIndexing(t))); + + return spyStore; + } + + private IndexedSetTripleStore getSutAsIndexedSetTripleStore() { + return (IndexedSetTripleStore) super.sut; + } + + @Test + public void testGetIndexingStrategy() { + // Given + final var sut = getSutAsIndexedSetTripleStore(); + + // Then + assertEquals(indexingStrategy, sut.getIndexingStrategy()); + } + + @Test + public void testIsIndexInitialized() { + // Given + final var sut = getSutAsIndexedSetTripleStore(); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER: + assertTrue(sut.isIndexInitialized()); + break; + case LAZY, LAZY_PARALLEL, MANUAL, MINIMAL: + assertFalse(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + + // When + sut.add(triple("s p o")); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER: + assertTrue(sut.isIndexInitialized()); + break; + case LAZY, LAZY_PARALLEL, MANUAL, MINIMAL: + assertFalse(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + } + + @Test + public void testLazyInitiallization() { + // Given + final var sut = getSutAsIndexedSetTripleStore(); + sut.add(triple("s p o")); + + // When + sut.contains(triple("s ?? o")); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER, LAZY, LAZY_PARALLEL: + assertTrue(sut.isIndexInitialized()); + break; + case MANUAL, MINIMAL: + assertFalse(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + } + + @Test + public void testManualInitialization() { + // Given + final var sut = getSutAsIndexedSetTripleStore(); + + // When + sut.initializeIndex(); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER, LAZY, LAZY_PARALLEL, MANUAL, MINIMAL: + assertTrue(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + } + + @Test + public void testManualInitializationParallel() { + // Given + final var sut = getSutAsIndexedSetTripleStore(); + + // When + sut.initializeIndexParallel(); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER, LAZY, LAZY_PARALLEL, MANUAL, MINIMAL: + assertTrue(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + } + + @Test + public void testResetIndexingStrategy() { + // Given + final var sut = getSutAsIndexedSetTripleStore(); + sut.initializeIndex(); + + // When + sut.resetIndexingStrategy(); + + // Then + switch (sut.getIndexingStrategy()) { + case EAGER: + assertTrue(sut.isIndexInitialized()); + break; + case LAZY, LAZY_PARALLEL, MANUAL, MINIMAL: + assertFalse(sut.isIndexInitialized()); + break; + default: + throw new IllegalArgumentException("Unsupported indexing strategy: " + sut.getIndexingStrategy()); + } + } +} \ No newline at end of file diff --git a/jena-core/src/test/java/org/apache/jena/mem/store/indexed/NodesToIndicesTest.java b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/NodesToIndicesTest.java new file mode 100644 index 00000000000..31662a7a137 --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/NodesToIndicesTest.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.store.indexed; + +import org.apache.jena.graph.Node; +import org.junit.Test; + +import static org.apache.jena.testing_framework.GraphHelper.node; +import static org.junit.Assert.*; + +/** + * Unit tests for {@link NodesToIndices}: {@link Node}-keyed + * {@link org.apache.jena.mem.collection.FastHashMap} of {@link IndexList}s, + * with a deep-cloning copy constructor. + */ +public class NodesToIndicesTest { + + @Test + public void newMapIsEmpty() { + final var map = new NodesToIndices(); + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + } + + @Test + public void putAndGetReturnSameList() { + final var map = new NodesToIndices(); + final Node n = node("s"); + final var list = new IndexList(); + list.add(7); + list.add(11); + + map.put(n, list); + assertEquals(1, map.size()); + assertSame(list, map.get(n)); + } + + @Test + public void getReturnsNullForMissingKey() { + final var map = new NodesToIndices(); + assertNull(map.get(node("missing"))); + } + + @Test + public void copyClonesEveryIndexList() { + final var src = new NodesToIndices(); + final var listA = new IndexList(); + listA.add(1); + listA.add(2); + final var listB = new IndexList(); + listB.add(3); + src.put(node("a"), listA); + src.put(node("b"), listB); + + final var copy = src.copy(); + assertEquals(2, copy.size()); + + final var copiedA = copy.get(node("a")); + final var copiedB = copy.get(node("b")); + assertNotNull(copiedA); + assertNotNull(copiedB); + + // Lists must be independent: same content, different identity + assertNotSame(listA, copiedA); + assertNotSame(listB, copiedB); + assertEquals(listA.size(), copiedA.size()); + assertEquals(listB.size(), copiedB.size()); + assertEquals(1, copiedA.getIndexAt(0)); + assertEquals(2, copiedA.getIndexAt(1)); + assertEquals(3, copiedB.getIndexAt(0)); + } + + @Test + public void copyIsIndependentOfSource() { + final var src = new NodesToIndices(); + final var list = new IndexList(); + list.add(42); + src.put(node("k"), list); + + final var copy = src.copy(); + // Mutate the source's list — copy must be unaffected. + list.add(99); + assertEquals(2, src.get(node("k")).size()); + assertEquals(1, copy.get(node("k")).size()); + assertEquals(42, copy.get(node("k")).getIndexAt(0)); + + // Add a key only to the copy: src must be unaffected. + copy.put(node("only-in-copy"), new IndexList()); + assertNull(src.get(node("only-in-copy"))); + } + + @Test + public void computeIfAbsentInsertsAndReusesValues() { + final var map = new NodesToIndices(); + final Node n = node("x"); + final var created = map.computeIfAbsent(n, IndexList::new); + assertNotNull(created); + // Second call with the same key must return the SAME list. + final var second = map.computeIfAbsent(n, IndexList::new); + assertSame(created, second); + } +} diff --git a/jena-core/src/test/java/org/apache/jena/mem/store/indexed/TripleSetTest.java b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/TripleSetTest.java new file mode 100644 index 00000000000..cfec08b9f57 --- /dev/null +++ b/jena-core/src/test/java/org/apache/jena/mem/store/indexed/TripleSetTest.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +package org.apache.jena.mem.store.indexed; + +import org.junit.Test; + +import java.util.concurrent.atomic.AtomicInteger; + +import static org.apache.jena.testing_framework.GraphHelper.triple; +import static org.junit.Assert.*; + +/** + * Unit tests for {@link TripleSet}: hash set of triples extended with a + * grow-hook that fires when the underlying keys array is enlarged. Used as + * the canonical triple collection inside {@link IndexedSetTripleStore}. + */ +public class TripleSetTest { + + @Test + public void newSetIsEmpty() { + final var set = new TripleSet(); + assertEquals(0, set.size()); + assertTrue(set.isEmpty()); + } + + @Test + public void addAndGetIndexAssignsStableIndices() { + final var set = new TripleSet(); + final var t1 = triple("a b c"); + final var t2 = triple("d e f"); + final int i1 = set.addAndGetIndex(t1); + final int i2 = set.addAndGetIndex(t2); + assertNotEquals(i1, i2); + assertEquals(t1, set.getKeyAt(i1)); + assertEquals(t2, set.getKeyAt(i2)); + + assertEquals(i1, set.indexOf(t1)); + assertEquals(i2, set.indexOf(t2)); + assertEquals(-1, set.indexOf(triple("NOT_A NOT_B NOT_C"))); + + // Re-adding the same triple returns the bitwise-complement of the + // existing index. + final int reAdd = set.addAndGetIndex(t1); + assertTrue("re-add must return ~existingIndex", reAdd < 0); + assertEquals(i1, ~reAdd); + assertEquals(2, set.size()); + } + + @Test + public void containsKeyReturnsTrueForAddedTriple() { + final var set = new TripleSet(); + final var t = triple("x y z"); + set.tryAdd(t); + assertTrue(set.containsKey(t)); + assertFalse(set.containsKey(triple("x y NOT_Z"))); + } + + @Test + public void onKeysGrowHookFiresWhenInternalArrayResizes() { + final var set = new TripleSet(); + final AtomicInteger callCount = new AtomicInteger(); + final AtomicInteger lastReportedSize = new AtomicInteger(); + set.setOnKeysGrowHook(newSize -> { + callCount.incrementAndGet(); + lastReportedSize.set(newSize); + }); + + // Add enough elements to force at least one growth of the keys array. + for (int i = 0; i < 200; i++) { + set.addAndGetIndex(triple("s" + i + " p o")); + } + + assertTrue("hook must fire at least once when growing", callCount.get() > 0); + assertTrue("reported size must be a positive integer (the new array length)", + lastReportedSize.get() > 0); + } + + @Test + public void hookCanBeDisabledByPassingNull() { + final var set = new TripleSet(); + final AtomicInteger callCount = new AtomicInteger(); + set.setOnKeysGrowHook(newSize -> callCount.incrementAndGet()); + // disable + set.setOnKeysGrowHook(null); + for (int i = 0; i < 200; i++) { + set.addAndGetIndex(triple("s" + i + " p o")); + } + assertEquals(0, callCount.get()); + } + + @Test + public void copyContainsSameTriplesAndIsIndependent() { + final var src = new TripleSet(); + for (int i = 0; i < 5; i++) { + src.tryAdd(triple("s" + i + " p o")); + } + + final var copy = src.copy(); + assertNotSame(src, copy); + assertEquals(src.size(), copy.size()); + for (int i = 0; i < 5; i++) { + assertTrue(copy.containsKey(triple("s" + i + " p o"))); + } + + // mutating the copy must not affect the source + copy.tryAdd(triple("extra p o")); + assertEquals(6, copy.size()); + assertEquals(5, src.size()); + assertFalse(src.containsKey(triple("extra p o"))); + } + + @Test + public void copyDoesNotPropagateGrowHook() { + final var src = new TripleSet(); + final AtomicInteger srcHookCalls = new AtomicInteger(); + src.setOnKeysGrowHook(n -> srcHookCalls.incrementAndGet()); + + // Populate enough to force growth in src and reset counter + for (int i = 0; i < 100; i++) { + src.tryAdd(triple("s" + i + " p o")); + } + srcHookCalls.set(0); + + // The copy must NOT inherit the hook; pumping more triples into it + // must not invoke the source's hook. + final var copy = src.copy(); + for (int i = 100; i < 300; i++) { + copy.tryAdd(triple("s" + i + " p o")); + } + assertEquals(0, srcHookCalls.get()); + } + + @Test + public void removeUpdatesSize() { + final var set = new TripleSet(); + final var t1 = triple("a b c"); + final var t2 = triple("d e f"); + set.tryAdd(t1); + set.tryAdd(t2); + + assertTrue(set.tryRemove(t1)); + assertEquals(1, set.size()); + assertFalse(set.containsKey(t1)); + assertTrue(set.containsKey(t2)); + + // removing again is a no-op + assertFalse(set.tryRemove(t1)); + } + + @Test + public void streamProducesAllTriples() { + final var set = new TripleSet(); + for (int i = 0; i < 10; i++) { + set.tryAdd(triple("s" + i + " p o")); + } + final long count = set.keyStream().count(); + assertEquals(10, count); + } + + @Test + public void clearResetsToEmpty() { + final var set = new TripleSet(); + set.tryAdd(triple("a b c")); + set.tryAdd(triple("d e f")); + set.clear(); + assertTrue(set.isEmpty()); + assertEquals(0, set.size()); + } +}