Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion jena-benchmarks/jena-benchmarks-jmh/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
<parent>
<groupId>org.apache.jena</groupId>
<artifactId>jena-benchmarks</artifactId>
<version>6.1.0</version>
<version>6.2.0-SNAPSHOT</version>
</parent>

<name>Apache Jena - Benchmarks JMH</name>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.jena.atlas.iterator.ActionCount;
import org.apache.jena.jmh.JmhDefaultOptions;

import org.apache.jena.mem.collection.Sized;
import org.junit.Assert;
import org.junit.Test;

Expand Down Expand Up @@ -75,12 +76,18 @@ public Spliterator<Object> createSut(Object[] arrayWithNulls, int elementsCount)
if (count != elementsCount) {
throw new RuntimeException("Concurrent modification detected");
}
} ;
final var sized = new Sized() {
@Override
public int size() {
return elementsCount;
}
};
return switch (param1_iteratorImplementation) {
case "memvalue.SparseArraySpliterator" ->
new org.apache.jena.memvalue.SparseArraySpliterator<>(arrayWithNulls, count, checkForConcurrentModification);
case "mem2.SparseArraySpliterator" ->
new SparseArraySpliterator<>(arrayWithNulls, checkForConcurrentModification);
new SparseArraySpliterator<>(arrayWithNulls, sized);
default ->
throw new IllegalArgumentException("Unknown spliterator implementation: " + param1_iteratorImplementation);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.jena.atlas.iterator.ActionCount;
import org.apache.jena.jmh.JmhDefaultOptions;

import org.apache.jena.mem.collection.Sized;
import org.junit.Assert;
import org.junit.Test;

Expand Down Expand Up @@ -77,11 +78,17 @@ public Spliterator<Object> createSut(Object[] arrayWithNulls, int elementsCount)
throw new RuntimeException("Concurrent modification detected");
}
};
final var sized = new Sized() {
@Override
public int size() {
return elementsCount;
}
};
return switch (param1_iteratorImplementation) {
case "memvalue.SparseArraySpliterator" ->
new org.apache.jena.memvalue.SparseArraySpliterator<>(arrayWithNulls, count, checkForConcurrentModification);
case "mem2.SparseArraySpliterator" ->
new SparseArraySpliterator<>(arrayWithNulls, checkForConcurrentModification);
new SparseArraySpliterator<>(arrayWithNulls, sized);
default ->
throw new IllegalArgumentException("Unknown spliterator implementation: " + param1_iteratorImplementation);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.jena.atlas.iterator.ActionCount;
import org.apache.jena.jmh.JmhDefaultOptions;

import org.apache.jena.mem.collection.Sized;
import org.junit.Assert;
import org.junit.Test;

Expand Down Expand Up @@ -78,11 +79,17 @@ public Spliterator<Object> createSut(Object[] arrayWithNulls, int elementsCount)
throw new RuntimeException("Concurrent modification detected");
}
};
final var sized = new Sized() {
@Override
public int size() {
return elementsCount;
}
};
return switch (param1_iteratorImplementation) {
case "memvalue.SparseArraySpliterator" ->
new org.apache.jena.memvalue.SparseArraySpliterator<>(arrayWithNulls, count, checkForConcurrentModification);
case "mem2.SparseArraySpliterator" ->
new SparseArraySpliterator<>(arrayWithNulls, checkForConcurrentModification);
new SparseArraySpliterator<>(arrayWithNulls, sized);
default ->
throw new IllegalArgumentException("Unknown spliterator implementation: " + param1_iteratorImplementation);
};
Expand Down
2 changes: 1 addition & 1 deletion jena-benchmarks/jena-benchmarks-shadedJena560/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
<parent>
<groupId>org.apache.jena</groupId>
<artifactId>jena-benchmarks</artifactId>
<version>6.1.0</version>
<version>6.2.0-SNAPSHOT</version>
</parent>

<name>Apache Jena - Benchmarks Shaded Jena 5.6.0</name>
Expand Down
2 changes: 1 addition & 1 deletion jena-benchmarks/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
<parent>
<groupId>org.apache.jena</groupId>
<artifactId>jena</artifactId>
<version>6.1.0</version>
<version>6.2.0-SNAPSHOT</version>
</parent>

<name>Apache Jena - Benchmark Suite</name>
Expand Down
151 changes: 151 additions & 0 deletions jena-core/src/main/java/org/apache/jena/mem/GraphMemIndexedSet.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
*/

package org.apache.jena.mem;

import org.apache.jena.mem.store.TripleStore;
import org.apache.jena.mem.store.indexed.IndexedSetTripleStore;

/**
* In-memory {@link GraphMem} implementation that stores all triples in a single
* indexed set ({@link IndexedSetTripleStore}). This class is not thread-safe.
* <p>
* Different {@link IndexingStrategy indexing strategies} can be selected to
* balance memory usage and lookup performance. The triples themselves always live
* in a flat set; only the auxiliary subject/predicate/object indices are
* controlled by the strategy. See {@link IndexingStrategy} for the trade-offs of
* each variant.
* <p>
* While the index has not been built (e.g. with {@link IndexingStrategy#LAZY},
* {@link IndexingStrategy#LAZY_PARALLEL}, {@link IndexingStrategy#MANUAL} or
* {@link IndexingStrategy#MINIMAL}) the memory footprint is very low and the
* following operations are particularly fast:
* <ul>
* <li>{@link GraphMem#add} - adds a triple to the graph</li>
* <li>{@link GraphMem#delete} - removes a triple from the graph</li>
* </ul>
* A typical bulk-load pattern is to start without an index, add all triples and
* then call {@link #initializeIndexParallel()} to build the index in parallel.
*/
public class GraphMemIndexedSet extends GraphMem {

/**
* Creates a new graph using the {@link IndexingStrategy#EAGER} default
* indexing strategy.
*/
public GraphMemIndexedSet() {
this(IndexingStrategy.EAGER);
}

/**
* Creates a new graph that uses the given indexing strategy.
*
* @param indexingStrategy the indexing strategy to use; controls when the
* subject/predicate/object index is built and how
* pattern lookups are evaluated
*/
public GraphMemIndexedSet(IndexingStrategy indexingStrategy) {
super(new IndexedSetTripleStore(indexingStrategy));
}

/**
* Internal constructor used by {@link #copy()} to wrap an already populated
* triple store.
*
* @param tripleStore the triple store to wrap (must be an
* {@link IndexedSetTripleStore})
*/
private GraphMemIndexedSet(final TripleStore tripleStore) {
super(tripleStore);
}

/**
* {@inheritDoc}
* <p>
* Returns an independent copy that preserves the indexing strategy and,
* if the source has its index built, copies the index data structures
* directly to avoid rebuilding them.
*/
@Override
public GraphMemIndexedSet copy() {
return new GraphMemIndexedSet(this.tripleStore.copy());
}

/**
* Convenience accessor for the typed underlying store.
*
* @return the {@link IndexedSetTripleStore} backing this graph
*/
private IndexedSetTripleStore getIndexedSetTripleStore() {
return (IndexedSetTripleStore) this.tripleStore;
}

/**
* Returns the indexing strategy this graph was created with.
* The strategy is fixed for the lifetime of the graph; clearing or
* initializing the index does not change it.
*
* @return the indexing strategy
*/
public IndexingStrategy getIndexingStrategy() {
return getIndexedSetTripleStore().getIndexingStrategy();
}

/**
* Drops the current subject/predicate/object index and reverts to the
* initial strategy. Subsequent pattern lookups will trigger (re)building
* the index according to the configured {@link IndexingStrategy}.
*/
public void clearIndex() {
getIndexedSetTripleStore().clearIndex();
}

/**
* Build (or rebuild) the index sequentially.
* After this call, pattern lookups will be served by the eager strategy
* regardless of the originally configured indexing strategy.
*/
public void initializeIndex() {
getIndexedSetTripleStore().initializeIndex();
}

/**
* Build (or rebuild) the index in parallel.
* This can be substantially faster than {@link #initializeIndex()} for
* larger graphs. After this call, pattern lookups will be served by the
* eager strategy regardless of the originally configured indexing strategy.
*/
public void initializeIndexParallel() {
getIndexedSetTripleStore().initializeIndexParallel();
}

/**
* Reports whether the index is currently built and ready to serve pattern
* lookups directly. For graphs configured with a non-eager strategy this
* may flip from {@code false} to {@code true} as soon as the first lookup
* is performed (or when {@link #initializeIndex()} is called explicitly).
*
* @return {@code true} iff the index is initialized
*/
public boolean isIndexInitialized() {
return getIndexedSetTripleStore().isIndexInitialized();
}
}
65 changes: 36 additions & 29 deletions jena-core/src/main/java/org/apache/jena/mem/IndexingStrategy.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,54 +24,61 @@
import org.apache.jena.graph.Graph;

/**
* An enumeration that represents different indexing strategies for a graph.
* The indexing strategy determines how triples are indexed to support pattern matching.
* It is assumed that the graph contains a set of triples, and all operations that do not involve
* pattern matching are performed directly on this set, not on the indices.
* <br>
* Pattern matching refers to operations like {@link Graph#find}, {@link Graph#remove} or {@link Graph#contains}
* that may take a triple pattern as argument, such as "S__", "SP_", "S_O", "_P_", "_PO", or "__O",
* instead of a concrete triple "SPO".
* In the case of a concrete triple these operations should be performed directly on the set of triples
* and not rely on the indices.
* Indexing strategies supported by {@link org.apache.jena.mem.store.indexed.IndexedSetTripleStore}
* and {@link org.apache.jena.mem.store.roaring.RoaringTripleStore}.
* The indexing strategy determines how (and when) the auxiliary
* subject/predicate/object index is maintained for pattern-matching operations.
* <p>
* The graph always keeps a flat set of triples. Operations that do not involve
* pattern matching (size, iterating all triples, lookup of a fully concrete
* triple, etc.) are evaluated directly against this set and are unaffected by
* the indexing strategy.
* <p>
* Pattern matching refers to {@link Graph#find}, {@link Graph#remove} or
* {@link Graph#contains} called with a triple pattern such as
* {@code S__}, {@code SP_}, {@code S_O}, {@code _P_}, {@code _PO} or
* {@code __O} (where {@code _} denotes a wildcard).
* Lookups for fully concrete triples ({@code SPO}) are always answered
* directly from the triple set and never use the index.
*/
public enum IndexingStrategy {

/**
* Starts with an index as any other in-memory graph.
* {@link Graph#add}, {@link Graph#delete} and {@link Graph#clear()} update the index immediately.
* Clearing the index just rebuilds it from the set of triples.
* The index is always present.
* {@link Graph#add}, {@link Graph#delete} and {@link Graph#clear()} update
* the index immediately. Calling {@code clearIndex} simply discards the
* existing index, which is then rebuilt from the triple set.
*/
EAGER,

/**
* Starts with no index and builds it on demand when pattern matches are requested.
* After initialization, the index behaves like EAGER.
* Index may be cleared manually, then it is rebuilt on demand.
* The index is built on demand the first time a pattern match is requested.
* Once built, behaves like {@link #EAGER}. Calling {@code clearIndex}
* discards the index; it will be rebuilt on demand the next time a
* pattern match is performed.
*/
LAZY,

/**
* Starts with no index and builds it on demand when pattern matches are requested.
* After initialization, the index behaves like EAGER.
* Index may be cleared manually, then it is rebuilt on demand.
* This strategy uses parallel processing to build the index.
* Like {@link #LAZY}, but the on-demand index build uses parallel
* processing for faster initialization on large graphs.
*/
LAZY_PARALLEL,

/**
* Starts with no index and throws an exception if a pattern match is requested,
* but the index has not been initialized manually yet.
* After initialization, the index behaves like EAGER.
* Index may be cleared manually, then it has to be initialized again manually.
* The index is never built automatically. Pattern-match operations throw
* an {@link UnsupportedOperationException} until the index is initialized
* explicitly (e.g. via
* {@link org.apache.jena.mem.GraphMemIndexedSet#initializeIndex()}).
* After initialization, behaves like {@link #EAGER}.
*/
MANUAL,

/**
* Starts with no index and uses filtering on the triple set,
* as long as the index has not been initialized.
* After initialization, the index behaves like EAGER.
* Index may be cleared manually, then filtering is used again until the index is initialized again.
* No index is built. Pattern-match operations are evaluated by linearly
* filtering the triple set, which is space-efficient but slower for large
* graphs. The index can be initialized explicitly to switch to eager
* behavior; calling {@code clearIndex} reverts to filtering again.
*/
MINIMAL
}
}
Loading