Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7e93737
feat: support GraphSAGE
kitalkuyo-gita Nov 17, 2025
3866aa7
enhance: add feature select
kitalkuyo-gita Nov 17, 2025
22edacd
test: add test
kitalkuyo-gita Nov 17, 2025
67c1fb9
enhance: add test case
kitalkuyo-gita Nov 17, 2025
3f22f9f
enhance: add GQL support
kitalkuyo-gita Nov 17, 2025
86b4822
enhance: add cuda device && adjust dimssion
kitalkuyo-gita Nov 26, 2025
c2280b6
chore: add license
kitalkuyo-gita Nov 26, 2025
55e42b6
bugfix: add conda url
kitalkuyo-gita Nov 26, 2025
c8120ee
enhance: add user custom sys python path
kitalkuyo-gita Nov 26, 2025
726fc3a
rerfactor: fill original dimssion
kitalkuyo-gita Nov 26, 2025
5b4dd8a
refactor: update agg collect dimssion
kitalkuyo-gita Nov 26, 2025
f4a87d4
refactor: adjust dimension
kitalkuyo-gita Nov 26, 2025
a5de492
enhance: solve resource lack while boot
kitalkuyo-gita Nov 26, 2025
8de7b49
refactor: cython deps copy
kitalkuyo-gita Nov 26, 2025
bc86864
chore:remove useless code
kitalkuyo-gita Nov 27, 2025
9b6921d
fix: Replace var keyword with explicit type for JDK 8 compatibility
kitalkuyo-gita Mar 6, 2026
fadd0f8
fix: Replace FileWriter constructor with OutputStreamWriter for JDK 8…
kitalkuyo-gita Mar 6, 2026
c4c5480
ci: Install Python dependencies including PyTorch for GraphSAGE tests
kitalkuyo-gita Mar 6, 2026
3c1c656
ci: Trigger CI build to verify Python dependencies installation
kitalkuyo-gita Mar 6, 2026
bbe5900
ci: Install Python dependencies in JDK 11 workflow for GraphSAGE tests
kitalkuyo-gita Mar 7, 2026
0992714
Merge remote-tracking branch 'upstream/master' into issue-677
kitalkuyo-gita Mar 7, 2026
fe761c8
style: Remove unused imports in BuildInSqlFunctionTable to fix checks…
kitalkuyo-gita Mar 7, 2026
2bd227f
fix: Re-add ConnectedComponents to SQL function table registration
kitalkuyo-gita Mar 7, 2026
fe709e6
fix: Add LabelPropagation to SQL function table registration
kitalkuyo-gita Mar 7, 2026
8e4477e
fix: Add Louvain algorithm to SQL function table registration
kitalkuyo-gita Mar 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/ci-jdk11.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ jobs:
with:
version: "21.7"

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
cache: 'pip'

- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r geaflow/geaflow-dsl/geaflow-dsl-plan/src/main/resources/requirements.txt
pip list | grep -i torch

# Current hive connector is incompatible with jdk11, implement 4.0.0+ hive version in later.
- name: Build and Test On JDK 11
run: |
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,18 @@ jobs:
with:
version: "21.7"

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
cache: 'pip'

- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r geaflow/geaflow-dsl/geaflow-dsl-plan/src/main/resources/requirements.txt
pip list | grep -i torch

- name: Build and Test On JDK 8
run: mvn -B -e clean test -Pjdk8 -pl !geaflow/geaflow-plugins/geaflow-store/geaflow-store-vector
-Duser.timezone=Asia/Shanghai -Dlog4j.configuration="log4j .rootLogger=WARN, stdout"
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,16 @@ public class FrameworkConfigKeys implements Serializable {
.noDefaultValue()
.description("infer env conda url");

public static final ConfigKey INFER_ENV_USE_SYSTEM_PYTHON = ConfigKeys
.key("geaflow.infer.env.use.system.python")
.defaultValue(false)
.description("use system Python instead of creating virtual environment");

public static final ConfigKey INFER_ENV_SYSTEM_PYTHON_PATH = ConfigKeys
.key("geaflow.infer.env.system.python.path")
.noDefaultValue()
.description("path to system Python executable (e.g., /usr/bin/python3 or /opt/homebrew/bin/python3)");

public static final ConfigKey ASP_ENABLE = ConfigKeys
.key("geaflow.iteration.asp.enable")
.defaultValue(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.geaflow.common.config.keys.FrameworkConfigKeys;
import org.apache.geaflow.common.exception.GeaflowRuntimeException;
import org.apache.geaflow.infer.InferContext;
import org.apache.geaflow.infer.InferContextPool;
import org.apache.geaflow.model.graph.message.DefaultGraphMessage;
import org.apache.geaflow.model.graph.vertex.IVertex;
import org.apache.geaflow.model.record.RecordArgs.GraphRecordNames;
Expand Down Expand Up @@ -164,11 +165,17 @@ class IncGraphInferComputeContextImpl<OUT> extends IncGraphComputeContextImpl im
public IncGraphInferComputeContextImpl() {
if (clientLocal.get() == null) {
try {
inferContext = new InferContext<>(runtimeContext.getConfiguration());
// Use InferContextPool instead of direct instantiation
// This ensures efficient reuse of InferContext instances
inferContext = InferContextPool.getOrCreate(runtimeContext.getConfiguration());
clientLocal.set(inferContext);
LOGGER.debug("InferContext obtained from pool: {}",
InferContextPool.getStatus());
} catch (Exception e) {
throw new GeaflowRuntimeException(e);
LOGGER.error("Failed to obtain InferContext from pool", e);
throw new GeaflowRuntimeException(
"InferContext initialization failed: " + e.getMessage(), e);
}
clientLocal.set(inferContext);
} else {
inferContext = clientLocal.get();
}
Expand All @@ -186,7 +193,9 @@ public OUT infer(Object... modelInputs) {
@Override
public void close() throws IOException {
if (clientLocal.get() != null) {
clientLocal.get().close();
// Do NOT close the InferContext here since it's managed by the pool
// The pool handles lifecycle management
LOGGER.debug("Detaching from pooled InferContext");
clientLocal.remove();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.apache.geaflow.dsl.udf.graph.ClusterCoefficient;
import org.apache.geaflow.dsl.udf.graph.CommonNeighbors;
import org.apache.geaflow.dsl.udf.graph.ConnectedComponents;
import org.apache.geaflow.dsl.udf.graph.GraphSAGE;
import org.apache.geaflow.dsl.udf.graph.IncKHopAlgorithm;
import org.apache.geaflow.dsl.udf.graph.IncMinimumSpanningTree;
import org.apache.geaflow.dsl.udf.graph.IncWeakConnectedComponents;
Expand Down Expand Up @@ -232,15 +233,16 @@ public class BuildInSqlFunctionTable extends ListSqlOperatorTable {
.add(GeaFlowFunction.of(IncMinimumSpanningTree.class))
.add(GeaFlowFunction.of(ClosenessCentrality.class))
.add(GeaFlowFunction.of(WeakConnectedComponents.class))
.add(GeaFlowFunction.of(ConnectedComponents.class))
.add(GeaFlowFunction.of(LabelPropagation.class))
.add(GeaFlowFunction.of(Louvain.class))
.add(GeaFlowFunction.of(TriangleCount.class))
.add(GeaFlowFunction.of(ClusterCoefficient.class))
.add(GeaFlowFunction.of(IncWeakConnectedComponents.class))
.add(GeaFlowFunction.of(CommonNeighbors.class))
.add(GeaFlowFunction.of(JaccardSimilarity.class))
.add(GeaFlowFunction.of(IncKHopAlgorithm.class))
.add(GeaFlowFunction.of(LabelPropagation.class))
.add(GeaFlowFunction.of(ConnectedComponents.class))
.add(GeaFlowFunction.of(Louvain.class))
.add(GeaFlowFunction.of(GraphSAGE.class))
.build();

public BuildInSqlFunctionTable(GQLJavaTypeFactory typeFactory) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.geaflow.dsl.udf.graph;

import java.util.List;

/**
* Feature reducer for selecting important feature dimensions to reduce transmission overhead.
*
* <p>This class implements feature selection by keeping only the most important dimensions
* from the full feature vector. This significantly reduces the amount of data transferred
* between Java and Python processes, improving performance for large feature vectors.
*
* <p>Usage:
* <pre>
* // Select first 64 dimensions
* int[] selectedDims = new int[64];
* for (int i = 0; i < 64; i++) {
* selectedDims[i] = i;
* }
* FeatureReducer reducer = new FeatureReducer(selectedDims);
* double[] reduced = reducer.reduceFeatures(fullFeatures);
* </pre>
*
* <p>Benefits:
* - Reduces memory usage for feature storage
* - Reduces network/IO overhead in Java-Python communication
* - Improves inference speed by processing smaller feature vectors
* - Maintains model accuracy if important dimensions are selected correctly
*/
public class FeatureReducer {

private final int[] selectedDimensions;

/**
* Creates a feature reducer with specified dimension indices.
*
* @param selectedDimensions Array of dimension indices to keep.
* Indices should be valid for the full feature vector.
* Duplicate indices are allowed but not recommended.
*/
public FeatureReducer(int[] selectedDimensions) {
if (selectedDimensions == null || selectedDimensions.length == 0) {
throw new IllegalArgumentException(
"Selected dimensions array cannot be null or empty");
}
this.selectedDimensions = selectedDimensions.clone(); // Defensive copy
}

/**
* Reduces a full feature vector to selected dimensions.
*
* @param fullFeatures The complete feature vector
* @return Reduced feature vector containing only selected dimensions
* @throws IllegalArgumentException if fullFeatures is null or too short
*/
public double[] reduceFeatures(double[] fullFeatures) {
if (fullFeatures == null) {
throw new IllegalArgumentException("Full features array cannot be null");
}

double[] reducedFeatures = new double[selectedDimensions.length];
int maxDim = getMaxDimension();

if (maxDim >= fullFeatures.length) {
throw new IllegalArgumentException(
String.format("Feature vector length (%d) is too short for selected dimensions (max: %d)",
fullFeatures.length, maxDim + 1));
}

for (int i = 0; i < selectedDimensions.length; i++) {
int dimIndex = selectedDimensions[i];
reducedFeatures[i] = fullFeatures[dimIndex];
}

return reducedFeatures;
}

/**
* Reduces a feature list to selected dimensions.
*
* @param fullFeatures The complete feature list
* @return Reduced feature array containing only selected dimensions
*/
public double[] reduceFeatures(List<Double> fullFeatures) {
if (fullFeatures == null) {
throw new IllegalArgumentException("Full features list cannot be null");
}

double[] fullArray = new double[fullFeatures.size()];
for (int i = 0; i < fullFeatures.size(); i++) {
Double value = fullFeatures.get(i);
fullArray[i] = value != null ? value : 0.0;
}

return reduceFeatures(fullArray);
}

/**
* Reduces multiple feature vectors in batch.
*
* @param fullFeaturesList List of full feature vectors
* @return Array of reduced feature vectors
*/
public double[][] reduceFeaturesBatch(List<double[]> fullFeaturesList) {
if (fullFeaturesList == null) {
throw new IllegalArgumentException("Full features list cannot be null");
}

double[][] reducedFeatures = new double[fullFeaturesList.size()][];
for (int i = 0; i < fullFeaturesList.size(); i++) {
reducedFeatures[i] = reduceFeatures(fullFeaturesList.get(i));
}

return reducedFeatures;
}

/**
* Gets the maximum dimension index in the selected dimensions.
*
* @return Maximum dimension index
*/
private int getMaxDimension() {
int max = selectedDimensions[0];
for (int dim : selectedDimensions) {
if (dim > max) {
max = dim;
}
}
return max;
}

/**
* Gets the number of selected dimensions.
*
* @return Number of dimensions in the reduced feature vector
*/
public int getReducedDimension() {
return selectedDimensions.length;
}

/**
* Gets the selected dimension indices.
*
* @return Copy of the selected dimension indices array
*/
public int[] getSelectedDimensions() {
return selectedDimensions.clone(); // Defensive copy
}

/**
* Creates a feature reducer that selects the first N dimensions.
*
* <p>This is a convenience method for the common case of selecting
* the first N dimensions from a feature vector.
*
* @param numDimensions Number of dimensions to select from the beginning
* @return FeatureReducer instance
*/
public static FeatureReducer selectFirst(int numDimensions) {
if (numDimensions <= 0) {
throw new IllegalArgumentException(
"Number of dimensions must be positive, got: " + numDimensions);
}

int[] dims = new int[numDimensions];
for (int i = 0; i < numDimensions; i++) {
dims[i] = i;
}

return new FeatureReducer(dims);
}

/**
* Creates a feature reducer that selects evenly spaced dimensions.
*
* <p>This method selects dimensions at regular intervals, which can be useful
* for uniform sampling across the feature space.
*
* @param numDimensions Number of dimensions to select
* @param totalDimensions Total number of dimensions in the full feature vector
* @return FeatureReducer instance
*/
public static FeatureReducer selectEvenlySpaced(int numDimensions, int totalDimensions) {
if (numDimensions <= 0) {
throw new IllegalArgumentException(
"Number of dimensions must be positive, got: " + numDimensions);
}
if (totalDimensions <= 0) {
throw new IllegalArgumentException(
"Total dimensions must be positive, got: " + totalDimensions);
}
if (numDimensions > totalDimensions) {
throw new IllegalArgumentException(
String.format("Cannot select %d dimensions from %d total dimensions",
numDimensions, totalDimensions));
}

int[] dims = new int[numDimensions];
double step = (double) totalDimensions / numDimensions;
for (int i = 0; i < numDimensions; i++) {
dims[i] = (int) Math.floor(i * step);
}

return new FeatureReducer(dims);
}
}

Loading