Skip to content

Commit c7ac103

Browse files
committed
[AMORO-4099] Add table-summary metric collection option for non-optimizing tables
- Allow collecting table_summary metrics when self-optimizing is disabled by setting table-summary.enabled=true - Fix periodic collection bug: remove optimizingNotNecessary() call in summary-only branch to prevent snapshot gate from blocking subsequent collections - Separate property key from self-optimizing prefix: self-optimizing.table-summary.enabled -> table-summary.enabled - Add debug logging for table summary collection path - Add comprehensive test coverage for summary-only mode Signed-off-by: Jiwon Park <jpark92@outlook.kr>
1 parent bb14041 commit c7ac103

5 files changed

Lines changed: 468 additions & 11 deletions

File tree

amoro-ams/src/main/java/org/apache/amoro/server/scheduler/inline/TableRuntimeRefreshExecutor.java

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,10 @@ protected long getNextExecutingTime(TableRuntime tableRuntime) {
7070
}
7171

7272
private boolean tryEvaluatingPendingInput(DefaultTableRuntime tableRuntime, MixedTable table) {
73-
// only evaluate pending input when optimizing is enabled and in idle state
7473
OptimizingConfig optimizingConfig = tableRuntime.getOptimizingConfig();
7574
boolean optimizingEnabled = optimizingConfig.isEnabled();
7675
if (optimizingEnabled && tableRuntime.getOptimizingStatus().equals(OptimizingStatus.IDLE)) {
77-
76+
// Evaluate pending input and collect table summary when optimizing is enabled and idle
7877
if (optimizingConfig.isMetadataBasedTriggerEnabled()
7978
&& !MetadataBasedEvaluationEvent.isEvaluatingNecessary(
8079
optimizingConfig, table, tableRuntime.getLastPlanTime())) {
@@ -102,11 +101,21 @@ private boolean tryEvaluatingPendingInput(DefaultTableRuntime tableRuntime, Mixe
102101

103102
tableRuntime.setTableSummary(evaluator.getPendingInput());
104103
return evaluatorIsNecessary;
104+
} else if (!optimizingEnabled && optimizingConfig.isTableSummaryEnabled()) {
105+
// Collect table summary metrics even when optimizing is disabled
106+
logger.debug(
107+
"{} collecting table summary (optimizing disabled, tableSummary enabled)",
108+
tableRuntime.getTableIdentifier());
109+
AbstractOptimizingEvaluator evaluator =
110+
IcebergTableUtil.createOptimizingEvaluator(tableRuntime, table, maxPendingPartitions);
111+
AbstractOptimizingEvaluator.PendingInput summary = evaluator.getPendingInput();
112+
logger.debug("{} table summary collected: {}", tableRuntime.getTableIdentifier(), summary);
113+
tableRuntime.setTableSummary(summary);
114+
return false;
105115
} else if (!optimizingEnabled) {
106116
logger.debug(
107117
"{} optimizing is not enabled, skip evaluating pending input",
108118
tableRuntime.getTableIdentifier());
109-
// indicates no optimization demand now
110119
return false;
111120
} else {
112121
logger.debug(
@@ -147,21 +156,27 @@ public void execute(TableRuntime tableRuntime) {
147156
AmoroTable<?> table = loadTable(tableRuntime);
148157
defaultTableRuntime.refresh(table);
149158
MixedTable mixedTable = (MixedTable) table.originalTable();
150-
// Check if there is any optimizing demand now.
159+
boolean snapshotChanged =
160+
(mixedTable.isKeyedTable()
161+
&& (lastOptimizedSnapshotId != defaultTableRuntime.getCurrentSnapshotId()
162+
|| lastOptimizedChangeSnapshotId
163+
!= defaultTableRuntime.getCurrentChangeSnapshotId()))
164+
|| (mixedTable.isUnkeyedTable()
165+
&& lastOptimizedSnapshotId != defaultTableRuntime.getCurrentSnapshotId());
166+
OptimizingConfig optimizingConfig = defaultTableRuntime.getOptimizingConfig();
167+
boolean tableSummaryOnly =
168+
!optimizingConfig.isEnabled() && optimizingConfig.isTableSummaryEnabled();
151169
boolean hasOptimizingDemand = false;
152-
if ((mixedTable.isKeyedTable()
153-
&& (lastOptimizedSnapshotId != defaultTableRuntime.getCurrentSnapshotId()
154-
|| lastOptimizedChangeSnapshotId
155-
!= defaultTableRuntime.getCurrentChangeSnapshotId()))
156-
|| (mixedTable.isUnkeyedTable()
157-
&& lastOptimizedSnapshotId != defaultTableRuntime.getCurrentSnapshotId())) {
170+
if (snapshotChanged || tableSummaryOnly) {
158171
hasOptimizingDemand = tryEvaluatingPendingInput(defaultTableRuntime, mixedTable);
159172
} else {
160173
logger.debug("{} optimizing is not necessary", defaultTableRuntime.getTableIdentifier());
161174
}
162175

163176
// Update adaptive interval according to evaluated result.
164-
if (defaultTableRuntime.getOptimizingConfig().isRefreshTableAdaptiveEnabled(interval)) {
177+
// Skip adaptive interval for table-summary-only mode to maintain fixed collection interval.
178+
if (!tableSummaryOnly
179+
&& defaultTableRuntime.getOptimizingConfig().isRefreshTableAdaptiveEnabled(interval)) {
165180
defaultTableRuntime.setLatestEvaluatedNeedOptimizing(hasOptimizingDemand);
166181
long newInterval = getAdaptiveExecutingInterval(defaultTableRuntime);
167182
defaultTableRuntime.setLatestRefreshInterval(newInterval);

amoro-ams/src/main/java/org/apache/amoro/server/table/TableConfigurations.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,11 @@ public static OptimizingConfig parseOptimizingConfig(Map<String, String> propert
232232
properties,
233233
TableProperties.ENABLE_SELF_OPTIMIZING,
234234
TableProperties.ENABLE_SELF_OPTIMIZING_DEFAULT))
235+
.setTableSummaryEnabled(
236+
CompatiblePropertyUtil.propertyAsBoolean(
237+
properties,
238+
TableProperties.TABLE_SUMMARY_ENABLED,
239+
TableProperties.TABLE_SUMMARY_ENABLED_DEFAULT))
235240
.setAllowPartialCommit(
236241
CompatiblePropertyUtil.propertyAsBoolean(
237242
properties,

0 commit comments

Comments
 (0)