diff --git a/cmd/reconciler/main.go b/cmd/reconciler/main.go new file mode 100644 index 0000000..4d41d97 --- /dev/null +++ b/cmd/reconciler/main.go @@ -0,0 +1,400 @@ +// Mempool reconciler — closes the loop on persisted predictions. +// +// Reads `mempool_predictions` written by the Rust mempool writer (PR #133) +// and produces one `mempool_reconciliation` row per prediction once the +// outcome is known. Together the two tables answer "did the tx land where +// we said it would, in the order we said it would, hitting the pool we +// said it would?" — entirely in SQL. +// +// Architecture (per-block loop): +// 1. ethclient.SubscribeNewHead → chan *types.Header +// 2. For each new header: BlockByHash → iterate Transactions() +// 3. Per tx: LookupPredictionByTxHash; if found, fetch receipt, write +// `outcome='confirmed'` with block_delta + pool_path_correct +// 4. Every staleSweepInterval: MarkStaleAsDropped(currentHead) → bulk +// INSERT `outcome='dropped'` for predictions where target+12 ≤ head +// +// Receipt fetch is per-prediction-hit (not per-block tx) so a block of +// 200 txs with 1 prediction hit costs one receipt RPC, not 200. +// +// Run with: +// +// MEMPOOL_LEDGER_DSN=postgres://aether:aether@localhost:5433/aether \ +// ETH_RPC_URL=wss://eth-mainnet.g.alchemy.com/v2/ \ +// RECONCILER_METRICS_ADDR=:9094 \ +// ./aether-reconciler + +package main + +import ( + "context" + "errors" + "log/slog" + "net/http" + "os" + "os/signal" + "sync" + "syscall" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethclient" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + + "github.com/aether-arb/aether/internal/db" +) + +const ( + // staleSweepInterval is the cadence of the dropped-outcome batch + // query. Twice the average block time so a 24-block prediction + // reaches the dropped state within ~12 s of its window closing. + staleSweepInterval = 6 * time.Second + + // receiptFetchTimeout caps how long the reconciler waits for a single + // `eth_getTransactionReceipt` round-trip. Sized for the p99 mainnet + // receipt latency from major providers (~1.5 s); if the call wedges + // past this, the reconciliation row lands without `pool_path_correct` + // rather than block the per-block loop. + receiptFetchTimeout = 3 * time.Second + + // blockFetchTimeout caps the `eth_getBlockByHash` call. Generous + // because a single failure stalls every prediction in that block, + // not just one. + blockFetchTimeout = 5 * time.Second +) + +func main() { + slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, nil))) + + rpcURL := os.Getenv("ETH_RPC_URL") + if rpcURL == "" { + slog.Error("ETH_RPC_URL not set") + os.Exit(1) + } + dsn := os.Getenv("MEMPOOL_LEDGER_DSN") + if dsn == "" { + slog.Error("MEMPOOL_LEDGER_DSN not set") + os.Exit(1) + } + metricsAddr := os.Getenv("RECONCILER_METRICS_ADDR") + if metricsAddr == "" { + metricsAddr = ":9094" + } + + rootCtx, rootCancel := context.WithCancel(context.Background()) + defer rootCancel() + installSignalHandler(rootCancel) + + dialCtx, dialCancel := context.WithTimeout(rootCtx, 10*time.Second) + defer dialCancel() + ethClient, err := ethclient.DialContext(dialCtx, rpcURL) + if err != nil { + slog.Error("dial ETH_RPC_URL failed", "err", err) + os.Exit(1) + } + slog.Info("connected to ethereum node") + + registry := prometheus.NewRegistry() + dbMetrics := db.NewMempoolReconciliationMetrics(registry) + loopMetrics := newLoopMetrics(registry) + + pgRecon, err := db.NewPgMempoolReconciliation(rootCtx, dsn, dbMetrics) + if err != nil { + slog.Error("PgMempoolReconciliation connect failed", "err", err) + os.Exit(1) + } + defer pgRecon.Close() + + // /metrics endpoint runs on a background server so the binary is + // scrapeable by Prometheus without coupling to the engine's existing + // :9092 endpoint. + metricsServer := startMetricsServer(metricsAddr, registry) + defer func() { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + _ = metricsServer.Shutdown(shutdownCtx) + }() + + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + runHeaderLoop(rootCtx, ethClient, pgRecon, loopMetrics) + }() + go func() { + defer wg.Done() + runStaleSweepLoop(rootCtx, ethClient, pgRecon) + }() + + <-rootCtx.Done() + slog.Info("shutdown signalled; waiting for loops to exit") + // Give the loops a few seconds to drain in-flight reconciliations. + doneCh := make(chan struct{}) + go func() { + wg.Wait() + close(doneCh) + }() + select { + case <-doneCh: + case <-time.After(10 * time.Second): + slog.Warn("loops did not exit within 10s; tearing down anyway") + } +} + +func installSignalHandler(cancel context.CancelFunc) { + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + go func() { + sig := <-sigCh + slog.Info("signal received", "sig", sig) + cancel() + }() +} + +func startMetricsServer(addr string, registry *prometheus.Registry) *http.Server { + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{Registry: registry})) + srv := &http.Server{Addr: addr, Handler: mux, ReadHeaderTimeout: 3 * time.Second} + go func() { + slog.Info("metrics server listening", "addr", addr) + if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { + slog.Error("metrics server failed", "err", err) + } + }() + return srv +} + +// runHeaderLoop is the hot path. SubscribeNewHead provides a steady stream +// of *types.Header (one per ~12 s on mainnet); each header drives one +// block-resolution pass. +// +// SubscribeNewHead errors trigger a single reconnect + retry. A second +// failure exits the loop so the orchestrator (systemd / k8s) can restart +// the binary cleanly — a long-lived subscriber that silently stalls is +// worse than a binary that exits and gets restarted. +func runHeaderLoop( + ctx context.Context, + client *ethclient.Client, + recon *db.PgMempoolReconciliation, + metrics *loopMetrics, +) { + headers := make(chan *types.Header, 8) + sub, err := client.SubscribeNewHead(ctx, headers) + if err != nil { + slog.Error("SubscribeNewHead failed", "err", err) + return + } + defer sub.Unsubscribe() + slog.Info("subscribed to newHeads") + + for { + select { + case <-ctx.Done(): + return + case err := <-sub.Err(): + slog.Error("newHeads subscription error", "err", err) + return + case header := <-headers: + handleHeader(ctx, client, recon, metrics, header) + } + } +} + +// handleHeader resolves every prediction whose pending_tx_hash appears in +// this block. Per-block cost is one block-by-hash + one receipt-by-hash +// per prediction hit. Predictions are the rare case (a few per block on a +// good day) so the receipt fetches do not dominate. +func handleHeader( + ctx context.Context, + client *ethclient.Client, + recon *db.PgMempoolReconciliation, + metrics *loopMetrics, + header *types.Header, +) { + metrics.HeadersProcessed.Inc() + + blockCtx, cancel := context.WithTimeout(ctx, blockFetchTimeout) + defer cancel() + block, err := client.BlockByHash(blockCtx, header.Hash()) + if err != nil { + slog.Warn("BlockByHash failed; skipping reconciliation for this block", + "block_hash", header.Hash().Hex(), + "err", err) + metrics.HeaderFetchErrors.Inc() + return + } + + resolvedAt := time.Now().UTC() + blockNumber := block.NumberU64() + + for txIdx, tx := range block.Transactions() { + var txHash [32]byte + copy(txHash[:], tx.Hash().Bytes()) + + lookupCtx, lookupCancel := context.WithTimeout(ctx, blockFetchTimeout) + pred, found, err := recon.LookupPredictionByTxHash(lookupCtx, txHash) + lookupCancel() + if err != nil { + slog.Warn("LookupPredictionByTxHash failed", + "tx_hash", tx.Hash().Hex(), + "err", err) + metrics.LookupErrors.Inc() + continue + } + if !found { + continue + } + + actualBlock := blockNumber + actualIdx := txIdx + blockDelta := int(int64(actualBlock) - int64(pred.PredictedTargetBlock)) + + var poolPathCorrect *bool + if pred.PoolAddress != nil { + result, err := receiptHitsPool(ctx, client, tx.Hash(), *pred.PoolAddress) + if err != nil { + // Receipt fetch failure leaves pool_path_correct NULL so + // the row still lands. The TransactionReceiptErrors + // counter is the alert signal. + slog.Debug("TransactionReceipt failed; pool_path_correct=NULL", + "tx_hash", tx.Hash().Hex(), + "err", err) + metrics.ReceiptFetchErrors.Inc() + } else { + poolPathCorrect = &result + metrics.PoolPathChecks.WithLabelValues(pred.Protocol, boolLabel(result)).Inc() + } + } + + metrics.BlockDelta.Observe(float64(blockDelta)) + + recon.InsertReconciliation(db.NewReconciliation{ + PredictionID: pred.PredictionID, + ResolutionTs: resolvedAt, + Outcome: db.OutcomeConfirmed, + ActualTargetBlock: &actualBlock, + ActualTxIndex: &actualIdx, + BlockDelta: &blockDelta, + PoolPathCorrect: poolPathCorrect, + }) + } +} + +// receiptHitsPool fetches the tx's receipt and returns true iff any log +// entry's `Address` matches `poolAddr`. The predicted swap is expected to +// emit a `Swap`/`Sync`/`TokensTraded` event from the pool contract, so the +// address match alone is sufficient — decoding the event topic would +// confirm "yes it was a swap" but adds protocol-specific decode tables +// without changing the answer to "did we route to the pool we expected". +func receiptHitsPool( + ctx context.Context, + client *ethclient.Client, + txHash common.Hash, + poolAddr [20]byte, +) (bool, error) { + receiptCtx, cancel := context.WithTimeout(ctx, receiptFetchTimeout) + defer cancel() + receipt, err := client.TransactionReceipt(receiptCtx, txHash) + if err != nil { + return false, err + } + want := common.BytesToAddress(poolAddr[:]) + for _, log := range receipt.Logs { + if log.Address == want { + return true, nil + } + } + return false, nil +} + +// runStaleSweepLoop runs the periodic dropped-outcome batch. Reads the +// chain head from the eth client on every tick (rather than caching the +// header from runHeaderLoop) so the two loops stay independent — a stalled +// WS subscription does not freeze the dropped sweep. +func runStaleSweepLoop( + ctx context.Context, + client *ethclient.Client, + recon *db.PgMempoolReconciliation, +) { + ticker := time.NewTicker(staleSweepInterval) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + headCtx, cancel := context.WithTimeout(ctx, blockFetchTimeout) + head, err := client.BlockNumber(headCtx) + cancel() + if err != nil { + slog.Warn("BlockNumber failed; skipping stale sweep", "err", err) + continue + } + rows, err := recon.MarkStaleAsDropped(ctx, head) + if err != nil { + slog.Warn("MarkStaleAsDropped failed", "err", err) + continue + } + if rows > 0 { + slog.Info("stale sweep marked predictions as dropped", + "rows", rows, "head", head) + } + } + } +} + +func boolLabel(b bool) string { + if b { + return "true" + } + return "false" +} + +// loopMetrics groups the per-loop Prometheus families that are computed +// in-process by the header / sweep loops. The DB-layer metrics live with +// PgMempoolReconciliation. +type loopMetrics struct { + HeadersProcessed prometheus.Counter + HeaderFetchErrors prometheus.Counter + LookupErrors prometheus.Counter + ReceiptFetchErrors prometheus.Counter + BlockDelta prometheus.Histogram + PoolPathChecks *prometheus.CounterVec +} + +func newLoopMetrics(reg prometheus.Registerer) *loopMetrics { + m := &loopMetrics{ + HeadersProcessed: prometheus.NewCounter(prometheus.CounterOpts{ + Name: "aether_mempool_reconciler_headers_processed_total", + Help: "Block headers received from the WS newHeads subscription and processed", + }), + HeaderFetchErrors: prometheus.NewCounter(prometheus.CounterOpts{ + Name: "aether_mempool_reconciler_header_fetch_errors_total", + Help: "BlockByHash failures (per-header)", + }), + LookupErrors: prometheus.NewCounter(prometheus.CounterOpts{ + Name: "aether_mempool_reconciler_lookup_errors_total", + Help: "LookupPredictionByTxHash failures (per-tx)", + }), + ReceiptFetchErrors: prometheus.NewCounter(prometheus.CounterOpts{ + Name: "aether_mempool_reconciler_receipt_fetch_errors_total", + Help: "TransactionReceipt failures; reconciliation row still lands with pool_path_correct=NULL", + }), + BlockDelta: prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "aether_mempool_block_delta", + Help: "Confirmed prediction's actual_target_block minus predicted_target_block. PromQL: 1h-window accuracy = histogram_quantile(0.5, …) over time.", + Buckets: []float64{-2, -1, 0, 1, 2, 3, 5, 8, 12, 20}, + }), + PoolPathChecks: prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "aether_mempool_pool_path_total", + Help: "Confirmed predictions whose receipt logs were checked against the predicted pool, by protocol and correctness", + }, []string{"protocol", "correct"}), + } + reg.MustRegister( + m.HeadersProcessed, m.HeaderFetchErrors, m.LookupErrors, + m.ReceiptFetchErrors, m.BlockDelta, m.PoolPathChecks, + ) + return m +} diff --git a/crates/grpc-server/Cargo.toml b/crates/grpc-server/Cargo.toml index bd2766a..ae5eea0 100644 --- a/crates/grpc-server/Cargo.toml +++ b/crates/grpc-server/Cargo.toml @@ -15,6 +15,10 @@ path = "src/main.rs" name = "aether-replay" path = "src/bin/aether_replay.rs" +[[bin]] +name = "aether-profit-scorer" +path = "src/bin/aether_profit_scorer.rs" + [dependencies] aether-common = { path = "../common" } aether-ingestion = { path = "../ingestion" } @@ -48,6 +52,8 @@ serde_json = { workspace = true } uuid = { workspace = true } chrono = { workspace = true } sha2 = "0.10" +sqlx = { workspace = true } +bigdecimal = { workspace = true } [build-dependencies] tonic-build = { workspace = true } prost-build = { workspace = true } diff --git a/crates/grpc-server/src/bin/aether_profit_scorer.rs b/crates/grpc-server/src/bin/aether_profit_scorer.rs new file mode 100644 index 0000000..4c08e40 --- /dev/null +++ b/crates/grpc-server/src/bin/aether_profit_scorer.rs @@ -0,0 +1,2085 @@ +//! Mempool profit scorer — issue #132. +//! +//! Closes the value loop on PR #133 (predictions) + PR #134 (reconciliation): +//! for every prediction that confirmed, computes what our analytical arb +//! cycle would have realised against the **actual** post-state of the pool +//! at the block where the victim swap landed. The headline answer is +//! `SUM(net_profit_wei) WHERE decision='profitable'` over the soak window. +//! +//! Architecture: +//! +//! Bootstrap: load pools.toml → fetch all reserves at latest block → +//! build PriceGraph + TokenIndex. Held in `ScorerState` as the reference +//! graph; refreshed every `GRAPH_REFRESH_INTERVAL` so the "rest of the +//! graph" baseline stays close to current chain state. +//! +//! Poll loop: every `POLL_INTERVAL` SELECTs confirmed predictions that +//! have no profitability row yet. For each one we fetch the affected +//! pool's reserves at `actual_target_block` (one `eth_call` with a +//! historical BlockId), clone the reference graph and replace the +//! affected edge's reserves with the actual-block values, run +//! `BellmanFord::detect_from_affected` on the clone, and if a profitable +//! cycle is found we run the same ternary-search optimiser the engine +//! uses. The optimiser returns net_profit_wei (gross minus per-protocol +//! gas estimate); we INSERT the row with +//! `decision = profitable / unprofitable / no_path`. +//! +//! Approximation note: the "rest of the graph" reflects the latest fetched +//! reserves, not the actual_target_block. Properly fetching all 76 pools' +//! reserves at the prediction's block would cost 76 RPC calls per scoring +//! and is deferred. For most cycles (top pools shift slowly) the +//! approximation is acceptable; cases where it matters surface as +//! `decision=unprofitable` rows that PR-3 v2 (with full-block fetch) could +//! re-score upward. +//! +//! Inlined helpers (fetch_pool_state_at, build_graph, u256_to_f64, sol! +//! getReserves / slot0) are deliberate duplicates of the equivalents in +//! `bin/aether_replay.rs`. Extracting them into a shared module would +//! touch the merged replay file (2200+ lines) and inflate this PR's +//! review burden. Follow-up: deduplicate after the mempool phase lands. +//! +//! Run with: +//! +//! MEMPOOL_LEDGER_DSN=postgres://aether:aether@localhost:5433/aether \ +//! ETH_RPC_URL=wss://eth-mainnet.g.alchemy.com/v2/ \ +//! AETHER_POOLS_CONFIG=$(pwd)/config/pools.toml \ +//! PROFIT_SCORER_METRICS_ADDR=:9095 \ +//! ./aether-profit-scorer + +use std::collections::{HashMap, HashSet}; +use std::net::SocketAddr; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use alloy::eips::{BlockId, BlockNumberOrTag}; +use alloy::network::Ethereum; +use alloy::primitives::{address, Address, U256}; +use alloy::providers::{DynProvider, Provider, ProviderBuilder}; +use alloy::rpc::types::TransactionRequest; +use alloy::sol; +use alloy::sol_types::{SolCall, SolValue}; +use anyhow::{Context, Result}; +use chrono::Utc; +use clap::Parser; +use prometheus::{Encoder, Registry, TextEncoder}; +use serde::Deserialize; +use sqlx::postgres::PgPoolOptions; +use tokio::time::{interval, MissedTickBehavior}; +use tracing::{debug, error, info, warn}; +use tracing_subscriber::EnvFilter; + +use aether_common::types::{PoolId, ProtocolType, SwapStep}; +use aether_detector::bellman_ford::BellmanFord; +use aether_detector::gas as gas_model; +use aether_detector::opportunity::DetectedCycle; +use aether_detector::optimizer::ternary_search_optimal_input; +use aether_grpc_server::profitability_writer::{ + profit_writer_from_env, NewProfitabilityScore, PgProfitabilityWriter, ProfitabilitySink, + ProfitabilityWriterMetrics, UnscoredConfirmedPrediction, DECISION_NO_PATH, + DECISION_PROFITABLE, DECISION_REVERTED, DECISION_UNPROFITABLE, +}; +use aether_simulator::calldata::{ + build_execute_arb_calldata, build_univ2_swap_calldata, build_univ3_swap_calldata, +}; +use aether_simulator::fork::{RpcForkedState, SimConfig}; +use aether_simulator::EvmSimulator; +use aether_state::price_graph::PriceGraph; +use aether_state::token_index::TokenIndex; + +/// Cadence of the unscored-prediction SQL poll. 30 s matches the +/// acceptance criterion in #132: "scorer processes every confirmed +/// prediction within 30 s of its reconciliation row". +const POLL_INTERVAL: Duration = Duration::from_secs(30); + +/// How often the reference graph is refreshed from latest-block reserves. +/// 5 min balances RPC budget against staleness; the per-scoring fetch +/// still hits the affected pool at actual_target_block so the affected +/// edge is always exact. +const GRAPH_REFRESH_INTERVAL: Duration = Duration::from_secs(300); + +/// Maximum predictions scored per poll tick. Bounds memory + RPC fan-out +/// when the scorer starts with a backlog (e.g. it was offline for an hour +/// and 100+ unscored predictions are waiting). +const SCORE_BATCH_LIMIT: i64 = 25; + +/// Maximum hops in a candidate cycle. Matches the engine's default so +/// the scorer reproduces the same paths the engine would have considered +/// at decode time. +const MAX_HOPS: usize = 4; + +/// Bellman-Ford time budget per detection pass, in microseconds. Same +/// envelope as the engine's hot-path detection so the scorer's cycle +/// search is apples-to-apples with the production predictor. +const DETECT_BUDGET_US: u64 = 3_000; + +/// 2^96 as f64. Used to convert UniswapV3 `sqrtPriceX96` into a +/// floating-point price. +const Q96: f64 = 79_228_162_514_264_337_593_543_950_336.0; + +/// Default base fee assumption (wei) when `eth_getBlock(latest)` is +/// unavailable. 30 gwei matches the engine's typical assumption in +/// quiet markets; replaced by the actual base fee on every refresh. +const DEFAULT_BASE_FEE_WEI: u128 = 30_000_000_000; + +/// Upper bound on the number of pools augmented from `mempool_predictions`. +/// Bounds memory + bootstrap RPC fan-out (one `eth_call` per pool to fetch +/// reserves). The current production registry has ~55 pools; allow 5x +/// headroom while still containing pathological cases (e.g. a misbehaving +/// engine writing thousands of bogus pool addresses). +const MAX_DB_PREDICTED_POOLS: i64 = 256; + +/// Default fee in basis points for DB-augmented pools whose protocol is +/// V2-style. Uniswap V2, SushiSwap, and almost every V2 fork charge 30 bps; +/// the 0.05% (5 bps) and 1% (100 bps) outliers exist but are rare enough on +/// V2 forks that the default is good enough for the f64 rate weight here. +/// The U256 verifier only uses fee_bps for V2/Sushi hops, where it's exact. +const DEFAULT_V2_FEE_BPS: u32 = 30; + +/// Default fee for DB-augmented Uniswap V3 pools. V3's actual fee comes +/// from `pool.fee()` and lives in one of (1, 5, 30, 100) bps; we can't +/// know it without an extra RPC and the U256 verifier returns `None` for +/// V3 hops anyway, so this only affects the f64 rate path's graph weight +/// — a small error swamped by the rate magnitude itself. +const DEFAULT_V3_FEE_BPS: u32 = 5; + +/// Safety floor for f64 fallback verdicts. The U256 verifier returns +/// `None` for any cycle it can't resolve exactly — V3 hops, drained +/// pools, edge-selection picking a pool whose state is missing, etc. +/// In those cases the score falls back to the f64 optimiser's number, +/// which is exactly the precision-biased path this PR set out to +/// contain. So: cap the trust. Any f64-only verdict claiming net +/// profit above this floor is downgraded to `DECISION_REVERTED` because +/// a 1+ ETH arb on mainnet would be captured intra-block by faster +/// searchers and never reach our scorer. The threshold is denominated +/// in the starting token's base units, which matches `net_profit_wei`. +const MAX_PLAUSIBLE_F64_NET_WEI: i128 = 1_000_000_000_000_000_000; // 1 ETH worth + +// ── revm V3 verifier constants ───────────────────────────────────── + +/// Mainnet infra addresses — constructor args for AetherExecutor. +const AAVE_POOL: Address = address!("87870Bca3F3fD6335C3F4ce8392D69350B4fA4E2"); +const BALANCER_VAULT: Address = address!("BA12222222228d8Ba445958a75a0704d566BF2C8"); +const BANCOR_NETWORK: Address = address!("eEF417e1D5CC832e619ae18D2F140De2999dD4fB"); +const WETH_ADDR: Address = address!("C02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2"); +const USDC_ADDR: Address = address!("A0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48"); +const DAI_ADDR: Address = address!("6B175474E89094C44Da98b954EedeAC495271d0F"); +const USDT_ADDR: Address = address!("dAC17F958D2ee523a2206206994597C13D831ec7"); + +/// Deterministic deployer/owner for the scorer's in-revm executor. +const SIM_OWNER: Address = address!("1111111111111111111111111111111111111111"); + +/// Default executor artifact path (relative to CWD). +const DEFAULT_EXECUTOR_ARTIFACT: &str = + "contracts/out/AetherExecutor.sol/AetherExecutor.json"; + +sol! { + function getReserves() external view returns (uint112 reserve0, uint112 reserve1, uint32 blockTimestampLast); + function slot0() external view returns (uint160 sqrtPriceX96, int24 tick, uint16 observationIndex, uint16 observationCardinality, uint16 observationCardinalityNext, uint8 feeProtocol, bool unlocked); +} + +#[derive(Parser, Debug)] +#[command(name = "aether-profit-scorer", about = "Compute realised P&L per confirmed mempool prediction")] +struct Args { + /// Path to the pool registry TOML. Defaults to ./config/pools.toml. + #[arg(long, default_value = "config/pools.toml")] + pools_config: PathBuf, + + /// Path to the forge-compiled AetherExecutor JSON artifact. Used by the + /// revm verifier to deploy the executor inside pure-revm simulation for + /// V3-touching cycles. If absent or unreadable, the revm path is + /// disabled and V3 cycles fall back to the f64 absurdity floor. + #[arg(long, default_value = DEFAULT_EXECUTOR_ARTIFACT)] + executor_artifact: PathBuf, +} + +#[tokio::main] +async fn main() -> Result<()> { + let _ = dotenvy::dotenv(); + tracing_subscriber::fmt() + .with_env_filter(EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"))) + .init(); + + let args = Args::parse(); + + let dsn = std::env::var("MEMPOOL_LEDGER_DSN") + .context("MEMPOOL_LEDGER_DSN required")?; + let rpc_url = std::env::var("ETH_RPC_URL").context("ETH_RPC_URL required")?; + let metrics_addr: SocketAddr = std::env::var("PROFIT_SCORER_METRICS_ADDR") + .unwrap_or_else(|_| "0.0.0.0:9095".to_string()) + .parse() + .context("PROFIT_SCORER_METRICS_ADDR must be host:port")?; + let git_sha = std::env::var("AETHER_GIT_SHA").ok(); + + // Load executor init bytecode for the revm V3 verifier. If the artifact + // doesn't exist (e.g. forge not run, or scorer deployed without contracts/), + // we log and continue — V3 cycles will fall back to the f64 absurdity floor. + let executor_bytecode: Option>> = match load_executor_init_bytecode(&args.executor_artifact) { + Ok(bc) => { + info!( + artifact = %args.executor_artifact.display(), + bytecode_len = bc.len(), + "Loaded executor init bytecode for revm V3 verifier" + ); + Some(Arc::new(bc)) + } + Err(e) => { + warn!( + artifact = %args.executor_artifact.display(), + error = %e, + "Could not load executor artifact; revm V3 verifier disabled (f64 fallback only)" + ); + None + } + }; + + info!("Loading pool config from {}", args.pools_config.display()); + let mut pools = load_pools(&args.pools_config)?; + info!(pool_count = pools.len(), "Pools loaded from config"); + + let registry = Registry::new(); + let writer_metrics = ProfitabilityWriterMetrics::register(®istry); + let sink = profit_writer_from_env(Arc::clone(&writer_metrics)).await; + + // Separate PgPool for the read side: the writer's pool is for INSERTs + // (small, bounded) and we keep reads off it so a write backlog can't + // serialise the SELECT. + let read_pool = PgPoolOptions::new() + .max_connections(2) + .acquire_timeout(Duration::from_secs(2)) + .connect(&dsn) + .await + .context("connect read pool")?; + + // Augment the registry with every distinct pool the engine has + // already written a prediction for. The engine's runtime pair-index + // extends past `pools.toml` whenever the mempool decoder spots a new + // pool, but pre-#137 the scorer only loaded the static config — so + // most predictions resolved as `no_path` even when the engine could + // perfectly well graph them. This bootstrap pull closes that gap. + let config_addresses: HashSet
= pools.iter().map(|p| p.address).collect(); + match load_predicted_pools(&read_pool, &config_addresses).await { + Ok(extra) => { + info!(added_from_db = extra.len(), "DB-augmented pool registry"); + pools.extend(extra); + } + Err(e) => warn!(error = %e, "could not augment pools from DB; continuing with config only"), + } + + // Convert WS RPC URL to HTTPS for the alloy HTTP provider. The fork + // path in aether-replay does the same rewrite; replicated here so + // the scorer accepts the same env var as the engine. + let http_url = rewrite_ws_to_http(&rpc_url); + let provider = ProviderBuilder::new() + .connect_http(http_url.parse().context("parse RPC URL")?); + // Type-erased provider for the revm verifier (requires DynProvider). + let dyn_provider: DynProvider = DynProvider::new(provider.clone()); + + info!("Bootstrapping reference graph (this fetches reserves for every pool at latest block)"); + let initial_state = bootstrap_state(&pools, &provider).await?; + info!( + graph_edges = initial_state.graph.num_edges(), + base_fee_gwei = initial_state.base_fee_wei as f64 / 1e9, + "Reference graph ready" + ); + + start_metrics_server(metrics_addr, registry.clone()); + + let mut state = initial_state; + let mut poll_ticker = interval(POLL_INTERVAL); + poll_ticker.set_missed_tick_behavior(MissedTickBehavior::Skip); + let mut refresh_ticker = interval(GRAPH_REFRESH_INTERVAL); + refresh_ticker.set_missed_tick_behavior(MissedTickBehavior::Skip); + // Skip the first tick since we just bootstrapped. + refresh_ticker.tick().await; + + info!("Scorer started; polling every {:?}", POLL_INTERVAL); + loop { + tokio::select! { + _ = poll_ticker.tick() => { + if let Err(e) = score_batch( + &read_pool, &provider, &pools, &state, sink.as_ref(), + git_sha.as_deref(), + executor_bytecode.as_ref(), + &dyn_provider, + ).await { + warn!(error = %e, "score batch failed"); + } + } + _ = refresh_ticker.tick() => { + // Pick up pools the engine has discovered since startup. + // We re-run the same DB-augmentation as bootstrap, scoped + // to addresses we don't already have. Failure here is + // non-fatal — we keep the existing pool set if the SELECT + // fails — because losing one refresh cycle is better than + // killing the scorer over a transient DB blip. + let known: HashSet
= pools.iter().map(|p| p.address).collect(); + match load_predicted_pools(&read_pool, &known).await { + Ok(extra) if !extra.is_empty() => { + info!(added_from_db = extra.len(), "registry grew via mempool_predictions"); + pools.extend(extra); + } + Ok(_) => {} + Err(e) => warn!(error = %e, "DB-augmented pool refresh failed"), + } + match bootstrap_state(&pools, &provider).await { + Ok(fresh) => { + info!( + base_fee_gwei = fresh.base_fee_wei as f64 / 1e9, + pool_count = pools.len(), + "reference graph refreshed" + ); + state = fresh; + } + Err(e) => warn!(error = %e, "graph refresh failed; reusing previous reference"), + } + } + _ = tokio::signal::ctrl_c() => { + info!("Ctrl-C received; exiting"); + break; + } + } + } + Ok(()) +} + +/// Single tick of the score loop: pull a batch of unscored confirmed +/// predictions and score each one. +#[allow(clippy::too_many_arguments)] +async fn score_batch( + read_pool: &sqlx::PgPool, + provider: &impl Provider, + pools: &[LoadedPool], + state: &ScorerState, + sink: &dyn ProfitabilitySink, + git_sha: Option<&str>, + executor_bytecode: Option<&Arc>>, + dyn_provider: &DynProvider, +) -> Result<()> { + let batch = PgProfitabilityWriter::fetch_unscored_confirmed(read_pool, SCORE_BATCH_LIMIT) + .await + .context("fetch unscored confirmed")?; + if batch.is_empty() { + debug!("no unscored confirmed predictions"); + return Ok(()); + } + info!(count = batch.len(), "scoring batch"); + for pred in batch { + match score_one(provider, pools, state, &pred, executor_bytecode, dyn_provider).await { + Ok(score) => sink.insert_score(NewProfitabilityScore { + prediction_id: pred.prediction_id, + scored_at: Utc::now(), + cycle_path: score.cycle_path, + realized_profit_wei: score.realized_profit_wei, + gas_estimate_wei: score.gas_estimate_wei, + net_profit_wei: score.net_profit_wei, + decision: score.decision, + scoring_engine_git_sha: git_sha.map(str::to_string), + }), + Err(e) => warn!( + prediction_id = %pred.prediction_id, + error = %e, + "score_one failed; skipping this prediction (will retry next tick)" + ), + } + } + Ok(()) +} + +/// Result of scoring a single prediction. +struct ScoreOutcome { + cycle_path: serde_json::Value, + realized_profit_wei: U256, + gas_estimate_wei: U256, + net_profit_wei: i128, + decision: &'static str, +} + +async fn score_one( + provider: &impl Provider, + pools: &[LoadedPool], + state: &ScorerState, + pred: &UnscoredConfirmedPrediction, + executor_bytecode: Option<&Arc>>, + dyn_provider: &DynProvider, +) -> Result { + // Locate the prediction's pool in the registry. A prediction whose + // pool is no longer in the registry (rare; registry change between + // prediction time and scoring time) lands as `no_path` so the row + // still gets written and the dashboard sees the case. + let Some(pool_idx) = pools.iter().position(|p| p.address == pred.pool_address) else { + warn!( + prediction_id = %pred.prediction_id, + pool = %pred.pool_address, + "pool absent from registry; emitting no_path" + ); + return Ok(no_path_outcome(None)); + }; + let pool_entry = &pools[pool_idx]; + + // Fetch actual reserves at the prediction's confirmed block. + let actual_state = fetch_pool_state_at(provider, pool_entry, pred.actual_target_block) + .await + .context("fetch_pool_state_at")?; + let Some(actual_state) = actual_state else { + warn!( + prediction_id = %pred.prediction_id, + block = pred.actual_target_block, + "eth_call returned no state; emitting no_path" + ); + return Ok(no_path_outcome(None)); + }; + + // Clone the reference graph, then overwrite the affected edge with + // the actual-block reserves. + let mut graph = state.graph.clone(); + let token_index = &state.token_index; + let Some(t0) = token_index.get_index(&pool_entry.token0) else { + return Ok(no_path_outcome(None)); + }; + let Some(t1) = token_index.get_index(&pool_entry.token1) else { + return Ok(no_path_outcome(None)); + }; + let pool_id = PoolId { + address: pool_entry.address, + protocol: pool_entry.protocol, + }; + let fee_factor = (10_000u32 - pool_entry.fee_bps) as f64 / 10_000.0; + let (post0, post1) = state_to_graph_reserves(&actual_state); + if post0 <= 0.0 || post1 <= 0.0 { + return Ok(no_path_outcome(None)); + } + graph.update_edge_from_reserves(t0, t1, pool_id, post0, post1, fee_factor); + graph.update_edge_from_reserves(t1, t0, pool_id, post1, post0, fee_factor); + + // Run the same Bellman-Ford the engine uses at decode time. We restrict + // to cycles through the affected tokens (detect_from_affected) so the + // scorer doesn't burn time enumerating unrelated cycles. + let detector = BellmanFord::new(MAX_HOPS, DETECT_BUDGET_US); + let cycles = detector.detect_from_affected(&graph, &[t0, t1]); + let profitable: Vec = cycles.into_iter().filter(|c| c.is_profitable()).collect(); + if profitable.is_empty() { + let gas = gas_estimate_for_protocols(&[pool_entry.protocol], state.base_fee_wei); + return Ok(no_path_outcome(Some(gas))); + } + + // Optimise the best cycle. The optimiser walks the cycle, applies the + // post-state reserves to every V2 hop, and ternary-searches for the + // input amount that maximises (output - input - gas). + let best = &profitable[0]; + let running_states = collect_running_states(pools, &state.latest_states, pool_idx, actual_state); + let Some(optimisation) = optimise_cycle(best, &graph, token_index, pools, &running_states, state.base_fee_wei) else { + let gas = gas_estimate_for_protocols(&[pool_entry.protocol], state.base_fee_wei); + return Ok(no_path_outcome(Some(gas))); + }; + + let gas_wei = optimisation.gas_cost_wei; + let optimal_input_wei = optimisation.optimal_input_wei; + + // Decide: V2-only cycles get exact U256 math; V3-touching cycles route + // through the revm verifier (deploy AetherExecutor + executeArb in pure + // revm). Cycles that neither path can resolve fall back to the f64 + // absurdity floor. + let v3_touching = is_v3_touching_cycle(best, &graph, token_index, pools, &running_states); + + let (net, realized_wei_i128, decision) = if !v3_touching { + // V2-only: exact U256 getAmountOut walk (unchanged from pre-V3 scorer). + let verified_gross = verify_cycle_u256( + best, + &graph, + token_index, + pools, + &running_states, + optimal_input_wei, + ); + match verified_gross { + Some(gross_out) => { + let gross_i128 = u256_to_i128_saturating(gross_out) + .saturating_sub(u256_to_i128_saturating(optimal_input_wei)); + let exact_net = gross_i128.saturating_sub(gas_wei as i128); + let realised = gross_i128.max(0); + let decision = if gross_out < optimal_input_wei { + DECISION_REVERTED + } else if exact_net > 0 { + DECISION_PROFITABLE + } else { + DECISION_UNPROFITABLE + }; + (exact_net, realised, decision) + } + None => f64_fallback_verdict(optimisation.net_profit_wei, gas_wei), + } + } else if let Some(executor_bc) = executor_bytecode { + // V3-touching: deploy+simulate via pure revm. + let verdict = verify_cycle_revm( + best, + &graph, + token_index, + pools, + &running_states, + optimal_input_wei, + dyn_provider, + executor_bc, + state.block_number, + state.block_timestamp, + state.base_fee_wei as u64, + ); + match verdict { + Some(rv) => revm_verdict_to_decision(rv, gas_wei), + // revm couldn't resolve (unsupported token, Curve hop, etc.) + None => f64_fallback_verdict(optimisation.net_profit_wei, gas_wei), + } + } else { + // No executor bytecode available — pure f64 fallback. + f64_fallback_verdict(optimisation.net_profit_wei, gas_wei) + }; + + let realized_wei = U256::from(realized_wei_i128 as u128); + let gas_estimate_wei = U256::from(gas_wei); + + let cycle_json = cycle_to_json(best, &graph, token_index, pools); + + Ok(ScoreOutcome { + cycle_path: cycle_json, + realized_profit_wei: realized_wei, + gas_estimate_wei, + net_profit_wei: net, + decision, + }) +} + +fn no_path_outcome(gas: Option) -> ScoreOutcome { + let gas_wei = gas.unwrap_or(0); + ScoreOutcome { + cycle_path: serde_json::Value::Array(vec![]), + realized_profit_wei: U256::ZERO, + gas_estimate_wei: U256::from(gas_wei), + net_profit_wei: -(gas_wei as i128), + decision: DECISION_NO_PATH, + } +} + +/// Convert `PoolState` to graph-edge reserves matching how the engine +/// seeds them: V2 keeps `(r0, r1)`; V3 uses a synthetic `(1.0, +/// spot_price)` pair so Bellman-Ford treats the two families +/// identically (the engine's mempool pipeline does the same mapping). +fn state_to_graph_reserves(state: &PoolState) -> (f64, f64) { + match state { + PoolState::V2 { r0, r1 } => (u256_to_f64(*r0), u256_to_f64(*r1)), + PoolState::V3 { sqrt_price_x96 } => { + let sqrt_f = u256_to_f64(*sqrt_price_x96); + if sqrt_f == 0.0 { + return (0.0, 0.0); + } + let root = sqrt_f / Q96; + (1.0, root * root) + } + } +} + +/// Merge the latest per-pool states (refreshed by the bootstrap loop) +/// with the affected pool's actual-block state. This is the map the +/// optimiser consults when walking each cycle hop. +fn collect_running_states( + pools: &[LoadedPool], + latest_states: &HashMap, + affected_idx: usize, + affected_state: PoolState, +) -> HashMap { + let mut out = HashMap::with_capacity(pools.len()); + for (idx, state) in latest_states.iter() { + out.insert(*idx, *state); + } + out.insert(affected_idx, affected_state); + out +} + +struct OptimiserSuccess { + net_profit_wei: i128, + gas_cost_wei: u128, + /// Input amount the ternary search converged on. Exposed so the + /// post-optimiser U256 verifier (`verify_cycle_u256`) can re-walk + /// the cycle with exact integer math at the same input the f64 + /// optimiser scored, and either confirm the profit or downgrade the + /// row to `DECISION_REVERTED` when f64 precision overstated reserves. + optimal_input_wei: U256, +} + +fn optimise_cycle( + cycle: &DetectedCycle, + graph: &PriceGraph, + token_index: &TokenIndex, + pools: &[LoadedPool], + running_states: &HashMap, + base_fee_wei: u128, +) -> Option { + if cycle.path.len() < 2 { + return None; + } + let mut hop_reserves: Vec<(f64, f64)> = Vec::with_capacity(cycle.path.len() - 1); + let mut hop_fee_factors: Vec = Vec::with_capacity(cycle.path.len() - 1); + let mut hop_rates: Vec = Vec::with_capacity(cycle.path.len() - 1); + let mut protocols: Vec = Vec::with_capacity(cycle.path.len() - 1); + let mut min_liquidity_wei: Option = None; + + for pair in cycle.path.windows(2) { + let [from_v, to_v] = [pair[0], pair[1]]; + let edge = graph + .edges_from(from_v) + .iter() + .filter(|e| e.to == to_v) + .min_by(|a, b| a.weight.partial_cmp(&b.weight).unwrap_or(std::cmp::Ordering::Equal))?; + + let token_in = *token_index.get_address(from_v)?; + let (pool_idx, pool_entry) = pools + .iter() + .enumerate() + .find(|(_, p)| p.address == edge.pool_address)?; + let fee_multiplier = (10_000u32 - pool_entry.fee_bps) as f64 / 10_000.0; + let rate = (-edge.weight).exp(); + + let (rin, rout) = match running_states.get(&pool_idx).copied() { + Some(PoolState::V2 { r0, r1 }) => { + let (ri, ro) = if token_in == pool_entry.token0 { + (r0, r1) + } else { + (r1, r0) + }; + let ri_f = u256_to_f64(ri); + if min_liquidity_wei.is_none_or(|prev| prev > ri_f) { + min_liquidity_wei = Some(ri_f); + } + (ri_f, u256_to_f64(ro)) + } + // V3 / unknown: optimiser falls back to rate-only path. + Some(PoolState::V3 { .. }) | None => (0.0, 0.0), + }; + + hop_reserves.push((rin, rout)); + hop_fee_factors.push(fee_multiplier); + hop_rates.push(rate); + protocols.push(pool_entry.protocol); + } + + let min_input = U256::from(10_000_000_000_000_000u128); // 0.01 ETH + let hard_max = U256::from(50_000_000_000_000_000_000u128); // 50 ETH + let max_input = match min_liquidity_wei { + Some(liq) if liq > 0.0 => { + let liq_u256 = U256::from(liq as u128); + if liq_u256 < hard_max { + liq_u256 + } else { + hard_max + } + } + _ => hard_max, + }; + + let ticks = vec![0u32; protocols.len()]; + let gas_units = gas_model::estimate_total_gas(&protocols, &ticks); + let base_fee_gwei = base_fee_wei as f64 / 1e9; + let gas_cost_wei = gas_model::gas_cost_wei(gas_units, base_fee_gwei); + + let profit_fn = |input: U256| -> i128 { + let mut current = u256_to_f64(input); + for i in 0..hop_reserves.len() { + let (x, y) = hop_reserves[i]; + let fee = hop_fee_factors[i]; + if x > 0.0 && y > 0.0 { + current = (current * fee * y) / (x + current * fee); + } else { + current *= hop_rates[i]; + } + } + let output = current as i128; + let input_i128 = u256_to_f64(input) as i128; + output + .saturating_sub(input_i128) + .saturating_sub(gas_cost_wei as i128) + }; + + let (optimal_input_wei, net_profit_wei) = if min_input < max_input { + ternary_search_optimal_input(min_input, max_input, 80, profit_fn) + } else { + let p = profit_fn(min_input); + (min_input, p) + }; + + Some(OptimiserSuccess { + net_profit_wei, + gas_cost_wei, + optimal_input_wei, + }) +} + +/// Re-walk the optimiser's chosen cycle with exact U256 V2 math and return +/// the gross output amount in the cycle's starting token, or `None` when +/// the cycle isn't fully V2-decidable. +/// +/// Returns `None` (inconclusive — caller falls back to the f64 optimiser's +/// verdict) when: +/// - any hop's running state is missing +/// - any hop is V3 (`PoolState::V3`) — V3 amount-out needs tick traversal; +/// replicating that here is out of scope for the precision fix +/// - any hop has zero-or-degenerate reserves +/// - the graph edge doesn't resolve cleanly back to a registry pool +/// +/// Returns `Some(gross_wei)` when every hop resolves to a V2/Sushi pool +/// with positive reserves. The caller compares `gross_wei` against the +/// starting input: `gross < input` ⇒ `DECISION_REVERTED` (f64 bias), +/// otherwise the exact net = gross − input − gas drives the decision. +fn verify_cycle_u256( + cycle: &DetectedCycle, + graph: &PriceGraph, + token_index: &TokenIndex, + pools: &[LoadedPool], + running_states: &HashMap, + optimal_input_wei: U256, +) -> Option { + if cycle.path.len() < 2 || optimal_input_wei.is_zero() { + return None; + } + // Per-pool reserve copy that we mutate as the cycle progresses. When + // a multi-hop cycle revisits the same pool (e.g. A→B→A self-loops the + // Bellman-Ford detector can emit whenever both edge directions exist + // on a single pool), the second hop MUST see reserves shifted by hop + // 1's swap; otherwise the verifier double-uses the pre-swap reserves + // and lets the second hop "regenerate" the input out of thin air, + // producing ETH-scale ghost profit identical in shape to the f64 + // precision bias this PR set out to remove. + // + // Keyed by `pool_idx` so address-collision is impossible. Entries are + // only ever V2 `(r0, r1)` pairs — V3 hops short-circuit to `None` on + // first encounter, so any present entry is guaranteed V2. + let mut local_reserves: HashMap = HashMap::new(); + + let mut current_amount = optimal_input_wei; + for pair in cycle.path.windows(2) { + let [from_v, to_v] = [pair[0], pair[1]]; + let edge = graph + .edges_from(from_v) + .iter() + .filter(|e| e.to == to_v) + .min_by(|a, b| a.weight.partial_cmp(&b.weight).unwrap_or(std::cmp::Ordering::Equal))?; + let token_in = *token_index.get_address(from_v)?; + let (pool_idx, pool_entry) = pools + .iter() + .enumerate() + .find(|(_, p)| p.address == edge.pool_address)?; + + let (r0, r1) = match local_reserves.get(&pool_idx).copied() { + Some(rs) => rs, + None => match running_states.get(&pool_idx).copied()? { + PoolState::V2 { r0, r1 } => (r0, r1), + // V3 hop: out of scope for the U256 verifier; signal + // caller to keep the f64 verdict. + PoolState::V3 { .. } => return None, + }, + }; + let zero_for_one = token_in == pool_entry.token0; + let (r_in, r_out) = if zero_for_one { (r0, r1) } else { (r1, r0) }; + let amount_out = + uniswap_v2_get_amount_out(current_amount, r_in, r_out, pool_entry.fee_bps)?; + if amount_out.is_zero() { + return None; + } + + // Apply the swap to the local copy so subsequent hops on this + // pool see the post-swap reserves. V2 invariant + // (`r_in_new * r_out_new ≥ r_in * r_out`) is preserved exactly by + // construction since `uniswap_v2_get_amount_out` returns the + // largest `amount_out` consistent with the curve. + let r_in_new = r_in.checked_add(current_amount)?; + let r_out_new = r_out.checked_sub(amount_out)?; + let new_state = if zero_for_one { + (r_in_new, r_out_new) + } else { + (r_out_new, r_in_new) + }; + local_reserves.insert(pool_idx, new_state); + + current_amount = amount_out; + } + Some(current_amount) +} + +/// UniswapV2 `getAmountOut` — exact U256 math, no rounding. Same formula +/// the pool's `swap()` invariant check enforces on-chain, so the verifier +/// here is byte-identical to what would actually execute. Returns `None` +/// when any leg has zero reserves / zero input (drained-pool guard) or any +/// intermediate multiplication overflows U256. +fn uniswap_v2_get_amount_out( + amount_in: U256, + reserve_in: U256, + reserve_out: U256, + fee_bps: u32, +) -> Option { + if reserve_in.is_zero() || reserve_out.is_zero() || amount_in.is_zero() { + return None; + } + // fee_bps = 30 (0.30%) → multiplier 9970/10000. The 10_000 - fee_bps + // form matches the contract's hard-coded numerator for the default 30 + // bps pool and generalises to lower-fee Uni V2 forks. + let fee_multiplier = U256::from(10_000u64.saturating_sub(fee_bps as u64)); + let amount_in_with_fee = amount_in.checked_mul(fee_multiplier)?; + let numerator = amount_in_with_fee.checked_mul(reserve_out)?; + let denominator = reserve_in + .checked_mul(U256::from(10_000u64))? + .checked_add(amount_in_with_fee)?; + if denominator.is_zero() { + return None; + } + Some(numerator / denominator) +} + +/// U256 → i128 with saturating overflow. The scorer's `net_profit_wei` +/// column is i128; profits beyond i128::MAX wei (≈170 quadrillion ETH — +/// numerically unreachable on Ethereum) saturate rather than wrap. The +/// guard exists for the precision-bias path where an unbounded f64 may +/// have proposed an input larger than i128 can hold. +fn u256_to_i128_saturating(v: U256) -> i128 { + let limbs = v.as_limbs(); + // i128 fits in limbs[0] + limbs[1] (each limb is u64). Anything beyond + // limbs[1]'s sign bit overflows. + if limbs[2] != 0 || limbs[3] != 0 || (limbs[1] >> 63) == 1 { + return i128::MAX; + } + ((limbs[1] as i128) << 64) | (limbs[0] as i128) +} + +fn gas_estimate_for_protocols(protocols: &[ProtocolType], base_fee_wei: u128) -> u128 { + let ticks = vec![0u32; protocols.len()]; + let units = gas_model::estimate_total_gas(protocols, &ticks); + gas_model::gas_cost_wei(units, base_fee_wei as f64 / 1e9) +} + +// ── V3 revm verifier ────────────────────────────────────────────── + +/// Result from the revm deploy+simulate verifier. +#[derive(Debug, Clone, Copy)] +struct RevmVerdict { + /// Gross profit in the cycle's starting token (ERC20 balance delta on + /// SIM_OWNER after executeArb). Zero on revert. + gross_profit_wei: U256, + /// Gas consumed by the executeArb CALL (excludes CREATE overhead). + /// Currently informational only — the decision mapping uses the + /// scorer's static `gas_estimate_for_protocols` rather than revm's + /// measured cost, so this field is populated but not yet read by + /// the decision path. Kept for forthcoming gas-model calibration. + #[allow(dead_code)] + gas_used: u64, + /// True if the executeArb CALL reverted or halted. + reverted: bool, +} + +/// Map a `RevmVerdict` into `(net, realised_i128, decision)`. +fn revm_verdict_to_decision(rv: RevmVerdict, gas_cost_wei: u128) -> (i128, i128, &'static str) { + if rv.reverted { + let gas_i128 = gas_cost_wei as i128; + (-(gas_i128), 0, DECISION_REVERTED) + } else { + let gross_i128 = u256_to_i128_saturating(rv.gross_profit_wei); + let net = gross_i128.saturating_sub(gas_cost_wei as i128); + let realised = gross_i128.max(0); + let decision = if net > 0 { + DECISION_PROFITABLE + } else { + DECISION_UNPROFITABLE + }; + (net, realised, decision) + } +} + +/// Fallback for cycles that neither the U256 walker nor revm can resolve. +/// Applies the absurdity floor: f64 nets above 1 ETH are downgraded to +/// REVERTED (precision-bias artefact). +fn f64_fallback_verdict(f64_net: i128, gas_cost_wei: u128) -> (i128, i128, &'static str) { + let realised = f64_net.saturating_add(gas_cost_wei as i128).max(0); + let decision = if f64_net > MAX_PLAUSIBLE_F64_NET_WEI { + DECISION_REVERTED + } else if f64_net > 0 { + DECISION_PROFITABLE + } else { + DECISION_UNPROFITABLE + }; + (f64_net, realised, decision) +} + +/// Walk the cycle's hops and return `true` if any hop's pool state is +/// `PoolState::V3`. O(hops) — typically 2-4 iterations. +fn is_v3_touching_cycle( + cycle: &DetectedCycle, + graph: &PriceGraph, + token_index: &TokenIndex, + pools: &[LoadedPool], + running_states: &HashMap, +) -> bool { + for pair in cycle.path.windows(2) { + let [from_v, to_v] = [pair[0], pair[1]]; + let edge = match graph + .edges_from(from_v) + .iter() + .filter(|e| e.to == to_v) + .min_by(|a, b| a.weight.partial_cmp(&b.weight).unwrap_or(std::cmp::Ordering::Equal)) + { + Some(e) => e, + None => continue, + }; + // Resolve to a pool, check if it has a V3 state. + if token_index.get_address(from_v).is_none() { + continue; + } + let pool_idx = match pools.iter().position(|p| p.address == edge.pool_address) { + Some(i) => i, + None => continue, + }; + if matches!(running_states.get(&pool_idx), Some(PoolState::V3 { .. })) { + return true; + } + } + false +} + +/// Return the ERC20 `_balances` mapping storage slot for well-known mainnet +/// tokens. Returns `None` for tokens without a known slot — the revm verifier +/// returns `None` (f64 fallback) for those cycles. +fn balance_slot_for_token(token: Address) -> Option { + if token == WETH_ADDR { + Some(U256::from(3u64)) + } else if token == USDC_ADDR { + Some(U256::from(9u64)) + } else if token == DAI_ADDR || token == USDT_ADDR { + Some(U256::from(2u64)) + } else { + None + } +} + +/// Load AetherExecutor init-bytecode from the forge-compiled JSON artifact. +fn load_executor_init_bytecode(artifact_path: &PathBuf) -> Result> { + let raw = std::fs::read_to_string(artifact_path) + .with_context(|| format!("read executor artifact {}", artifact_path.display()))?; + let v: serde_json::Value = serde_json::from_str(&raw).context("parse executor artifact JSON")?; + let hex_str = v + .pointer("/bytecode/object") + .and_then(|x| x.as_str()) + .ok_or_else(|| anyhow::anyhow!("missing /bytecode/object in artifact"))?; + let stripped = hex_str.strip_prefix("0x").unwrap_or(hex_str); + let bytes = alloy::hex::decode(stripped).context("decode bytecode hex")?; + if bytes.is_empty() { + anyhow::bail!("executor bytecode is empty"); + } + Ok(bytes) +} + +/// Build `Vec` from a detected cycle using pre-fetched running +/// states (synchronous — no RPC calls). Ported from aether_replay's +/// `build_steps_from_cycle` but sync and fed from `running_states`. +/// +/// Returns `None` if any hop touches Curve/Balancer/Bancor, has missing +/// state, or produces zero output. +fn build_steps_from_cycle_sync( + cycle: &DetectedCycle, + graph: &PriceGraph, + token_index: &TokenIndex, + pools: &[LoadedPool], + running_states: &HashMap, + executor_addr: Address, + flashloan_amount: U256, +) -> Option> { + if cycle.path.len() < 2 { + return None; + } + let mut current_amount = flashloan_amount; + let mut steps: Vec = Vec::with_capacity(cycle.path.len() - 1); + + for pair in cycle.path.windows(2) { + let [from_v, to_v] = [pair[0], pair[1]]; + let edge = graph + .edges_from(from_v) + .iter() + .filter(|e| e.to == to_v) + .min_by(|a, b| a.weight.partial_cmp(&b.weight).unwrap_or(std::cmp::Ordering::Equal))?; + + let token_in = *token_index.get_address(from_v)?; + let token_out = *token_index.get_address(to_v)?; + let (pool_idx, pool_entry) = pools + .iter() + .enumerate() + .find(|(_, p)| p.address == edge.pool_address)?; + + let state = running_states.get(&pool_idx).copied()?; + let (amount_out, inner_calldata) = match (pool_entry.protocol, state) { + (ProtocolType::UniswapV2 | ProtocolType::SushiSwap, PoolState::V2 { r0, r1 }) => { + let (reserve_in, reserve_out, zero_for_one) = if token_in == pool_entry.token0 { + (r0, r1, true) + } else { + (r1, r0, false) + }; + let out = uniswap_v2_get_amount_out(current_amount, reserve_in, reserve_out, pool_entry.fee_bps)?; + if out.is_zero() { + return None; + } + let (amount0_out, amount1_out) = if zero_for_one { + (U256::ZERO, out) + } else { + (out, U256::ZERO) + }; + let cd = build_univ2_swap_calldata(amount0_out, amount1_out, executor_addr); + (out, cd) + } + (ProtocolType::UniswapV3, PoolState::V3 { .. }) => { + // V3: approximate output from graph edge rate; the revm sim + // produces the real executable amount via tick traversal. + let rate = (-edge.weight).exp(); + let approx_out = U256::from((u256_to_f64(current_amount) * rate).max(0.0) as u128); + if approx_out.is_zero() { + return None; + } + let zero_for_one = token_in == pool_entry.token0; + let sqrt_limit = if zero_for_one { + U256::from(4_295_128_740u64) // MIN_SQRT_RATIO + 1 + } else { + (U256::from(1u8) << 160) - U256::from(2u8) // MAX_SQRT_RATIO - 1 + }; + let amt_i128 = i128::try_from(current_amount.saturating_to::()).ok()?; + let cd = build_univ3_swap_calldata(executor_addr, zero_for_one, amt_i128, sqrt_limit); + (approx_out, cd) + } + // Curve / Balancer / Bancor: out of scope for V3 verifier. + _ => return None, + }; + + steps.push(SwapStep { + protocol: pool_entry.protocol, + pool_address: pool_entry.address, + token_in, + token_out, + amount_in: current_amount, + min_amount_out: U256::ZERO, + calldata: inner_calldata, + }); + + current_amount = amount_out; + } + + Some(steps) +} + +/// Verify a V3-touching cycle by deploying AetherExecutor inside pure revm +/// and calling `executeArb`. Returns `None` when the cycle can't be resolved +/// (unsupported token for balance-slot, Curve/Balancer hop, build failure). +/// +/// Runs synchronously — callers should wrap in `spawn_blocking` if on an +/// async context (the scorer's `score_one` is already async but the revm +/// transact calls `block_in_place` internally via AlloyDB). +#[allow(clippy::too_many_arguments)] +fn verify_cycle_revm( + cycle: &DetectedCycle, + graph: &PriceGraph, + token_index: &TokenIndex, + pools: &[LoadedPool], + running_states: &HashMap, + optimal_input_wei: U256, + provider: &DynProvider, + executor_init_bytecode: &[u8], + block_number: u64, + block_timestamp: u64, + base_fee: u64, +) -> Option { + if cycle.path.len() < 2 || optimal_input_wei.is_zero() { + return None; + } + // The cycle's starting token = flashloan asset = profit token. + let start_token = *token_index.get_address(cycle.path[0])?; + let balance_slot = balance_slot_for_token(start_token)?; + + // We need a temporary executor address for inner-calldata recipients. + // Since we don't know the deployed address yet, we pre-compute it: + // CREATE from SIM_OWNER at nonce 0 → deterministic address. + let executor_addr = SIM_OWNER.create(0); + + let steps = build_steps_from_cycle_sync( + cycle, + graph, + token_index, + pools, + running_states, + executor_addr, + optimal_input_wei, + )?; + + if steps.is_empty() { + return None; + } + + let calldata = build_execute_arb_calldata( + &steps, + start_token, + optimal_input_wei, + U256::from(u64::MAX), // deadline + U256::ZERO, // minProfitOut + U256::ZERO, // tipBps + ); + + let ctor_args = (AAVE_POOL, BALANCER_VAULT, BANCOR_NETWORK).abi_encode_params(); + + let fork_state = RpcForkedState::new( + provider.clone(), + block_number, + block_timestamp, + base_fee, + )?; + + let sim = EvmSimulator::new(SimConfig { + gas_limit: 8_000_000, + chain_id: 1, + caller: SIM_OWNER, + value: U256::ZERO, + }); + + let result = sim.deploy_and_simulate_with_erc20_profit( + fork_state, + SIM_OWNER, + executor_init_bytecode, + &ctor_args, + calldata, + start_token, + SIM_OWNER, + balance_slot, + ); + + Some(RevmVerdict { + gross_profit_wei: result.profit_wei, + gas_used: result.gas_used, + reverted: !result.success, + }) +} + +/// Serialise a DetectedCycle into the JSONB shape the dashboard reads. +/// Each hop carries `pool`, `token_in`, `token_out`, `protocol`. +fn cycle_to_json( + cycle: &DetectedCycle, + graph: &PriceGraph, + token_index: &TokenIndex, + pools: &[LoadedPool], +) -> serde_json::Value { + let mut hops = Vec::with_capacity(cycle.path.len().saturating_sub(1)); + for pair in cycle.path.windows(2) { + let [from_v, to_v] = [pair[0], pair[1]]; + let Some(edge) = graph.edges_from(from_v).iter().find(|e| e.to == to_v) else { + continue; + }; + let Some(token_in) = token_index.get_address(from_v) else { + continue; + }; + let Some(token_out) = token_index.get_address(to_v) else { + continue; + }; + let proto_label = pools + .iter() + .find(|p| p.address == edge.pool_address) + .map(|p| protocol_label(p.protocol)) + .unwrap_or("unknown"); + hops.push(serde_json::json!({ + "pool": format!("{:#x}", edge.pool_address), + "token_in": format!("{:#x}", token_in), + "token_out": format!("{:#x}", token_out), + "protocol": proto_label, + })); + } + serde_json::Value::Array(hops) +} + +fn protocol_label(p: ProtocolType) -> &'static str { + match p { + ProtocolType::UniswapV2 => "uni_v2", + ProtocolType::UniswapV3 => "uni_v3", + ProtocolType::SushiSwap => "sushi", + ProtocolType::Curve => "curve", + ProtocolType::BalancerV2 => "balancer", + ProtocolType::BancorV3 => "bancor", + } +} + +// ----- inlined helpers (duplicate of aether_replay.rs; see module docstring) ----- + +#[derive(Clone, Copy, Debug)] +enum PoolState { + V2 { r0: U256, r1: U256 }, + V3 { sqrt_price_x96: U256 }, +} + +#[derive(Clone, Debug)] +struct LoadedPool { + address: Address, + token0: Address, + token1: Address, + protocol: ProtocolType, + fee_bps: u32, +} + +#[derive(Deserialize)] +struct PoolsConfig { + pools: Vec, +} + +#[derive(Deserialize)] +struct PoolEntry { + address: String, + token0: String, + token1: String, + protocol: String, + fee_bps: u32, +} + +fn parse_protocol(s: &str) -> Option { + match s { + "uniswap_v2" => Some(ProtocolType::UniswapV2), + "sushiswap" => Some(ProtocolType::SushiSwap), + "uniswap_v3" => Some(ProtocolType::UniswapV3), + "curve" => Some(ProtocolType::Curve), + "balancer_v2" => Some(ProtocolType::BalancerV2), + "bancor_v3" => Some(ProtocolType::BancorV3), + _ => None, + } +} + +/// Map the short-form protocol strings the engine writes into +/// `mempool_predictions.protocol` (see `aether_grpc_server::mempool_writer` +/// `PROTOCOL_*` constants) to `ProtocolType`. Distinct from +/// [`parse_protocol`], which reads the long-form names used in +/// `config/pools.toml`. Kept narrow on purpose: only the protocols the +/// scorer can actually score are returned; Balancer / Curve / Bancor +/// fall through to `None` so we don't add edges for hops the engine +/// can't compute reserves for at present. +fn parse_db_protocol(s: &str) -> Option { + match s { + "uni_v2" => Some(ProtocolType::UniswapV2), + "uni_v3" => Some(ProtocolType::UniswapV3), + "sushi" => Some(ProtocolType::SushiSwap), + _ => None, + } +} + +/// Augment the static `config/pools.toml` registry with every distinct +/// pool the engine has actually written a prediction for, but doesn't +/// appear in the config. The engine's runtime pair-index extends as +/// mempool decoding discovers new pools; the scorer's old behaviour of +/// loading only the TOML config meant ~88% of confirmed predictions +/// resolved as `decision='no_path'` even when their pool existed in the +/// engine's graph at decode time. +/// +/// `known` is the set of addresses already present from the config; pools +/// in `known` are skipped so we don't double-register them. +/// +/// Returns up to `MAX_DB_PREDICTED_POOLS` distinct LoadedPool entries. +/// The cap exists so a runaway engine writing thousands of pool +/// addresses can't blow the bootstrap's RPC fan-out (one eth_call per +/// pool) or memory. The query orders by pool_address so the truncation +/// is deterministic — same set across restarts unless the underlying +/// table changes. +async fn load_predicted_pools( + pg_pool: &sqlx::PgPool, + known: &HashSet
, +) -> Result> { + // Pull (pool, protocol, sample token_in, sample token_out) for every + // distinct pool address. token_in/token_out come from one arbitrary + // prediction row per pool; we use them only to derive the canonical + // (token0, token1) ordering, which is direction-agnostic by V2/V3 + // invariant (token0 = min(addr), token1 = max(addr)). + // `(pool_address, protocol, token_in, token_out)` — all bytea fields + // come back as `Vec` from sqlx. Aliased so clippy doesn't flag + // the nested generic. + type DbPoolRow = (Vec, String, Vec, Vec); + let rows: Vec = sqlx::query_as( + "SELECT DISTINCT ON (pool_address) pool_address, protocol, token_in, token_out \ + FROM mempool_predictions \ + WHERE pool_address IS NOT NULL \ + ORDER BY pool_address, decoded_at DESC \ + LIMIT $1", + ) + .bind(MAX_DB_PREDICTED_POOLS) + .fetch_all(pg_pool) + .await + .context("SELECT DISTINCT pool_address FROM mempool_predictions")?; + + let mut out = Vec::with_capacity(rows.len()); + for (pool_bytes, proto_str, tin_bytes, tout_bytes) in rows { + if pool_bytes.len() != 20 || tin_bytes.len() != 20 || tout_bytes.len() != 20 { + warn!( + pool_len = pool_bytes.len(), + tin_len = tin_bytes.len(), + tout_len = tout_bytes.len(), + "skipping db pool with non-20-byte address fields" + ); + continue; + } + let addr = Address::from_slice(&pool_bytes); + if known.contains(&addr) { + continue; + } + let Some(protocol) = parse_db_protocol(&proto_str) else { + // Balancer / Curve / Bancor / unknown — out of scope for the + // current scoring path. Tracked as future work. + debug!(protocol = %proto_str, pool = %addr, "skipping db pool with unsupported protocol"); + continue; + }; + let tin = Address::from_slice(&tin_bytes); + let tout = Address::from_slice(&tout_bytes); + let (token0, token1) = if tin < tout { (tin, tout) } else { (tout, tin) }; + let fee_bps = match protocol { + ProtocolType::UniswapV3 => DEFAULT_V3_FEE_BPS, + _ => DEFAULT_V2_FEE_BPS, + }; + out.push(LoadedPool { + address: addr, + token0, + token1, + protocol, + fee_bps, + }); + } + Ok(out) +} + +fn load_pools(path: &PathBuf) -> Result> { + let raw = std::fs::read_to_string(path) + .with_context(|| format!("read pool config {}", path.display()))?; + let cfg: PoolsConfig = toml::from_str(&raw).context("parse pool config")?; + let mut out = Vec::new(); + for entry in cfg.pools { + let Some(protocol) = parse_protocol(&entry.protocol) else { + continue; + }; + // v1 scorer supports the same protocols aether-replay supports. + if !matches!( + protocol, + ProtocolType::UniswapV2 | ProtocolType::SushiSwap | ProtocolType::UniswapV3 + ) { + continue; + } + out.push(LoadedPool { + address: entry.address.parse().context("pool address")?, + token0: entry.token0.parse().context("token0")?, + token1: entry.token1.parse().context("token1")?, + protocol, + fee_bps: entry.fee_bps, + }); + } + Ok(out) +} + +async fn fetch_pool_state_at( + provider: &impl Provider, + pool: &LoadedPool, + block: u64, +) -> Result> { + let block_id = BlockId::Number(BlockNumberOrTag::Number(block)); + let state = match pool.protocol { + ProtocolType::UniswapV2 | ProtocolType::SushiSwap => { + let calldata = getReservesCall {}.abi_encode(); + let tx = TransactionRequest::default() + .to(pool.address) + .input(calldata.into()); + let out = provider.call(tx).block(block_id).await?; + if out.len() >= 64 { + Some(PoolState::V2 { + r0: U256::from_be_slice(&out[0..32]), + r1: U256::from_be_slice(&out[32..64]), + }) + } else { + None + } + } + ProtocolType::UniswapV3 => { + let calldata = slot0Call {}.abi_encode(); + let tx = TransactionRequest::default() + .to(pool.address) + .input(calldata.into()); + let out = provider.call(tx).block(block_id).await?; + if out.len() >= 32 { + Some(PoolState::V3 { + sqrt_price_x96: U256::from_be_slice(&out[0..32]), + }) + } else { + None + } + } + _ => None, + }; + Ok(state) +} + +fn u256_to_f64(v: U256) -> f64 { + let limbs = v.as_limbs(); + let mut acc = 0.0f64; + for (i, &limb) in limbs.iter().enumerate() { + acc += (limb as f64) * (2f64).powi((64 * i) as i32); + } + acc +} + +struct ScorerState { + graph: PriceGraph, + token_index: TokenIndex, + /// Per-pool reserves at the latest fetched block. Keyed by index into + /// the `pools` slice so the optimiser can look up by pool-registry + /// position rather than by address. + latest_states: HashMap, + base_fee_wei: u128, + /// Block number the reference graph was bootstrapped at. Used by the + /// revm verifier to pin `RpcForkedState` to a specific block. + block_number: u64, + /// Block timestamp at the reference-graph block. + block_timestamp: u64, +} + +async fn bootstrap_state( + pools: &[LoadedPool], + provider: &impl Provider, +) -> Result { + let head = provider.get_block_number().await.context("get_block_number")?; + // Pull the full block header for base fee + timestamp (revm verifier + // needs both for accurate simulation). + let head_block = provider + .get_block(BlockId::Number(BlockNumberOrTag::Number(head))) + .await + .ok() + .flatten(); + let base_fee_wei = head_block + .as_ref() + .and_then(|b| b.header.base_fee_per_gas) + .map(u128::from) + .unwrap_or(DEFAULT_BASE_FEE_WEI); + let block_timestamp = head_block + .as_ref() + .map(|b| b.header.timestamp) + .unwrap_or(0); + + let mut latest_states: HashMap = HashMap::new(); + for (idx, pool) in pools.iter().enumerate() { + match fetch_pool_state_at(provider, pool, head).await? { + Some(state) => { + latest_states.insert(idx, state); + } + None => { + debug!( + pool = %pool.address, + "no state returned at head; skipping" + ); + } + } + } + + let mut token_index = TokenIndex::new(); + let mut graph = PriceGraph::new(10); + for (idx, pool) in pools.iter().enumerate() { + let Some(state) = latest_states.get(&idx).copied() else { + continue; + }; + let t0 = token_index.get_or_insert(pool.token0); + let t1 = token_index.get_or_insert(pool.token1); + graph.resize(token_index.len()); + + let rate_0to1 = match state { + PoolState::V2 { r0, r1 } => { + let r0f = u256_to_f64(r0); + let r1f = u256_to_f64(r1); + if r0f == 0.0 || r1f == 0.0 { + continue; + } + r1f / r0f + } + PoolState::V3 { sqrt_price_x96 } => { + let s = u256_to_f64(sqrt_price_x96); + if s == 0.0 { + continue; + } + let root = s / Q96; + root * root + } + }; + if !rate_0to1.is_finite() || rate_0to1 <= 0.0 { + continue; + } + let fee = (10_000 - pool.fee_bps) as f64 / 10_000.0; + let pool_id = PoolId { + address: pool.address, + protocol: pool.protocol, + }; + graph.add_edge(t0, t1, rate_0to1 * fee, pool_id, pool.address, pool.protocol, U256::ZERO); + graph.add_edge(t1, t0, (1.0 / rate_0to1) * fee, pool_id, pool.address, pool.protocol, U256::ZERO); + } + + Ok(ScorerState { + graph, + token_index, + latest_states, + base_fee_wei, + block_number: head, + block_timestamp, + }) +} + +/// Rewrite a `wss://...` URL to `https://...` so the alloy HTTP provider +/// can use it. No-op for already-HTTP URLs. +fn rewrite_ws_to_http(url: &str) -> String { + if let Some(rest) = url.strip_prefix("wss://") { + format!("https://{rest}") + } else if let Some(rest) = url.strip_prefix("ws://") { + format!("http://{rest}") + } else { + url.to_string() + } +} + +fn start_metrics_server(addr: SocketAddr, registry: Registry) { + tokio::spawn(async move { + let make_svc = move || { + let registry = registry.clone(); + async move { + let encoder = TextEncoder::new(); + let mut buf = Vec::new(); + let _ = encoder.encode(®istry.gather(), &mut buf); + buf + } + }; + let listener = match tokio::net::TcpListener::bind(addr).await { + Ok(l) => l, + Err(e) => { + error!(error = %e, "metrics listener bind failed"); + return; + } + }; + info!("metrics server listening at {addr}"); + loop { + match listener.accept().await { + Ok((mut socket, _)) => { + let body = make_svc().await; + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\nContent-Length: {}\r\n\r\n", + body.len() + ); + use tokio::io::AsyncWriteExt; + if let Err(e) = socket.write_all(response.as_bytes()).await { + debug!(error = %e, "metrics write header failed"); + continue; + } + if let Err(e) = socket.write_all(&body).await { + debug!(error = %e, "metrics write body failed"); + continue; + } + } + Err(e) => { + debug!(error = %e, "metrics accept failed"); + } + } + } + }); +} + +// Silence the unused-but-imported warning for default-but-not-needed +// addresses pulled in via alloy::primitives::address. Removing the import +// would break the inlined helpers if they're ever expanded to include +// well-known mainnet token labels. +#[allow(dead_code)] +const _DUMMY_WETH: Address = address!("C02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2"); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rewrite_ws_to_http_handles_both_schemes() { + assert_eq!( + rewrite_ws_to_http("wss://eth.example/v2/key"), + "https://eth.example/v2/key" + ); + assert_eq!( + rewrite_ws_to_http("ws://eth.example/v2/key"), + "http://eth.example/v2/key" + ); + assert_eq!( + rewrite_ws_to_http("https://eth.example/v2/key"), + "https://eth.example/v2/key" + ); + } + + #[test] + fn state_to_graph_reserves_v2_passes_through() { + let s = PoolState::V2 { + r0: U256::from(1_000_000u64), + r1: U256::from(2_000_000u64), + }; + let (r0, r1) = state_to_graph_reserves(&s); + assert!((r0 - 1_000_000.0).abs() < 1.0); + assert!((r1 - 2_000_000.0).abs() < 1.0); + } + + #[test] + fn state_to_graph_reserves_v3_uses_synthetic_pair() { + // sqrtPriceX96 = 2^96 → rate_0to1 = 1.0; synthetic (1.0, 1.0). + let s = PoolState::V3 { + sqrt_price_x96: U256::from_be_slice(&{ + let mut buf = [0u8; 32]; + buf[31 - 12] = 1; + buf + }), + }; + let (r0, r1) = state_to_graph_reserves(&s); + assert_eq!(r0, 1.0); + assert!(r1 > 0.0 && r1 < 2.0); + } + + #[test] + fn protocol_label_covers_supported_variants() { + for (p, expected) in [ + (ProtocolType::UniswapV2, "uni_v2"), + (ProtocolType::UniswapV3, "uni_v3"), + (ProtocolType::SushiSwap, "sushi"), + (ProtocolType::Curve, "curve"), + (ProtocolType::BalancerV2, "balancer"), + (ProtocolType::BancorV3, "bancor"), + ] { + assert_eq!(protocol_label(p), expected); + } + } + + #[test] + fn no_path_outcome_carries_negative_net_when_gas_given() { + let out = no_path_outcome(Some(50_000)); + assert_eq!(out.decision, DECISION_NO_PATH); + assert_eq!(out.net_profit_wei, -50_000); + assert_eq!(out.gas_estimate_wei, U256::from(50_000u64)); + } + + #[test] + fn uniswap_v2_get_amount_out_matches_constant_product() { + // 1 WETH in, 100 WETH / 200_000 USDC pool, 30 bps fee. + // Exact math: amount_in_with_fee = 1e18 * 9970 = 9.97e21 + // numerator = 9.97e21 * 200e9 = 1.994e33 + // denominator = 100e18 * 10_000 + 9.97e21 ≈ 1.00997e24 + // out = 1.994e33 / 1.00997e24 ≈ 1.974e9 USDC (input is ~1% of pool + // depth so slippage compounds with the fee). Range below brackets + // the exact value while keeping wiggle room for unrelated changes + // to the formula. + let amount_in = U256::from(1_000_000_000_000_000_000u128); // 1 WETH (18 dec) + let reserve_in = U256::from(100_000_000_000_000_000_000u128); // 100 WETH + let reserve_out = U256::from(200_000_000_000u128); // 200_000 USDC (6 dec) + let out = uniswap_v2_get_amount_out(amount_in, reserve_in, reserve_out, 30).unwrap(); + let out_u64 = out.try_into().unwrap_or(u64::MAX); + assert!( + (1_970_000_000..=1_980_000_000).contains(&out_u64), + "expected ~1974 USDC, got {out_u64}" + ); + } + + #[test] + fn uniswap_v2_get_amount_out_rejects_zero_inputs() { + let r = U256::from(1_000_000u64); + assert!(uniswap_v2_get_amount_out(U256::ZERO, r, r, 30).is_none()); + assert!(uniswap_v2_get_amount_out(r, U256::ZERO, r, 30).is_none()); + assert!(uniswap_v2_get_amount_out(r, r, U256::ZERO, 30).is_none()); + } + + #[test] + fn u256_to_i128_saturating_handles_full_range() { + assert_eq!(u256_to_i128_saturating(U256::ZERO), 0); + assert_eq!(u256_to_i128_saturating(U256::from(42u64)), 42); + // i128::MAX fits exactly: high limb = i64::MAX, low limb = u64::MAX + let max_i128_as_u256 = U256::from(i128::MAX as u128); + assert_eq!(u256_to_i128_saturating(max_i128_as_u256), i128::MAX); + // Anything beyond i128::MAX saturates rather than wrapping. + let too_big = U256::from(1u128) << 127; // 2^127 — first value over i128::MAX + assert_eq!(u256_to_i128_saturating(too_big), i128::MAX); + // 2^192 lives in limb 3 — must saturate, not panic. + let huge = U256::from(1u128) << 192; + assert_eq!(u256_to_i128_saturating(huge), i128::MAX); + } + + fn make_token_index() -> (TokenIndex, [usize; 3]) { + let a = address!("AAaaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAaaaa"); + let b = address!("BBbbBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBbbbb"); + let c = address!("CCccCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCcccc"); + let mut idx = TokenIndex::new(); + let ia = idx.get_or_insert(a); + let ib = idx.get_or_insert(b); + let ic = idx.get_or_insert(c); + (idx, [ia, ib, ic]) + } + + fn loaded(addr_byte: u8, token0: Address, token1: Address) -> LoadedPool { + // Construct a unique address by repeating addr_byte across all 20 bytes. + let mut raw = [0u8; 20]; + raw.fill(addr_byte); + LoadedPool { + address: Address::from(raw), + token0, + token1, + protocol: ProtocolType::UniswapV2, + fee_bps: 30, + } + } + + #[test] + fn verify_cycle_u256_returns_none_when_v3_hop_present() { + // A two-hop cycle with a V3 hop in the middle must return None so + // the caller falls back to the f64 optimiser verdict. + let (token_index, [ta, tb, _tc]) = make_token_index(); + let a = *token_index.get_address(ta).unwrap(); + let b = *token_index.get_address(tb).unwrap(); + let mut pool_v3 = loaded(0x33, a, b); + pool_v3.protocol = ProtocolType::UniswapV3; + let pools = vec![pool_v3]; + + let mut graph = PriceGraph::new(token_index.len()); + graph.resize(token_index.len()); + graph.add_edge( + ta, + tb, + 1.0, + PoolId { address: pools[0].address, protocol: pools[0].protocol }, + pools[0].address, + pools[0].protocol, + U256::ZERO, + ); + + let mut states = HashMap::new(); + states.insert(0, PoolState::V3 { sqrt_price_x96: U256::from(1u64) }); + + let cycle = DetectedCycle { + path: vec![ta, tb], + total_weight: 0.0, + }; + assert!( + verify_cycle_u256(&cycle, &graph, &token_index, &pools, &states, U256::from(1u64)) + .is_none() + ); + } + + #[test] + fn verify_cycle_u256_walks_balanced_triangle() { + // Three V2 pools forming a balanced triangle. With balanced + // reserves and 30bps fee on each hop, an input of 1e18 should + // round-trip back to ~(1 - 3*0.003) * 1e18 ≈ 9.91e17 (lossy: the + // arb is unprofitable, which is the correct expected behaviour + // for a flat, no-edge triangle). + let (token_index, [ta, tb, tc]) = make_token_index(); + let a = *token_index.get_address(ta).unwrap(); + let b = *token_index.get_address(tb).unwrap(); + let c = *token_index.get_address(tc).unwrap(); + + let pools = vec![ + loaded(0x11, a, b), + loaded(0x22, b, c), + loaded(0x33, a, c), + ]; + + let mut graph = PriceGraph::new(token_index.len()); + graph.resize(token_index.len()); + // Three balanced edges at rate=1.0; only the U256 walk matters + // for the verifier's behaviour, so we don't bother making the + // weights realistic. + for (i, (from, to)) in [(ta, tb), (tb, tc), (tc, ta)].iter().enumerate() { + graph.add_edge( + *from, + *to, + 0.0, + PoolId { address: pools[i].address, protocol: pools[i].protocol }, + pools[i].address, + pools[i].protocol, + U256::ZERO, + ); + } + + let mut states = HashMap::new(); + // Balanced reserves: every pool 1e21 / 1e21 (no inter-pool edge). + let r = U256::from(1_000_000_000_000_000_000_000u128); + for i in 0..3 { + states.insert(i, PoolState::V2 { r0: r, r1: r }); + } + + let cycle = DetectedCycle { + path: vec![ta, tb, tc, ta], + total_weight: 0.0, + }; + let input = U256::from(1_000_000_000_000_000_000u128); // 1.0 + let out = verify_cycle_u256(&cycle, &graph, &token_index, &pools, &states, input).unwrap(); + assert!(out < input); + } + + #[test] + fn verify_cycle_u256_rejects_self_loop_with_shifted_reserves() { + // A→B→A on a single V2 pool. Without per-hop reserve evolution, + // the verifier returns gross_out >> input for large inputs + // because hop 2 sees the pre-swap reserves and "regenerates" the + // input. With evolution, gross_out < input for *every* input + // (double 30 bps fee is always lossy on a self-loop, regardless + // of input size). This is the exact bug that fabricated 80B ETH + // ghost profit on the soak's DAI/USDC self-loop row. + let (token_index, [ta, tb, _tc]) = make_token_index(); + let a = *token_index.get_address(ta).unwrap(); + let b = *token_index.get_address(tb).unwrap(); + let pools = vec![loaded(0x55, a, b)]; + + let mut graph = PriceGraph::new(token_index.len()); + graph.resize(token_index.len()); + let pid = PoolId { address: pools[0].address, protocol: pools[0].protocol }; + graph.add_edge(ta, tb, 1.0, pid, pools[0].address, pools[0].protocol, U256::ZERO); + graph.add_edge(tb, ta, 1.0, pid, pools[0].address, pools[0].protocol, U256::ZERO); + + // DAI/USDC-shaped reserves: 5M DAI (1e25 base units) / 5M USDC + // (5e12 base units). Mainnet-scale where the f64 precision bias + // would otherwise bite. + let r_a = U256::from(5_000_000u128) * U256::from(10u128).pow(U256::from(18u64)); + let r_b = U256::from(5_000_000u128) * U256::from(10u128).pow(U256::from(6u64)); + let mut states = HashMap::new(); + states.insert(0, PoolState::V2 { r0: r_a, r1: r_b }); + + let cycle = DetectedCycle { path: vec![ta, tb, ta], total_weight: 0.0 }; + + // Sweep inputs across four orders of magnitude — small inputs, + // pool-fraction inputs, and oversized inputs all must come back + // strictly less than input (double fee + slippage compound). + for &exp in &[16u32, 18, 21, 24] { + let input = U256::from(10u128).pow(U256::from(exp)); + let out = verify_cycle_u256(&cycle, &graph, &token_index, &pools, &states, input) + .expect("self-loop should resolve"); + assert!( + out < input, + "self-loop at input 10^{exp} returned {out} >= {input} — reserve evolution missing", + ); + } + } + + #[test] + fn parse_db_protocol_maps_short_form() { + assert_eq!(parse_db_protocol("uni_v2"), Some(ProtocolType::UniswapV2)); + assert_eq!(parse_db_protocol("uni_v3"), Some(ProtocolType::UniswapV3)); + assert_eq!(parse_db_protocol("sushi"), Some(ProtocolType::SushiSwap)); + // Long forms are config-only; load_predicted_pools should + // reject them so we never accidentally route a config row + // through the DB path. + assert_eq!(parse_db_protocol("uniswap_v2"), None); + assert_eq!(parse_db_protocol("sushiswap"), None); + // Balancer / Curve / Bancor are valid engine protocols but the + // scorer can't compute reserves for them yet — they MUST be + // refused here so an unsupported pool doesn't sneak in with + // wrong fee_bps + nonexistent state. + assert_eq!(parse_db_protocol("balancer"), None); + assert_eq!(parse_db_protocol("curve"), None); + assert_eq!(parse_db_protocol("bancor"), None); + assert_eq!(parse_db_protocol(""), None); + } + + #[test] + fn default_fee_bps_constants_match_spec() { + // Treat as a behavioural contract: changing either default + // changes graph weight for every DB-augmented pool, which + // shifts cycle rankings. Force the change to come through code + // review by surfacing here. + assert_eq!(DEFAULT_V2_FEE_BPS, 30); + assert_eq!(DEFAULT_V3_FEE_BPS, 5); + } + + #[test] + fn max_db_predicted_pools_is_bounded() { + // Sanity floor: needs to be both positive and below the RPC + // fan-out budget (one eth_call per pool at bootstrap; ~256 is + // the production-tested ceiling). Surfaced as a behavioural + // contract so retunes go through review. + const _: () = { + assert!(MAX_DB_PREDICTED_POOLS > 0); + assert!(MAX_DB_PREDICTED_POOLS <= 1024); + }; + assert_eq!(MAX_DB_PREDICTED_POOLS, 256); + } + + #[test] + fn absurdity_floor_is_set_at_one_eth() { + // The constant gates "verifier inconclusive but f64 says huge" + // → REVERTED. If anyone retunes it, this test reminds them to + // re-read the comment block and re-run the soak. + assert_eq!(MAX_PLAUSIBLE_F64_NET_WEI, 1_000_000_000_000_000_000i128); + } + + // ── V3 verifier tests ───────────────────────────────────────── + + fn loaded_v3(addr_byte: u8, token0: Address, token1: Address) -> LoadedPool { + let mut raw = [0u8; 20]; + raw.fill(addr_byte); + LoadedPool { + address: Address::from(raw), + token0, + token1, + protocol: ProtocolType::UniswapV3, + fee_bps: 5, + } + } + + fn loaded_curve(addr_byte: u8, token0: Address, token1: Address) -> LoadedPool { + let mut raw = [0u8; 20]; + raw.fill(addr_byte); + LoadedPool { + address: Address::from(raw), + token0, + token1, + protocol: ProtocolType::Curve, + fee_bps: 4, + } + } + + #[test] + fn is_v3_touching_cycle_v2_only_returns_false() { + let (token_index, [ta, tb, tc]) = make_token_index(); + let a = *token_index.get_address(ta).unwrap(); + let b = *token_index.get_address(tb).unwrap(); + let c = *token_index.get_address(tc).unwrap(); + let pools = vec![loaded(0x11, a, b), loaded(0x22, b, c), loaded(0x33, a, c)]; + let mut graph = PriceGraph::new(token_index.len()); + graph.resize(token_index.len()); + for (i, &(from, to)) in [(ta, tb), (tb, tc), (tc, ta)].iter().enumerate() { + graph.add_edge( + from, to, 0.0, + PoolId { address: pools[i].address, protocol: pools[i].protocol }, + pools[i].address, pools[i].protocol, U256::ZERO, + ); + } + let r = U256::from(1_000_000u64); + let mut states = HashMap::new(); + for i in 0..3 { states.insert(i, PoolState::V2 { r0: r, r1: r }); } + let cycle = DetectedCycle { path: vec![ta, tb, tc, ta], total_weight: 0.0 }; + assert!(!is_v3_touching_cycle(&cycle, &graph, &token_index, &pools, &states)); + } + + #[test] + fn is_v3_touching_cycle_mixed_returns_true() { + let (token_index, [ta, tb, _tc]) = make_token_index(); + let a = *token_index.get_address(ta).unwrap(); + let b = *token_index.get_address(tb).unwrap(); + // Pool 0 is V2, pool 1 is V3 — mixed cycle. + let pools = vec![loaded(0x11, a, b), loaded_v3(0x22, a, b)]; + let mut graph = PriceGraph::new(token_index.len()); + graph.resize(token_index.len()); + graph.add_edge( + ta, tb, 0.0, + PoolId { address: pools[0].address, protocol: pools[0].protocol }, + pools[0].address, pools[0].protocol, U256::ZERO, + ); + graph.add_edge( + tb, ta, 0.0, + PoolId { address: pools[1].address, protocol: pools[1].protocol }, + pools[1].address, pools[1].protocol, U256::ZERO, + ); + let mut states = HashMap::new(); + states.insert(0, PoolState::V2 { r0: U256::from(1u64), r1: U256::from(1u64) }); + states.insert(1, PoolState::V3 { sqrt_price_x96: U256::from(1u64) }); + let cycle = DetectedCycle { path: vec![ta, tb, ta], total_weight: 0.0 }; + assert!(is_v3_touching_cycle(&cycle, &graph, &token_index, &pools, &states)); + } + + #[test] + fn is_v3_touching_cycle_v3_only_returns_true() { + let (token_index, [ta, tb, _tc]) = make_token_index(); + let a = *token_index.get_address(ta).unwrap(); + let b = *token_index.get_address(tb).unwrap(); + let pools = vec![loaded_v3(0x44, a, b)]; + let mut graph = PriceGraph::new(token_index.len()); + graph.resize(token_index.len()); + let pid = PoolId { address: pools[0].address, protocol: pools[0].protocol }; + graph.add_edge(ta, tb, 0.0, pid, pools[0].address, pools[0].protocol, U256::ZERO); + graph.add_edge(tb, ta, 0.0, pid, pools[0].address, pools[0].protocol, U256::ZERO); + let mut states = HashMap::new(); + states.insert(0, PoolState::V3 { sqrt_price_x96: U256::from(1u64) }); + let cycle = DetectedCycle { path: vec![ta, tb, ta], total_weight: 0.0 }; + assert!(is_v3_touching_cycle(&cycle, &graph, &token_index, &pools, &states)); + } + + #[test] + fn build_steps_returns_none_for_curve_hop() { + let (token_index, [ta, tb, _tc]) = make_token_index(); + let a = *token_index.get_address(ta).unwrap(); + let b = *token_index.get_address(tb).unwrap(); + let pools = vec![loaded_curve(0x77, a, b)]; + let mut graph = PriceGraph::new(token_index.len()); + graph.resize(token_index.len()); + let pid = PoolId { address: pools[0].address, protocol: pools[0].protocol }; + graph.add_edge(ta, tb, 0.0, pid, pools[0].address, pools[0].protocol, U256::ZERO); + let mut states = HashMap::new(); + states.insert(0, PoolState::V2 { r0: U256::from(1_000_000u64), r1: U256::from(1_000_000u64) }); + let cycle = DetectedCycle { path: vec![ta, tb], total_weight: 0.0 }; + let executor_addr = address!("1111111111111111111111111111111111111111"); + assert!(build_steps_from_cycle_sync( + &cycle, &graph, &token_index, &pools, &states, executor_addr, U256::from(1_000u64), + ).is_none()); + } + + #[test] + fn revm_verdict_decision_mapping_reverted() { + let rv = RevmVerdict { gross_profit_wei: U256::ZERO, gas_used: 100_000, reverted: true }; + let (net, realised, dec) = revm_verdict_to_decision(rv, 50_000); + assert_eq!(dec, DECISION_REVERTED); + assert!(net < 0); + assert_eq!(realised, 0); + } + + #[test] + fn revm_verdict_decision_mapping_profitable() { + let rv = RevmVerdict { + gross_profit_wei: U256::from(200_000u64), + gas_used: 100_000, + reverted: false, + }; + let (net, _realised, dec) = revm_verdict_to_decision(rv, 50_000); + assert_eq!(dec, DECISION_PROFITABLE); + assert!(net > 0); + } + + #[test] + fn revm_verdict_decision_mapping_unprofitable() { + let rv = RevmVerdict { + gross_profit_wei: U256::from(10_000u64), + gas_used: 100_000, + reverted: false, + }; + let (net, _realised, dec) = revm_verdict_to_decision(rv, 50_000); + assert_eq!(dec, DECISION_UNPROFITABLE); + assert!(net <= 0); + } + + #[test] + fn f64_fallback_verdict_above_floor_reverted() { + let big_net = MAX_PLAUSIBLE_F64_NET_WEI + 1; + let (_net, _realised, dec) = f64_fallback_verdict(big_net, 50_000); + assert_eq!(dec, DECISION_REVERTED); + } + + #[test] + fn f64_fallback_verdict_below_floor_profitable() { + let small_net = 1_000_000i128; + let (_net, _realised, dec) = f64_fallback_verdict(small_net, 50_000); + assert_eq!(dec, DECISION_PROFITABLE); + } + + #[test] + fn f64_fallback_verdict_negative_unprofitable() { + let neg = -500_000i128; + let (net, _realised, dec) = f64_fallback_verdict(neg, 50_000); + assert_eq!(dec, DECISION_UNPROFITABLE); + assert!(net < 0); + } + + #[test] + fn balance_slot_for_known_tokens() { + assert_eq!(balance_slot_for_token(WETH_ADDR), Some(U256::from(3u64))); + assert_eq!(balance_slot_for_token(USDC_ADDR), Some(U256::from(9u64))); + assert_eq!(balance_slot_for_token(DAI_ADDR), Some(U256::from(2u64))); + assert_eq!(balance_slot_for_token(USDT_ADDR), Some(U256::from(2u64))); + // Unknown token → None. + assert_eq!( + balance_slot_for_token(address!("0000000000000000000000000000000000000042")), + None, + ); + } +} diff --git a/crates/grpc-server/src/engine.rs b/crates/grpc-server/src/engine.rs index e636e7e..589301f 100644 --- a/crates/grpc-server/src/engine.rs +++ b/crates/grpc-server/src/engine.rs @@ -991,6 +991,24 @@ impl AetherEngine { (1.0 / price) * fee, meta.pool_id, pool_addr, meta.protocol, liq, ); + // Seed the synthetic `(1.0, spot_price)` reserve + // pair on the edge. `add_edge` only sets weight; + // without this follow-up the edge stays at + // `reserve_in = reserve_out = 0.0` and the + // `reserves_zero` guard in + // `mempool_pipeline::try_post_state_scan` drops + // every V3 swap before it reaches the post-state + // predictor. The convention `(1.0, spot_price)` + // matches the scorer's `state_to_graph_reserves` + // V3 branch so the two sides stay in lockstep. + graph.update_edge_from_reserves( + meta.token0_idx, meta.token1_idx, + meta.pool_id, 1.0, price, fee, + ); + graph.update_edge_from_reserves( + meta.token1_idx, meta.token0_idx, + meta.pool_id, 1.0, 1.0 / price, fee, + ); // Seed the V3 pool-state cache. Liquidity is set // to zero here because slot0 does not expose it — // a separate `liquidity()` RPC would be required @@ -1240,6 +1258,28 @@ impl AetherEngine { meta.protocol, liq, ); + // Refresh the synthetic `(1.0, spot_price)` reserve + // pair on the edge so live V3 sqrtPrice updates flow + // through to `mempool_pipeline::try_post_state_scan`'s + // `reserves_zero` guard. Same convention used by the + // bootstrap branch and the scorer's + // `state_to_graph_reserves`. + graph.update_edge_from_reserves( + meta.token0_idx, + meta.token1_idx, + meta.pool_id, + 1.0, + price, + fee, + ); + graph.update_edge_from_reserves( + meta.token1_idx, + meta.token0_idx, + meta.pool_id, + 1.0, + 1.0 / price, + fee, + ); // Snapshot is published once per detection cycle, not per event. // Refresh the V3 pool-state cache entry. The event // carries everything `predict_post_state` needs @@ -2570,6 +2610,60 @@ mod tests { assert!(graph.has_dirty_edges()); } + /// V3 graph edges must carry the synthetic `(1.0, spot_price)` reserve + /// pair after a V3Update event. Regression guard for the bug where + /// `add_edge` set the weight but left `reserve_in == reserve_out == 0.0`, + /// causing `mempool_pipeline::try_post_state_scan`'s `reserves_zero` + /// guard to drop every V3 swap before reaching the post-state predictor. + #[tokio::test] + async fn test_v3_update_seeds_synthetic_reserves() { + let (tx, _rx) = broadcast::channel(100); + let engine = AetherEngine::new(EngineConfig::default(), tx); + + let pool = Address::repeat_byte(0xCD); + let token0 = Address::repeat_byte(0x31); + let token1 = Address::repeat_byte(0x41); + + engine + .register_pool(pool, token0, token1, ProtocolType::UniswapV3, 5) + .await; + + // sqrt_price_x96 = 2 * 2^96 → price = 4.0. Asymmetric value catches + // any direction-swap bug between forward and reverse edges. + let sqrt_x96 = U256::from(2u128) * (U256::from(1u128) << 96); + let event = PoolEvent::V3Update { + pool, + sqrt_price_x96: sqrt_x96, + liquidity: 1_000_000, + tick: 0, + }; + engine.handle_pool_update(event).await; + + let reg = engine.pool_registry.load(); + let meta = reg.get(&pool).expect("V3 pool registered"); + let t0 = meta.token0_idx; + let t1 = meta.token1_idx; + let pool_id = meta.pool_id; + + let graph = engine.working_graph.lock().await; + let fwd = graph + .edges_from(t0) + .iter() + .find(|e| e.to == t1 && e.pool_id == pool_id) + .expect("V3 forward edge present"); + let rev = graph + .edges_from(t1) + .iter() + .find(|e| e.to == t0 && e.pool_id == pool_id) + .expect("V3 reverse edge present"); + + // price = (sqrt/2^96)^2 = 2^2 = 4.0 + assert!((fwd.reserve_in - 1.0).abs() < 1e-9, "fwd reserve_in {}", fwd.reserve_in); + assert!((fwd.reserve_out - 4.0).abs() < 1e-6, "fwd reserve_out {}", fwd.reserve_out); + assert!((rev.reserve_in - 1.0).abs() < 1e-9, "rev reserve_in {}", rev.reserve_in); + assert!((rev.reserve_out - 0.25).abs() < 1e-9, "rev reserve_out {}", rev.reserve_out); + } + #[tokio::test] async fn test_pool_created_auto_registers() { let (tx, _rx) = broadcast::channel(100); diff --git a/crates/grpc-server/src/lib.rs b/crates/grpc-server/src/lib.rs index 7d8c174..19ef24d 100644 --- a/crates/grpc-server/src/lib.rs +++ b/crates/grpc-server/src/lib.rs @@ -6,6 +6,7 @@ /// crate-private; only the two types the binary and integration tests /// actually need are re-exported publicly. pub(crate) mod metrics; +pub mod profitability_writer; pub mod provider; pub use metrics::{start_metrics_server, EngineMetrics}; diff --git a/crates/grpc-server/src/main.rs b/crates/grpc-server/src/main.rs index 315803e..1ce30d7 100644 --- a/crates/grpc-server/src/main.rs +++ b/crates/grpc-server/src/main.rs @@ -14,6 +14,7 @@ use tokio_stream::wrappers::UnixListenerStream; mod cycle_gating; mod engine; mod mempool_pipeline; +mod mempool_writer; mod pipeline; mod service; mod tracing_init; @@ -146,6 +147,15 @@ async fn main() -> Result<(), Box> { // the engine's BellmanFord config so the analytical scan // honours the same hop / latency budget as the main path. let engine_cfg = EngineConfig::default(); + // Mempool prediction writer: optional persistence to a separate + // Postgres DSN. MEMPOOL_LEDGER_DSN unset → NoopMempoolSink, no + // DB writes, behaviour identical to today. Distinct from the + // trade ledger's DATABASE_URL so an operator can enable mempool + // observability without provisioning the executor schema. + let writer_metrics = + mempool_writer::MempoolWriterMetrics::register(metrics.registry()); + let prediction_sink = mempool_writer::mempool_writer_from_env(writer_metrics).await; + let engine_git_sha = std::env::var("AETHER_GIT_SHA").ok(); let sim_ctx = Arc::new(mempool_pipeline::SimContext::new( Arc::clone(engine.pool_registry()), Arc::clone(engine.token_index()), @@ -155,6 +165,8 @@ async fn main() -> Result<(), Box> { engine_cfg.detection_time_budget_us, ), Arc::clone(engine.pool_states()), + prediction_sink, + engine_git_sha, )); let pipeline_handle = mempool_pipeline::spawn_mempool_pipeline( Arc::clone(engine.event_channels()), diff --git a/crates/grpc-server/src/mempool_pipeline.rs b/crates/grpc-server/src/mempool_pipeline.rs index 1b2a8ba..e509132 100644 --- a/crates/grpc-server/src/mempool_pipeline.rs +++ b/crates/grpc-server/src/mempool_pipeline.rs @@ -32,10 +32,16 @@ use aether_state::snapshot::SnapshotManager; use aether_state::token_index::TokenIndex; use alloy::primitives::{Address, U256}; use arc_swap::ArcSwap; +use chrono::Utc; use tokio::sync::watch; use tracing::{debug, info, warn}; +use uuid::Uuid; use crate::engine::PoolMetadata; +use crate::mempool_writer::{ + MempoolPredictionSink, NewMempoolPrediction, PredictedPostState, PROTOCOL_BALANCER, + PROTOCOL_SUSHI, PROTOCOL_UNI_V2, PROTOCOL_UNI_V3, +}; use crate::EngineMetrics; /// Pair-keyed pool index built from the live pool registry. Lookup is O(1) @@ -80,6 +86,15 @@ pub struct SimContext { /// Balancer mempool sim path to call `predict_post_state_with_fallback` /// without round-tripping through the pool registry RPC. pub pool_states: PoolStateCache, + /// Optional persistence sink for mempool predictions. `Arc` + /// when `MEMPOOL_LEDGER_DSN` is unset (no DB writes, no behaviour + /// change); `Arc` when set. Always present so the + /// post-state path can call `insert_prediction` unconditionally. + pub prediction_sink: Arc, + /// Engine build's git sha, copied onto every persisted prediction so + /// the reconciler / scorer can correlate row outcomes with the engine + /// version that produced them. `None` when the env var is unset. + pub engine_git_sha: Option, /// Cached `(registry_ptr, PairIndex)` so the second and following pending /// swaps under the same registry generation lookup in O(1). The Mutex /// guards rebuild only — the steady-state path is `lock + ptr_eq + read`. @@ -87,12 +102,15 @@ pub struct SimContext { } impl SimContext { + #[allow(clippy::too_many_arguments)] pub fn new( pool_registry: Arc>>, token_index: Arc>, snapshot_manager: Arc, detector: BellmanFord, pool_states: PoolStateCache, + prediction_sink: Arc, + engine_git_sha: Option, ) -> Self { Self { pool_registry, @@ -100,6 +118,8 @@ impl SimContext { snapshot_manager, detector, pool_states, + prediction_sink, + engine_git_sha, pair_index_cache: Mutex::new(None), } } @@ -215,8 +235,9 @@ fn handle_event( let ctx = Arc::clone(ctx); let swap = swap.clone(); let router_label = router_label.clone(); + let tx_hash = event.tx_hash; tokio::task::spawn_blocking(move || { - try_post_state_scan(&metrics, &ctx, &router_label, &swap); + try_post_state_scan(&metrics, &ctx, &router_label, &swap, tx_hash, to); }); } } @@ -352,6 +373,8 @@ fn try_post_state_scan( ctx: &SimContext, router_label: &str, swap: &DecodedSwap, + event_tx_hash: alloy::primitives::B256, + event_to: Address, ) { let target_protocol = match swap.protocol { Protocol::UniswapV2 => ProtocolType::UniswapV2, @@ -453,6 +476,47 @@ fn try_post_state_scan( .detect_from_affected(&graph, &[in_idx, out_idx]); let profitable: Vec<_> = cycles.into_iter().filter(|c| c.is_profitable()).collect(); + // Persist the prediction unconditionally — both profitable and + // unprofitable swaps are useful signal for the reconciler (issue #131 + // Go half), which needs the full population of decoded mempool swaps + // to compute block / ordering / pool-path accuracy. The + // `profit_factor_predicted` column is the SQL signal that the engine + // would have considered acting on the swap. + let post_state_json = match swap.protocol { + Protocol::UniswapV2 | Protocol::SushiSwap => PredictedPostState::V2 { + reserve_in: post_in, + reserve_out: post_out, + }, + Protocol::UniswapV3 => PredictedPostState::V3 { + reserve_in: post_in, + reserve_out: post_out, + }, + Protocol::BalancerV2 => PredictedPostState::Balancer { + reserve_in: post_in, + reserve_out: post_out, + }, + } + .into_json(); + let prediction = NewMempoolPrediction { + prediction_id: Uuid::new_v4(), + decoded_at: Utc::now(), + pending_tx_hash: event_tx_hash, + router_address: event_to, + protocol: decoder_protocol_label(swap.protocol), + token_in: swap.token_in, + token_out: swap.token_out, + amount_in: swap.amount_in, + pool_address: Some(meta.pool_id.address), + predicted_target_block: snapshot.block_number.saturating_add(1), + predicted_post_state: post_state_json, + profit_factor_predicted: profitable.first().map(|c| c.profit_factor()), + // Reserved for the MEV-Share SSE path; Alchemy WS pendings carry + // no builder-side timestamp today. + detection_lead_ms: None, + engine_git_sha: ctx.engine_git_sha.clone(), + }; + ctx.prediction_sink.insert_prediction(prediction); + if profitable.is_empty() { metrics.inc_pending_arb_sim_skipped("no_profitable_cycle"); return; @@ -476,6 +540,18 @@ fn try_post_state_scan( ); } +/// Wire label for the `protocol` column on `mempool_predictions`. Pinned to +/// the strings declared in [`crate::mempool_writer`] so the writer and the +/// pipeline cannot drift. Matches issue #131's schema body. +fn decoder_protocol_label(p: Protocol) -> &'static str { + match p { + Protocol::UniswapV2 => PROTOCOL_UNI_V2, + Protocol::SushiSwap => PROTOCOL_SUSHI, + Protocol::UniswapV3 => PROTOCOL_UNI_V3, + Protocol::BalancerV2 => PROTOCOL_BALANCER, + } +} + /// Map a V3 / Balancer post-state into the (post_in, post_out) reserves the /// price graph stores per edge. Curve cannot reach here — the router /// decoder rejects every Curve calldata shape with `CurveUnsupported` @@ -758,6 +834,7 @@ mod tests { /// empty, snapshot has a zero-vertex graph. Any `lookup_pool` returns /// `None`, which is what the `not_in_registry` test wants anyway. fn empty_sim_ctx() -> Arc { + use crate::mempool_writer::NoopMempoolSink; use aether_pools::new_pool_state_cache; use aether_state::price_graph::PriceGraph; Arc::new(SimContext::new( @@ -766,6 +843,8 @@ mod tests { Arc::new(SnapshotManager::new(PriceGraph::new(0))), BellmanFord::new(3, 1_000), new_pool_state_cache(), + Arc::new(NoopMempoolSink::new()), + None, )) } diff --git a/crates/grpc-server/src/mempool_writer.rs b/crates/grpc-server/src/mempool_writer.rs new file mode 100644 index 0000000..5f22b2c --- /dev/null +++ b/crates/grpc-server/src/mempool_writer.rs @@ -0,0 +1,584 @@ +//! Mempool prediction writer. +//! +//! Sibling pattern to `aether_common::db::PgLedger`: the hot path enqueues +//! a [`NewMempoolPrediction`] onto a bounded mpsc and returns; a dedicated +//! writer task drains the channel and runs `INSERT`s through `sqlx::PgPool`. +//! Channel saturation drops the row (with metric) so a slow Postgres can +//! never exert unbounded backpressure on the mempool decode pipeline. +//! +//! Independent of the trade ledger by design — distinct DSN +//! (`MEMPOOL_LEDGER_DSN` vs `DATABASE_URL`), distinct connection pool, +//! distinct metric namespace. An operator can enable mempool observability +//! without provisioning the executor schema and vice versa. +//! +//! Observability surface (registered against the engine's +//! `prometheus::Registry` so a single `/metrics` endpoint emits everything): +//! +//! | Metric | Type | Labels | +//! |---|---|---| +//! | `aether_mempool_predictions_persisted_total` | Counter | `protocol` | +//! | `aether_mempool_writer_drops_total` | Counter | — | +//! | `aether_mempool_writer_queue_depth` | Gauge | — | +//! | `aether_mempool_writer_write_latency_ms` | Histogram | `result` (`ok`/`err`) | +//! +//! See `migrations/0003_mempool_predictions.sql` for the schema. + +use std::str::FromStr; +use std::sync::Arc; +use std::time::Instant; + +use alloy::primitives::{Address, B256, U256}; +use bigdecimal::BigDecimal; +use chrono::{DateTime, Utc}; +use prometheus::{HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, Opts, Registry}; +use serde::{Deserialize, Serialize}; +use sqlx::postgres::{PgPool, PgPoolOptions}; +use tokio::sync::mpsc; +use uuid::Uuid; + +/// Channel depth between the decode pipeline and the writer task. Sized for +/// ~5 s of bursty pending-tx decodes at peak rate (~100 swaps/s sustained +/// during high mempool activity → 512 / 100 ≈ 5 s). Breached only when +/// Postgres stalls; the drops counter is the alert signal. +const WRITER_CHANNEL_CAPACITY: usize = 512; + +/// sqlx connection pool size for the mempool writer. Sized below the trade +/// ledger's pool (4 vs 8) because the prediction insert is a smaller, fixed +/// shape with no per-arb cross-table writes — four connections saturate the +/// writer task without leaving the pool idle. +const WRITER_POOL_SIZE: u32 = 4; + +/// Wire labels for the `protocol` column. Matches the rendering in issue +/// #131's schema body so SQL `WHERE protocol = 'uni_v2'` works without a +/// reverse mapping table. Kept in sync with [`decoder_protocol_label`] in +/// `mempool_pipeline.rs` — both must produce the same string per decoded +/// protocol. +pub const PROTOCOL_UNI_V2: &str = "uni_v2"; +pub const PROTOCOL_SUSHI: &str = "sushi"; +pub const PROTOCOL_UNI_V3: &str = "uni_v3"; +/// Reserved for a future Curve decoder path. The router decoder rejects +/// every Curve calldata shape with `CurveUnsupported` today, so no writer +/// call ever lands here — but the constant documents the schema's +/// `protocol` TEXT domain so a future decoder addition does not introduce +/// a new wire label. +#[allow(dead_code)] +pub const PROTOCOL_CURVE: &str = "curve"; +pub const PROTOCOL_BALANCER: &str = "balancer"; + +/// Insert payload for the `mempool_predictions` table. Field shapes mirror +/// the SQL schema 1:1 so a sqlx bind is a straight enumeration. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NewMempoolPrediction { + pub prediction_id: Uuid, + /// Event time — when the decode pipeline accepted the pending tx. + /// Per the migration's clock-authority policy this is CLIENT-SET and + /// the writer MUST populate it; the schema's `DEFAULT now()` is a + /// psql-level safety net only. + pub decoded_at: DateTime, + pub pending_tx_hash: B256, + pub router_address: Address, + /// One of [`PROTOCOL_UNI_V2`] / [`PROTOCOL_SUSHI`] / [`PROTOCOL_UNI_V3`] / + /// [`PROTOCOL_CURVE`] / [`PROTOCOL_BALANCER`]. Bound to `&'static str` + /// (not [`String`]) so callers cannot invent values the reconciler is + /// unprepared for. + pub protocol: &'static str, + pub token_in: Address, + pub token_out: Address, + pub amount_in: U256, + pub pool_address: Option
, + pub predicted_target_block: u64, + /// JSONB payload describing the post-state the analytical sim produced. + /// Shape varies by protocol; consumers should match on `kind` first. + /// See [`PredictedPostState`] for the writer-side helpers. + pub predicted_post_state: serde_json::Value, + /// `Some(f)` when the post-state Bellman-Ford scan found a profitable + /// cycle; `None` when the scan ran but the result was unprofitable. + pub profit_factor_predicted: Option, + /// Reserved for the MEV-Share SSE path (issue #126) — Alchemy WS does + /// not expose a builder-side timestamp today, so this is always `None` + /// in the current pipeline. Kept on the payload so the schema and + /// writer stay forward-compatible. + pub detection_lead_ms: Option, + pub engine_git_sha: Option, +} + +/// Convenience builder for the `predicted_post_state` JSONB column. The +/// reconciler (issue #131 Go half) and the profitability scorer (#132) +/// inspect `kind` first; per-variant fields then carry the protocol-specific +/// state. Kept here, in the writer crate, because every consumer is on the +/// Rust side today — emitting the JSON via [`serde_json::Value`] avoids a +/// generic enum dance on the read side. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum PredictedPostState { + /// V2 / Sushi: constant-product post-state mapped onto graph reserves. + /// `reserve_in` and `reserve_out` are the post-swap reserves in the + /// affected pool, expressed as `f64` so the JSONB row matches what the + /// price graph holds — the profitability scorer pulls these directly + /// without unit conversion. + V2 { + reserve_in: f64, + reserve_out: f64, + }, + /// V3: analytical predictor result mapped onto the synthetic + /// `(1.0, spot_price_post)` pair the price graph stores. The raw + /// `new_sqrt_price_x96` is reserved for the scorer (PR-3) — emitting + /// `reserve_in/out` matches the V2 case and keeps reconciler SQL simple. + V3 { + reserve_in: f64, + reserve_out: f64, + }, + /// Balancer equal-weight 2-token: balances map directly to graph + /// reserves with the pool's fee factor applied at the graph layer. + Balancer { + reserve_in: f64, + reserve_out: f64, + }, +} + +impl PredictedPostState { + pub fn into_json(self) -> serde_json::Value { + serde_json::to_value(self).expect("PredictedPostState is always serialisable") + } +} + +/// Persistence boundary for mempool predictions. +/// +/// `Send + Sync` so a single `Arc` can fan out +/// to every decode task without further locking. Methods take `&self` and +/// are infallible from the caller's perspective — a connection blip must +/// never bring down the decode pipeline. Implementations log and drop. +pub trait MempoolPredictionSink: Send + Sync { + fn insert_prediction(&self, prediction: NewMempoolPrediction); +} + +/// Prometheus surface for the writer. Registered once at startup against +/// the engine's shared `Registry`. +pub struct MempoolWriterMetrics { + persisted_total: IntCounterVec, + drops_total: IntCounter, + queue_depth: IntGauge, + write_latency_ms: HistogramVec, +} + +impl MempoolWriterMetrics { + /// Register all writer metrics on the provided `Registry`. + /// + /// Panics on duplicate registration — this is startup code and a + /// duplicate indicates a programmer error, not a runtime condition. + pub fn register(registry: &Registry) -> Arc { + let persisted_total = IntCounterVec::new( + Opts::new( + "aether_mempool_predictions_persisted_total", + "Mempool predictions accepted by the writer task and queued for insert, by protocol", + ), + &["protocol"], + ) + .expect("aether_mempool_predictions_persisted_total counter vec"); + let drops_total = IntCounter::new( + "aether_mempool_writer_drops_total", + "Mempool predictions dropped because the writer channel was full", + ) + .expect("aether_mempool_writer_drops_total counter"); + let queue_depth = IntGauge::new( + "aether_mempool_writer_queue_depth", + "Pending mempool predictions sitting in the writer-task channel", + ) + .expect("aether_mempool_writer_queue_depth gauge"); + let write_latency_ms = HistogramVec::new( + HistogramOpts::new( + "aether_mempool_writer_write_latency_ms", + "Per-write latency of mempool prediction inserts from dequeue to query completion", + ) + .buckets(vec![0.5, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]), + &["result"], + ) + .expect("aether_mempool_writer_write_latency_ms histogram vec"); + + registry + .register(Box::new(persisted_total.clone())) + .expect("register aether_mempool_predictions_persisted_total"); + registry + .register(Box::new(drops_total.clone())) + .expect("register aether_mempool_writer_drops_total"); + registry + .register(Box::new(queue_depth.clone())) + .expect("register aether_mempool_writer_queue_depth"); + registry + .register(Box::new(write_latency_ms.clone())) + .expect("register aether_mempool_writer_write_latency_ms"); + + Arc::new(Self { + persisted_total, + drops_total, + queue_depth, + write_latency_ms, + }) + } +} + +/// Default sink: discards every prediction. Used when `MEMPOOL_LEDGER_DSN` +/// is unset so the engine's mempool path is fully functional without +/// Postgres. Logs once on construction so operators can grep startup output +/// and rule out persistence as the reason rows are missing. +pub struct NoopMempoolSink; + +impl NoopMempoolSink { + pub fn new() -> Self { + tracing::info!( + target: "aether::mempool_writer", + "MEMPOOL_LEDGER_DSN unset — mempool prediction writes disabled (no-op)" + ); + Self + } +} + +impl Default for NoopMempoolSink { + fn default() -> Self { + Self::new() + } +} + +impl MempoolPredictionSink for NoopMempoolSink { + fn insert_prediction(&self, _prediction: NewMempoolPrediction) {} +} + +/// Postgres-backed [`MempoolPredictionSink`]. +/// +/// The hot path enqueues onto a bounded channel; a single dedicated writer +/// task drains and executes inserts. Channel saturation drops the row (with +/// metric) rather than blocking the decoder. The connection pool is bounded +/// so a slow Postgres still cannot fan out unbounded backpressure even when +/// every connection is busy. +#[derive(Clone)] +pub struct PgMempoolWriter { + tx: mpsc::Sender, + metrics: Arc, +} + +impl PgMempoolWriter { + /// Connect to Postgres and spawn the writer task. Returns once the pool + /// is ready. The writer task exits when every clone of the `Sender` is + /// dropped (typically at process shutdown). + pub async fn connect( + database_url: &str, + metrics: Arc, + ) -> Result { + let pool = PgPoolOptions::new() + .max_connections(WRITER_POOL_SIZE) + // Short acquire timeout: misconfigured DSN should fail boot in + // seconds, not block the decoder while we wait. The + // `mempool_writer_from_env` wrapper falls back to NoopSink on + // this error so a slow Postgres degrades gracefully. + .acquire_timeout(std::time::Duration::from_secs(2)) + .connect(database_url) + .await?; + + let (tx, rx) = mpsc::channel::(WRITER_CHANNEL_CAPACITY); + spawn_writer_task(pool, rx, Arc::clone(&metrics)); + + tracing::info!( + target: "aether::mempool_writer", + channel_capacity = WRITER_CHANNEL_CAPACITY, + pool_size = WRITER_POOL_SIZE, + "PgMempoolWriter connected — mempool prediction writes enabled" + ); + Ok(Self { tx, metrics }) + } +} + +impl MempoolPredictionSink for PgMempoolWriter { + fn insert_prediction(&self, prediction: NewMempoolPrediction) { + let protocol = prediction.protocol; + match self.tx.try_send(prediction) { + Ok(()) => { + self.metrics.queue_depth.inc(); + self.metrics + .persisted_total + .with_label_values(&[protocol]) + .inc(); + } + Err(mpsc::error::TrySendError::Full(_)) => { + self.metrics.drops_total.inc(); + tracing::warn!( + target: "aether::mempool_writer", + capacity = WRITER_CHANNEL_CAPACITY, + "mempool writer channel full — dropping prediction" + ); + } + Err(mpsc::error::TrySendError::Closed(_)) => { + // Writer task has exited; only happens at shutdown. + tracing::debug!( + target: "aether::mempool_writer", + "mempool writer channel closed; dropping prediction" + ); + } + } + } +} + +/// Build a [`MempoolPredictionSink`] from `MEMPOOL_LEDGER_DSN`. Returns +/// [`NoopMempoolSink`] when the var is unset, empty, or the connection +/// fails — the decoder stays runnable in dev / CI without Postgres. +pub async fn mempool_writer_from_env( + metrics: Arc, +) -> Arc { + match std::env::var("MEMPOOL_LEDGER_DSN") { + Ok(url) if !url.is_empty() => match PgMempoolWriter::connect(&url, metrics).await { + Ok(w) => Arc::new(w) as Arc, + Err(e) => { + tracing::error!( + target: "aether::mempool_writer", + error = %e, + "PgMempoolWriter connect failed; falling back to NoopMempoolSink" + ); + Arc::new(NoopMempoolSink::new()) + } + }, + _ => Arc::new(NoopMempoolSink::new()), + } +} + +/// Spawn the writer dispatcher. Sequential by design — the prediction +/// insert is a single-table `ON CONFLICT DO NOTHING` and the pool's +/// natural per-connection serialisation matches the per-pending-tx +/// ordering, so a semaphore-fanned-out variant (like the trade ledger +/// uses) would add machinery without throughput gain at the writer's +/// expected rate. +fn spawn_writer_task( + pool: PgPool, + mut rx: mpsc::Receiver, + metrics: Arc, +) { + tokio::spawn(async move { + while let Some(prediction) = rx.recv().await { + metrics.queue_depth.dec(); + let timer = Instant::now(); + let result = insert_prediction_inner(&pool, &prediction).await; + let elapsed_ms = timer.elapsed().as_secs_f64() * 1_000.0; + let label = if result.is_ok() { "ok" } else { "err" }; + metrics + .write_latency_ms + .with_label_values(&[label]) + .observe(elapsed_ms); + if let Err(e) = result { + tracing::warn!( + target: "aether::mempool_writer", + error = %e, + elapsed_ms, + tx_hash = %prediction.pending_tx_hash, + "mempool prediction insert failed; row dropped" + ); + } + } + tracing::info!( + target: "aether::mempool_writer", + "PgMempoolWriter dispatcher exiting" + ); + }); +} + +async fn insert_prediction_inner( + pool: &PgPool, + p: &NewMempoolPrediction, +) -> Result<(), sqlx::Error> { + let predicted_target_block = i64::try_from(p.predicted_target_block).unwrap_or(i64::MAX); + let amount_in = u256_to_decimal(p.amount_in); + let pool_address_bytes = p.pool_address.as_ref().map(|a| a.as_slice()); + + sqlx::query( + r#" + INSERT INTO mempool_predictions ( + prediction_id, decoded_at, pending_tx_hash, router_address, protocol, + token_in, token_out, amount_in, pool_address, + predicted_target_block, predicted_post_state, profit_factor_predicted, + detection_lead_ms, engine_git_sha + ) VALUES ( + $1, $2, $3, $4, $5, + $6, $7, $8, $9, + $10, $11, $12, + $13, $14 + ) + ON CONFLICT (pending_tx_hash) DO NOTHING + "#, + ) + .bind(p.prediction_id) + .bind(p.decoded_at) + .bind(p.pending_tx_hash.as_slice()) + .bind(p.router_address.as_slice()) + .bind(p.protocol) + .bind(p.token_in.as_slice()) + .bind(p.token_out.as_slice()) + .bind(&amount_in) + .bind(pool_address_bytes) + .bind(predicted_target_block) + .bind(&p.predicted_post_state) + .bind(p.profit_factor_predicted) + .bind(p.detection_lead_ms) + .bind(p.engine_git_sha.as_deref()) + .execute(pool) + .await?; + Ok(()) +} + +/// Map a U256 to the `NUMERIC(78,0)` representation sqlx accepts via +/// [`BigDecimal`]. Identical to the trade-ledger helper; pinned here to +/// keep the writer self-contained. +fn u256_to_decimal(v: U256) -> BigDecimal { + let s = v.to_string(); + BigDecimal::from_str(&s) + .expect("U256::to_string is always a valid base-10 BigDecimal input") +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Mutex; + + /// In-memory sink used by the pipeline test to assert that a row was + /// produced without standing up Postgres. + pub(crate) struct CapturingSink { + pub seen: Mutex>, + } + + impl CapturingSink { + pub fn new() -> Self { + Self { + seen: Mutex::new(Vec::new()), + } + } + } + + impl MempoolPredictionSink for CapturingSink { + fn insert_prediction(&self, prediction: NewMempoolPrediction) { + self.seen.lock().expect("capturing sink poisoned").push(prediction); + } + } + + fn sample_prediction() -> NewMempoolPrediction { + NewMempoolPrediction { + prediction_id: Uuid::new_v4(), + decoded_at: Utc::now(), + pending_tx_hash: B256::ZERO, + router_address: Address::ZERO, + protocol: PROTOCOL_UNI_V2, + token_in: Address::ZERO, + token_out: Address::ZERO, + amount_in: U256::from(1_000_000u64), + pool_address: Some(Address::ZERO), + predicted_target_block: 19_000_001, + predicted_post_state: PredictedPostState::V2 { + reserve_in: 1_000.0, + reserve_out: 2_000.0, + } + .into_json(), + profit_factor_predicted: Some(0.0042), + detection_lead_ms: None, + engine_git_sha: Some("deadbeef".to_string()), + } + } + + #[test] + fn noop_sink_accepts_writes_silently() { + let sink = NoopMempoolSink::new(); + sink.insert_prediction(sample_prediction()); + } + + #[test] + fn noop_sink_is_object_safe() { + let _: Box = Box::new(NoopMempoolSink::new()); + } + + #[test] + fn predicted_post_state_round_trips_through_json() { + for original in [ + PredictedPostState::V2 { + reserve_in: 1.5, + reserve_out: 2.5, + }, + PredictedPostState::V3 { + reserve_in: 1.0, + reserve_out: 1.234e18, + }, + PredictedPostState::Balancer { + reserve_in: 10.0, + reserve_out: 20.0, + }, + ] { + let json = serde_json::to_value(&original).expect("serialize"); + let kind = json.get("kind").and_then(|v| v.as_str()).expect("kind present"); + // `kind` lives under `#[serde(rename_all = "snake_case")]` so a + // future refactor that drops the rename surfaces here. + assert!( + ["v2", "v3", "balancer"].contains(&kind), + "unexpected kind {kind}" + ); + let parsed: PredictedPostState = serde_json::from_value(json).expect("deserialize"); + // Re-serialise both and compare strings — partial_eq via f64 is + // brittle but the JSON form is stable. + assert_eq!( + serde_json::to_string(&parsed).expect("re-serialize"), + serde_json::to_string(&original).expect("re-serialize-original"), + ); + } + } + + #[test] + fn capturing_sink_records_every_insert() { + let sink = CapturingSink::new(); + sink.insert_prediction(sample_prediction()); + sink.insert_prediction(sample_prediction()); + assert_eq!(sink.seen.lock().expect("capturing sink poisoned").len(), 2); + } + + #[test] + fn metrics_register_round_trips() { + let registry = Registry::new(); + let m = MempoolWriterMetrics::register(®istry); + m.persisted_total.with_label_values(&[PROTOCOL_UNI_V2]).inc(); + m.drops_total.inc(); + m.queue_depth.set(3); + m.write_latency_ms.with_label_values(&["ok"]).observe(1.5); + + let names: Vec<_> = registry + .gather() + .iter() + .map(|f| f.get_name().to_string()) + .collect(); + for required in [ + "aether_mempool_predictions_persisted_total", + "aether_mempool_writer_drops_total", + "aether_mempool_writer_queue_depth", + "aether_mempool_writer_write_latency_ms", + ] { + assert!( + names.iter().any(|n| n == required), + "missing metric family {required}" + ); + } + } + + #[tokio::test] + async fn mempool_writer_from_env_falls_back_when_dsn_unset() { + // Save/restore so the test does not leak state into siblings. + let prev = std::env::var("MEMPOOL_LEDGER_DSN").ok(); + // SAFETY: tests in this crate run single-threaded against + // `MEMPOOL_LEDGER_DSN`; no concurrent reader can observe the unset. + unsafe { + std::env::remove_var("MEMPOOL_LEDGER_DSN"); + } + + let registry = Registry::new(); + let metrics = MempoolWriterMetrics::register(®istry); + let sink = mempool_writer_from_env(metrics).await; + // Should not panic; should not write. + sink.insert_prediction(sample_prediction()); + + if let Some(v) = prev { + // SAFETY: restored in the same single-threaded test scope. + unsafe { + std::env::set_var("MEMPOOL_LEDGER_DSN", v); + } + } + } +} diff --git a/crates/grpc-server/src/profitability_writer.rs b/crates/grpc-server/src/profitability_writer.rs new file mode 100644 index 0000000..cab80e0 --- /dev/null +++ b/crates/grpc-server/src/profitability_writer.rs @@ -0,0 +1,506 @@ +//! Mempool profitability writer. +//! +//! Sibling of [`crate::mempool_writer`] (the predictions writer from PR +//! #133). Same shape: bounded mpsc → dedicated writer task → `sqlx::PgPool`, +//! drop-on-saturation, separate metric namespace. The two writers run in +//! distinct processes (engine vs scorer binary) so collapsing them into +//! one type would force the engine to link in scorer-only code. +//! +//! Reuses the trade-ledger DSN convention by reading `MEMPOOL_LEDGER_DSN` +//! — the profitability table lives in the same Postgres as predictions +//! and reconciliation, so a separate DSN would force operators to keep +//! three DSNs in sync for no benefit. + +use std::str::FromStr; +use std::sync::Arc; +use std::time::Instant; + +use alloy::primitives::U256; +use bigdecimal::BigDecimal; +use chrono::{DateTime, Utc}; +use prometheus::{HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, Opts, Registry}; +use serde::{Deserialize, Serialize}; +use sqlx::postgres::{PgPool, PgPoolOptions}; +use tokio::sync::mpsc; +use uuid::Uuid; + +const WRITER_CHANNEL_CAPACITY: usize = 256; +const WRITER_POOL_SIZE: u32 = 4; + +/// Wire labels for the `decision` column. Matches the CHECK constraint in +/// `migrations/0005_mempool_profitability.sql`. +pub const DECISION_PROFITABLE: &str = "profitable"; +pub const DECISION_UNPROFITABLE: &str = "unprofitable"; +/// Reserved for the revm-fork-verify path (planned follow-up). Not emitted +/// by the v1 scorer; the constant is here so a future code path produces +/// the same wire label without re-typing it. +#[allow(dead_code)] +pub const DECISION_REVERTED: &str = "reverted"; +pub const DECISION_NO_PATH: &str = "no_path"; + +/// Insert payload for the `mempool_profitability` table. +/// +/// `realized_profit_eth` is derived from `realized_profit_wei` at write +/// time inside the SQL bind, not carried separately on the payload, so +/// callers can't accidentally hand the writer mismatched wei + eth +/// values. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NewProfitabilityScore { + pub prediction_id: Uuid, + /// Event time — when the scorer finished computing this row. + pub scored_at: DateTime, + /// JSONB cycle: `[{"pool":"0x..","token_in":"0x..","token_out":"0x..","protocol":"uni_v2"}, ...]`. + pub cycle_path: serde_json::Value, + pub realized_profit_wei: U256, + pub gas_estimate_wei: U256, + /// `realized - gas`. The caller computes this once and passes both + /// halves so the writer does not need a signed-arithmetic helper. + /// Negative values are represented as the wei *deficit* with the + /// `is_loss` flag set. + pub net_profit_wei: i128, + pub decision: &'static str, + pub scoring_engine_git_sha: Option, +} + +/// Sink trait. Object-safe so a single `Arc` can +/// fan out to multiple scoring tasks (currently only one runs at a time, +/// but the trait shape leaves room for a parallel batch scorer). +pub trait ProfitabilitySink: Send + Sync { + fn insert_score(&self, score: NewProfitabilityScore); +} + +/// Prometheus surface. Three families: +/// - `aether_mempool_profit_scored_total{decision}` — the headline +/// counter the dashboard pivots on. +/// - drops / queue_depth — writer-internal health. +/// - write_latency_ms — per-write latency by result. +pub struct ProfitabilityWriterMetrics { + pub scored_total: IntCounterVec, + pub drops_total: IntCounter, + pub queue_depth: IntGauge, + pub write_latency_ms: HistogramVec, +} + +impl ProfitabilityWriterMetrics { + pub fn register(registry: &Registry) -> Arc { + let scored_total = IntCounterVec::new( + Opts::new( + "aether_mempool_profit_scored_total", + "Confirmed predictions scored by the profitability scorer, by decision", + ), + &["decision"], + ) + .expect("aether_mempool_profit_scored_total counter vec"); + let drops_total = IntCounter::new( + "aether_mempool_profit_writer_drops_total", + "Profitability writes dropped because the bounded channel was full", + ) + .expect("aether_mempool_profit_writer_drops_total counter"); + let queue_depth = IntGauge::new( + "aether_mempool_profit_writer_queue_depth", + "Pending profitability writes sitting in the writer-task channel", + ) + .expect("aether_mempool_profit_writer_queue_depth gauge"); + let write_latency_ms = HistogramVec::new( + HistogramOpts::new( + "aether_mempool_profit_writer_write_latency_ms", + "Per-write latency of profitability inserts from dequeue to query completion", + ) + .buckets(vec![0.5, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]), + &["result"], + ) + .expect("aether_mempool_profit_writer_write_latency_ms histogram vec"); + + registry + .register(Box::new(scored_total.clone())) + .expect("register aether_mempool_profit_scored_total"); + registry + .register(Box::new(drops_total.clone())) + .expect("register aether_mempool_profit_writer_drops_total"); + registry + .register(Box::new(queue_depth.clone())) + .expect("register aether_mempool_profit_writer_queue_depth"); + registry + .register(Box::new(write_latency_ms.clone())) + .expect("register aether_mempool_profit_writer_write_latency_ms"); + + Arc::new(Self { + scored_total, + drops_total, + queue_depth, + write_latency_ms, + }) + } +} + +/// Default sink: discards every write. Logs once on construction. +pub struct NoopProfitabilitySink; + +impl NoopProfitabilitySink { + pub fn new() -> Self { + tracing::info!( + target: "aether::profit_writer", + "MEMPOOL_LEDGER_DSN unset — profitability writes disabled (no-op)" + ); + Self + } +} + +impl Default for NoopProfitabilitySink { + fn default() -> Self { + Self::new() + } +} + +impl ProfitabilitySink for NoopProfitabilitySink { + fn insert_score(&self, _score: NewProfitabilityScore) {} +} + +/// Postgres-backed sink. Bounded mpsc + dedicated writer task; saturation +/// drops the row rather than blocking the scoring loop. Slow Postgres +/// cannot exert unbounded backpressure on the scorer. +#[derive(Clone)] +pub struct PgProfitabilityWriter { + tx: mpsc::Sender, + metrics: Arc, +} + +impl PgProfitabilityWriter { + pub async fn connect( + database_url: &str, + metrics: Arc, + ) -> Result { + let pool = PgPoolOptions::new() + .max_connections(WRITER_POOL_SIZE) + .acquire_timeout(std::time::Duration::from_secs(2)) + .connect(database_url) + .await?; + + let (tx, rx) = mpsc::channel::(WRITER_CHANNEL_CAPACITY); + spawn_writer_task(pool, rx, Arc::clone(&metrics)); + + tracing::info!( + target: "aether::profit_writer", + channel_capacity = WRITER_CHANNEL_CAPACITY, + pool_size = WRITER_POOL_SIZE, + "PgProfitabilityWriter connected — profitability writes enabled" + ); + Ok(Self { tx, metrics }) + } + + /// Read API for the scorer's poll loop. Returns confirmed predictions + /// that have no profitability row yet. Bounded to `limit` so a backlog + /// burst does not blow the scorer's memory; the loop drains a page + /// per tick and the next tick picks up the rest. + /// + /// This is a separate concern from the write path (lookups are sync + /// because they live on the scoring loop, not the writer task) so we + /// expose a public pool handle. The handle is `Arc` clone-safe. + pub async fn fetch_unscored_confirmed( + pool: &PgPool, + limit: i64, + ) -> Result, sqlx::Error> { + let rows = sqlx::query_as::<_, RawUnscored>( + r#" + SELECT + p.prediction_id AS prediction_id, + p.protocol AS protocol, + p.pool_address AS pool_address, + p.token_in AS token_in, + p.token_out AS token_out, + p.amount_in AS amount_in, + r.actual_target_block AS actual_target_block + FROM mempool_predictions p + JOIN mempool_reconciliation r USING (prediction_id) + LEFT JOIN mempool_profitability sc USING (prediction_id) + WHERE r.outcome = 'confirmed' + AND r.actual_target_block IS NOT NULL + AND p.pool_address IS NOT NULL + AND sc.prediction_id IS NULL + ORDER BY r.actual_target_block ASC + LIMIT $1 + "#, + ) + .bind(limit) + .fetch_all(pool) + .await?; + Ok(rows.into_iter().map(UnscoredConfirmedPrediction::from).collect()) + } +} + +impl ProfitabilitySink for PgProfitabilityWriter { + fn insert_score(&self, score: NewProfitabilityScore) { + let decision = score.decision; + match self.tx.try_send(score) { + Ok(()) => { + self.metrics.queue_depth.inc(); + self.metrics + .scored_total + .with_label_values(&[decision]) + .inc(); + } + Err(mpsc::error::TrySendError::Full(_)) => { + self.metrics.drops_total.inc(); + tracing::warn!( + target: "aether::profit_writer", + capacity = WRITER_CHANNEL_CAPACITY, + "profitability writer channel full — dropping score" + ); + } + Err(mpsc::error::TrySendError::Closed(_)) => { + tracing::debug!( + target: "aether::profit_writer", + "profitability writer channel closed; dropping score" + ); + } + } + } +} + +/// Build a [`ProfitabilitySink`] from `MEMPOOL_LEDGER_DSN`. Returns +/// [`NoopProfitabilitySink`] when the var is unset or the connection +/// fails. +pub async fn profit_writer_from_env( + metrics: Arc, +) -> Arc { + match std::env::var("MEMPOOL_LEDGER_DSN") { + Ok(url) if !url.is_empty() => match PgProfitabilityWriter::connect(&url, metrics).await { + Ok(w) => Arc::new(w) as Arc, + Err(e) => { + tracing::error!( + target: "aether::profit_writer", + error = %e, + "PgProfitabilityWriter connect failed; falling back to NoopProfitabilitySink" + ); + Arc::new(NoopProfitabilitySink::new()) + } + }, + _ => Arc::new(NoopProfitabilitySink::new()), + } +} + +fn spawn_writer_task( + pool: PgPool, + mut rx: mpsc::Receiver, + metrics: Arc, +) { + tokio::spawn(async move { + while let Some(score) = rx.recv().await { + metrics.queue_depth.dec(); + let timer = Instant::now(); + let result = insert_score_inner(&pool, &score).await; + let elapsed_ms = timer.elapsed().as_secs_f64() * 1_000.0; + let label = if result.is_ok() { "ok" } else { "err" }; + metrics + .write_latency_ms + .with_label_values(&[label]) + .observe(elapsed_ms); + if let Err(e) = result { + tracing::warn!( + target: "aether::profit_writer", + error = %e, + elapsed_ms, + prediction_id = %score.prediction_id, + "profitability insert failed; row dropped" + ); + } + } + tracing::info!( + target: "aether::profit_writer", + "PgProfitabilityWriter dispatcher exiting" + ); + }); +} + +async fn insert_score_inner( + pool: &PgPool, + s: &NewProfitabilityScore, +) -> Result<(), sqlx::Error> { + let realized_wei = u256_to_decimal(s.realized_profit_wei); + let gas_wei = u256_to_decimal(s.gas_estimate_wei); + // net can be negative. BigDecimal supports signed values natively. + let net_wei = BigDecimal::from(s.net_profit_wei); + // realized_eth = realized_wei / 1e18 with full precision. BigDecimal + // division at NUMERIC(38,18) precision is exact for inputs <= 1e60 + // wei, which is many orders of magnitude beyond ETH total supply. + let realized_eth = BigDecimal::from_str(&s.realized_profit_wei.to_string()) + .expect("U256::to_string always parses as BigDecimal") + / BigDecimal::from(1_000_000_000_000_000_000u64); + + sqlx::query( + r#" + INSERT INTO mempool_profitability ( + prediction_id, scored_at, cycle_path, + realized_profit_wei, realized_profit_eth, + gas_estimate_wei, net_profit_wei, + decision, scoring_engine_git_sha + ) VALUES ( + $1, $2, $3, + $4, $5, + $6, $7, + $8, $9 + ) + ON CONFLICT (prediction_id) DO NOTHING + "#, + ) + .bind(s.prediction_id) + .bind(s.scored_at) + .bind(&s.cycle_path) + .bind(&realized_wei) + .bind(&realized_eth) + .bind(&gas_wei) + .bind(&net_wei) + .bind(s.decision) + .bind(s.scoring_engine_git_sha.as_deref()) + .execute(pool) + .await?; + Ok(()) +} + +fn u256_to_decimal(v: U256) -> BigDecimal { + let s = v.to_string(); + BigDecimal::from_str(&s).expect("U256::to_string is always a valid BigDecimal input") +} + +/// One row from `fetch_unscored_confirmed`. Carries enough state for the +/// scoring loop to fetch the pool's actual reserves at the prediction's +/// confirmed block and re-run the detector. +#[derive(Debug, Clone)] +pub struct UnscoredConfirmedPrediction { + pub prediction_id: Uuid, + pub protocol: String, + pub pool_address: alloy::primitives::Address, + pub token_in: alloy::primitives::Address, + pub token_out: alloy::primitives::Address, + pub amount_in: U256, + pub actual_target_block: u64, +} + +#[derive(sqlx::FromRow)] +struct RawUnscored { + prediction_id: Uuid, + protocol: String, + pool_address: Vec, + token_in: Vec, + token_out: Vec, + amount_in: BigDecimal, + actual_target_block: i64, +} + +impl From for UnscoredConfirmedPrediction { + fn from(r: RawUnscored) -> Self { + use alloy::primitives::Address; + let to_addr = |b: &[u8]| -> Address { + let mut arr = [0u8; 20]; + if b.len() == 20 { + arr.copy_from_slice(b); + } + Address::from(arr) + }; + let amount_in = U256::from_str(&r.amount_in.to_string()).unwrap_or(U256::ZERO); + Self { + prediction_id: r.prediction_id, + protocol: r.protocol, + pool_address: to_addr(&r.pool_address), + token_in: to_addr(&r.token_in), + token_out: to_addr(&r.token_out), + amount_in, + actual_target_block: r.actual_target_block.max(0) as u64, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloy::primitives::Address; + + #[test] + fn noop_sink_silently_accepts_writes() { + let sink = NoopProfitabilitySink::new(); + sink.insert_score(sample_score()); + } + + #[test] + fn noop_sink_is_object_safe() { + let _: Box = Box::new(NoopProfitabilitySink::new()); + } + + #[test] + fn decision_constants_match_check_constraint() { + // Pinned to the CHECK constraint in + // migrations/0005_mempool_profitability.sql. A rename here without + // a matching migration would make every insert fail with + // SQLSTATE 23514. + assert_eq!(DECISION_PROFITABLE, "profitable"); + assert_eq!(DECISION_UNPROFITABLE, "unprofitable"); + assert_eq!(DECISION_REVERTED, "reverted"); + assert_eq!(DECISION_NO_PATH, "no_path"); + } + + #[test] + fn metrics_register_round_trips() { + let registry = Registry::new(); + let m = ProfitabilityWriterMetrics::register(®istry); + m.scored_total.with_label_values(&[DECISION_PROFITABLE]).inc(); + m.scored_total.with_label_values(&[DECISION_NO_PATH]).inc(); + m.drops_total.inc(); + m.queue_depth.set(2); + m.write_latency_ms.with_label_values(&["ok"]).observe(1.0); + + let names: Vec<_> = registry + .gather() + .iter() + .map(|f| f.get_name().to_string()) + .collect(); + for required in [ + "aether_mempool_profit_scored_total", + "aether_mempool_profit_writer_drops_total", + "aether_mempool_profit_writer_queue_depth", + "aether_mempool_profit_writer_write_latency_ms", + ] { + assert!( + names.iter().any(|n| n == required), + "missing metric family {required}" + ); + } + } + + fn sample_score() -> NewProfitabilityScore { + NewProfitabilityScore { + prediction_id: Uuid::new_v4(), + scored_at: Utc::now(), + cycle_path: serde_json::json!([ + {"pool":"0x0000000000000000000000000000000000000001","token_in":"0x0","token_out":"0x0","protocol":"uni_v2"} + ]), + realized_profit_wei: U256::from(1_000_000_000_000_000u64), + gas_estimate_wei: U256::from(50_000_000_000_000u64), + net_profit_wei: 950_000_000_000_000, + decision: DECISION_PROFITABLE, + scoring_engine_git_sha: Some("deadbeef".to_string()), + } + } + + #[test] + fn unscored_from_raw_handles_bytea_widths() { + // RawUnscored.pool_address etc. are Vec from pgx; the From + // impl must handle the 20-byte case and gracefully fall back on + // anything else without panicking (a defensive guard against a + // future schema migration that widens / narrows the bytea + // columns). + let raw = RawUnscored { + prediction_id: Uuid::new_v4(), + protocol: "uni_v2".to_string(), + pool_address: vec![0xab; 20], + token_in: vec![0xcd; 20], + token_out: vec![0xef; 20], + amount_in: BigDecimal::from(123_456u64), + actual_target_block: 100, + }; + let conv: UnscoredConfirmedPrediction = raw.into(); + assert_eq!(conv.actual_target_block, 100); + assert_eq!(conv.amount_in, U256::from(123_456u64)); + let expected_pool = Address::from([0xab; 20]); + assert_eq!(conv.pool_address, expected_pool); + } +} diff --git a/crates/simulator/src/lib.rs b/crates/simulator/src/lib.rs index ecce999..a14c8e3 100644 --- a/crates/simulator/src/lib.rs +++ b/crates/simulator/src/lib.rs @@ -9,6 +9,7 @@ use revm::context::{BlockEnv, TxEnv}; use revm::database::{CacheDB, EmptyDBTyped}; use revm::handler::{ExecuteEvm, MainBuilder}; use revm::primitives::hardfork::SpecId; +use revm::state::AccountInfo; use revm::Context; use tracing::{debug, error, info}; @@ -322,6 +323,250 @@ impl EvmSimulator { } } + /// Deploy a contract via CREATE and then CALL it, measuring the ERC20 + /// balance delta on `profit_recipient` for `profit_token` as profit. + /// + /// Two `transact` calls on the same revm `Context` share state: the + /// deployed contract's runtime bytecode is visible to the second CALL. + /// This lets us deploy AetherExecutor and immediately invoke + /// `executeArb` without an external provider or Anvil. + /// + /// `deployer` must be funded (ETH balance) in the provided `state` + /// before calling this method. The method funds the deployer + /// internally with 100 ETH as a convenience. + /// + /// Returns `SimulationResult` where `profit_wei` is the post-call + /// balance of `profit_recipient` minus the pre-call balance (in + /// `profit_token` ERC20, read via `balance_slot`). On any failure + /// (CREATE revert, CALL revert), returns `success: false`. + #[allow(clippy::too_many_arguments)] + pub fn deploy_and_simulate_with_erc20_profit( + &self, + mut state: RpcForkedState, + deployer: Address, + init_bytecode: &[u8], + constructor_args: &[u8], + calldata: Vec, + profit_token: Address, + profit_recipient: Address, + balance_slot: U256, + ) -> SimulationResult { + use revm::context::result::Output; + use revm::database::DatabaseRef; + + // Fund the deployer so CREATE and CALL can proceed without + // balance-check failures even when `disable_balance_check` is set. + let hundred_eth = U256::from(100u64) * U256::from(10u64).pow(U256::from(18u64)); + state.insert_account_balance(deployer, hundred_eth); + + // Concatenate init bytecode + constructor args. + let mut deploy_data = Vec::with_capacity(init_bytecode.len() + constructor_args.len()); + deploy_data.extend_from_slice(init_bytecode); + deploy_data.extend_from_slice(constructor_args); + + // Destructure state for revm consumption. + let RpcForkedState { + db, + block_number, + block_timestamp, + base_fee, + chain_id, + } = state; + + let block = BlockEnv { + number: U256::from(block_number), + timestamp: U256::from(block_timestamp), + basefee: base_fee, + ..Default::default() + }; + + // === Transact 1: CREATE === + let create_tx = TxEnv::builder() + .caller(deployer) + .kind(revm::primitives::TxKind::Create) + .data(revm::primitives::Bytes::copy_from_slice(&deploy_data)) + .value(U256::ZERO) + .gas_limit(8_000_000) + .gas_price(base_fee as u128) + .nonce(0) + .chain_id(Some(chain_id)) + .build_fill(); + + let ctx = Context::, ()>::new( + db, SpecId::CANCUN, + ) + .with_block(block.clone()) + .modify_cfg_chained(|cfg| { + cfg.chain_id = chain_id; + cfg.disable_nonce_check = true; + cfg.disable_balance_check = true; + cfg.disable_base_fee = true; + }); + + let mut evm = ctx.build_mainnet(); + + let create_result = match evm.transact(create_tx) { + Ok(r) => r, + Err(e) => { + error!(error = %e, "CREATE transact error"); + return SimulationResult { + success: false, + profit_wei: U256::ZERO, + gas_used: 0, + revert_reason: Some(format!("CREATE EVM error: {e}")), + }; + } + }; + + let deployed_addr = match create_result.result { + ExecutionResult::Success { output: Output::Create(_, Some(addr)), .. } => addr, + ExecutionResult::Success { output: Output::Create(_, None), .. } => { + return SimulationResult { + success: false, + profit_wei: U256::ZERO, + gas_used: 0, + revert_reason: Some("CREATE succeeded but no address returned".into()), + }; + } + ExecutionResult::Revert { gas_used, output } => { + let reason = format!("0x{}", alloy::hex::encode(&output)); + return SimulationResult { + success: false, + profit_wei: U256::ZERO, + gas_used, + revert_reason: Some(format!("CREATE reverted: {reason}")), + }; + } + ExecutionResult::Halt { reason, gas_used } => { + return SimulationResult { + success: false, + profit_wei: U256::ZERO, + gas_used, + revert_reason: Some(format!("CREATE halted: {reason:?}")), + }; + } + _ => { + return SimulationResult { + success: false, + profit_wei: U256::ZERO, + gas_used: 0, + revert_reason: Some("CREATE returned unexpected output variant".into()), + }; + } + }; + + debug!(%deployed_addr, "CREATE succeeded, proceeding to CALL"); + + // Commit CREATE state diffs into the DB so the CALL sees them. + let mut db = evm.ctx.journaled_state.database; + for (addr, account) in create_result.state.iter() { + if account.is_selfdestructed() { + continue; + } + let info = &account.info; + db.insert_account_info( + *addr, + AccountInfo { + balance: info.balance, + nonce: info.nonce, + code_hash: info.code_hash, + code: info.code.clone(), + ..Default::default() + }, + ); + for (slot, slot_val) in account.storage.iter() { + let _ = db.insert_account_storage(*addr, *slot, slot_val.present_value); + } + } + + // === Pre-call balance read === + let mut key_input = [0u8; 64]; + key_input[12..32].copy_from_slice(profit_recipient.as_slice()); + key_input[32..64].copy_from_slice(&balance_slot.to_be_bytes::<32>()); + let storage_key = U256::from_be_slice( + alloy::primitives::keccak256(key_input).as_slice(), + ); + let pre_balance = db + .storage_ref(profit_token, storage_key) + .unwrap_or_default(); + + // === Transact 2: CALL the deployed contract === + let call_tx = TxEnv::builder() + .caller(deployer) + .kind(revm::primitives::TxKind::Call(deployed_addr)) + .data(revm::primitives::Bytes::copy_from_slice(&calldata)) + .value(U256::ZERO) + .gas_limit(self.config.gas_limit) + .gas_price(base_fee as u128) + .nonce(1) + .chain_id(Some(chain_id)) + .build_fill(); + + let ctx2 = Context::, ()>::new( + db, SpecId::CANCUN, + ) + .with_block(block) + .modify_cfg_chained(|cfg| { + cfg.chain_id = chain_id; + cfg.disable_nonce_check = true; + cfg.disable_balance_check = true; + cfg.disable_base_fee = true; + }); + + let mut evm2 = ctx2.build_mainnet(); + + match evm2.transact(call_tx) { + Ok(result_and_state) => match result_and_state.result { + ExecutionResult::Success { gas_used, .. } => { + let post_balance = result_and_state + .state + .get(&profit_token) + .and_then(|acc| acc.storage.get(&storage_key)) + .map(|slot| slot.present_value) + .unwrap_or(pre_balance); + + let profit = post_balance.saturating_sub(pre_balance); + debug!(gas_used, %profit, "deploy+call simulation succeeded"); + SimulationResult { + success: true, + profit_wei: profit, + gas_used, + revert_reason: None, + } + } + ExecutionResult::Revert { gas_used, output } => { + let reason = format!("0x{}", alloy::hex::encode(&output)); + debug!(gas_used, reason = %reason, "CALL reverted"); + SimulationResult { + success: false, + profit_wei: U256::ZERO, + gas_used, + revert_reason: Some(reason), + } + } + ExecutionResult::Halt { reason, gas_used } => { + let reason_str = format!("{reason:?}"); + debug!(gas_used, reason = %reason_str, "CALL halted"); + SimulationResult { + success: false, + profit_wei: U256::ZERO, + gas_used, + revert_reason: Some(reason_str), + } + } + }, + Err(e) => { + error!(error = %e, "CALL transact error"); + SimulationResult { + success: false, + profit_wei: U256::ZERO, + gas_used: 0, + revert_reason: Some(format!("CALL EVM error: {e}")), + } + } + } + } + /// Simulate a transaction against RPC-backed state and measure the ERC20 /// balance delta of `profit_recipient` for `profit_token` as profit. /// diff --git a/deploy/docker/grafana/dashboards/mempool.json b/deploy/docker/grafana/dashboards/mempool.json index 785c33e..ce48efa 100644 --- a/deploy/docker/grafana/dashboards/mempool.json +++ b/deploy/docker/grafana/dashboards/mempool.json @@ -195,6 +195,466 @@ "w": 9, "h": 9 } + }, + { + "id": 100, + "type": "row", + "title": "Reconciler (PR #134)", + "collapsed": false, + "gridPos": { + "x": 0, + "y": 33, + "w": 24, + "h": 1 + } + }, + { + "id": 11, + "type": "stat", + "title": "Block accuracy (1h) — Δ ≤ 0", + "description": "Fraction of confirmed predictions whose actual_target_block landed at or before the predicted block. Computed from the aether_mempool_block_delta histogram's le=0 bucket over a 1h window.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "100 * sum(rate(aether_mempool_block_delta_bucket{job=\"aether-host-reconciler\",le=\"0\"}[1h])) / clamp_min(sum(rate(aether_mempool_block_delta_count{job=\"aether-host-reconciler\"}[1h])), 1e-9)", + "legendFormat": "Δ≤0" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "orange", "value": 60}, + {"color": "green", "value": 85} + ] + } + } + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "" + }, + "colorMode": "value", + "graphMode": "area" + }, + "gridPos": { + "x": 0, + "y": 34, + "w": 6, + "h": 6 + } + }, + { + "id": 12, + "type": "stat", + "title": "Pool-path accuracy (1h)", + "description": "Fraction of reconciled predictions whose pool address appeared in the confirmed tx's receipt logs. Source: aether_mempool_pool_path_total{correct=true/false}.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "100 * sum(rate(aether_mempool_pool_path_total{job=\"aether-host-reconciler\",correct=\"true\"}[1h])) / clamp_min(sum(rate(aether_mempool_pool_path_total{job=\"aether-host-reconciler\"}[1h])), 1e-9)", + "legendFormat": "correct" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": null}, + {"color": "orange", "value": 70}, + {"color": "green", "value": 90} + ] + } + } + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "" + }, + "colorMode": "value", + "graphMode": "area" + }, + "gridPos": { + "x": 6, + "y": 34, + "w": 6, + "h": 6 + } + }, + { + "id": 13, + "type": "stat", + "title": "Reconciler queue depth", + "description": "Pending reconciliation writes sitting in the writer-goroutine channel. Saturation → drops increment.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "aether_mempool_reconciler_queue_depth{job=\"aether-host-reconciler\"}", + "legendFormat": "queue" + } + ], + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "" + }, + "colorMode": "value", + "graphMode": "area" + }, + "gridPos": { + "x": 12, + "y": 34, + "w": 6, + "h": 6 + } + }, + { + "id": 14, + "type": "stat", + "title": "Reconciler drops (5m rate)", + "description": "Reconciliation writes dropped because the bounded channel was full. Sustained non-zero = writer is slower than confirm rate.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(aether_mempool_reconciler_drops_total{job=\"aether-host-reconciler\"}[5m]))", + "legendFormat": "drops/s" + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "red", "value": 0.01} + ] + } + } + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "" + }, + "colorMode": "background", + "graphMode": "area" + }, + "gridPos": { + "x": 18, + "y": 34, + "w": 6, + "h": 6 + } + }, + { + "id": 15, + "type": "timeseries", + "title": "Block-delta quantiles", + "description": "p50 / p90 of the predicted-vs-actual target-block delta over a 5m window. Negative = engine predicted too far in the future; positive = engine was too eager.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.5, sum by (le) (rate(aether_mempool_block_delta_bucket{job=\"aether-host-reconciler\"}[5m])))", + "legendFormat": "p50" + }, + { + "refId": "B", + "expr": "histogram_quantile(0.9, sum by (le) (rate(aether_mempool_block_delta_bucket{job=\"aether-host-reconciler\"}[5m])))", + "legendFormat": "p90" + } + ], + "gridPos": { + "x": 0, + "y": 40, + "w": 12, + "h": 8 + } + }, + { + "id": 16, + "type": "piechart", + "title": "Reconciliation outcomes", + "description": "Cumulative confirmed-vs-dropped split for predictions that have been resolved.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "sum by (outcome) (aether_mempool_reconciled_total{job=\"aether-host-reconciler\"})", + "legendFormat": "{{outcome}}" + } + ], + "options": { + "legend": {"displayMode": "list", "placement": "right", "values": ["value", "percent"]} + }, + "gridPos": { + "x": 12, + "y": 40, + "w": 12, + "h": 8 + } + }, + { + "id": 17, + "type": "timeseries", + "title": "Reconciler error rates", + "description": "Per-source RPC failure rates inside the reconciler. Header-fetch fails drop the whole header batch; lookup and receipt-fetch fails are per-tx and merely degrade pool_path_correct fidelity.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(aether_mempool_reconciler_header_fetch_errors_total{job=\"aether-host-reconciler\"}[5m]))", + "legendFormat": "header_fetch" + }, + { + "refId": "B", + "expr": "sum(rate(aether_mempool_reconciler_lookup_errors_total{job=\"aether-host-reconciler\"}[5m]))", + "legendFormat": "lookup" + }, + { + "refId": "C", + "expr": "sum(rate(aether_mempool_reconciler_receipt_fetch_errors_total{job=\"aether-host-reconciler\"}[5m]))", + "legendFormat": "receipt_fetch" + } + ], + "gridPos": { + "x": 0, + "y": 48, + "w": 12, + "h": 8 + } + }, + { + "id": 18, + "type": "timeseries", + "title": "Reconciler write latency (p50 / p95)", + "description": "Per-write latency of reconciliation inserts from dequeue to query completion.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.5, sum by (le) (rate(aether_mempool_reconciler_write_latency_ms_bucket{job=\"aether-host-reconciler\"}[5m])))", + "legendFormat": "p50" + }, + { + "refId": "B", + "expr": "histogram_quantile(0.95, sum by (le) (rate(aether_mempool_reconciler_write_latency_ms_bucket{job=\"aether-host-reconciler\"}[5m])))", + "legendFormat": "p95" + } + ], + "fieldConfig": { + "defaults": {"unit": "ms"} + }, + "gridPos": { + "x": 12, + "y": 48, + "w": 12, + "h": 8 + } + }, + { + "id": 200, + "type": "row", + "title": "Profitability scorer (PR #135–#137)", + "collapsed": false, + "gridPos": { + "x": 0, + "y": 56, + "w": 24, + "h": 1 + } + }, + { + "id": 19, + "type": "piechart", + "title": "Decision breakdown", + "description": "Cumulative scored-prediction split across profitable / unprofitable / reverted / no_path. PR #136's absurdity floor and PR #144's revm V3 verifier both feed the `reverted` bucket today; splitting them needs a sub-label on the counter (follow-up PR).", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "sum by (decision) (aether_mempool_profit_scored_total{job=\"aether-host-scorer\"})", + "legendFormat": "{{decision}}" + } + ], + "options": { + "legend": {"displayMode": "list", "placement": "right", "values": ["value", "percent"]} + }, + "gridPos": { + "x": 0, + "y": 57, + "w": 12, + "h": 8 + } + }, + { + "id": 20, + "type": "timeseries", + "title": "Scored rate by decision", + "description": "Per-decision scoring throughput. Stable non-zero `profitable` or `unprofitable` after the PR #145 engine restart is the V3-verifier end-to-end signal.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "sum by (decision) (rate(aether_mempool_profit_scored_total{job=\"aether-host-scorer\"}[5m]))", + "legendFormat": "{{decision}}" + } + ], + "gridPos": { + "x": 12, + "y": 57, + "w": 12, + "h": 8 + } + }, + { + "id": 21, + "type": "stat", + "title": "Scorer queue depth", + "description": "Pending profitability writes sitting in the writer-task channel.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "aether_mempool_profit_writer_queue_depth{job=\"aether-host-scorer\"}", + "legendFormat": "queue" + } + ], + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "" + }, + "colorMode": "value", + "graphMode": "area" + }, + "gridPos": { + "x": 0, + "y": 65, + "w": 6, + "h": 6 + } + }, + { + "id": 22, + "type": "stat", + "title": "Scorer drops (5m rate)", + "description": "Profitability writes dropped because the bounded channel was full.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(aether_mempool_profit_writer_drops_total{job=\"aether-host-scorer\"}[5m]))", + "legendFormat": "drops/s" + } + ], + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "red", "value": 0.01} + ] + } + } + }, + "options": { + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "" + }, + "colorMode": "background", + "graphMode": "area" + }, + "gridPos": { + "x": 6, + "y": 65, + "w": 6, + "h": 6 + } + }, + { + "id": 23, + "type": "timeseries", + "title": "Scorer write latency (p50 / p95)", + "description": "Per-write latency of profitability inserts from dequeue to query completion, split by result label.", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.5, sum by (le, result) (rate(aether_mempool_profit_writer_write_latency_ms_bucket{job=\"aether-host-scorer\"}[5m])))", + "legendFormat": "p50 {{result}}" + }, + { + "refId": "B", + "expr": "histogram_quantile(0.95, sum by (le, result) (rate(aether_mempool_profit_writer_write_latency_ms_bucket{job=\"aether-host-scorer\"}[5m])))", + "legendFormat": "p95 {{result}}" + } + ], + "fieldConfig": { + "defaults": {"unit": "ms"} + }, + "gridPos": { + "x": 12, + "y": 65, + "w": 12, + "h": 6 + } } ] } \ No newline at end of file diff --git a/deploy/docker/prometheus.yml b/deploy/docker/prometheus.yml index 07b5393..fae8a9d 100644 --- a/deploy/docker/prometheus.yml +++ b/deploy/docker/prometheus.yml @@ -35,3 +35,18 @@ scrape_configs: - job_name: "aether-host-monitor" static_configs: - targets: ["host.docker.internal:9090"] + # Mempool reconciler (Go) — confirmed/dropped outcomes per prediction + + # block-delta + pool-path accuracy histograms (PR #134). Defaults to + # :9094 per `cmd/reconciler/main.go`; override with `METRICS_ADDR`. + - job_name: "aether-host-reconciler" + static_configs: + - targets: ["host.docker.internal:9094"] + # Profitability scorer (Rust) — per-decision counter + writer health + # gauges (PR #135). Defaults to :9095; soak ops override to :9097 via + # `PROFIT_SCORER_METRICS_ADDR` so the scrape lists both targets and + # tolerates whichever is in use. + - job_name: "aether-host-scorer" + static_configs: + - targets: + - "host.docker.internal:9095" + - "host.docker.internal:9097" diff --git a/internal/db/mempool_reconciliation_metrics.go b/internal/db/mempool_reconciliation_metrics.go new file mode 100644 index 0000000..c13be1d --- /dev/null +++ b/internal/db/mempool_reconciliation_metrics.go @@ -0,0 +1,61 @@ +// Prometheus surface for the mempool reconciliation writer. +// +// Kept in the `db` package alongside LedgerMetrics so the two reconciliation +// halves (DB + reconciler binary) can register their counters against a +// single shared registry. The reconciler-loop metrics (`block_accuracy`, +// `pool_path_accuracy`) live with the binary in `cmd/reconciler/` because +// they are computed from in-process counters and never touch the DB. + +package db + +import "github.com/prometheus/client_golang/prometheus" + +// MempoolReconciliationMetrics groups the families the +// PgMempoolReconciliation writer goroutine updates. Names mirror the +// `aether_ledger_*` namespace shape (`aether_mempool_reconciler_*`) so +// dashboards can apply a single template. +type MempoolReconciliationMetrics struct { + // Bumped on every successful reconciliation insert (or on every row + // returned by MarkStaleAsDropped). `outcome` is one of the + // OutcomeConfirmed / OutcomeDropped / OutcomeReplaced / + // OutcomeStillPending constants. + ReconciledTotal *prometheus.CounterVec + // Reconciliation writes the bounded channel rejected because it was + // full. Single-labelled (no `op`) because this writer only does one + // kind of insert. + DropsTotal prometheus.Counter + QueueDepth prometheus.Gauge + // Per-write latency from dequeue to query completion. `result` = + // "ok"|"err" so an alert can fire on a sudden `err` spike. + WriteLatencyMs *prometheus.HistogramVec +} + +// NewMempoolReconciliationMetrics constructs the families and registers +// them with the supplied Prometheus registerer. A separate registerer +// argument (vs the default `prometheus.MustRegister`) makes the binary's +// /metrics endpoint composable — the reconciler can publish under its own +// process registry while the engine publishes under its own, and a future +// joint binary can pass the same registry to both halves. +func NewMempoolReconciliationMetrics(reg prometheus.Registerer) *MempoolReconciliationMetrics { + m := &MempoolReconciliationMetrics{ + ReconciledTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "aether_mempool_reconciled_total", + Help: "Mempool predictions resolved by the reconciler, by outcome", + }, []string{"outcome"}), + DropsTotal: prometheus.NewCounter(prometheus.CounterOpts{ + Name: "aether_mempool_reconciler_drops_total", + Help: "Reconciliation writes dropped because the bounded channel was full", + }), + QueueDepth: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "aether_mempool_reconciler_queue_depth", + Help: "Pending reconciliation writes sitting in the writer-goroutine channel", + }), + WriteLatencyMs: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Name: "aether_mempool_reconciler_write_latency_ms", + Help: "Per-write latency of reconciliation inserts from dequeue to query completion", + Buckets: []float64{0.1, 0.25, 0.5, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0}, + }, []string{"result"}), + } + reg.MustRegister(m.ReconciledTotal, m.DropsTotal, m.QueueDepth, m.WriteLatencyMs) + return m +} diff --git a/internal/db/mempool_reconciliation_pg.go b/internal/db/mempool_reconciliation_pg.go new file mode 100644 index 0000000..d0c7b01 --- /dev/null +++ b/internal/db/mempool_reconciliation_pg.go @@ -0,0 +1,349 @@ +// Mempool reconciliation persistence layer. +// +// Separate from the trade-ledger PgLedger by design: distinct DSN +// (MEMPOOL_LEDGER_DSN), distinct pgx pool. The reconciler binary is opt-in +// (the executor and engine don't link this code path) so its DB +// configuration must not collide with the trade-ledger's DATABASE_URL. +// +// API surface: +// - LookupPredictionByTxHash: synchronous single-row read on the +// `pending_tx_hash` unique index. Called once per landed block tx; the +// reconciler hot path stays simple by awaiting this result inline. +// - InsertReconciliation: fire-and-forget through the existing PgLedger +// pattern — bounded channel, drop-on-saturation, separate writer +// goroutine. +// - MarkStaleAsDropped: batch SQL that inserts `outcome='dropped'` rows +// for every prediction past its 12-block grace window without a +// reconciliation row. +// +// See migrations/0004_mempool_reconciliation.sql for the schema. + +package db + +import ( + "context" + "errors" + "fmt" + "log/slog" + "sync" + "time" + + "github.com/google/uuid" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" +) + +const ( + // reconChannelCapacity bounds the writer queue. Sized smaller than the + // trade-ledger ledger's 1024 because reconciliation throughput equals + // "predictions that hit the same block" — at peak ~10/block on busy + // mainnet — and a 256-deep buffer gives ~25 blocks of headroom before + // drops at peak. + reconChannelCapacity = 256 + + // reconPoolSize sizes the underlying pgxpool. Smaller than the trade + // ledger (4 vs 8) because the reconciler's write rate is bounded by + // the per-block batch instead of the per-arb stream. + reconPoolSize = 4 + + // reconConnectTimeout fails boot fast on misconfigured + // MEMPOOL_LEDGER_DSN so the binary degrades cleanly to "metric-only, + // no DB writes" instead of stalling startup. + reconConnectTimeout = 2 * time.Second + + // reconCloseDrainTimeout caps how long Close() waits for in-flight + // writes. Mirrors PgLedger's policy. + reconCloseDrainTimeout = 5 * time.Second + + // StaleConfirmationWindow is the number of blocks the reconciler waits + // past a prediction's predicted_target_block before declaring it + // dropped. 12 ≈ Flashbots' empirical "tx never landed" heuristic + // (one epoch on post-merge Ethereum); shorter windows surface false + // drops on busy fee markets where pendings wait several blocks for + // inclusion. + StaleConfirmationWindow = 12 +) + +// PendingPrediction is the subset of `mempool_predictions` columns the +// reconciler needs to score an outcome. Returned by LookupPredictionByTxHash. +// Kept tight on purpose — adding columns means widening the read path's hot +// SELECT and is gated by an actual reconciler-side use. +type PendingPrediction struct { + PredictionID uuid.UUID + Protocol string + PoolAddress *[20]byte // nil when registry miss recorded NULL + PredictedTargetBlock uint64 +} + +// NewReconciliation is the insert payload for `mempool_reconciliation`. The +// outcome enum is constrained at the SQL CHECK level, but mirrored here as +// public constants so callers can switch on a stable identifier instead of +// re-typing the literal each time. +type NewReconciliation struct { + PredictionID uuid.UUID + ResolutionTs time.Time + Outcome string + ActualTargetBlock *uint64 + ActualTxIndex *int + BlockDelta *int + OrderingCorrect *bool + PoolPathCorrect *bool + ReplacedByTxHash *[32]byte + FailureReason *string +} + +const ( + OutcomeConfirmed = "confirmed" + OutcomeDropped = "dropped" + OutcomeReplaced = "replaced" + OutcomeStillPending = "still_pending" +) + +// PgMempoolReconciliation owns the pgxpool and writer goroutine pair. +// Functionally a sibling of PgLedger; intentionally not collapsed into the +// same type because (a) the two run in separate process address spaces +// (engine vs reconciler binary), and (b) collapsing would force the engine +// to pull a pgx reconciliation-table runtime even when the reconciler is +// not in use. +type PgMempoolReconciliation struct { + pool *pgxpool.Pool + ch chan NewReconciliation + metrics *MempoolReconciliationMetrics + wg sync.WaitGroup + dispatcherCancel context.CancelFunc +} + +// NewPgMempoolReconciliation connects to Postgres and spawns the dispatcher. +// Mirrors NewPgLedger's lifecycle so a future joint shutdown coordinator +// can call Close on both without special-casing either. +func NewPgMempoolReconciliation( + ctx context.Context, + databaseURL string, + metrics *MempoolReconciliationMetrics, +) (*PgMempoolReconciliation, error) { + cfg, err := pgxpool.ParseConfig(databaseURL) + if err != nil { + return nil, fmt.Errorf("parse MEMPOOL_LEDGER_DSN: %w", err) + } + cfg.MaxConns = reconPoolSize + cfg.ConnConfig.ConnectTimeout = reconConnectTimeout + + connectCtx, cancel := context.WithTimeout(ctx, reconConnectTimeout) + defer cancel() + pool, err := pgxpool.NewWithConfig(connectCtx, cfg) + if err != nil { + return nil, fmt.Errorf("connect mempool pgxpool: %w", err) + } + if err := pool.Ping(connectCtx); err != nil { + pool.Close() + return nil, fmt.Errorf("ping mempool postgres: %w", err) + } + + dispatcherCtx, dispatcherCancel := context.WithCancel(context.Background()) + r := &PgMempoolReconciliation{ + pool: pool, + ch: make(chan NewReconciliation, reconChannelCapacity), + metrics: metrics, + dispatcherCancel: dispatcherCancel, + } + r.wg.Add(1) + go r.dispatch(dispatcherCtx) + + slog.Info("PgMempoolReconciliation connected — reconciliation writes enabled", + "component", "reconciler", + "channel_capacity", reconChannelCapacity, + "pool_size", reconPoolSize) + return r, nil +} + +// Close drains in-flight writes and shuts the pool down. Same bounded +// drain policy as PgLedger.Close: a wedged Postgres cannot hang the +// reconciler shutdown forever. +func (r *PgMempoolReconciliation) Close() { + close(r.ch) + done := make(chan struct{}) + go func() { + r.wg.Wait() + close(done) + }() + select { + case <-done: + // Clean drain. + case <-time.After(reconCloseDrainTimeout): + slog.Warn("PgMempoolReconciliation Close() drain timed out", + "component", "reconciler", + "timeout", reconCloseDrainTimeout) + r.dispatcherCancel() + select { + case <-done: + case <-time.After(time.Second): + } + } + r.pool.Close() +} + +// LookupPredictionByTxHash returns the prediction row keyed by +// `pending_tx_hash`. The second return is false (with nil error) when no +// row matches — i.e. the tx hash was never a prediction. Callers MUST +// distinguish "missing" from "error" because the per-block hot path treats +// the two cases differently: missing is the expected dominant case (most +// tx hashes are not predictions) and is silent. +func (r *PgMempoolReconciliation) LookupPredictionByTxHash( + ctx context.Context, + txHash [32]byte, +) (PendingPrediction, bool, error) { + row := r.pool.QueryRow(ctx, ` + SELECT prediction_id, protocol, pool_address, predicted_target_block + FROM mempool_predictions + WHERE pending_tx_hash = $1 + `, txHash[:]) + + var ( + pred PendingPrediction + poolBytes []byte + targetBlk int64 + ) + if err := row.Scan(&pred.PredictionID, &pred.Protocol, &poolBytes, &targetBlk); err != nil { + if errors.Is(err, pgx.ErrNoRows) { + return PendingPrediction{}, false, nil + } + return PendingPrediction{}, false, fmt.Errorf("lookup prediction: %w", err) + } + pred.PredictedTargetBlock = uint64(targetBlk) + if len(poolBytes) == 20 { + var arr [20]byte + copy(arr[:], poolBytes) + pred.PoolAddress = &arr + } + return pred, true, nil +} + +// InsertReconciliation enqueues a write. Fire-and-forget. Saturation drops +// the row and bumps `aether_mempool_reconciler_drops_total`. +func (r *PgMempoolReconciliation) InsertReconciliation(rec NewReconciliation) { + r.metrics.QueueDepth.Inc() + select { + case r.ch <- rec: + default: + r.metrics.QueueDepth.Dec() + r.metrics.DropsTotal.Inc() + slog.Warn("reconciliation channel full — dropping row", + "component", "reconciler", + "capacity", reconChannelCapacity, + "outcome", rec.Outcome) + } +} + +// MarkStaleAsDropped runs the periodic sweep that closes the loop for +// predictions where the 12-block window elapsed without a matching landed +// tx. Returns the number of rows inserted. +// +// The query is a single INSERT … SELECT with NOT EXISTS guard so multiple +// concurrent reconcilers (e.g. blue/green deploy overlap) never produce +// duplicate dropped rows — the prediction_id PK on +// mempool_reconciliation makes the second writer's row a no-op via +// ON CONFLICT DO NOTHING. +func (r *PgMempoolReconciliation) MarkStaleAsDropped( + ctx context.Context, + currentHead uint64, +) (int64, error) { + cutoff := int64(currentHead) - int64(StaleConfirmationWindow) + if cutoff < 0 { + return 0, nil + } + // The failure_reason is built in Go (rather than via SQL concat) so + // pgx encodes a single $1 as TEXT and a single $2 as BIGINT — the + // concat form pgx<->driver-encoder did not recognise the int64 + // argument when the column context was TEXT. + failureReason := fmt.Sprintf("12-block window elapsed (head=%d)", currentHead) + tag, err := r.pool.Exec(ctx, ` + INSERT INTO mempool_reconciliation ( + prediction_id, resolution_ts, outcome, failure_reason + ) + SELECT p.prediction_id, now(), 'dropped', $1 + FROM mempool_predictions p + WHERE p.predicted_target_block <= $2 + AND NOT EXISTS ( + SELECT 1 FROM mempool_reconciliation r + WHERE r.prediction_id = p.prediction_id + ) + ON CONFLICT (prediction_id) DO NOTHING + `, failureReason, cutoff) + if err != nil { + return 0, fmt.Errorf("mark stale dropped: %w", err) + } + rows := tag.RowsAffected() + if rows > 0 { + r.metrics.ReconciledTotal.WithLabelValues(OutcomeDropped).Add(float64(rows)) + } + return rows, nil +} + +func (r *PgMempoolReconciliation) dispatch(ctx context.Context) { + defer r.wg.Done() + for rec := range r.ch { + r.metrics.QueueDepth.Dec() + if ctx.Err() != nil { + // Drain remaining ops to keep the channel reader live; the + // dispatcherCancel path is reserved for the wedged-PG case. + continue + } + timer := time.Now() + err := r.insertReconciliationInner(ctx, &rec) + elapsedMs := float64(time.Since(timer).Microseconds()) / 1000.0 + result := "ok" + if err != nil { + result = "err" + slog.Warn("reconciliation insert failed; row dropped", + "component", "reconciler", + "outcome", rec.Outcome, + "prediction_id", rec.PredictionID, + "error", err.Error()) + } else { + r.metrics.ReconciledTotal.WithLabelValues(rec.Outcome).Inc() + } + r.metrics.WriteLatencyMs.WithLabelValues(result).Observe(elapsedMs) + } + slog.Info("PgMempoolReconciliation dispatcher exiting", "component", "reconciler") +} + +func (r *PgMempoolReconciliation) insertReconciliationInner( + ctx context.Context, + rec *NewReconciliation, +) error { + var ( + actualBlock *int64 + blockDelta *int + replaced []byte + ) + if rec.ActualTargetBlock != nil { + v := int64(*rec.ActualTargetBlock) + actualBlock = &v + } + if rec.BlockDelta != nil { + blockDelta = rec.BlockDelta + } + if rec.ReplacedByTxHash != nil { + replaced = rec.ReplacedByTxHash[:] + } + _, err := r.pool.Exec(ctx, ` + INSERT INTO mempool_reconciliation ( + prediction_id, resolution_ts, outcome, + actual_target_block, actual_tx_index, block_delta, + ordering_correct, pool_path_correct, + replaced_by_tx_hash, failure_reason + ) VALUES ( + $1, $2, $3, + $4, $5, $6, + $7, $8, + $9, $10 + ) + ON CONFLICT (prediction_id) DO NOTHING + `, + rec.PredictionID, rec.ResolutionTs, rec.Outcome, + actualBlock, rec.ActualTxIndex, blockDelta, + rec.OrderingCorrect, rec.PoolPathCorrect, + replaced, rec.FailureReason, + ) + return err +} diff --git a/internal/db/mempool_reconciliation_test.go b/internal/db/mempool_reconciliation_test.go new file mode 100644 index 0000000..9384a03 --- /dev/null +++ b/internal/db/mempool_reconciliation_test.go @@ -0,0 +1,254 @@ +package db + +import ( + "context" + "os" + "testing" + "time" + + "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus" +) + +// TestOutcomeConstantsMatchSchema pins the outcome string constants to the +// CHECK constraint in `migrations/0004_mempool_reconciliation.sql`. A drift +// here (e.g. renaming "confirmed" to "landed") would make every write fail +// with a CHECK violation; this test catches it without touching the DB. +func TestOutcomeConstantsMatchSchema(t *testing.T) { + wantConfirmed := "confirmed" + wantDropped := "dropped" + wantReplaced := "replaced" + wantStillPending := "still_pending" + if OutcomeConfirmed != wantConfirmed { + t.Fatalf("OutcomeConfirmed = %q, want %q (migration 0004 CHECK constraint)", + OutcomeConfirmed, wantConfirmed) + } + if OutcomeDropped != wantDropped { + t.Fatalf("OutcomeDropped = %q, want %q", OutcomeDropped, wantDropped) + } + if OutcomeReplaced != wantReplaced { + t.Fatalf("OutcomeReplaced = %q, want %q", OutcomeReplaced, wantReplaced) + } + if OutcomeStillPending != wantStillPending { + t.Fatalf("OutcomeStillPending = %q, want %q", OutcomeStillPending, wantStillPending) + } +} + +// TestStaleConfirmationWindow pins the dropped-grace constant. Lowering it +// would surface false drops on busy fee markets; raising it would delay +// the "dropped" outcome past usefulness. A drift bug is far more likely +// than an intentional change, so the test fails noisily when the constant +// moves. +func TestStaleConfirmationWindow(t *testing.T) { + if StaleConfirmationWindow != 12 { + t.Fatalf("StaleConfirmationWindow = %d, want 12 (Flashbots-aligned heuristic)", + StaleConfirmationWindow) + } +} + +// TestMempoolReconciliationMetricsRegister exercises every path on the +// metrics struct so a typo in a Name / Help / label triggers in CI. +func TestMempoolReconciliationMetricsRegister(t *testing.T) { + reg := prometheus.NewRegistry() + m := NewMempoolReconciliationMetrics(reg) + + m.ReconciledTotal.WithLabelValues(OutcomeConfirmed).Inc() + m.ReconciledTotal.WithLabelValues(OutcomeDropped).Add(3) + m.DropsTotal.Inc() + m.QueueDepth.Set(7) + m.WriteLatencyMs.WithLabelValues("ok").Observe(1.5) + + families, err := reg.Gather() + if err != nil { + t.Fatalf("registry.Gather: %v", err) + } + names := map[string]bool{} + for _, f := range families { + names[f.GetName()] = true + } + for _, required := range []string{ + "aether_mempool_reconciled_total", + "aether_mempool_reconciler_drops_total", + "aether_mempool_reconciler_queue_depth", + "aether_mempool_reconciler_write_latency_ms", + } { + if !names[required] { + t.Fatalf("missing metric family %s", required) + } + } +} + +// TestNewReconciliationDefaults documents the zero-value behaviour for the +// optional pointer fields. Without these defaults, a caller who forgets to +// populate ActualTargetBlock for a `dropped` outcome would still produce +// a row whose NULLs match the schema's expectations — this test pins that. +func TestNewReconciliationDefaults(t *testing.T) { + rec := NewReconciliation{ + PredictionID: uuid.New(), + ResolutionTs: time.Now().UTC(), + Outcome: OutcomeDropped, + } + if rec.ActualTargetBlock != nil { + t.Errorf("ActualTargetBlock should default to nil; got %v", rec.ActualTargetBlock) + } + if rec.ActualTxIndex != nil { + t.Errorf("ActualTxIndex should default to nil; got %v", rec.ActualTxIndex) + } + if rec.PoolPathCorrect != nil { + t.Errorf("PoolPathCorrect should default to nil; got %v", rec.PoolPathCorrect) + } +} + +// ------- Integration test, gated by MEMPOOL_LEDGER_TEST_DSN ------- + +// TestPgMempoolReconciliationRoundTrip exercises the writer against a live +// Postgres reachable via MEMPOOL_LEDGER_TEST_DSN. Skipped when the env var +// is unset so `go test ./...` works on machines without Postgres. +// +// Pre-condition: migrations 0001 → 0004 applied. The test inserts one +// prediction via raw SQL (mirroring what the Rust writer would emit), +// invokes LookupPredictionByTxHash + InsertReconciliation, then verifies +// the join. Cleanup truncates the rows it added; it does not touch +// pre-existing data. +func TestPgMempoolReconciliationRoundTrip(t *testing.T) { + dsn := os.Getenv("MEMPOOL_LEDGER_TEST_DSN") + if dsn == "" { + t.Skip("MEMPOOL_LEDGER_TEST_DSN unset — skipping live PG integration test") + } + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + reg := prometheus.NewRegistry() + metrics := NewMempoolReconciliationMetrics(reg) + recon, err := NewPgMempoolReconciliation(ctx, dsn, metrics) + if err != nil { + t.Fatalf("NewPgMempoolReconciliation: %v", err) + } + defer recon.Close() + + // Insert a prediction we own. The tx hash is derived from two fresh + // UUIDs so re-running the test against the same DB does not collide + // with an orphan row from a previous failed run (the `pending_tx_hash` + // UNIQUE index would otherwise short-circuit our INSERT and we'd + // read back the stale prediction_id). + predID := uuid.New() + var txHash [32]byte + low := uuid.New() + high := uuid.New() + copy(txHash[0:16], low[:]) + copy(txHash[16:32], high[:]) + router := [20]byte{0x7a, 0x25, 0x0d, 0x56, 0x30, 0xb4, 0xcf, 0x53, 0x97, 0x39, + 0xdf, 0x2c, 0x5d, 0xac, 0xb4, 0xc6, 0x59, 0xf2, 0x48, 0x8d} + tokenIn := [20]byte{0xc0, 0x2a, 0xaa, 0x39, 0xb2, 0x23, 0xfe, 0x8d, 0x0a, 0x0e, + 0x5c, 0x4f, 0x27, 0xea, 0xd9, 0x08, 0x3c, 0x75, 0x6c, 0xc2} + tokenOut := [20]byte{0xa0, 0xb8, 0x69, 0x91, 0xc6, 0x21, 0x8b, 0x36, 0xc1, 0xd1, + 0x9d, 0x4a, 0x2e, 0x9e, 0xb0, 0xce, 0x36, 0x06, 0xeb, 0x48} + poolAddr := [20]byte{0xB4, 0xe1, 0x6d, 0x01, 0x68, 0xe5, 0x2d, 0x35, 0xCa, 0xCD, + 0x2c, 0x61, 0x85, 0xb4, 0x42, 0x81, 0xEc, 0x28, 0xC9, 0xDc} + + _, err = recon.pool.Exec(ctx, ` + INSERT INTO mempool_predictions ( + prediction_id, decoded_at, pending_tx_hash, router_address, protocol, + token_in, token_out, amount_in, pool_address, + predicted_target_block, predicted_post_state + ) VALUES ( + $1, now(), $2, $3, 'uni_v2', $4, $5, 1000000, $6, + 100, '{"kind":"v2","reserve_in":1000,"reserve_out":2000}'::jsonb + ) + ON CONFLICT (pending_tx_hash) DO NOTHING + `, predID, txHash[:], router[:], tokenIn[:], tokenOut[:], poolAddr[:]) + if err != nil { + t.Fatalf("seed prediction: %v", err) + } + t.Cleanup(func() { + // Cascade clears both rows when we delete the prediction. + _, _ = recon.pool.Exec(context.Background(), + `DELETE FROM mempool_predictions WHERE prediction_id = $1`, predID) + }) + + // Read it back through the public API. + pred, found, err := recon.LookupPredictionByTxHash(ctx, txHash) + if err != nil { + t.Fatalf("LookupPredictionByTxHash: %v", err) + } + if !found { + t.Fatalf("expected prediction to be found by tx hash") + } + if pred.PredictionID != predID { + t.Fatalf("PredictionID = %v, want %v", pred.PredictionID, predID) + } + if pred.PoolAddress == nil || *pred.PoolAddress != poolAddr { + t.Fatalf("PoolAddress mismatch: %v vs %v", pred.PoolAddress, poolAddr) + } + if pred.PredictedTargetBlock != 100 { + t.Fatalf("PredictedTargetBlock = %d, want 100", pred.PredictedTargetBlock) + } + + // Write a confirmed reconciliation row. Use Close() at end to drain. + actualBlock := uint64(101) + actualIdx := 5 + blockDelta := 1 + poolPathCorrect := true + recon.InsertReconciliation(NewReconciliation{ + PredictionID: predID, + ResolutionTs: time.Now().UTC(), + Outcome: OutcomeConfirmed, + ActualTargetBlock: &actualBlock, + ActualTxIndex: &actualIdx, + BlockDelta: &blockDelta, + PoolPathCorrect: &poolPathCorrect, + }) + + // Allow the dispatcher to drain. Poll the row up to 2 s so we don't + // race on a slow CI Postgres. + deadline := time.Now().Add(2 * time.Second) + var landed bool + for time.Now().Before(deadline) { + var outcome string + err := recon.pool.QueryRow(ctx, + `SELECT outcome FROM mempool_reconciliation WHERE prediction_id = $1`, + predID, + ).Scan(&outcome) + if err == nil && outcome == OutcomeConfirmed { + landed = true + break + } + time.Sleep(50 * time.Millisecond) + } + if !landed { + t.Fatalf("reconciliation row did not land within 2s") + } +} + +// TestPgMempoolReconciliationLookupMiss verifies the (false, nil err) case +// for a hash that is not in `mempool_predictions`. Important because the +// per-block loop treats `(false, nil)` as "tx hash is not a prediction" +// (the dominant case) without logging. +func TestPgMempoolReconciliationLookupMiss(t *testing.T) { + dsn := os.Getenv("MEMPOOL_LEDGER_TEST_DSN") + if dsn == "" { + t.Skip("MEMPOOL_LEDGER_TEST_DSN unset — skipping live PG integration test") + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + reg := prometheus.NewRegistry() + metrics := NewMempoolReconciliationMetrics(reg) + recon, err := NewPgMempoolReconciliation(ctx, dsn, metrics) + if err != nil { + t.Fatalf("NewPgMempoolReconciliation: %v", err) + } + defer recon.Close() + + // A hash that no prediction ever uses (high bytes set). + missing := [32]byte{0xff, 0xff, 0xff, 0xff} + _, found, err := recon.LookupPredictionByTxHash(ctx, missing) + if err != nil { + t.Fatalf("LookupPredictionByTxHash error on miss: %v", err) + } + if found { + t.Fatalf("found=true for non-existent hash") + } +} diff --git a/migrations/0003_mempool_predictions.sql b/migrations/0003_mempool_predictions.sql new file mode 100644 index 0000000..bf63119 --- /dev/null +++ b/migrations/0003_mempool_predictions.sql @@ -0,0 +1,79 @@ +-- Mempool predictions — public-flow observability ledger. +-- +-- Records every pending-tx swap the engine decoded + analytically simulated, +-- so a follow-up reconciler (issue #131 Go half) can compare against +-- confirmed blocks and answer "did the tx land where we predicted, in the +-- order we predicted, hitting the pool we predicted?" — all in SQL. +-- +-- Independent of `arbs` / `bundles`: this side never submits, so there is no +-- foreign key into the trade-ledger tables. The two ledgers can be enabled +-- separately via distinct DSNs (DATABASE_URL = trade ledger, +-- MEMPOOL_LEDGER_DSN = mempool ledger), so an operator can run mempool +-- observability without provisioning the executor schema and vice versa. +-- +-- U256 economics → NUMERIC(78,0) (max 2^256 has 78 digits). +-- Variable-shape state → JSONB (V3 sqrt + tick; Curve A + balances; +-- Balancer balances + weights). +-- All timestamps → TIMESTAMPTZ. +-- +-- Clock-authority policy (matches 0001_trade_ledger.sql): +-- * `decoded_at` is CLIENT-SET. Writers MUST populate it at the moment +-- the pending-tx event lands in the decoder; the `DEFAULT now()` +-- fallback exists only for ad-hoc inserts and must not be relied on by +-- application paths. The gap between "tx hit the mempool subscription" +-- and "row landed in Postgres" can be tens of ms under load; trusting +-- DB time would skew the detection-lead-vs-confirmation analysis the +-- follow-up reconciler builds on. +-- +-- See issue #131 for the broader observability plan, and CLAUDE.md for the +-- 7-layer architecture context. + +CREATE TABLE IF NOT EXISTS mempool_predictions ( + prediction_id UUID PRIMARY KEY, + -- Client-set; instant of decode in the Rust pipeline. + decoded_at TIMESTAMPTZ NOT NULL DEFAULT now(), + -- 32-byte tx hash. UNIQUE so a re-broadcast of the same pending tx + -- (Alchemy WS can replay on reconnect) does not insert a duplicate + -- prediction row; the writer uses ON CONFLICT DO NOTHING. + pending_tx_hash BYTEA NOT NULL UNIQUE, + router_address BYTEA NOT NULL, + -- Wire label matches the decoder's `Protocol` debug rendering: + -- uni_v2 / sushi / uni_v3 / curve / balancer. Bound to TEXT (not an + -- enum type) so adding a new decoded protocol does not require a + -- migration; values are validated at the Rust boundary. + protocol TEXT NOT NULL, + token_in BYTEA NOT NULL, + token_out BYTEA NOT NULL, + amount_in NUMERIC(78,0) NOT NULL, + -- NULL when the (token_in, token_out, protocol) triple missed the live + -- pool registry — the pre-sim filter drops these before the writer is + -- called, but the column stays nullable so a future "filtered" code + -- path can emit a stub row for completeness. + pool_address BYTEA, + -- current_head + 1 at decode time; the reconciler compares against + -- the actual landed block to produce `block_delta`. + predicted_target_block BIGINT NOT NULL, + -- Shape varies by protocol: + -- uni_v2 / sushi → {"reserve_in": "..", "reserve_out": ".."} + -- uni_v3 → {"sqrt_price_x96_post": "..", "tick_post": ..} + -- balancer → {"balance_in": "..", "balance_out": ".."} + -- curve → {"balances_post": ["..", ".."], "amp": ".."} + predicted_post_state JSONB NOT NULL, + -- Populated when the post-state Bellman-Ford scan surfaced a profitable + -- cycle; NULL when the scan ran but found nothing. A NOT NULL profit + -- factor is the SQL signal "we would have considered acting on this". + profit_factor_predicted DOUBLE PRECISION, + -- Engine-side measurement of how far ahead of confirmation we saw the + -- pending tx (decoded_at - earliest builder-side timestamp). NULL when + -- the builder timestamp is unknown (Alchemy WS doesn't expose one + -- today; reserved for the MEV-Share SSE path). + detection_lead_ms BIGINT, + engine_git_sha TEXT +); + +CREATE INDEX IF NOT EXISTS mempool_predictions_target_block_idx ON mempool_predictions (predicted_target_block); +CREATE INDEX IF NOT EXISTS mempool_predictions_decoded_at_idx ON mempool_predictions (decoded_at DESC); + +-- mempool_reconciliation lands in PR-2 (issue #131 Go half). Defining it +-- here in a separate migration would couple the two PRs; the reconciler +-- ships its own 0004_mempool_reconciliation.sql. diff --git a/migrations/0004_mempool_reconciliation.sql b/migrations/0004_mempool_reconciliation.sql new file mode 100644 index 0000000..0272f18 --- /dev/null +++ b/migrations/0004_mempool_reconciliation.sql @@ -0,0 +1,77 @@ +-- Mempool reconciliation — close the loop on persisted predictions. +-- +-- Joins one-to-one with `mempool_predictions` (issue #131 first half / +-- PR #133): for every prediction, the reconciler poll loop writes exactly +-- one row here once the outcome is known. The two tables together answer +-- "did the tx land where we said it would, in the order we said it would, +-- hitting the pool we said it would?" — entirely in SQL. +-- +-- Outcome lifecycle: +-- confirmed - tx landed in a block at or after predicted_target_block. +-- `actual_target_block`, `actual_tx_index`, `block_delta`, +-- `ordering_correct`, `pool_path_correct` are all populated. +-- dropped - prediction is now older than head - 12 blocks and no +-- matching tx has surfaced. Mirrors the Flashbots-side +-- "12-block dropped" heuristic. Only `resolution_ts` +-- carries meaning. +-- replaced - a later pending tx from the same sender + nonce landed +-- first (same-nonce replacement). `replaced_by_tx_hash` +-- carries the replacement's hash; the other "actual" +-- columns are NULL because the prediction itself never +-- confirmed. +-- still_pending - reserved for the case where the reconciler shuts down +-- with predictions still in-flight; the next start-up +-- resumes from this state. Not emitted under steady-state +-- operation. +-- +-- Clock-authority policy matches 0001_trade_ledger.sql / 0003_mempool_predictions: +-- * `resolution_ts` is CLIENT-SET (writer populates it the moment the +-- resolution is computed in code; the `DEFAULT now()` is a psql-level +-- safety net only). +-- +-- The FK to `mempool_predictions(prediction_id)` uses ON DELETE CASCADE so +-- truncating predictions for a re-soak also clears reconciliation. The +-- reverse direction is enforced by the writer (insert prediction first, +-- then reconciliation), not by a CHECK constraint, so the writer can +-- batch-insert reconciliations without locking against concurrent +-- prediction inserts. + +CREATE TABLE IF NOT EXISTS mempool_reconciliation ( + prediction_id UUID PRIMARY KEY + REFERENCES mempool_predictions(prediction_id) ON DELETE CASCADE, + -- Client-set; instant of resolution in the reconciler loop. + resolution_ts TIMESTAMPTZ NOT NULL DEFAULT now(), + outcome TEXT NOT NULL + CHECK (outcome IN ('confirmed','dropped','replaced','still_pending')), + -- NULL for `dropped` / `replaced` / `still_pending`. For `confirmed`: + -- the block the prediction's pending tx actually landed in. + actual_target_block BIGINT, + -- Position within `actual_target_block`. NULL when outcome ≠ confirmed. + actual_tx_index INTEGER, + -- `actual_target_block - predicted_target_block`. Negative = landed + -- earlier than predicted; positive = landed later. NULL when outcome + -- ≠ confirmed. + block_delta INTEGER, + -- The mempool predictor records `predicted_target_block` only — it does + -- not predict tx ordering within the block. Until a predicted-index is + -- recorded by the engine (future work), `ordering_correct` is left + -- NULL on confirmed rows so the column stays a no-op rather than + -- a misleading TRUE. + ordering_correct BOOLEAN, + -- TRUE when the receipt's logs contain an entry whose `address` matches + -- `mempool_predictions.pool_address`. FALSE when the receipt landed but + -- no log touched the predicted pool (router routed elsewhere, or the + -- predicted pool was wrong). NULL when outcome ≠ confirmed or + -- `pool_address` was NULL on the prediction. + pool_path_correct BOOLEAN, + replaced_by_tx_hash BYTEA, + -- Free-form reason for non-confirmed outcomes (e.g. the + -- "12-block window elapsed" / "same-nonce replacement" labels the + -- reconciler emits). NULL on `confirmed`. + failure_reason TEXT +); + +CREATE INDEX IF NOT EXISTS mempool_reconciliation_actual_target_block_idx + ON mempool_reconciliation (actual_target_block); +CREATE INDEX IF NOT EXISTS mempool_reconciliation_outcome_idx + ON mempool_reconciliation (outcome); diff --git a/migrations/0005_mempool_profitability.sql b/migrations/0005_mempool_profitability.sql new file mode 100644 index 0000000..bfe53c0 --- /dev/null +++ b/migrations/0005_mempool_profitability.sql @@ -0,0 +1,75 @@ +-- Mempool profitability — realized P&L per reconciled prediction. +-- +-- Joins one-to-one with `mempool_predictions` (and indirectly through +-- `mempool_reconciliation`): for every prediction whose outcome was +-- `confirmed`, the scorer writes one row here with what our analytical +-- arb cycle would have *actually* realized against the post-state of +-- the pool at the block where the victim swap landed. +-- +-- The point is to learn whether the predictor is finding *profitable* +-- real-world opportunities — without ever submitting a bundle. The +-- headline answer is SUM(net_profit_wei) WHERE decision='profitable' +-- over the soak window. +-- +-- Outcome lifecycle (`decision`): +-- profitable - Bellman-Ford on the actual-block graph found a +-- cycle whose gross profit exceeds the gas estimate. +-- `realized_profit_*` and `net_profit_wei` are positive. +-- unprofitable - Cycle was found but gross profit < gas estimate. +-- `net_profit_wei` is negative; `realized_profit_*` +-- may still be non-zero (gross can be positive while +-- net is negative). +-- reverted - Reserved for the revm-fork-verify path (planned +-- follow-up). The detector found a cycle but a +-- forked-EVM replay would have reverted. Not emitted +-- by the v1 scorer; the column carries the value for +-- forward compatibility. +-- no_path - Bellman-Ford on the actual-block graph found no +-- negative cycle through the affected pool. The +-- analytical predictor surfaced a path at decode +-- time but the real-block post-state had no path. +-- +-- Clock-authority policy matches 0001_trade_ledger.sql / 0003 / 0004: +-- * `scored_at` is CLIENT-SET. Writers MUST populate it the moment +-- the scoring is computed in code; the `DEFAULT now()` is a psql +-- safety net only. +-- +-- Cascade FK to `mempool_predictions(prediction_id)` so a re-soak that +-- truncates predictions also clears the profitability rows. + +CREATE TABLE IF NOT EXISTS mempool_profitability ( + prediction_id UUID PRIMARY KEY + REFERENCES mempool_predictions(prediction_id) ON DELETE CASCADE, + -- Client-set; instant the scorer finished computing this row. + scored_at TIMESTAMPTZ NOT NULL DEFAULT now(), + -- JSONB-encoded cycle as a list of {pool_address, token_in, + -- token_out, protocol} hops. Variable length so the schema + -- accommodates 2-hop and longer cycles without a separate table. + cycle_path JSONB NOT NULL, + -- Gross profit from replaying the cycle against the actual-block + -- reserves. NUMERIC(78,0) keeps U256 economics lossless. + realized_profit_wei NUMERIC(78,0) NOT NULL, + -- Convenience copy in ETH units. 38 digits + 18 decimals fits + -- 1e20 ETH which is more than the total supply, so overflow is + -- impossible. NUMERIC(38,18) is precise; DOUBLE PRECISION would + -- lose lower digits. + realized_profit_eth NUMERIC(38,18) NOT NULL, + -- Gas estimate in wei (gas_units × gas_price_wei). Both factors + -- come from the existing per-protocol gas model + the chain's + -- current base fee at scoring time. Stored separately from + -- `realized_profit_wei` so the scorer can be re-run with a + -- different gas model without losing the gross signal. + gas_estimate_wei NUMERIC(78,0) NOT NULL, + -- realized_profit_wei - gas_estimate_wei. May be negative. + -- Sign on this column is the SQL signal "would we have made + -- money" — the headline answer the dashboard exposes. + net_profit_wei NUMERIC(78,0) NOT NULL, + decision TEXT NOT NULL + CHECK (decision IN ('profitable','unprofitable','reverted','no_path')), + scoring_engine_git_sha TEXT +); + +CREATE INDEX IF NOT EXISTS mempool_profitability_decision_idx + ON mempool_profitability (decision); +CREATE INDEX IF NOT EXISTS mempool_profitability_scored_at_idx + ON mempool_profitability (scored_at DESC);