diff --git a/Cargo.lock b/Cargo.lock index deb0fc74..22a72cda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1398,6 +1398,7 @@ dependencies = [ "smoltcp", "tabwriter", "usdt", + "uuid", "version_check", "zerocopy 0.8.26", ] @@ -1412,6 +1413,7 @@ dependencies = [ "postcard", "serde", "smoltcp", + "uuid", ] [[package]] @@ -1455,6 +1457,7 @@ dependencies = [ "postcard", "serde", "thiserror 2.0.12", + "uuid", ] [[package]] @@ -1483,6 +1486,7 @@ dependencies = [ "serde", "tabwriter", "thiserror 2.0.12", + "uuid", ] [[package]] diff --git a/bench/src/packet.rs b/bench/src/packet.rs index b555f946..9054ce66 100644 --- a/bench/src/packet.rs +++ b/bench/src/packet.rs @@ -26,6 +26,7 @@ use opte_test_utils::icmp::gen_icmp_echo; use opte_test_utils::icmp::gen_icmpv6_echo; use opte_test_utils::icmp::generate_ndisc; use opte_test_utils::*; +use oxide_vpc::api::Route; pub type TestCase = (MsgBlk, Direction); @@ -294,18 +295,24 @@ impl BenchPacketInstance for UlpProcessInstance { router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip6("::/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip6("::/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -314,7 +321,7 @@ impl BenchPacketInstance for UlpProcessInstance { let any_in = "dir=in action=allow priority=1000 protocol=any"; firewall::set_fw_rules( &g1.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g1.port.name().to_string(), rules: vec![any_in.parse().unwrap()], }, diff --git a/bin/opteadm/Cargo.toml b/bin/opteadm/Cargo.toml index c0e5ac19..d11a7fd6 100644 --- a/bin/opteadm/Cargo.toml +++ b/bin/opteadm/Cargo.toml @@ -23,6 +23,7 @@ postcard.workspace = true serde.workspace = true tabwriter.workspace = true thiserror.workspace = true +uuid.workspace = true [build-dependencies] anyhow.workspace = true diff --git a/bin/opteadm/src/bin/opteadm.rs b/bin/opteadm/src/bin/opteadm.rs index 219bf555..0dee15f3 100644 --- a/bin/opteadm/src/bin/opteadm.rs +++ b/bin/opteadm/src/bin/opteadm.rs @@ -10,6 +10,7 @@ use clap::Parser; use opte::api::API_VERSION; use opte::api::Direction; use opte::api::DomainName; +use opte::api::FlowPair; use opte::api::IpAddr; use opte::api::IpCidr; use opte::api::Ipv4Addr; @@ -45,6 +46,7 @@ use oxide_vpc::api::Ports; use oxide_vpc::api::ProtoFilter; use oxide_vpc::api::RemFwRuleReq; use oxide_vpc::api::RemoveCidrResp; +use oxide_vpc::api::Route; use oxide_vpc::api::RouterClass; use oxide_vpc::api::RouterTarget; use oxide_vpc::api::SNat4Cfg; @@ -276,6 +278,42 @@ enum Command { #[arg(long = "dir")] direction: Option, }, + + /// Return the IDs of all registered stat objects. + ListRootStats { + /// The OPTE port to query. + #[arg(short)] + port: String, + }, + + /// Return the IDs of all current flows. + ListFlowStats { + /// The OPTE port to query. + #[arg(short)] + port: String, + }, + + /// Request the current state of root stats contained in a port. + DumpRootStats { + /// The OPTE port to query. + #[arg(short)] + port: String, + // /// A comma-separated list of stat UUIDs of interest. If omitted, + // /// request all available stats. + // #[arg(long)] + // ids: Uuid, + }, + + /// Return the IDs of all current flows. + DumpFlowStats { + /// The OPTE port to query. + #[arg(short)] + port: String, + // /// A comma-separated list of flowkeys of interest. If omitted, + // /// request all available stats. + // #[arg(long)] + // ids: Vec, + }, } #[derive(Debug, Parser)] @@ -632,6 +670,7 @@ fn main() -> anyhow::Result<()> { filters: filters.into(), action, priority, + stat_id: None, }; hdl.add_firewall_rule(&AddFwRuleReq { port_name: port, rule })?; } @@ -767,16 +806,16 @@ fn main() -> anyhow::Result<()> { Command::AddRouterEntry { route: RouterRule { port, dest, target, class }, } => { - let req = - AddRouterEntryReq { port_name: port, dest, target, class }; + let route = Route { dest, target, class, stat_id: None }; + let req = AddRouterEntryReq { port_name: port, route }; hdl.add_router_entry(&req)?; } Command::DelRouterEntry { route: RouterRule { port, dest, target, class }, } => { - let req = - DelRouterEntryReq { port_name: port, dest, target, class }; + let route = Route { dest, target, class, stat_id: None }; + let req = DelRouterEntryReq { port_name: port, route }; if let DelRouterEntryResp::NotFound = hdl.del_router_entry(&req)? { anyhow::bail!( "could not delete entry -- no matching rule found" @@ -859,6 +898,37 @@ fn main() -> anyhow::Result<()> { })?; } } + + Command::ListRootStats { port } => { + let vals = hdl.list_root_stats(&port)?; + + for val in vals.root_ids { + println!("{val}"); + } + } + + Command::ListFlowStats { port } => { + let vals = hdl.list_flow_stats(&port)?; + + println!("Inbound -> Outbound"); + for FlowPair { inbound, outbound } in vals.flow_ids { + println!("{inbound} -> {outbound}"); + } + } + + Command::DumpRootStats { port } => { + let vals = hdl.dump_root_stats(&port, [])?; + for (id, stat) in vals.root_stats { + println!("{id}:\n\t{stat:?}"); + } + } + + Command::DumpFlowStats { port } => { + let vals = hdl.dump_flow_stats(&port, [])?; + for (id, stat) in vals.flow_stats { + println!("{id}:\n\t{stat:?}"); + } + } } Ok(()) diff --git a/crates/opte-api/Cargo.toml b/crates/opte-api/Cargo.toml index 7c4d2e60..34c47a95 100644 --- a/crates/opte-api/Cargo.toml +++ b/crates/opte-api/Cargo.toml @@ -17,6 +17,7 @@ ingot.workspace = true ipnetwork = { workspace = true, optional = true } postcard.workspace = true serde.workspace = true +uuid.workspace = true [dependencies.smoltcp] workspace = true diff --git a/crates/opte-api/src/cmd.rs b/crates/opte-api/src/cmd.rs index 5c0f9986..b01c5e44 100644 --- a/crates/opte-api/src/cmd.rs +++ b/crates/opte-api/src/cmd.rs @@ -5,11 +5,15 @@ // Copyright 2025 Oxide Computer Company use super::API_VERSION; +use super::FlowStat; +use super::FullCounter; use super::RuleId; use super::TcpState; use super::encap::Vni; use super::ip::IpCidr; use super::mac::MacAddr; +use alloc::collections::BTreeMap; +use alloc::collections::BTreeSet; use alloc::string::String; use alloc::string::ToString; use alloc::vec::Vec; @@ -18,6 +22,7 @@ use illumos_sys_hdrs::c_int; use illumos_sys_hdrs::size_t; use serde::Deserialize; use serde::Serialize; +use uuid::Uuid; pub const XDE_IOC: u32 = 0xde777700; pub const XDE_IOC_OPTE_CMD: i32 = XDE_IOC as i32 | 0x01; @@ -50,6 +55,10 @@ pub enum OpteCmd { SetExternalIps = 80, // set xde external IPs for a port AllowCidr = 90, // allow ip block through gateway tx/rx RemoveCidr = 91, // deny ip block through gateway tx/rx + ListRootStat = 100, // list the ids of all registered root stats + ListFlowStat = 101, // list the flow-keys of all current flows + DumpRootStat = 102, // request current counter set(s) with a given ID + DumpFlowStat = 103, // request flow stats for one or more flows } impl TryFrom for OpteCmd { @@ -82,6 +91,10 @@ impl TryFrom for OpteCmd { 80 => Ok(Self::SetExternalIps), 90 => Ok(Self::AllowCidr), 91 => Ok(Self::RemoveCidr), + 100 => Ok(Self::ListRootStat), + 101 => Ok(Self::ListFlowStat), + 102 => Ok(Self::DumpRootStat), + 103 => Ok(Self::DumpFlowStat), _ => Err(()), } } @@ -261,6 +274,13 @@ pub struct NoResp { impl CmdOk for NoResp {} +/// Arbitrary request directed at a port which requires no additional +/// selectors. +#[derive(Debug, Deserialize, Serialize)] +pub struct PortReq { + pub port_name: String, +} + /// Dump various information about a layer, for use in debugging or /// administrative purposes. #[derive(Debug, Deserialize, Serialize)] @@ -296,10 +316,7 @@ pub struct DumpLayerResp { impl CmdOk for DumpLayerResp {} -#[derive(Debug, Deserialize, Serialize)] -pub struct ListLayersReq { - pub port_name: String, -} +pub type ListLayersReq = PortReq; #[derive(Debug, Deserialize, Serialize)] pub struct LayerDesc { @@ -324,10 +341,7 @@ pub struct ListLayersResp { impl CmdOk for ListLayersResp {} -#[derive(Debug, Deserialize, Serialize)] -pub struct ClearUftReq { - pub port_name: String, -} +pub type ClearUftReq = PortReq; #[derive(Debug, Deserialize, Serialize)] pub struct ClearLftReq { @@ -335,10 +349,7 @@ pub struct ClearLftReq { pub layer_name: String, } -#[derive(Debug, Deserialize, Serialize)] -pub struct DumpUftReq { - pub port_name: String, -} +pub type DumpUftReq = PortReq; #[derive(Debug, Deserialize, Serialize)] pub struct DumpUftResp { @@ -358,10 +369,7 @@ pub struct UftEntryDump { pub summary: String, } -#[derive(Debug, Deserialize, Serialize)] -pub struct DumpTcpFlowsReq { - pub port_name: String, -} +pub type DumpTcpFlowsReq = PortReq; #[derive(Debug, Deserialize, Serialize)] pub struct DumpTcpFlowsResp { @@ -373,10 +381,6 @@ pub struct TcpFlowEntryDump { pub hits: u64, pub inbound_ufid: Option, pub tcp_state: TcpFlowStateDump, - pub segs_in: u64, - pub segs_out: u64, - pub bytes_in: u64, - pub bytes_out: u64, } #[derive(Debug, Deserialize, Serialize)] @@ -410,3 +414,69 @@ pub struct RuleDump { pub data_predicates: Vec, pub action: String, } + +pub type ListRootStatReq = PortReq; + +#[derive(Debug, Deserialize, Serialize)] +pub struct ListRootStatResp { + pub root_ids: Vec, +} + +impl CmdOk for ListRootStatResp {} + +pub type ListFlowStatReq = PortReq; + +#[derive(Debug, Deserialize, Serialize, Ord, Eq, PartialEq, PartialOrd)] +pub struct FlowPair { + pub inbound: Flow, + pub outbound: Flow, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ListFlowStatResp { + pub flow_ids: BTreeSet>, +} + +impl CmdOk for ListFlowStatResp {} + +/// Request the current state of some (or all) root stats contained +/// in a port. +#[derive(Debug, Deserialize, Serialize)] +pub struct DumpRootStatReq { + /// The name of the target port. + pub port_name: String, + /// The set of root stat IDs to query. + /// + /// If empty, collect the state of all stats. + pub root_ids: BTreeSet, +} + +/// The current state of queried root stats. +#[derive(Debug, Deserialize, Serialize)] +pub struct DumpRootStatResp { + /// The set of queried root stats. + pub root_stats: BTreeMap, +} + +impl CmdOk for DumpRootStatResp {} + +/// Request the current state of some (or all) flow stats contained +/// in a port. +#[derive(Debug, Deserialize, Serialize)] +pub struct DumpFlowStatReq { + /// The name of the target port. + pub port_name: String, + /// The set of flow-keys to query. + /// + /// If empty, collect the state of all flows. + pub flow_ids: BTreeSet, +} + +/// The current state of queried flow stats. +#[derive(Debug, Deserialize, Serialize)] +pub struct DumpFlowStatResp { + /// The set of queried flow stats. + pub flow_stats: BTreeMap>, +} + +impl CmdOk for DumpFlowStatResp {} diff --git a/crates/opte-api/src/lib.rs b/crates/opte-api/src/lib.rs index 7176e7a5..a2032de7 100644 --- a/crates/opte-api/src/lib.rs +++ b/crates/opte-api/src/lib.rs @@ -28,6 +28,7 @@ pub mod encap; pub mod ip; pub mod mac; pub mod ndp; +pub mod stat; pub mod tcp; pub mod ulp; @@ -38,6 +39,7 @@ pub use encap::*; pub use ip::*; pub use mac::*; pub use ndp::*; +pub use stat::*; pub use tcp::*; pub use ulp::*; @@ -51,7 +53,7 @@ pub use ulp::*; /// /// We rely on CI and the check-api-version.sh script to verify that /// this number is incremented anytime the oxide-api code changes. -pub const API_VERSION: u64 = 37; +pub const API_VERSION: u64 = 38; /// Major version of the OPTE package. pub const MAJOR_VERSION: u64 = 0; diff --git a/crates/opte-api/src/stat.rs b/crates/opte-api/src/stat.rs new file mode 100644 index 00000000..2c71a3b4 --- /dev/null +++ b/crates/opte-api/src/stat.rs @@ -0,0 +1,40 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2025 Oxide Computer Company + +//! Types for handling flow stats from the ioctl API. + +use crate::Direction; +use alloc::vec::Vec; +use serde::Deserialize; +use serde::Serialize; +use uuid::Uuid; + +#[derive(Deserialize, Serialize, Debug, Clone, Eq, PartialEq)] +pub struct FlowStat { + pub partner: FlowId, + pub dir: Direction, + pub first_dir: Direction, + pub bases: Vec, + pub stats: PacketCounter, +} + +#[derive(Deserialize, Serialize, Debug, Clone, Copy, Eq, PartialEq)] +pub struct PacketCounter { + pub created_at: u64, + pub pkts_in: u64, + pub bytes_in: u64, + pub pkts_out: u64, + pub bytes_out: u64, +} + +#[derive(Deserialize, Serialize, Debug, Clone, Copy, Eq, PartialEq)] +pub struct FullCounter { + pub allow: u64, + pub deny: u64, + pub hairpin: u64, + pub error: u64, + pub packets: PacketCounter, +} diff --git a/lib/opte-ioctl/Cargo.toml b/lib/opte-ioctl/Cargo.toml index 3e410cc1..4c2f734d 100644 --- a/lib/opte-ioctl/Cargo.toml +++ b/lib/opte-ioctl/Cargo.toml @@ -15,3 +15,4 @@ postcard.workspace = true serde.workspace = true thiserror.workspace = true libnet.workspace = true +uuid.workspace = true diff --git a/lib/opte-ioctl/src/lib.rs b/lib/opte-ioctl/src/lib.rs index c896ce4b..34235048 100644 --- a/lib/opte-ioctl/src/lib.rs +++ b/lib/opte-ioctl/src/lib.rs @@ -9,15 +9,23 @@ use opte::api::ClearLftReq; use opte::api::ClearUftReq; use opte::api::CmdOk; use opte::api::Direction; +use opte::api::DumpFlowStatReq; +use opte::api::DumpFlowStatResp; use opte::api::DumpLayerReq; use opte::api::DumpLayerResp; +use opte::api::DumpRootStatReq; +use opte::api::DumpRootStatResp; use opte::api::DumpTcpFlowsReq; use opte::api::DumpTcpFlowsResp; use opte::api::DumpUftReq; use opte::api::DumpUftResp; pub use opte::api::InnerFlowId; +use opte::api::ListFlowStatReq; +use opte::api::ListFlowStatResp; use opte::api::ListLayersReq; use opte::api::ListLayersResp; +use opte::api::ListRootStatReq; +use opte::api::ListRootStatResp; use opte::api::NoResp; use opte::api::OpteCmd; use opte::api::OpteCmdIoctl; @@ -52,6 +60,7 @@ use std::fs::File; use std::fs::OpenOptions; use std::os::unix::io::AsRawFd; use thiserror::Error; +use uuid::Uuid; /// Errors related to administering the OPTE driver. #[derive(Debug, Error)] @@ -368,6 +377,68 @@ impl OpteHdl { Some(&DumpUftReq { port_name: port_name.to_string() }), ) } + + /// Return the IDs of all registered stat objects. + pub fn list_root_stats( + &self, + port_name: &str, + ) -> Result { + run_cmd_ioctl( + self.device.as_raw_fd(), + OpteCmd::ListRootStat, + Some(&ListRootStatReq { port_name: port_name.to_string() }), + ) + } + + /// Return the IDs of all current flows. + pub fn list_flow_stats( + &self, + port_name: &str, + ) -> Result, Error> { + run_cmd_ioctl( + self.device.as_raw_fd(), + OpteCmd::ListFlowStat, + Some(&ListFlowStatReq { port_name: port_name.to_string() }), + ) + } + + /// Request the current state of some (or all) root stats contained + /// in a port. + /// + /// An empty `stat_ids` will request all present stats. + pub fn dump_root_stats( + &self, + port_name: &str, + stat_ids: impl IntoIterator, + ) -> Result { + run_cmd_ioctl( + self.device.as_raw_fd(), + OpteCmd::DumpRootStat, + Some(&DumpRootStatReq { + port_name: port_name.to_string(), + root_ids: stat_ids.into_iter().collect(), + }), + ) + } + + /// Request the current state of some (or all) flow stats contained + /// in a port. + /// + /// An empty `flow_keys` will request all present flows. + pub fn dump_flow_stats( + &self, + port_name: &str, + flow_keys: impl IntoIterator, + ) -> Result, Error> { + run_cmd_ioctl( + self.device.as_raw_fd(), + OpteCmd::DumpFlowStat, + Some(&DumpFlowStatReq { + port_name: port_name.to_string(), + flow_ids: flow_keys.into_iter().collect(), + }), + ) + } } pub fn run_cmd_ioctl( diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index f3eac140..9ab989fd 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -16,7 +16,6 @@ pub mod pcap; pub mod port_state; // Let's make our lives easier and pub use a bunch of stuff. -pub use opte::ExecCtx; pub use opte::api::Direction::*; pub use opte::api::MacAddr; pub use opte::ddi::mblk::MsgBlk; @@ -63,6 +62,7 @@ pub use opte::ingot::types::Emit; pub use opte::ingot::types::EmitDoesNotRelyOnBufContents; pub use opte::ingot::types::HeaderLen; pub use opte::ingot::udp::Udp; +pub use opte::provider::Providers; pub use oxide_vpc::api::AddFwRuleReq; pub use oxide_vpc::api::BOUNDARY_SERVICES_VNI; pub use oxide_vpc::api::DhcpCfg; @@ -72,6 +72,7 @@ pub use oxide_vpc::api::IpCfg; pub use oxide_vpc::api::Ipv4Cfg; pub use oxide_vpc::api::Ipv6Cfg; pub use oxide_vpc::api::PhysNet; +use oxide_vpc::api::Route; pub use oxide_vpc::api::RouterClass; pub use oxide_vpc::api::RouterTarget; pub use oxide_vpc::api::SNat4Cfg; @@ -257,8 +258,8 @@ fn oxide_net_builder( v2p: Arc, v2b: Arc, ) -> PortBuilder { - #[allow(clippy::arc_with_non_send_sync)] - let ectx = Arc::new(ExecCtx { log: Box::new(opte::PrintlnLog {}) }); + let ectx = + Arc::new(Providers { log: Box::new(opte::provider::PrintlnLog) }); let name_cstr = std::ffi::CString::new(name).unwrap(); let mut pb = PortBuilder::new(name, name_cstr, cfg.guest_mac, ectx); @@ -269,11 +270,11 @@ fn oxide_net_builder( let dhcp = base_dhcp_config(); firewall::setup(&mut pb, fw_limit).expect("failed to add firewall layer"); - gateway::setup(&pb, cfg, vpc_map, fw_limit, &dhcp) + gateway::setup(&mut pb, cfg, vpc_map, fw_limit, &dhcp) .expect("failed to setup gateway layer"); - router::setup(&pb, cfg, one_limit).expect("failed to add router layer"); + router::setup(&mut pb, cfg, one_limit).expect("failed to add router layer"); nat::setup(&mut pb, cfg, snat_limit).expect("failed to add nat layer"); - overlay::setup(&pb, cfg, v2p, v2b, one_limit) + overlay::setup(&mut pb, cfg, v2p, v2b, one_limit) .expect("failed to add overlay layer"); pb } @@ -372,9 +373,12 @@ pub fn oxide_net_setup2( // on same subnet. router::add_entry( &port, - IpCidr::Ip4(cfg.ipv4().vpc_subnet), - RouterTarget::VpcSubnet(IpCidr::Ip4(cfg.ipv4().vpc_subnet)), - RouterClass::System, + Route { + dest: IpCidr::Ip4(cfg.ipv4().vpc_subnet), + target: RouterTarget::VpcSubnet(IpCidr::Ip4(cfg.ipv4().vpc_subnet)), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); @@ -457,7 +461,7 @@ fn set_default_fw_rules(pav: &mut PortAndVps, cfg: &VpcCfg) { format!("dir=in action=allow priority=65534 hosts=vni={}", cfg.vni,); firewall::set_fw_rules( &pav.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: pav.port.name().to_string(), rules: vec![ vpc_in.parse().unwrap(), diff --git a/lib/opte-test-utils/src/pcap.rs b/lib/opte-test-utils/src/pcap.rs index cfe00103..67523d46 100644 --- a/lib/opte-test-utils/src/pcap.rs +++ b/lib/opte-test-utils/src/pcap.rs @@ -37,6 +37,8 @@ fn next_block(offset: &[u8]) -> (&[u8], LegacyPcapBlock) { } /// Build a packet capture file from a series of [`Packet`]. +/// +/// [`Packet`]: opte::engine::packet::Packet pub struct PcapBuilder { file: File, } diff --git a/lib/opte-test-utils/src/port_state.rs b/lib/opte-test-utils/src/port_state.rs index 3a233f58..c39bafb4 100644 --- a/lib/opte-test-utils/src/port_state.rs +++ b/lib/opte-test-utils/src/port_state.rs @@ -86,6 +86,11 @@ pub fn print_port( write_hr(&mut out)?; writeln!(&mut out, "{:#?}", port.stats_snap())?; + // ================================================================ + // Print the Better Stats + // ================================================================ + writeln!(&mut out, "{}", port.dump_flow_stats().unwrap())?; + write_hrb(&mut out)?; writeln!(&mut out)?; diff --git a/lib/opte/Cargo.toml b/lib/opte/Cargo.toml index 35ad5540..6400ea61 100644 --- a/lib/opte/Cargo.toml +++ b/lib/opte/Cargo.toml @@ -47,6 +47,7 @@ itertools = { workspace = true, optional = true } postcard.workspace = true serde.workspace = true tabwriter = { workspace = true, optional = true } +uuid.workspace = true usdt = { workspace = true, optional = true } zerocopy = { workspace = true, optional = true } diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 3a80ad0c..5da86643 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -8,7 +8,7 @@ use super::ether::Ethernet; use super::ip::v4::*; -use super::packet::MblkPacketData; +use super::packet::MblkPacketDataView; use super::predicate::DataPredicate; use super::predicate::EtherAddrMatch; use super::predicate::IpProtoMatch; @@ -478,7 +478,7 @@ impl HairpinAction for DhcpAction { (hdr_preds, data_preds) } - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { let body = meta.copy_remaining(); let client_pkt = DhcpPacket::new_checked(&body)?; let client_dhcp = DhcpRepr::parse(&client_pkt)?; diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 358f322c..1c4af4db 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -25,7 +25,7 @@ use crate::engine::dhcpv6::options::StatusCode; use crate::engine::ether::Ethernet; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; -use crate::engine::packet::MblkPacketData; +use crate::engine::packet::MblkPacketDataView; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; @@ -582,10 +582,9 @@ fn process_confirm_message<'a>( } } -// Process a DHCPv6 message from the a client. +// Process a DHCPv6 message from a client. fn process_client_message<'a>( action: &'a Dhcpv6Action, - _meta: &'a MblkPacketData, client_msg: &'a Message<'a>, ) -> Option> { match client_msg.typ { @@ -607,7 +606,7 @@ fn process_client_message<'a>( // the request and the actual DHCPv6 message to send out. fn generate_packet<'a>( action: &Dhcpv6Action, - meta: &MblkPacketData, + meta: MblkPacketDataView, msg: &'a Message<'a>, ) -> GenPacketResult { let udp = Udp { @@ -621,7 +620,7 @@ fn generate_packet<'a>( source: Ipv6Addr::from_eui64(&action.server_mac), // Safety: We're only here if the predicates match, one of which is // IPv6. - destination: meta.inner_ip6().unwrap().source(), + destination: meta.headers.inner_ip6().unwrap().source(), next_header: IngotIpProto::UDP, payload_len: udp.length, ..Default::default() @@ -667,11 +666,10 @@ impl HairpinAction for Dhcpv6Action { // Rather than put this logic into DataPredicates, we just parse the packet // here and reply accordingly. So the `Dhcpv6Action` is really a full // server, to the extent we emulate one. - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { let body = meta.copy_remaining(); if let Some(client_msg) = Message::from_bytes(&body) { - if let Some(reply) = process_client_message(self, meta, &client_msg) - { + if let Some(reply) = process_client_message(self, &client_msg) { generate_packet(self, meta, &reply) } else { Ok(AllowOrDeny::Deny) @@ -727,13 +725,12 @@ mod test { let pkt = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); - let pmeta = pkt.meta(); let ameta = ActionMeta::new(); let client_mac = MacAddr::from_const([0xa8, 0x40, 0x25, 0xfa, 0xdd, 0x0b]); for pred in dhcpv6_server_predicates(&client_mac) { assert!( - pred.is_match(pmeta, &ameta), + pred.is_match(&pkt, &ameta), "Expected predicate to match snooped Solicit test packet: {pred}", ); } diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 93aa1e52..1ab0f9c4 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -11,7 +11,7 @@ use crate::ddi::mblk::MsgBlk; use crate::engine::checksum::HeaderChecksum; use crate::engine::ether::Ethernet; use crate::engine::ip::v4::Ipv4; -use crate::engine::packet::MblkPacketData; +use crate::engine::packet::MblkPacketDataView; use crate::engine::predicate::Ipv4AddrMatch; use ingot::ethernet::Ethertype; use ingot::icmp::IcmpV4; @@ -50,8 +50,8 @@ impl HairpinAction for IcmpEchoReply { (hdr_preds, vec![]) } - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { - let Some(icmp) = meta.inner_icmp() else { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { + let Some(icmp) = meta.headers.inner_icmp() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMP packet. That // should be impossible, but we avoid panicking given the kernel diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 5562c19c..eb5d09d1 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -12,7 +12,7 @@ use crate::engine::checksum::HeaderChecksum; use crate::engine::ether::Ethernet; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; -use crate::engine::packet::MblkPacketData; +use crate::engine::packet::MblkPacketDataView; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; use ingot::ethernet::Ethertype; @@ -109,8 +109,8 @@ impl HairpinAction for Icmpv6EchoReply { (hdr_preds, vec![]) } - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { - let Some(icmp6) = meta.inner_icmp6() else { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { + let Some(icmp6) = meta.headers.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That // should be impossible, but we avoid panicking given the kernel @@ -234,11 +234,11 @@ impl HairpinAction for RouterAdvertisement { (hdr_preds, vec![]) } - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { use smoltcp::time::Duration; use smoltcp::wire::NdiscRouterFlags; - let Some(icmp6) = meta.inner_icmp6() else { + let Some(icmp6) = meta.headers.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That // should be impossible, but we avoid panicking given the kernel @@ -250,7 +250,7 @@ impl HairpinAction for RouterAdvertisement { // Collect the src / dst IP addresses, which are needed to emit the // resulting ICMPv6 packet using `smoltcp`. - let Some(ip6) = meta.inner_ip6() else { + let Some(ip6) = meta.headers.inner_ip6() else { // We got the ICMPv6 metadata above but no IPv6 somehow? return Err(GenErr::Unexpected(format!( "Expected IPv6 packet metadata, but found: {meta:?}", @@ -350,7 +350,7 @@ impl HairpinAction for RouterAdvertisement { let ip6 = Ipv6 { source: *self.ip(), // Safety: We match on this being Some(_) above, so unwrap is safe. - destination: meta.inner_ip6().unwrap().source(), + destination: meta.headers.inner_ip6().unwrap().source(), next_header: IngotIpProto::ICMP_V6, payload_len: reply_len as u16, @@ -552,8 +552,8 @@ impl HairpinAction for NeighborAdvertisement { (hdr_preds, vec![]) } - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { - let Some(icmp6) = meta.inner_icmp6() else { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { + let Some(icmp6) = meta.headers.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That // should be impossible, but we avoid panicking given the kernel @@ -564,7 +564,7 @@ impl HairpinAction for NeighborAdvertisement { }; // Sanity check that this is actually in IPv6 packet. - let metadata = meta.inner_ip6().ok_or_else(|| { + let metadata = meta.headers.inner_ip6().ok_or_else(|| { // We got the ICMPv6 metadata above but no IPv6 somehow? GenErr::Unexpected(format!( "Expected IPv6 packet metadata, but found: {meta:?}", diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index d6413e27..d8c09d83 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -15,8 +15,8 @@ use super::packet::BodyTransformError; use super::packet::FLOW_ID_DEFAULT; use super::packet::InnerFlowId; use super::packet::MblkFullParsed; -use super::packet::MblkPacketData; use super::packet::Packet; +use super::port::PortBuilder; use super::port::Transforms; use super::port::meta::ActionMeta; use super::rule; @@ -28,8 +28,9 @@ use super::rule::GenBtError; use super::rule::HdrTransformError; use super::rule::Rule; use super::rule::ht_probe; -use crate::ExecCtx; -use crate::LogLevel; +use super::stat::InternalStat; +use super::stat::RootStat; +use super::stat::StatTree; use crate::api::DumpLayerResp; use crate::d_error::DError; #[cfg(all(not(feature = "std"), not(test)))] @@ -39,6 +40,9 @@ use crate::ddi::kstat::KStatProvider; use crate::ddi::kstat::KStatU64; use crate::ddi::mblk::MsgBlk; use crate::ddi::time::Moment; +use crate::engine::ExecCtx; +use crate::provider::LogLevel; +use crate::provider::Providers; use alloc::ffi::CString; use alloc::string::String; use alloc::string::ToString; @@ -56,6 +60,7 @@ use opte_api::Direction; use opte_api::RuleDump; use opte_api::RuleId; use opte_api::RuleTableEntryDump; +use uuid::Uuid; #[derive(Debug)] pub enum LayerError { @@ -156,10 +161,31 @@ pub enum LftError { MaxCapacity, } +#[derive(Clone, Debug)] +struct LftInEntry { + action_desc: ActionDescEntry, + stat: Arc, +} + +impl Display for LftInEntry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.action_desc) + } +} + +impl Dump for LftInEntry { + type DumpVal = ActionDescEntryDump; + + fn dump(&self, hits: u64) -> Self::DumpVal { + ActionDescEntryDump { hits, summary: self.to_string() } + } +} + #[derive(Clone, Debug)] struct LftOutEntry { in_flow_pair: InnerFlowId, action_desc: ActionDescEntry, + stat: Arc, } impl LftOutEntry { @@ -185,7 +211,7 @@ impl Dump for LftOutEntry { struct LayerFlowTable { limit: NonZeroU32, count: u32, - ft_in: FlowTable, + ft_in: FlowTable, ft_out: FlowTable, } @@ -201,11 +227,17 @@ impl LayerFlowTable { action_desc: ActionDescEntry, in_flow: InnerFlowId, out_flow: InnerFlowId, + stat: Arc, ) { - // We add unchekced because the limit is now enforced by + // We add unchecked because the limit is now enforced by // LayerFlowTable, not the individual flow tables. - self.ft_in.add_unchecked(in_flow, action_desc.clone()); - let out_entry = LftOutEntry { in_flow_pair: in_flow, action_desc }; + let in_entry = LftInEntry { + action_desc: action_desc.clone(), + stat: Arc::clone(&stat), + }; + self.ft_in.add_unchecked(in_flow, in_entry); + let out_entry = + LftOutEntry { in_flow_pair: in_flow, action_desc, stat }; self.ft_out.add_unchecked(out_flow, out_entry); self.count += 1; } @@ -246,10 +278,12 @@ impl LayerFlowTable { match self.ft_in.get(flow) { Some(entry) => { entry.hit(); + let action = entry.state().action_desc.clone(); + let stat = Arc::clone(&entry.state().stat); if entry.is_dirty() { - EntryState::Dirty(entry.state().clone()) + EntryState::Dirty(action, stat) } else { - EntryState::Clean(entry.state().clone()) + EntryState::Clean(action, stat) } } @@ -262,10 +296,11 @@ impl LayerFlowTable { Some(entry) => { entry.hit(); let action = entry.state().action_desc.clone(); + let stat = Arc::clone(&entry.state().stat); if entry.is_dirty() { - EntryState::Dirty(action) + EntryState::Dirty(action, stat) } else { - EntryState::Clean(action) + EntryState::Clean(action, stat) } } @@ -276,7 +311,7 @@ impl LayerFlowTable { fn remove_in( &mut self, flow: &InnerFlowId, - ) -> Option>> { + ) -> Option>> { self.ft_in.remove(flow) } @@ -331,14 +366,14 @@ impl LayerFlowTable { } /// The result of a flowtable lookup. -pub enum EntryState { +enum EntryState { /// No flow entry was found matching a given flowid. None, /// An existing flow table entry was found. - Clean(ActionDescEntry), + Clean(ActionDescEntry, Arc), /// An existing flow table entry was found, but rule processing must be rerun /// to use the original action or invalidate the underlying entry. - Dirty(ActionDescEntry), + Dirty(ActionDescEntry, Arc), } /// The default action of a layer. @@ -348,8 +383,9 @@ pub enum EntryState { /// reasonable to open this up to be any [`Action`], if such a use /// case were to present itself. For now, we stay conservative, and /// supply only what the current consumers need. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, Default)] pub enum DefaultAction { + #[default] Allow, StatefulAllow, Deny, @@ -402,7 +438,7 @@ impl Display for ActionDescEntry { /// /// This describes the actions a layer's rules can take as well as the /// [`DefaultAction`] to take when a rule doesn't match. -#[derive(Debug)] +#[derive(Debug, Default)] pub struct LayerActions { /// The list of actions shared among the layer's rules. An action /// doesn't have to be shared, each rule is free to create its @@ -414,9 +450,15 @@ pub struct LayerActions { /// direction. pub default_in: DefaultAction, + /// The stats ID to attach to the default-in action. + pub default_in_stat_id: Option, + /// The default action to take if no rule matches in the outbound /// direction. pub default_out: DefaultAction, + + /// The stats ID to attach to the default-in action. + pub default_out_stat_id: Option, } #[derive(KStatProvider)] @@ -498,8 +540,10 @@ pub struct Layer { actions: Vec, default_in: DefaultAction, default_in_hits: u64, + default_in_stat: Arc, default_out: DefaultAction, default_out_hits: u64, + default_out_stat: Arc, ft: LayerFlowTable, ft_cstr: CString, rules_in: RuleTable, @@ -513,15 +557,20 @@ impl Layer { self.actions.get(idx).cloned() } - pub fn add_rule(&mut self, dir: Direction, rule: Rule) { + pub fn add_rule( + &mut self, + dir: Direction, + rule: Rule, + stats: &mut StatTree, + ) { match dir { Direction::Out => { - self.rules_out.add(rule); + self.rules_out.add(rule, stats); self.stats.vals.out_rules += 1; } Direction::In => { - self.rules_in.add(rule); + self.rules_in.add(rule, stats); self.stats.vals.in_rules += 1; } } @@ -732,18 +781,24 @@ impl Layer { pub fn new( name: &'static str, - port: &str, + port: &mut PortBuilder, actions: LayerActions, ft_limit: NonZeroU32, ) -> Self { - let port_c = CString::new(port).unwrap(); + let stats = port.stats_mut(); + let default_in_stat = stats.new_root(actions.default_in_stat_id); + let default_out_stat = stats.new_root(actions.default_out_stat_id); + + let port_name = port.name(); + + let port_c = CString::new(port_name).unwrap(); let name_c = CString::new(name).unwrap(); // Unwrap: We know this is fine because the stat names are // generated from the LayerStats structure. let stats = KStatNamed::new( "xde", - &format!("{port}_{name}"), + &format!("{port_name}_{name}"), LayerStats::new(), ) .unwrap(); @@ -754,15 +809,17 @@ impl Layer { actions: actions.actions, default_in: actions.default_in, default_in_hits: 0, + default_in_stat, default_out: actions.default_out, default_out_hits: 0, + default_out_stat, name, name_c, port_c, - ft: LayerFlowTable::new(port, name, ft_limit), + ft: LayerFlowTable::new(port_name, name, ft_limit), ft_cstr: CString::new(format!("ft-{name}")).unwrap(), - rules_in: RuleTable::new(port, name, Direction::In), - rules_out: RuleTable::new(port, name, Direction::Out), + rules_in: RuleTable::new(port_name, name, Direction::In), + rules_out: RuleTable::new(port_name, name, Direction::Out), rt_cstr: CString::new(format!("rt-{name}")).unwrap(), stats, } @@ -784,18 +841,18 @@ impl Layer { pub(crate) fn process( &mut self, - ectx: &ExecCtx, + ectx: &mut ExecCtx, dir: Direction, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { - use Direction::*; let flow_before = *pkt.flow(); self.layer_process_entry_probe(dir, pkt.flow()); + pkt.meta_internal_mut().stats.new_layer(); let res = match dir { - Out => self.process_out(ectx, pkt, xforms, ameta), - In => self.process_in(ectx, pkt, xforms, ameta), + Direction::Out => self.process_out(ectx, pkt, xforms, ameta), + Direction::In => self.process_in(ectx, pkt, xforms, ameta), }; self.layer_process_return_probe(dir, &flow_before, pkt.flow(), &res); res @@ -803,35 +860,39 @@ impl Layer { fn process_in( &mut self, - ectx: &ExecCtx, + ectx: &mut ExecCtx, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { // We have no FlowId, thus there can be no FlowTable entry. - if *pkt.flow() == FLOW_ID_DEFAULT { + if pkt.flow() == &FLOW_ID_DEFAULT { return self.process_in_rules(ectx, pkt, xforms, ameta); } // Do we have a FlowTable entry? If so, use it. - let flow = *pkt.flow(); - let action = match self.ft.get_in(&flow) { - EntryState::Dirty(ActionDescEntry::Desc(action)) + let flow = pkt.flow(); + let (action, stat) = match self.ft.get_in(flow) { + EntryState::Dirty(ActionDescEntry::Desc(action), stat) if action.is_valid() => { - self.ft.mark_clean(Direction::In, &flow); - Some(ActionDescEntry::Desc(action)) + self.ft.mark_clean(Direction::In, flow); + (Some(ActionDescEntry::Desc(action)), Some(stat)) } - EntryState::Dirty(_) => { + EntryState::Dirty(_, _) => { // NoOps are included in this case as we can't ask the actor whether // it remains valid: the simplest method to do so is to rerun lookup. - self.ft.remove_in(&flow); - None + self.ft.remove_in(flow); + (None, None) } - EntryState::Clean(action) => Some(action), - EntryState::None => None, + EntryState::Clean(action, stat) => (Some(action), Some(stat)), + EntryState::None => (None, None), }; + if let Some(stat) = stat { + pkt.meta_internal_mut().stats.push(stat.into()); + } + match action { Some(ActionDescEntry::NoOp) => { self.stats.vals.in_lft_hit += 1; @@ -842,6 +903,8 @@ impl Layer { self.stats.vals.in_lft_hit += 1; let flow_before = *pkt.flow(); let ht = desc.gen_ht(Direction::In); + let bt = desc.gen_bt(Direction::In, pkt.meta())?; + pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); ht_probe( @@ -852,10 +915,7 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body() - && let Some(bt) = - desc.gen_bt(Direction::In, pkt.meta(), body_segs)? - { + if let Some(bt) = bt { pkt.body_transform(Direction::In, &*bt)?; xforms.body.push(bt); } @@ -872,7 +932,7 @@ impl Layer { fn process_in_rules( &mut self, - ectx: &ExecCtx, + ectx: &mut ExecCtx, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, @@ -880,17 +940,20 @@ impl Layer { use Direction::In; self.stats.vals.in_lft_miss += 1; - let rule = self.rules_in.find_match(pkt.flow(), pkt.meta(), ameta); + let rule = self.rules_in.find_match(pkt.flow(), pkt, ameta); - let action = if let Some(rule) = rule { + let (action, stat) = if let Some(rule) = rule { self.stats.vals.in_rule_match += 1; - rule.action() + (rule.rule.action(), Arc::clone(&rule.stat)) } else { self.stats.vals.in_rule_nomatch += 1; self.default_in_hits += 1; - self.default_in.into() + (self.default_in.into(), Arc::clone(&self.default_in_stat)) }; + pkt.meta_internal_mut().stats.push(stat.into()); + let flow_before = *pkt.flow(); + match action { Action::Allow => Ok(LayerResult::Allow), @@ -903,13 +966,23 @@ impl Layer { }); } + let stat = + pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); + // The outbound flow ID mirrors the inbound. Remember, // the "top" of layer represents how the client sees // the traffic, and the "bottom" of the layer // represents how the network sees the traffic. - let flow_out = pkt.flow().mirror(); - let desc = ActionDescEntry::NoOp; - self.ft.add_pair(desc, *pkt.flow(), flow_out); + // + // No transformation occurs in a `StatefulAllow`, unlike + // `Stateful(x)`. The mirror flow is computed from the + // initial state. + self.ft.add_pair( + ActionDescEntry::NoOp, + flow_before, + flow_before.mirror(), + stat, + ); self.stats.vals.flows += 1; Ok(LayerResult::Allow) } @@ -917,7 +990,7 @@ impl Layer { Action::Deny => { self.stats.vals.in_deny += 1; let reason = if rule.is_some() { - self.rule_deny_probe(In, pkt.flow()); + self.rule_deny_probe(In, &flow_before); DenyReason::Rule } else { DenyReason::Default @@ -926,42 +999,48 @@ impl Layer { Ok(LayerResult::Deny { name: self.name, reason }) } - Action::Meta(action) => match action.mod_meta(pkt.flow(), ameta) { - Ok(res) => match res { - AllowOrDeny::Allow(_) => Ok(LayerResult::Allow), + Action::Meta(action) => { + match action.mod_meta(&flow_before, ameta) { + Ok(res) => match res { + AllowOrDeny::Allow(_) => Ok(LayerResult::Allow), - AllowOrDeny::Deny => Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }), - }, + AllowOrDeny::Deny => Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }), + }, - Err(msg) => Err(LayerError::ModMeta(msg)), - }, + Err(msg) => Err(LayerError::ModMeta(msg)), + } + } Action::Static(action) => { - let ht = match action.gen_ht(In, pkt.flow(), pkt.meta(), ameta) - { - Ok(aord) => match aord { - AllowOrDeny::Allow(ht) => ht, - AllowOrDeny::Deny => { - return Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, + let ht = + match action.gen_ht(In, &flow_before, pkt.meta(), ameta) { + Ok(aord) => match aord { + AllowOrDeny::Allow(ht) => ht, + AllowOrDeny::Deny => { + return Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }); + } + }, + + Err(e) => { + self.record_gen_ht_failure( + ectx.user_ctx, + In, + &flow_before, + &e, + ); + return Err(LayerError::GenHdrTransform { + layer: self.name, + err: e, }); } - }, + }; - Err(e) => { - self.record_gen_ht_failure(ectx, In, pkt.flow(), &e); - return Err(LayerError::GenHdrTransform { - layer: self.name, - err: e, - }); - } - }; - - let flow_before = *pkt.flow(); pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); ht_probe( @@ -1013,26 +1092,39 @@ impl Layer { }); } - let desc = match action.gen_desc(pkt.flow(), pkt, ameta) { - Ok(aord) => match aord { - AllowOrDeny::Allow(desc) => desc, - - AllowOrDeny::Deny => { - return Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }); + let desc = + match action.gen_desc(&flow_before, pkt.meta(), ameta) { + Ok(aord) => match aord { + AllowOrDeny::Allow(desc) => desc, + + AllowOrDeny::Deny => { + return Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }); + } + }, + + Err(e) => { + self.record_gen_desc_failure( + ectx.user_ctx, + In, + &flow_before, + &e, + ); + return Err(LayerError::GenDesc(e)); } - }, + }; - Err(e) => { - self.record_gen_desc_failure(ectx, In, pkt.flow(), &e); - return Err(LayerError::GenDesc(e)); - } - }; - - let flow_before = *pkt.flow(); + // Generate the transforms, and then roll up our stats into an + // internal node. This allows for correct accounting in the event + // of an error. let ht_in = desc.gen_ht(In); + let bt = desc.gen_bt(In, pkt.meta())?; + + let stat = + pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); + pkt.hdr_transform(&ht_in)?; xforms.hdr.push(ht_in); ht_probe( @@ -1043,9 +1135,7 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body() - && let Some(bt) = desc.gen_bt(In, pkt.meta(), body_segs)? - { + if let Some(bt) = bt { pkt.body_transform(In, &*bt)?; xforms.body.push(bt); } @@ -1062,6 +1152,7 @@ impl Layer { ActionDescEntry::Desc(desc), flow_before, flow_out, + stat, ); self.stats.vals.flows += 1; Ok(LayerResult::Allow) @@ -1089,35 +1180,39 @@ impl Layer { fn process_out( &mut self, - ectx: &ExecCtx, + ectx: &mut ExecCtx, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { // We have no FlowId, thus there can be no FlowTable entry. - if *pkt.flow() == FLOW_ID_DEFAULT { + if pkt.flow() == &FLOW_ID_DEFAULT { return self.process_out_rules(ectx, pkt, xforms, ameta); } // Do we have a FlowTable entry? If so, use it. - let flow = *pkt.flow(); - let action = match self.ft.get_out(&flow) { - EntryState::Dirty(ActionDescEntry::Desc(action)) + let flow = pkt.flow(); + let (action, stat) = match self.ft.get_out(flow) { + EntryState::Dirty(ActionDescEntry::Desc(action), stat) if action.is_valid() => { - self.ft.mark_clean(Direction::Out, &flow); - Some(ActionDescEntry::Desc(action)) + self.ft.mark_clean(Direction::Out, flow); + (Some(ActionDescEntry::Desc(action)), Some(stat)) } - EntryState::Dirty(_) => { + EntryState::Dirty(_, _) => { // NoOps are included in this case as we can't ask the actor whether // it remains valid: the simplest method to do so is to rerun lookup. - self.ft.remove_out(&flow); - None + self.ft.remove_out(flow); + (None, None) } - EntryState::Clean(action) => Some(action), - EntryState::None => None, + EntryState::Clean(action, stat) => (Some(action), Some(stat)), + EntryState::None => (None, None), }; + if let Some(stat) = stat { + pkt.meta_internal_mut().stats.push(stat.into()); + } + match action { Some(ActionDescEntry::NoOp) => { self.stats.vals.out_lft_hit += 1; @@ -1128,6 +1223,8 @@ impl Layer { self.stats.vals.out_lft_hit += 1; let flow_before = *pkt.flow(); let ht = desc.gen_ht(Direction::Out); + let bt = desc.gen_bt(Direction::Out, pkt.meta())?; + pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); ht_probe( @@ -1138,10 +1235,7 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body() - && let Some(bt) = - desc.gen_bt(Direction::Out, pkt.meta(), body_segs)? - { + if let Some(bt) = bt { pkt.body_transform(Direction::Out, &*bt)?; xforms.body.push(bt); } @@ -1158,7 +1252,7 @@ impl Layer { fn process_out_rules( &mut self, - ectx: &ExecCtx, + ectx: &mut ExecCtx, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, @@ -1166,17 +1260,20 @@ impl Layer { use Direction::Out; self.stats.vals.out_lft_miss += 1; - let rule = self.rules_out.find_match(pkt.flow(), pkt.meta(), ameta); + let rule = self.rules_out.find_match(pkt.flow(), pkt, ameta); - let action = if let Some(rule) = rule { + let (action, stat) = if let Some(rule) = rule { self.stats.vals.out_rule_match += 1; - rule.action() + (rule.rule.action(), Arc::clone(&rule.stat)) } else { self.stats.vals.out_rule_nomatch += 1; self.default_out_hits += 1; - self.default_out.into() + (self.default_out.into(), Arc::clone(&self.default_out_stat)) }; + pkt.meta_internal_mut().stats.push(stat.into()); + let flow_before = *pkt.flow(); + match action { Action::Allow => Ok(LayerResult::Allow), @@ -1189,15 +1286,18 @@ impl Layer { }); } - // The inbound flow ID must be calculated _after_ the - // header transformation. Remember, the "top" - // (outbound) of layer represents how the client sees - // the traffic, and the "bottom" (inbound) of the - // layer represents how the network sees the traffic. - // The final step is to mirror the IPs and ports to - // reflect the traffic direction change. - let flow_in = pkt.flow().mirror(); - self.ft.add_pair(ActionDescEntry::NoOp, flow_in, *pkt.flow()); + let stat = + pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); + + // No transformation occurs in a `StatefulAllow`, unlike + // `Stateful(x)`. The mirror flow is computed from the + // initial state. + self.ft.add_pair( + ActionDescEntry::NoOp, + flow_before.mirror(), + flow_before, + stat, + ); self.stats.vals.flows += 1; Ok(LayerResult::Allow) } @@ -1205,7 +1305,7 @@ impl Layer { Action::Deny => { self.stats.vals.out_deny += 1; let reason = if rule.is_some() { - self.rule_deny_probe(Out, pkt.flow()); + self.rule_deny_probe(Out, &flow_before); DenyReason::Rule } else { DenyReason::Default @@ -1214,42 +1314,48 @@ impl Layer { Ok(LayerResult::Deny { name: self.name, reason }) } - Action::Meta(action) => match action.mod_meta(pkt.flow(), ameta) { - Ok(res) => match res { - AllowOrDeny::Allow(_) => Ok(LayerResult::Allow), + Action::Meta(action) => { + match action.mod_meta(&flow_before, ameta) { + Ok(res) => match res { + AllowOrDeny::Allow(_) => Ok(LayerResult::Allow), - AllowOrDeny::Deny => Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }), - }, + AllowOrDeny::Deny => Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }), + }, - Err(msg) => Err(LayerError::ModMeta(msg)), - }, + Err(msg) => Err(LayerError::ModMeta(msg)), + } + } Action::Static(action) => { - let ht = match action.gen_ht(Out, pkt.flow(), pkt.meta(), ameta) - { - Ok(aord) => match aord { - AllowOrDeny::Allow(ht) => ht, - AllowOrDeny::Deny => { - return Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, + let ht = + match action.gen_ht(Out, &flow_before, pkt.meta(), ameta) { + Ok(aord) => match aord { + AllowOrDeny::Allow(ht) => ht, + AllowOrDeny::Deny => { + return Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }); + } + }, + + Err(e) => { + self.record_gen_ht_failure( + ectx.user_ctx, + Out, + &flow_before, + &e, + ); + return Err(LayerError::GenHdrTransform { + layer: self.name, + err: e, }); } - }, + }; - Err(e) => { - self.record_gen_ht_failure(ectx, Out, pkt.flow(), &e); - return Err(LayerError::GenHdrTransform { - layer: self.name, - err: e, - }); - } - }; - - let flow_before = *pkt.flow(); pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); ht_probe( @@ -1301,26 +1407,39 @@ impl Layer { }); } - let desc = match action.gen_desc(pkt.flow(), pkt, ameta) { - Ok(aord) => match aord { - AllowOrDeny::Allow(desc) => desc, - - AllowOrDeny::Deny => { - return Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }); + let desc = + match action.gen_desc(&flow_before, pkt.meta(), ameta) { + Ok(aord) => match aord { + AllowOrDeny::Allow(desc) => desc, + + AllowOrDeny::Deny => { + return Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }); + } + }, + + Err(e) => { + self.record_gen_desc_failure( + ectx.user_ctx, + Out, + &flow_before, + &e, + ); + return Err(LayerError::GenDesc(e)); } - }, - - Err(e) => { - self.record_gen_desc_failure(ectx, Out, pkt.flow(), &e); - return Err(LayerError::GenDesc(e)); - } - }; + }; - let flow_before = *pkt.flow(); + // Generate the transforms, and then roll up our stats into an + // internal node. This allows for correct accounting in the event + // of an error. let ht_out = desc.gen_ht(Out); + let bt = desc.gen_bt(Out, pkt.meta())?; + + let stat = + pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); + pkt.hdr_transform(&ht_out)?; xforms.hdr.push(ht_out); ht_probe( @@ -1331,9 +1450,7 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body() - && let Some(bt) = desc.gen_bt(Out, pkt.meta(), body_segs)? - { + if let Some(bt) = bt { pkt.body_transform(Out, &*bt)?; xforms.body.push(bt); } @@ -1351,6 +1468,7 @@ impl Layer { ActionDescEntry::Desc(desc), flow_in, flow_before, + stat, ); self.stats.vals.flows += 1; Ok(LayerResult::Allow) @@ -1378,7 +1496,7 @@ impl Layer { fn record_gen_desc_failure( &self, - ectx: &ExecCtx, + ectx: &Providers, dir: Direction, flow: &InnerFlowId, err: &rule::GenDescError, @@ -1395,7 +1513,7 @@ impl Layer { fn record_gen_ht_failure( &self, - ectx: &ExecCtx, + ectx: &Providers, dir: Direction, flow: &InnerFlowId, err: &rule::GenHtError, @@ -1483,9 +1601,10 @@ impl Layer { &mut self, in_rules: Vec>, out_rules: Vec>, + stats: &mut StatTree, ) { self.ft.clear(); - self.set_rules_core(in_rules, out_rules); + self.set_rules_core(in_rules, out_rules, stats); } /// Set all rules at once without clearing the flow table. @@ -1496,18 +1615,20 @@ impl Layer { &mut self, in_rules: Vec>, out_rules: Vec>, + stats: &mut StatTree, ) { self.ft.mark_dirty(); - self.set_rules_core(in_rules, out_rules); + self.set_rules_core(in_rules, out_rules, stats); } fn set_rules_core( &mut self, in_rules: Vec>, out_rules: Vec>, + stats: &mut StatTree, ) { - self.rules_in.set_rules(in_rules); - self.rules_out.set_rules(out_rules); + self.rules_in.set_rules(in_rules, stats); + self.rules_out.set_rules(out_rules, stats); self.stats.vals.set_rules_called += 1; self.stats.vals.in_rules.set(self.rules_in.num_rules() as u64); self.stats.vals.out_rules.set(self.rules_out.num_rules() as u64); @@ -1523,6 +1644,7 @@ struct RuleTableEntry { id: RuleId, hits: u64, rule: Rule, + stat: Arc, } impl From<&RuleTableEntry> for RuleTableEntryDump { @@ -1552,15 +1674,18 @@ pub enum RuleRemoveErr { } impl RuleTable { - fn add(&mut self, rule: Rule) { + fn add(&mut self, rule: Rule, stats: &mut StatTree) { + let stat = stats.new_root(rule.stat_id().copied()); match self.find_pos(&rule) { RulePlace::End => { - let rte = RuleTableEntry { id: self.next_id, hits: 0, rule }; + let rte = + RuleTableEntry { id: self.next_id, hits: 0, rule, stat }; self.rules.push(rte); } RulePlace::Insert(idx) => { - let rte = RuleTableEntry { id: self.next_id, hits: 0, rule }; + let rte = + RuleTableEntry { id: self.next_id, hits: 0, rule, stat }; self.rules.insert(idx, rte); } } @@ -1578,11 +1703,11 @@ impl RuleTable { fn find_match( &mut self, ifid: &InnerFlowId, - pmeta: &MblkPacketData, + pkt: &Packet, ameta: &ActionMeta, - ) -> Option<&Rule> { + ) -> Option<&RuleTableEntry> { for rte in self.rules.iter_mut() { - if rte.rule.is_match(pmeta, ameta) { + if rte.rule.is_match(pkt, ameta) { rte.hits += 1; Self::rule_match_probe( self.port_c.as_c_str(), @@ -1591,7 +1716,7 @@ impl RuleTable { ifid, &rte.rule, ); - return Some(&rte.rule); + return Some(rte); } } @@ -1729,10 +1854,14 @@ impl RuleTable { } } - pub fn set_rules(&mut self, new_rules: Vec>) { + pub fn set_rules( + &mut self, + new_rules: Vec>, + stats: &mut StatTree, + ) { self.rules.clear(); for r in new_rules { - self.add(r); + self.add(r, stats); } } } @@ -1816,6 +1945,7 @@ mod test { use crate::engine::predicate::Predicate; use crate::engine::rule; + let mut stats = StatTree::default(); let mut rule_table = RuleTable::new("port", "test", Direction::Out); let mut rule = Rule::new( 1, @@ -1826,7 +1956,7 @@ mod test { Ipv4AddrMatch::Prefix(cidr), ])); - rule_table.add(rule.finalize()); + rule_table.add(rule.finalize(), &mut stats); let mut test_pkt = MsgBlk::new_ethernet_pkt(( Ethernet { ethertype: Ethertype::IPV4, ..Default::default() }, @@ -1853,7 +1983,7 @@ mod test { // The pkt/rdr aren't actually used in this case. let ameta = ActionMeta::new(); let ifid = *pmeta.flow(); - assert!(rule_table.find_match(&ifid, pmeta.meta(), &ameta).is_some()); + assert!(rule_table.find_match(&ifid, &pmeta, &ameta).is_some()); } } // TODO Reinstate diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 603f51b3..13128860 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -5,8 +5,7 @@ // Copyright 2025 Oxide Computer Company //! The engine in OPTE. -//! -//! All code under this namespace is guarded by the `engine` feature flag. + pub mod arp; pub mod checksum; pub mod dhcp; @@ -28,6 +27,7 @@ pub mod port; pub mod predicate; pub mod rule; pub mod snat; +pub mod stat; #[macro_use] pub mod tcp; pub mod tcp_state; @@ -35,6 +35,7 @@ pub mod tcp_state; pub mod udp; use crate::ddi::mblk::MsgBlk; +use crate::provider::Providers; use checksum::Checksum; use ingot::tcp::TcpRef; use ingot::types::IntoBufPointer; @@ -47,6 +48,7 @@ use packet::Packet; use packet::Pullup; use parse::ValidNoEncap; use rule::CompiledTransform; +use stat::StatTree; use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; @@ -140,6 +142,13 @@ use crate::engine::packet::InnerFlowId; use crate::engine::packet::ParseError; use crate::engine::port::UftEntry; +/// Context containing platform-specific providers and shared elements from a +/// [`port::Port`], used within layer and action execution. +pub struct ExecCtx<'a> { + pub user_ctx: &'a Providers, + pub stats: &'a mut StatTree, +} + /// The action to take for a single packet, based on the processing of /// the [`NetworkImpl::handle_pkt()`] callback. pub enum HdlPktAction { @@ -190,7 +199,7 @@ pub struct HdlPktError(pub &'static str); /// handling of the packet at an individual level, instead of /// treating it as a flow. This is useful for packets that do not /// easily map to the flow model. -pub trait NetworkImpl { +pub trait NetworkImpl: Send + Sync { /// The packet parser for this network implementation. type Parser: NetworkParser; diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index e8f7e190..990160dd 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -17,8 +17,7 @@ use super::ip::v6::ValidIpv6; use super::packet::BodyTransform; use super::packet::BodyTransformError; use super::packet::InnerFlowId; -use super::packet::MblkFullParsed; -use super::packet::Packet; +use super::packet::MblkPacketDataView; use super::parse::Ulp; use super::parse::UlpRepr; use super::port::meta::ActionMeta; @@ -105,7 +104,7 @@ impl StatefulAction for OutboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet, + _pkt: MblkPacketDataView, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // When we have several external IPs at our disposal, we are @@ -168,7 +167,7 @@ impl StatefulAction for InboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet, + _pkt: MblkPacketDataView, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // We rely on the attached predicates to filter out IPs which are *not* @@ -234,13 +233,12 @@ impl ActionDesc for NatDesc { fn gen_bt( &self, _dir: Direction, - meta: &super::packet::MblkPacketData, - _payload_seg: &[u8], + meta: MblkPacketDataView, ) -> Result>, rule::GenBtError> { // ICMPv4/v6 traffic can carry frames which they were generated // in response to. We need to also apply our NAT transform to // these. - match (meta.inner_ulp(), self.priv_ip, self.external_ip) { + match (&meta.headers.inner_ulp, self.priv_ip, self.external_ip) { ( Some(Ulp::IcmpV4(_)), IpAddr::Ip4(priv_ip), @@ -395,6 +393,7 @@ mod test { use crate::engine::ether::EthernetRef; use crate::engine::ip::v4::Ipv4; use crate::engine::ip::v4::Ipv4Ref; + use crate::engine::packet::Packet; use ingot::ethernet::Ethertype; use ingot::ip::IpProtocol; use ingot::tcp::Tcp; @@ -458,8 +457,8 @@ mod test { // ================================================================ // Verify descriptor generation. // ================================================================ - let flow_out = InnerFlowId::from(pkt.meta()); - let desc = match nat.gen_desc(&flow_out, &pkt, &mut ameta) { + let flow_out = InnerFlowId::from(pkt.headers()); + let desc = match nat.gen_desc(&flow_out, pkt.meta(), &mut ameta) { Ok(AllowOrDeny::Allow(desc)) => desc, _ => panic!("expected AllowOrDeny::Allow(desc) result"), }; @@ -468,26 +467,20 @@ mod test { // Verify outbound header transformation // ================================================================ let out_ht = desc.gen_ht(Direction::Out); - let pmo = pkt.meta_mut(); - out_ht.run(pmo).unwrap(); + out_ht.run(&mut pkt).unwrap(); + let pmo = pkt.headers(); - let ether_meta = pmo.inner_ether(); + let ether_meta = &pmo.inner_eth; assert_eq!(ether_meta.source(), priv_mac); assert_eq!(ether_meta.destination(), dest_mac); - let ip4_meta = match pmo.inner_ip4() { - Some(v) => v, - _ => panic!("expect Ipv4Meta"), - }; + let ip4_meta = pmo.inner_ip4().unwrap(); assert_eq!(ip4_meta.source(), pub_ip); assert_eq!(ip4_meta.destination(), outside_ip); assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmo.inner_tcp() { - Some(v) => v, - _ => panic!("expect TcpMeta"), - }; + let tcp_meta = pmo.inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), priv_port); assert_eq!(tcp_meta.destination(), outside_port); @@ -523,27 +516,21 @@ mod test { .unwrap() .to_full_meta(); - let pmi = pkt.meta_mut(); let in_ht = desc.gen_ht(Direction::In); - in_ht.run(pmi).unwrap(); + in_ht.run(&mut pkt).unwrap(); + let pmi = pkt.headers(); - let ether_meta = pmi.inner_ether(); + let ether_meta = &pmi.inner_eth; assert_eq!(ether_meta.source(), dest_mac); assert_eq!(ether_meta.destination(), priv_mac); - let ip4_meta = match pmi.inner_ip4() { - Some(v) => v, - _ => panic!("expect Ipv4Meta"), - }; + let ip4_meta = pmi.inner_ip4().unwrap(); assert_eq!(ip4_meta.source(), outside_ip); assert_eq!(ip4_meta.destination(), priv_ip); assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmi.inner_tcp() { - Some(v) => v, - _ => panic!("expect TcpMeta"), - }; + let tcp_meta = pmi.inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), outside_port); assert_eq!(tcp_meta.destination(), priv_port); diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index bac3e28a..954a6ef6 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -34,6 +34,8 @@ use super::rule::CompiledEncap; use super::rule::CompiledTransform; use super::rule::HdrTransform; use super::rule::HdrTransformError; +use super::stat::FlowStatBuilder; +use super::stat::RootStat; pub use crate::api::AddrPair; pub use crate::api::FLOW_ID_DEFAULT; use crate::api::IcmpInfo; @@ -291,6 +293,149 @@ pub struct OpteMeta { pub inner_ulp: Option>, } +impl OpteMeta { + /// Returns whether this packet is sourced from outside the rack, + /// in addition to its VNI. + pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { + match &self.outer_encap { + Some(InlineHeader::Repr(EncapMeta::Geneve(g))) => { + Some((g.vni, g.oxide_external_pkt)) + } + Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { + Some((g.vni(), valid_geneve_has_oxide_external(g))) + } + None => None, + } + } + + pub fn inner_ip4(&self) -> Option<&Ipv4Packet> { + self.inner_l3.as_ref().and_then(|v| match v { + L3::Ipv4(v) => Some(v), + _ => None, + }) + } + + pub fn inner_ip6(&self) -> Option<&Ipv6Packet> { + self.inner_l3.as_ref().and_then(|v| match v { + L3::Ipv6(v) => Some(v), + _ => None, + }) + } + + pub fn inner_icmp(&self) -> Option<&IcmpV4Packet> { + self.inner_ulp.as_ref().and_then(|v| match v { + Ulp::IcmpV4(v) => Some(v), + _ => None, + }) + } + + pub fn inner_icmp6(&self) -> Option<&IcmpV6Packet> { + self.inner_ulp.as_ref().and_then(|v| match v { + Ulp::IcmpV6(v) => Some(v), + _ => None, + }) + } + + pub fn inner_tcp(&self) -> Option<&TcpPacket> { + self.inner_ulp.as_ref().and_then(|v| match v { + Ulp::Tcp(v) => Some(v), + _ => None, + }) + } + + pub fn inner_udp(&self) -> Option<&UdpPacket> { + self.inner_ulp.as_ref().and_then(|v| match v { + Ulp::Udp(v) => Some(v), + _ => None, + }) + } + + pub fn is_inner_tcp(&self) -> bool { + matches!(self.inner_ulp, Some(Ulp::Tcp(_))) + } + + /// Return whether the IP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ip_csum(&self) -> bool { + match &self.inner_l3 { + Some(L3::Ipv4(v4)) => v4.checksum() != 0, + Some(L3::Ipv6(_)) => false, + None => false, + } + } + + /// Return whether the ULP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ulp_csum(&self) -> bool { + let csum = match &self.inner_ulp { + Some(Ulp::Tcp(t)) => t.checksum(), + Some(Ulp::Udp(u)) => u.checksum(), + Some(Ulp::IcmpV4(i4)) => i4.checksum(), + Some(Ulp::IcmpV6(i6)) => i6.checksum(), + None => return false, + }; + + csum != 0 + } +} + +impl From> for OpteMeta { + #[inline] + fn from(value: NoEncap) -> Self { + OpteMeta { + outer_eth: None, + outer_l3: None, + outer_encap: None, + inner_eth: value.inner_eth, + inner_l3: value.inner_l3, + inner_ulp: value.inner_ulp, + } + } +} + +impl From<&OpteMeta> for InnerFlowId { + #[inline] + fn from(meta: &OpteMeta) -> Self { + let (proto, addrs) = match &meta.inner_l3 { + Some(L3::Ipv4(pkt)) => ( + pkt.protocol().0, + AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, + ), + Some(L3::Ipv6(pkt)) => ( + pkt.next_layer().unwrap_or_default().0, + AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, + ), + None => (255, FLOW_ID_DEFAULT.addrs), + }; + + let proto_info = match &meta.inner_ulp { + Some(Ulp::Tcp(t)) => { + PortInfo { src_port: t.source(), dst_port: t.destination() } + .into() + } + Some(Ulp::Udp(u)) => { + PortInfo { src_port: u.source(), dst_port: u.destination() } + .into() + } + Some(Ulp::IcmpV4(v4)) => IcmpInfo { + ty: v4.ty().0, + code: v4.code(), + id: v4.echo_id().unwrap_or_default(), + } + .into(), + Some(Ulp::IcmpV6(v6)) => IcmpInfo { + ty: v6.ty().0, + code: v6.code(), + id: v6.echo_id().unwrap_or_default(), + } + .into(), + _ => Default::default(), + }; + + InnerFlowId { proto, addrs, proto_info } + } +} + /// Helper for conditionally pulling up a packet when required, /// to provide safe read/write access to the packet body. /// @@ -455,128 +600,33 @@ impl Drop for PktBodyWalker { } } -/// Packet state for the standard ULP path, or a full table walk over the slowpath. -pub struct PacketData { - pub(crate) headers: OpteMeta, - initial_lens: Option>, - body: PktBodyWalker, -} - -impl From> for OpteMeta { - #[inline] - fn from(value: NoEncap) -> Self { - OpteMeta { - outer_eth: None, - outer_l3: None, - outer_encap: None, - inner_eth: value.inner_eth, - inner_l3: value.inner_l3, - inner_ulp: value.inner_ulp, - } - } +/// Per-packet context for use within (stateful) actions. +/// +/// This view type provides read-only access to the packet's headers and body, +/// which allow for an action to determine in more detail how a packet should be +/// modified. Additionally, this allows for an action to insert particular +/// [`RootStat`] objects into the packet trace. +pub struct PacketDataView<'a, T: Read + Pullup> { + pub headers: &'a OpteMeta, + pub initial_lens: &'a InitialLayerLens, + body: &'a PktBodyWalker, + stats: &'a mut FlowStatBuilder, } -impl core::fmt::Debug for PacketData { +impl core::fmt::Debug for PacketDataView<'_, T> { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.write_str("PacketHeaders(..)") } } -impl PacketData { - pub fn initial_lens(&self) -> Option<&InitialLayerLens> { - self.initial_lens.as_deref() - } - - pub fn outer_ether( - &self, - ) -> Option<&InlineHeader>> { - self.headers.outer_eth.as_ref() - } - - pub fn outer_ip(&self) -> Option<&L3> { - self.headers.outer_l3.as_ref() - } - - /// Returns whether this packet is sourced from outside the rack, - /// in addition to its VNI. - pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { - match &self.headers.outer_encap { - Some(InlineHeader::Repr(EncapMeta::Geneve(g))) => { - Some((g.vni, g.oxide_external_pkt)) - } - Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { - Some((g.vni(), valid_geneve_has_oxide_external(g))) - } - None => None, - } - } - - pub fn inner_ether(&self) -> &EthernetPacket { - &self.headers.inner_eth - } - - pub fn inner_l3(&self) -> Option<&L3> { - self.headers.inner_l3.as_ref() - } - - pub fn inner_ulp(&self) -> Option<&Ulp> { - self.headers.inner_ulp.as_ref() - } - - pub fn inner_ip4(&self) -> Option<&Ipv4Packet> { - self.inner_l3().and_then(|v| match v { - L3::Ipv4(v) => Some(v), - _ => None, - }) - } - - pub fn inner_ip6(&self) -> Option<&Ipv6Packet> { - self.inner_l3().and_then(|v| match v { - L3::Ipv6(v) => Some(v), - _ => None, - }) - } - - pub fn inner_icmp(&self) -> Option<&IcmpV4Packet> { - self.inner_ulp().and_then(|v| match v { - Ulp::IcmpV4(v) => Some(v), - _ => None, - }) - } - - pub fn inner_icmp6(&self) -> Option<&IcmpV6Packet> { - self.inner_ulp().and_then(|v| match v { - Ulp::IcmpV6(v) => Some(v), - _ => None, - }) - } - - pub fn inner_tcp(&self) -> Option<&TcpPacket> { - self.inner_ulp().and_then(|v| match v { - Ulp::Tcp(v) => Some(v), - _ => None, - }) - } - - pub fn inner_udp(&self) -> Option<&UdpPacket> { - self.inner_ulp().and_then(|v| match v { - Ulp::Udp(v) => Some(v), - _ => None, - }) - } - - pub fn is_inner_tcp(&self) -> bool { - matches!(self.inner_ulp(), Some(Ulp::Tcp(_))) - } - - pub fn prep_body(&mut self) - where - T::Chunk: ByteSliceMut, - T: Pullup, - { - self.body.prepare() - } - +impl PacketDataView<'_, T> { + /// Examine a packet's body, beginning after the last parsed layer in + /// `headers`. + /// + /// This should be avoided unless required -- if a packet's body is split + /// over several segments or has a shared refcount, then the packet body + /// will be puleld up into a single segment. This cost is paid at most + /// once per packet. pub fn body(&self) -> &[u8] where T::Chunk: ByteSliceMut, @@ -585,6 +635,9 @@ impl PacketData { self.body.body() } + /// Copy the packet's body into a new `Vec`. + /// + /// Comes with the same performance caveats as [`Self::body`]. pub fn copy_remaining(&self) -> Vec where T::Chunk: ByteSliceMut, @@ -594,6 +647,9 @@ impl PacketData { base.to_vec() } + /// Append the packet's body to an existing `Vec`. + /// + /// Comes with the same performance caveats as [`Self::body`]. pub fn append_remaining(&self, buf: &mut Vec) where T::Chunk: ByteSliceMut, @@ -603,79 +659,68 @@ impl PacketData { buf.extend_from_slice(base); } - pub fn body_mut(&mut self) -> &mut [u8] - where - T::Chunk: ByteSliceMut, - T: Pullup, - { - self.body.body_mut() - } - - /// Return whether the IP layer has a checksum both structurally - /// and that it is non-zero (i.e., not offloaded). - pub fn has_ip_csum(&self) -> bool { - match &self.headers.inner_l3 { - Some(L3::Ipv4(v4)) => v4.checksum() != 0, - Some(L3::Ipv6(_)) => false, - None => false, - } + /// Push a stat object for this layer of packet processing, in addition to + /// that of the current rule. This allows one rule to be associated with + /// several control-plane level objects, and to associate states with each + /// as needed. + /// + /// ## Ensuring exact counting + /// If an LFT entry is created, all [`RootStat`]s from the current layer are + /// collected and assigned a new internal stat node as a child. + /// + /// For stats to be measured exactly (i.e., without any nondeterministic + /// double/triple-counting) you must ensure that your [`NetworkImpl`] is designed + /// so that each [`RootStat`] you define is only reachable by at most one path + /// in a flow. Duplicate root stats (within a flow or internal node) are + /// trivially filtered out, but reusing a [`RootStat`] in, e.g., a layer which + /// generates an LFT entry and then as the rule-stat in a stateless layer poses + /// problems. + /// + /// I.e., consider the below case: + /// ```text + /// flow(abcd)[ RootStat(0), RootStat(1), InternalStat(2), RootStat(3) ] + /// ^ + /// | + /// [ RootStat(1), RootStat(4), ... ] + /// ``` + /// `InternalNode(2)` could expire at a *later time* than `flow(abcd)`, + /// which means that it and `RootStat(1)` will inherit the flow stats on + /// its closure, and then RootStat(1) will inherit these *again* once + /// `InternalNode(2)` expires. + /// + /// [`NetworkImpl`]: super::NetworkImpl + pub fn push_stat(&mut self, stat: Arc) { + self.stats.push(stat.into()); } +} - /// Return whether the ULP layer has a checksum both structurally - /// and that it is non-zero (i.e., not offloaded). - pub fn has_ulp_csum(&self) -> bool { - let csum = match &self.headers.inner_ulp { - Some(Ulp::Tcp(t)) => t.checksum(), - Some(Ulp::Udp(u)) => u.checksum(), - Some(Ulp::IcmpV4(i4)) => i4.checksum(), - Some(Ulp::IcmpV6(i6)) => i6.checksum(), - None => return false, - }; +/// Packet state for the standard ULP path, or a full table walk over the slowpath. +/// +/// This type should not be used in or handed to OPTE actions, as its fields have +/// different intended levels of mutability when generating an action. For instance, +/// stats can be created and pushed at will, but packet fields/lengths/body contents +/// should be immutable outside of constructed header/body transforms. +pub(crate) struct PacketData { + pub(crate) headers: OpteMeta, + pub(crate) initial_lens: InitialLayerLens, + body: PktBodyWalker, + pub(crate) stats: FlowStatBuilder, +} - csum != 0 +impl core::fmt::Debug for PacketData { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("PacketHeaders(..)") } } -impl From<&PacketData> for InnerFlowId { - #[inline] - fn from(meta: &PacketData) -> Self { - let (proto, addrs) = match meta.inner_l3() { - Some(L3::Ipv4(pkt)) => ( - pkt.protocol().0, - AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, - ), - Some(L3::Ipv6(pkt)) => ( - pkt.next_layer().unwrap_or_default().0, - AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, - ), - None => (255, FLOW_ID_DEFAULT.addrs), - }; - - let proto_info = match meta.inner_ulp() { - Some(Ulp::Tcp(t)) => { - PortInfo { src_port: t.source(), dst_port: t.destination() } - .into() - } - Some(Ulp::Udp(u)) => { - PortInfo { src_port: u.source(), dst_port: u.destination() } - .into() - } - Some(Ulp::IcmpV4(v4)) => IcmpInfo { - ty: v4.ty().0, - code: v4.code(), - id: v4.echo_id().unwrap_or_default(), - } - .into(), - Some(Ulp::IcmpV6(v6)) => IcmpInfo { - ty: v6.ty().0, - code: v6.code(), - id: v6.echo_id().unwrap_or_default(), - } - .into(), - _ => Default::default(), - }; - - InnerFlowId { proto, addrs, proto_info } +impl PacketData { + pub fn view(&mut self) -> PacketDataView<'_, T> { + PacketDataView { + headers: &self.headers, + initial_lens: &self.initial_lens, + body: &self.body, + stats: &mut self.stats, + } } } @@ -769,19 +814,21 @@ where let flow = headers.flow(); let headers: OpteMeta<_> = headers.into(); - let initial_lens = Some( - InitialLayerLens { - outer_eth: headers.outer_eth.packet_length(), - outer_l3: headers.outer_l3.packet_length(), - outer_encap: headers.outer_encap.packet_length(), - inner_eth: headers.inner_eth.packet_length(), - inner_l3: headers.inner_l3.packet_length(), - inner_ulp: headers.inner_ulp.packet_length(), - } - .into(), - ); + let initial_lens = InitialLayerLens { + outer_eth: headers.outer_eth.packet_length(), + outer_l3: headers.outer_l3.packet_length(), + outer_encap: headers.outer_encap.packet_length(), + inner_eth: headers.inner_eth.packet_length(), + inner_l3: headers.inner_l3.packet_length(), + inner_ulp: headers.inner_ulp.packet_length(), + }; let body = PktBodyWalker::new(last_chunk, data); - let meta = Box::new(PacketData { headers, initial_lens, body }); + let meta = Box::new(PacketData { + headers, + initial_lens, + body, + stats: FlowStatBuilder::new(), + }); Packet { state: FullParsed { @@ -798,12 +845,12 @@ where } #[inline] - pub fn meta(&self) -> &M { + pub fn headers(&self) -> &M { &self.state.meta.headers } #[inline] - pub fn meta_mut(&mut self) -> &mut M { + pub fn headers_mut(&mut self) -> &mut M { &mut self.state.meta.headers } @@ -819,16 +866,24 @@ where #[inline] pub fn flow(&self) -> InnerFlowId { - self.meta().flow() + self.headers().flow() } } impl Packet> { - pub fn meta(&self) -> &PacketData { + pub fn meta(&mut self) -> PacketDataView<'_, T> { + self.state.meta.view() + } + + pub fn headers(&self) -> &OpteMeta { + &self.state.meta.headers + } + + pub(crate) fn meta_internal(&self) -> &PacketData { &self.state.meta } - pub fn meta_mut(&mut self) -> &mut PacketData { + pub(crate) fn meta_internal_mut(&mut self) -> &mut PacketData { &mut self.state.meta } @@ -854,7 +909,7 @@ impl Packet> { // pkt space. let l4_hash = self.l4_hash(); let state = &mut self.state; - let init_lens = state.meta.initial_lens.as_ref().unwrap(); + let init_lens = &state.meta.initial_lens; let headers = &state.meta.headers; let payload_len = state.len - init_lens.hdr_len(); let mut encapped_len = payload_len; @@ -1069,7 +1124,7 @@ impl Packet> { where T::Chunk: ByteSliceMut, { - self.state.inner_csum_dirty |= xform.run(&mut self.state.meta)?; + self.state.inner_csum_dirty |= xform.run(self)?; // Recomputing this is a little bit wasteful, since we're moving // rebuilding a static repr from packet fields. This is a necessary @@ -1078,7 +1133,7 @@ impl Packet> { // // We *could* elide this on non-compiled UFT transforms, but we do not // need those today. - self.state.flow = InnerFlowId::from(self.meta()); + self.state.flow = InnerFlowId::from(self.headers()); Ok(()) } @@ -1101,7 +1156,7 @@ impl Packet> { self.state.body_modified = true; self.state.meta.body.prepare(); - let ulp = self.state.meta.inner_ulp().map(|v| v.repr()); + let ulp = self.state.meta.headers.inner_ulp.as_ref().map(|v| v.repr()); match self.body_mut() { Some(body_segs) => xform.run(dir, ulp.as_ref(), body_segs), @@ -1118,7 +1173,7 @@ impl Packet> { T::Chunk: ByteSliceMut, T: Pullup, { - let out = self.state.meta.body(); + let out = self.state.meta.body.body(); if out.is_empty() { None } else { Some(out) } } @@ -1128,10 +1183,32 @@ impl Packet> { T::Chunk: ByteSliceMut, T: Pullup, { - let out = self.state.meta.body_mut(); + let out = self.state.meta.body.body_mut(); if out.is_empty() { None } else { Some(out) } } + #[cfg(any(test, feature = "std"))] + pub fn append_remaining(&self, buf: &mut Vec) + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + let base = self.body(); + if let Some(base) = base { + buf.extend_from_slice(base); + } + } + + #[cfg(any(test, feature = "std"))] + pub fn copy_remaining(&self) -> Vec + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + let base = self.body(); + if let Some(base) = base { base.to_vec() } else { vec![] } + } + #[inline] pub fn mblk_addr(&self) -> uintptr_t { self.state.base_ptr @@ -1260,8 +1337,8 @@ impl Packet> { // provided. If the checksum is zero, it's assumed heardware // checksum offload is being used, and OPTE should not update // the checksum. - let update_ip = self.state.meta.has_ip_csum(); - let update_ulp = self.state.meta.has_ulp_csum(); + let update_ip = self.state.meta.headers.has_ip_csum(); + let update_ulp = self.state.meta.headers.has_ulp_csum(); // We expect that any body transform will necessarily invalidate // the body_csum. Recompute from scratch. @@ -1415,7 +1492,8 @@ impl> LiteParsed {} // These are needed for now to account for not wanting to redesign // ActionDescs to be generic over T (trait object safety rules, etc.), // in addition to needing to rework Hairpin actions. -pub type MblkPacketData<'a> = PacketData>; +pub(crate) type MblkPacketData<'a> = PacketData>; +pub type MblkPacketDataView<'a, 'b> = PacketDataView<'a, MsgBlkIterMut<'b>>; pub type MblkFullParsed<'a> = FullParsed>; pub type MblkLiteParsed<'a, M> = LiteParsed, M>; @@ -1797,17 +1875,19 @@ mod test { .unwrap() .to_full_meta(); - let eth_meta = parsed.meta().inner_ether(); + let headers = parsed.headers(); + + let eth_meta = &headers.inner_eth; assert_eq!(eth_meta.destination(), DST_MAC); assert_eq!(eth_meta.source(), SRC_MAC); assert_eq!(eth_meta.ethertype(), Ethertype::IPV4); - let ip4_meta = parsed.meta().inner_ip4().unwrap(); + let ip4_meta = headers.inner_ip4().unwrap(); assert_eq!(ip4_meta.source(), SRC_IP4); assert_eq!(ip4_meta.destination(), DST_IP4); assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = parsed.meta().inner_tcp().unwrap(); + let tcp_meta = headers.inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), 3839); assert_eq!(tcp_meta.destination(), 80); assert_eq!(tcp_meta.flags(), TcpFlags::SYN); @@ -1847,17 +1927,17 @@ mod test { .unwrap() .to_full_meta(); - let eth_parsed = pkt.meta().inner_ether(); + let eth_parsed = &pkt.headers().inner_eth; assert_eq!(eth_parsed.destination(), DST_MAC); assert_eq!(eth_parsed.source(), SRC_MAC); assert_eq!(eth_parsed.ethertype(), Ethertype::IPV4); - let ip4_parsed = pkt.meta().inner_ip4().unwrap(); + let ip4_parsed = pkt.headers().inner_ip4().unwrap(); assert_eq!(ip4_parsed.source(), SRC_IP4); assert_eq!(ip4_parsed.destination(), DST_IP4); assert_eq!(ip4_parsed.protocol(), IpProtocol::TCP); - let tcp_parsed = pkt.meta().inner_tcp().unwrap(); + let tcp_parsed = pkt.headers().inner_tcp().unwrap(); assert_eq!(tcp_parsed.source(), 3839); assert_eq!(tcp_parsed.destination(), 80); assert_eq!(tcp_parsed.flags(), TcpFlags::SYN); @@ -1940,10 +2020,14 @@ mod test { // Assert that the packet parses back out, and we can reach // the TCP meta no matter which permutation of EHs we have. assert_eq!( - pkt.meta().inner_ip6().unwrap().v6ext_ref().packet_length(), + pkt.headers() + .inner_ip6() + .unwrap() + .v6ext_ref() + .packet_length(), ipv6_header_size - Ipv6::MINIMUM_LENGTH ); - let tcp_meta = pkt.meta().inner_tcp().unwrap(); + let tcp_meta = pkt.headers().inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), 3839); assert_eq!(tcp_meta.destination(), 80); assert_eq!(tcp_meta.sequence(), 4224936861); @@ -1987,8 +2071,8 @@ mod test { .to_full_meta(); // Grab parsed metadata - let ip4_meta = parsed.meta().inner_ip4().unwrap(); - let tcp_meta = parsed.meta().inner_tcp().unwrap(); + let ip4_meta = parsed.headers().inner_ip4().unwrap(); + let tcp_meta = parsed.headers().inner_tcp().unwrap(); // Length in packet headers shouldn't reflect include padding // This should not fail even though there are more bytes in @@ -2046,8 +2130,8 @@ mod test { .to_full_meta(); // Grab parsed metadata - let ip6_meta = pkt.meta().inner_ip6().unwrap(); - let udp_meta = pkt.meta().inner_udp().unwrap(); + let ip6_meta = pkt.headers().inner_ip6().unwrap(); + let udp_meta = pkt.headers().inner_udp().unwrap(); // Length in packet headers shouldn't reflect include padding assert_eq!( diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index d02eb707..c03bd209 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -6,6 +6,7 @@ //! A virtual switch port. +use super::ExecCtx; use super::HdlPktAction; use super::LightweightMeta; use super::NetworkImpl; @@ -44,11 +45,13 @@ use super::rule::HdrTransform; use super::rule::HdrTransformError; use super::rule::Rule; use super::rule::TransformFlags; +use super::stat::Action as StatAction; +use super::stat::FlowStat; +use super::stat::StatTree; use super::tcp::KEEPALIVE_EXPIRE_TTL; use super::tcp::TIME_WAIT_EXPIRE_TTL; use super::tcp_state::TcpFlowState; use super::tcp_state::TcpFlowStateError; -use crate::ExecCtx; use crate::api::DumpLayerResp; use crate::api::DumpTcpFlowsResp; use crate::api::DumpUftResp; @@ -69,6 +72,7 @@ use crate::engine::flow_table::ExpiryPolicy; use crate::engine::packet::EmitSpec; use crate::engine::packet::PushSpec; use crate::engine::rule::CompiledEncap; +use crate::provider::Providers; use alloc::boxed::Box; use alloc::ffi::CString; use alloc::string::String; @@ -190,6 +194,16 @@ enum InternalProcessResult { Hairpin(MsgBlk), } +impl InternalProcessResult { + fn stat_action(&self) -> StatAction { + match self { + Self::Modified => StatAction::Allow, + Self::Drop { .. } => StatAction::Deny, + Self::Hairpin(..) => StatAction::Hairpin, + } + } +} + impl From for InternalProcessResult { fn from(hpa: HdlPktAction) -> Self { match hpa { @@ -218,13 +232,14 @@ pub enum DropReason { /// Only the port builder may add or remove layers. Once you have a /// [`Port`] the list of layers is immutable. pub struct PortBuilder { - ectx: Arc, + ectx: Arc, name: String, // Cache the CString version of the name for use with DTrace // probes. name_cstr: CString, mac: MacAddr, - layers: KMutex>, + layers: Vec, + flow_stats: StatTree, } #[derive(Clone, Debug)] @@ -259,36 +274,34 @@ impl PortBuilder { /// a packet from the guest. The last is the last to see a packet /// before it is delivered to the guest. pub fn add_layer( - &self, + &mut self, new_layer: Layer, pos: Pos, ) -> result::Result<(), OpteError> { - let mut lock = self.layers.lock(); - match pos { Pos::Last => { - lock.push(new_layer); + self.layers.push(new_layer); return Ok(()); } Pos::First => { - lock.insert(0, new_layer); + self.layers.insert(0, new_layer); return Ok(()); } Pos::Before(name) => { - for (i, layer) in lock.iter().enumerate() { + for (i, layer) in self.layers.iter().enumerate() { if layer.name() == name { - lock.insert(i, new_layer); + self.layers.insert(i, new_layer); return Ok(()); } } } Pos::After(name) => { - for (i, layer) in lock.iter().enumerate() { + for (i, layer) in self.layers.iter().enumerate() { if layer.name() == name { - lock.insert(i + 1, new_layer); + self.layers.insert(i + 1, new_layer); return Ok(()); } } @@ -304,14 +317,14 @@ impl PortBuilder { /// Add a new `Rule` to the layer named by `layer`, if such a /// layer exists. Otherwise, return an error. pub fn add_rule( - &self, + &mut self, layer_name: &str, dir: Direction, rule: Rule, ) -> result::Result<(), OpteError> { - for layer in &mut *self.layers.lock() { + for layer in &mut self.layers { if layer.name() == layer_name { - layer.add_rule(dir, rule); + layer.add_rule(dir, rule, &mut self.flow_stats); return Ok(()); } } @@ -327,9 +340,7 @@ impl PortBuilder { ) -> result::Result, PortCreateError> { let data = PortData { state: PortState::Ready, - // At this point the layer pipeline is immutable, thus we - // move the layers out of the mutex. - layers: self.layers.into_inner(), + layers: self.layers, uft_in: FlowTable::new(&self.name, "uft_in", uft_limit, None), uft_out: FlowTable::new(&self.name, "uft_out", uft_limit, None), tcp_flows: FlowTable::new( @@ -338,6 +349,7 @@ impl PortBuilder { tcp_limit, Some(Box::::default()), ), + flow_stats: self.flow_stats, }; Ok(Port { @@ -356,7 +368,7 @@ impl PortBuilder { /// [`Layer`] at the given index. If the layer does not exist, or /// has no action at that index, then `None` is returned. pub fn layer_action(&self, layer: &str, idx: usize) -> Option { - for l in &*self.layers.lock() { + for l in &self.layers { if l.name() == layer { return l.action(idx); } @@ -368,9 +380,8 @@ impl PortBuilder { /// List each [`Layer`] under this port. pub fn list_layers(&self) -> ListLayersResp { let mut tmp = vec![]; - let lock = self.layers.lock(); - for layer in lock.iter() { + for layer in self.layers.iter() { tmp.push(LayerDesc { name: layer.name().to_string(), rules_in: layer.num_rules(Direction::In), @@ -393,29 +404,33 @@ impl PortBuilder { name: &str, name_cstr: CString, mac: MacAddr, - ectx: Arc, + ectx: Arc, ) -> Self { PortBuilder { name: name.to_string(), name_cstr, mac, ectx, - layers: KMutex::new(Vec::new()), + layers: vec![], + flow_stats: Default::default(), } } /// Remove the [`Layer`] registered under `name`, if such a layer /// exists. - pub fn remove_layer(&self, name: &str) { - let mut lock = self.layers.lock(); - - for (i, layer) in lock.iter().enumerate() { + pub fn remove_layer(&mut self, name: &str) { + for (i, layer) in self.layers.iter().enumerate() { if layer.name() == name { - let _ = lock.remove(i); + let _ = self.layers.remove(i); return; } } } + + /// Provide access to the inner [`StatTree`]. + pub fn stats_mut(&mut self) -> &mut StatTree { + &mut self.flow_stats + } } /// The current state of the [`Port`]. @@ -540,6 +555,8 @@ pub struct UftEntry { /// Cached reference to a flow's TCP state, if applicable. /// This allows us to maintain up-to-date TCP flow table info tcp_flow: Option>>, + + stat: Arc, } impl Dump for UftEntry { @@ -572,7 +589,8 @@ impl Display for UftEntry { impl fmt::Debug for UftEntry { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let UftEntry { pair: _pair, xforms, l4_hash, epoch, tcp_flow } = self; + let UftEntry { pair: _pair, xforms, l4_hash, epoch, tcp_flow, stat } = + self; f.debug_struct("UftEntry") .field("pair", &"") @@ -580,6 +598,10 @@ impl fmt::Debug for UftEntry { .field("l4_hash", l4_hash) .field("epoch", epoch) .field("tcp_flow", tcp_flow) + .field( + "stats", + &crate::api::FlowStat::::from(stat.as_ref()), + ) .finish() } } @@ -669,6 +691,8 @@ struct PortData { // that we know which inbound UFT/FT entries to retire upon // connection termination. tcp_flows: FlowTable, + + flow_stats: StatTree, } /// A virtual switch port. @@ -723,11 +747,13 @@ struct PortData { /// /// ### Execution Context /// -/// The `ExecCtx` provides implementations of specific features that -/// are valid for the given context the port is running in. +/// The `Providers` struct offers implementations of specific features that +/// are valid for the given context the port is running in (kernel, userland, ...). +/// This is combined with views of port specific fields in `ExecCtx`, which allows +/// layer/rule execution to access shared stats. pub struct Port { epoch: AtomicU64, - ectx: Arc, + ectx: Arc, name: String, // Cache the CString version of the name for use with DTrace // probes. @@ -866,10 +892,12 @@ impl Port { let mut data = self.data.write(); check_state!(data.state, [PortState::Ready, PortState::Running])?; - for layer in &mut data.layers { + let PortData { layers, flow_stats, .. } = &mut (*data); + + for layer in layers { if layer.name() == layer_name { self.epoch.fetch_add(1, SeqCst); - layer.add_rule(dir, rule); + layer.add_rule(dir, rule, flow_stats); return Ok(()); } } @@ -904,7 +932,7 @@ impl Port { if unsafe { super::opte_panic_debug != 0 } { super::err!("mblk: {}", pkt.mblk_addr()); super::err!("flow: {}", pkt.flow()); - super::err!("meta: {:?}", pkt.meta()); + super::err!("meta: {:?}", pkt.meta_internal()); super::err!("flows: {:?}", data); todo!("bad packet: {}", msg); } else { @@ -979,6 +1007,17 @@ impl Port { Ok(DumpTcpFlowsResp { flows: data.tcp_flows.dump() }) } + #[cfg(any(test, feature = "std"))] + pub fn dump_flow_stats(&self) -> Result { + let data = self.data.read(); + check_state!( + data.state, + [PortState::Running, PortState::Paused, PortState::Restored] + )?; + + Ok(data.flow_stats.dump()) + } + /// Clear all entries from the Unified Flow Table (UFT). /// /// # States @@ -1090,6 +1129,8 @@ impl Port { // set TIME_WAIT_EXPIRE_TTL or another state-specific timer lower // than 60s, we'll need to specifically expire the matching UFTs. let _ = data.tcp_flows.expire_flows(now, |_| FLOW_ID_DEFAULT); + + data.flow_stats.expire(now); Ok(()) } @@ -1218,6 +1259,7 @@ impl Port { let process_start = Moment::now(); let flow_before = pkt.flow(); let mblk_addr = pkt.mblk_addr(); + let pkt_len = pkt.len() as u64; // Packet processing is split into a few mechanisms based on // expected speed, based on actions and the size of required metadata: @@ -1319,6 +1361,8 @@ impl Port { // The Fast Path. drop(lock.take()); let xforms = &entry.state().xforms; + entry.state().stat.hit_at(pkt_len, process_start); + let out = if xforms.compiled.is_some() { FastPathDecision::CompiledUft(entry) } else { @@ -1371,7 +1415,7 @@ impl Port { tcp_flow.hit_at(process_start); let tcp = pkt - .meta() + .headers() .inner_tcp() .expect("failed to find TCP state on known TCP flow"); @@ -1384,7 +1428,6 @@ impl Port { self.name_cstr.as_c_str(), tcp, dir, - pkt.len() as u64, ufid_in, ) { Ok(TcpState::Closed) => Some(Arc::clone(tcp_flow)), @@ -1442,7 +1485,7 @@ impl Port { let tx = entry.state().xforms.compiled.as_ref().cloned().unwrap(); let len = pkt.len(); - let meta = pkt.meta_mut(); + let meta = pkt.headers_mut(); let csum_dirty = tx.checksums_dirty(); let body_csum = @@ -1512,6 +1555,8 @@ impl Port { .as_mut() .expect("lock should be held on this codepath"); + pkt.meta_internal_mut().stats.reserve(16); + let res = self.process_in_miss( data, epoch, @@ -1530,6 +1575,8 @@ impl Port { .as_mut() .expect("lock should be held on this codepath"); + pkt.meta_internal_mut().stats.reserve(16); + let res = self.process_out_miss(data, epoch, &mut pkt, &mut ameta); @@ -1650,10 +1697,12 @@ impl Port { let mut data = self.data.write(); check_state!(data.state, [PortState::Ready, PortState::Running])?; - for layer in &mut data.layers { + let PortData { layers, flow_stats, .. } = &mut (*data); + + for layer in layers { if layer.name() == layer_name { self.epoch.fetch_add(1, SeqCst); - layer.set_rules(in_rules, out_rules); + layer.set_rules(in_rules, out_rules, flow_stats); return Ok(()); } } @@ -1671,10 +1720,12 @@ impl Port { let mut data = self.data.write(); check_state!(data.state, [PortState::Ready, PortState::Running])?; - for layer in &mut data.layers { + let PortData { layers, flow_stats, .. } = &mut (*data); + + for layer in layers { if layer.name() == layer_name { self.epoch.fetch_add(1, SeqCst); - layer.set_rules_soft(in_rules, out_rules); + layer.set_rules_soft(in_rules, out_rules, flow_stats); return Ok(()); } } @@ -1696,6 +1747,12 @@ impl Port { .get(flow) .map(|entry| entry.state().tcp_state()) } + + /// Provides read access to all port stats. + pub fn read_stats(&self, f: impl FnOnce(&StatTree) -> T) -> T { + let data = self.data.read(); + f(&data.flow_stats) + } } #[allow(dead_code)] @@ -1961,11 +2018,13 @@ impl Port { xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { + let mut ectx = + ExecCtx { user_ctx: &self.ectx, stats: &mut data.flow_stats }; + match dir { Direction::Out => { for layer in &mut data.layers { - let res = - layer.process(&self.ectx, dir, pkt, xforms, ameta); + let res = layer.process(&mut ectx, dir, pkt, xforms, ameta); match res { Ok(LayerResult::Allow) => (), @@ -1979,8 +2038,7 @@ impl Port { Direction::In => { for layer in data.layers.iter_mut().rev() { - let res = - layer.process(&self.ectx, dir, pkt, xforms, ameta); + let res = layer.process(&mut ectx, dir, pkt, xforms, ameta); match res { Ok(LayerResult::Allow) => (), @@ -2121,7 +2179,6 @@ impl Port { tcp_flows: &mut FlowTable, tcp: &impl TcpRef, dir: &TcpDirection, - pkt_len: u64, ) -> result::Result { // Create a new entry and find its current state. In // this case it should always be `SynSent`, unless we're @@ -2147,14 +2204,11 @@ impl Port { let (ufid_out, tfes) = match *dir { TcpDirection::In { ufid_in, ufid_out } => ( ufid_out, - TcpFlowEntryState::new_inbound( - *ufid_out, *ufid_in, tfs, pkt_len, - ), - ), - TcpDirection::Out { ufid_out } => ( - ufid_out, - TcpFlowEntryState::new_outbound(*ufid_out, tfs, pkt_len), + TcpFlowEntryState::new_inbound(*ufid_out, *ufid_in, tfs), ), + TcpDirection::Out { ufid_out } => { + (ufid_out, TcpFlowEntryState::new_outbound(*ufid_out, tfs)) + } }; match tcp_flows.add_and_return(*ufid_out, tfes) { Ok(entry) => Ok(TcpMaybeClosed::NewState(tcp_state, entry)), @@ -2195,7 +2249,6 @@ impl Port { data: &mut PortData, tcp: &impl TcpRef, dir: &TcpDirection, - pkt_len: u64, ) -> result::Result { let (ufid_out, ufid_in) = match *dir { TcpDirection::In { ufid_in, ufid_out } => (ufid_out, Some(ufid_in)), @@ -2214,7 +2267,6 @@ impl Port { self.name_cstr.as_c_str(), tcp, dir.dir(), - pkt_len, ufid_in, ); @@ -2264,35 +2316,29 @@ impl Port { data: &mut PortData, pmeta: &MblkPacketData, ufid_in: &InnerFlowId, - pkt_len: u64, ) -> result::Result { // All TCP flows are keyed with respect to the outbound Flow // ID, therefore we mirror the flow. This value must represent // the guest-side of the flow and thus come from the passed-in // packet metadata that represents the post-processed packet. - let ufid_out = InnerFlowId::from(pmeta).mirror(); + let ufid_out = InnerFlowId::from(&pmeta.headers).mirror(); // Unwrap: We know this is a TCP packet at this point. // // XXX This will be even more foolproof in the future when // we've implemented the notion of FlowSet and Packet is // generic on header group/flow type. - let tcp = pmeta.inner_tcp().unwrap(); + let tcp = pmeta.headers.inner_tcp().unwrap(); let dir = TcpDirection::In { ufid_in, ufid_out: &ufid_out }; - match self.update_tcp_entry(data, tcp, &dir, pkt_len) { + match self.update_tcp_entry(data, tcp, &dir) { // We need to create a new TCP entry here because we can't call // `process_in_miss` on the already-modified packet. Err( ProcessError::TcpFlow(TcpFlowStateError::NewFlow { .. }) | ProcessError::MissingFlow(_), - ) => self.create_new_tcp_entry( - &mut data.tcp_flows, - tcp, - &dir, - pkt_len, - ), + ) => self.create_new_tcp_entry(&mut data.tcp_flows, tcp, &dir), v => v, } } @@ -2308,38 +2354,58 @@ impl Port { use Direction::In; self.stats.vals.in_uft_miss.incr(1); + let pkt_len = pkt.len() as u64; + let mut xforms = Transforms::new(); let res = self.layers_process(data, In, pkt, &mut xforms, ameta); - match res { + + let (ipr, create_flow) = match res { Ok(LayerResult::Allow) => { // If there is no flow ID, then do not create a UFT // entry. - if *ufid_in == FLOW_ID_DEFAULT { - return Ok(InternalProcessResult::Modified); - } + (InternalProcessResult::Modified, *ufid_in != FLOW_ID_DEFAULT) } - Ok(LayerResult::Deny { name, reason }) => { - return Ok(InternalProcessResult::Drop { + Ok(LayerResult::Deny { name, reason }) => ( + InternalProcessResult::Drop { reason: DropReason::Layer { name, reason }, - }); - } + }, + false, + ), Ok(LayerResult::Hairpin(hppkt)) => { - return Ok(InternalProcessResult::Hairpin(hppkt)); + (InternalProcessResult::Hairpin(hppkt), false) } - Ok(LayerResult::HandlePkt) => { - return Ok(InternalProcessResult::from(self.net.handle_pkt( + Ok(LayerResult::HandlePkt) => ( + InternalProcessResult::from(self.net.handle_pkt( In, pkt, &data.uft_in, &data.uft_out, - )?)); + )?), + false, + ), + + Err(e) => { + _ = pkt.meta_internal_mut().stats.terminate( + StatAction::Error, + pkt_len, + In, + false, + ); + return Err(ProcessError::Layer(e)); } + }; - Err(e) => return Err(ProcessError::Layer(e)), - } + let Some(stat_parents) = pkt.meta_internal_mut().stats.terminate( + ipr.stat_action(), + pkt_len, + In, + create_flow, + ) else { + return Ok(ipr); + }; let mut flags = TransformFlags::empty(); if pkt.checksums_dirty() { @@ -2350,12 +2416,16 @@ impl Port { } let ufid_out = pkt.flow().mirror(); + let stat = + data.flow_stats.new_flow(ufid_in, &ufid_out, In, stat_parents); + stat.hit(pkt_len); let mut hte = UftEntry { pair: KMutex::new(Some(ufid_out)), xforms: xforms.compile(flags), epoch, l4_hash: ufid_in.crc32(), tcp_flow: None, + stat, }; // Keep around the comment on the `None` arm @@ -2384,13 +2454,8 @@ impl Port { // For inbound traffic the TCP flow table must be // checked _after_ processing take place. - if pkt.meta().is_inner_tcp() { - match self.process_in_tcp( - data, - pkt.meta(), - ufid_in, - pkt.len() as u64, - ) { + if pkt.meta_internal().headers.is_inner_tcp() { + match self.process_in_tcp(data, pkt.meta_internal(), ufid_in) { Ok(TcpMaybeClosed::Closed { .. }) => { Ok(InternalProcessResult::Modified) } @@ -2484,21 +2549,15 @@ impl Port { data: &mut PortData, ufid_out: &InnerFlowId, pmeta: &MblkPacketData, - pkt_len: u64, ) -> result::Result { - let tcp = pmeta.inner_tcp().unwrap(); + let tcp = pmeta.headers.inner_tcp().unwrap(); let dir = TcpDirection::Out { ufid_out }; - match self.update_tcp_entry(data, tcp, &dir, pkt_len) { + match self.update_tcp_entry(data, tcp, &dir) { Err( ProcessError::TcpFlow(TcpFlowStateError::NewFlow { .. }) | ProcessError::MissingFlow(_), - ) => self.create_new_tcp_entry( - &mut data.tcp_flows, - tcp, - &dir, - pkt_len, - ), + ) => self.create_new_tcp_entry(&mut data.tcp_flows, tcp, &dir), other => other, } } @@ -2513,16 +2572,17 @@ impl Port { use Direction::Out; self.stats.vals.out_uft_miss.incr(1); + let pkt_len = pkt.len() as u64; + let mut tcp_closed = false; // For outbound traffic the TCP flow table must be checked // _before_ processing take place. - let tcp_flow = if pkt.meta().is_inner_tcp() { + let tcp_flow = if pkt.meta_internal().headers.is_inner_tcp() { match self.process_out_tcp_new( data, pkt.flow(), - pkt.meta(), - pkt.len() as u64, + pkt.meta_internal(), ) { Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { tcp_closed = true; @@ -2581,46 +2641,83 @@ impl Port { flags |= TransformFlags::INTERNAL_DESTINATION; } + let (ipr, create_flow) = match res { + Ok(LayerResult::Allow) => { + // If there is no flow ID, then do not create a UFT + // entry. + ( + InternalProcessResult::Modified, + flow_before != FLOW_ID_DEFAULT && !tcp_closed, + ) + } + + Ok(LayerResult::Deny { name, reason }) => ( + InternalProcessResult::Drop { + reason: DropReason::Layer { name, reason }, + }, + false, + ), + + Ok(LayerResult::Hairpin(hppkt)) => { + (InternalProcessResult::Hairpin(hppkt), false) + } + + Ok(LayerResult::HandlePkt) => ( + InternalProcessResult::from(self.net.handle_pkt( + Out, + pkt, + &data.uft_in, + &data.uft_out, + )?), + false, + ), + + Err(e) => { + _ = pkt.meta_internal_mut().stats.terminate( + StatAction::Error, + pkt_len, + Out, + false, + ); + return Err(ProcessError::Layer(e)); + } + }; + + let Some(stat_parents) = pkt.meta_internal_mut().stats.terminate( + ipr.stat_action(), + pkt_len, + Out, + create_flow, + ) else { + return Ok(ipr); + }; + + let ufid_out = pkt.flow().mirror(); + let stat = data.flow_stats.new_flow( + &flow_before, + &ufid_out, + Out, + stat_parents, + ); + stat.hit(pkt_len); + let hte = UftEntry { pair: KMutex::new(None), xforms: xforms.compile(flags), epoch, l4_hash: flow_before.crc32(), tcp_flow, + stat, }; - match res { - Ok(LayerResult::Allow) => { - // If there is no Flow ID, then there is no UFT entry. - if flow_before == FLOW_ID_DEFAULT || tcp_closed { - return Ok(InternalProcessResult::Modified); - } - match data.uft_out.add(flow_before, hte) { - Ok(_) => Ok(InternalProcessResult::Modified), - Err(OpteError::MaxCapacity(limit)) => { - Err(ProcessError::FlowTableFull { kind: "UFT", limit }) - } - Err(_) => unreachable!( - "Cannot return other errors from FlowTable::add" - ), - } + match data.uft_out.add(flow_before, hte) { + Ok(_) => Ok(InternalProcessResult::Modified), + Err(OpteError::MaxCapacity(limit)) => { + Err(ProcessError::FlowTableFull { kind: "UFT", limit }) } - - Ok(LayerResult::Hairpin(hppkt)) => { - Ok(InternalProcessResult::Hairpin(hppkt)) - } - - Ok(LayerResult::Deny { name, reason }) => { - Ok(InternalProcessResult::Drop { - reason: DropReason::Layer { name, reason }, - }) + Err(_) => { + unreachable!("Cannot return other errors from FlowTable::add") } - - Ok(LayerResult::HandlePkt) => Ok(InternalProcessResult::from( - self.net.handle_pkt(Out, pkt, &data.uft_in, &data.uft_out)?, - )), - - Err(e) => Err(ProcessError::Layer(e)), } } @@ -2860,10 +2957,6 @@ pub struct TcpFlowEntryStateInner { // the network, not after it's processed. inbound_ufid: Option, tcp_state: TcpFlowState, - segs_in: u64, - segs_out: u64, - bytes_in: u64, - bytes_out: u64, } pub struct TcpFlowEntryState { @@ -2875,17 +2968,12 @@ impl TcpFlowEntryState { outbound_ufid: InnerFlowId, inbound_ufid: InnerFlowId, tcp_state: TcpFlowState, - bytes_in: u64, ) -> Self { Self { inner: KMutex::new(TcpFlowEntryStateInner { outbound_ufid, inbound_ufid: Some(inbound_ufid), tcp_state, - segs_in: 1, - segs_out: 0, - bytes_in, - bytes_out: 0, }), } } @@ -2893,17 +2981,12 @@ impl TcpFlowEntryState { fn new_outbound( outbound_ufid: InnerFlowId, tcp_state: TcpFlowState, - bytes_out: u64, ) -> Self { Self { inner: KMutex::new(TcpFlowEntryStateInner { outbound_ufid, inbound_ufid: None, tcp_state, - segs_in: 0, - segs_out: 1, - bytes_in: 0, - bytes_out, }), } } @@ -2919,21 +3002,9 @@ impl TcpFlowEntryState { port_name: &CStr, tcp: &impl TcpRef, dir: Direction, - pkt_len: u64, ufid_in: Option<&InnerFlowId>, ) -> result::Result { let mut tfes = self.inner.lock(); - match dir { - Direction::In => { - tfes.segs_in += 1; - tfes.bytes_in += pkt_len; - } - Direction::Out => { - tfes.segs_out += 1; - tfes.bytes_out += pkt_len; - } - } - if let Some(ufid_in) = ufid_in { // We need to store the UFID of the inbound packet // before it was processed so that we can retire the @@ -2979,10 +3050,6 @@ impl Dump for TcpFlowEntryStateInner { hits, inbound_ufid: self.inbound_ufid, tcp_state: TcpFlowStateDump::from(self.tcp_state), - segs_in: self.segs_in, - segs_out: self.segs_out, - bytes_in: self.bytes_in, - bytes_out: self.bytes_out, } } } diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index 551f2179..07d21621 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -21,7 +21,8 @@ use super::ip::v6::Ipv6Addr; use super::ip::v6::Ipv6Cidr; use super::ip::v6::Ipv6Ref; use super::ip::v6::v6_get_next_header; -use super::packet::MblkPacketData; +use super::packet::MblkFullParsed; +use super::packet::Packet; use super::port::meta::ActionMeta; use alloc::boxed::Box; use alloc::string::String; @@ -389,11 +390,13 @@ impl Display for Predicate { } impl Predicate { - pub(crate) fn is_match( + pub fn is_match( &self, - meta: &MblkPacketData, + pkt: &Packet, action_meta: &ActionMeta, ) -> bool { + let headers = pkt.headers(); + match self { Self::Meta(key, pred_val) => { if let Some(meta_val) = action_meta.get(key) { @@ -403,20 +406,20 @@ impl Predicate { return false; } - Self::Not(pred) => return !pred.is_match(meta, action_meta), + Self::Not(pred) => return !pred.is_match(pkt, action_meta), Self::Any(list) => { - return list.iter().any(|v| v.is_match(meta, action_meta)); + return list.iter().any(|v| v.is_match(pkt, action_meta)); } Self::All(list) => { - return list.iter().all(|v| v.is_match(meta, action_meta)); + return list.iter().all(|v| v.is_match(pkt, action_meta)); } Self::InnerEtherType(list) => { for m in list { if m.matches(EtherType::from( - meta.inner_ether().ethertype().0, + headers.inner_eth.ethertype().0, )) { return true; } @@ -425,7 +428,7 @@ impl Predicate { Self::InnerEtherDst(list) => { for m in list { - if m.matches(meta.inner_ether().destination()) { + if m.matches(headers.inner_eth.destination()) { return true; } } @@ -433,13 +436,13 @@ impl Predicate { Self::InnerEtherSrc(list) => { for m in list { - if m.matches(meta.inner_ether().source()) { + if m.matches(headers.inner_eth.source()) { return true; } } } - Self::InnerIpProto(list) => match meta.inner_l3() { + Self::InnerIpProto(list) => match &headers.inner_l3 { None => return false, Some(L3::Ipv4(ipv4)) => { @@ -467,7 +470,7 @@ impl Predicate { } }, - Self::InnerSrcIp4(list) => match meta.inner_ip4() { + Self::InnerSrcIp4(list) => match headers.inner_ip4() { Some(v4) => { let ip = v4.source(); for m in list { @@ -482,7 +485,7 @@ impl Predicate { _ => return false, }, - Self::InnerDstIp4(list) => match meta.inner_ip4() { + Self::InnerDstIp4(list) => match headers.inner_ip4() { Some(v4) => { let ip = v4.destination(); for m in list { @@ -497,7 +500,7 @@ impl Predicate { _ => return false, }, - Self::InnerSrcIp6(list) => match meta.inner_ip6() { + Self::InnerSrcIp6(list) => match headers.inner_ip6() { Some(v6) => { let ip = v6.source(); for m in list { @@ -509,7 +512,7 @@ impl Predicate { _ => return false, }, - Self::InnerDstIp6(list) => match meta.inner_ip6() { + Self::InnerDstIp6(list) => match headers.inner_ip6() { Some(v6) => { let ip = v6.destination(); for m in list { @@ -522,7 +525,7 @@ impl Predicate { }, Self::InnerSrcPort(list) => { - match meta.inner_ulp().and_then(|v| v.src_port()) { + match headers.inner_ulp.as_ref().and_then(|v| v.src_port()) { // No ULP metadata or no source port (e.g. ICMPv6). None => return false, @@ -537,7 +540,7 @@ impl Predicate { } Self::InnerDstPort(list) => { - match meta.inner_ulp().and_then(|v| v.dst_port()) { + match headers.inner_ulp.as_ref().and_then(|v| v.dst_port()) { // No ULP metadata or no destination port (e.g. ICMPv6). None => return false, @@ -552,7 +555,7 @@ impl Predicate { } Self::IcmpMsgType(list) => { - let Some(icmp) = meta.inner_icmp() else { + let Some(icmp) = headers.inner_icmp() else { // This isn't an ICMPv4 packet at all return false; }; @@ -565,7 +568,7 @@ impl Predicate { } Self::IcmpMsgCode(list) => { - let Some(icmp) = meta.inner_icmp() else { + let Some(icmp) = headers.inner_icmp() else { // This isn't an ICMPv4 packet at all return false; }; @@ -578,7 +581,7 @@ impl Predicate { } Self::Icmpv6MsgType(list) => { - let Some(icmp6) = meta.inner_icmp6() else { + let Some(icmp6) = headers.inner_icmp6() else { // This isn't an ICMPv6 packet at all return false; }; @@ -591,7 +594,7 @@ impl Predicate { } Self::Icmpv6MsgCode(list) => { - let Some(icmp6) = meta.inner_icmp6() else { + let Some(icmp6) = headers.inner_icmp6() else { // This isn't an ICMPv6 packet at all return false; }; @@ -686,12 +689,12 @@ impl DataPredicate { // use `PacketMeta` to determine if there is a suitable payload to // be inspected. That is, if there is no metadata for a given // header, there is certainly no payload. - pub(crate) fn is_match(&self, meta: &MblkPacketData) -> bool { + pub(crate) fn is_match(&self, meta: &Packet) -> bool { match self { Self::Not(pred) => !pred.is_match(meta), Self::DhcpMsgType(mt) => { - let bytes = meta.body(); + let bytes = meta.body().unwrap_or_default(); let pkt = match DhcpPacket::new_checked(&bytes) { Ok(v) => v, @@ -716,17 +719,15 @@ impl DataPredicate { mt.is_match(&DhcpMessageType::from(dhcp.message_type)) } - Self::Dhcpv6MsgType(mt) => { - let body = meta.body(); - if body.is_empty() { + Self::Dhcpv6MsgType(mt) => match meta.body() { + Some(body) => mt.is_match(&body[0].into()), + None => { super::err!( "Failed to read DHCPv6 message type from packet" ); false - } else { - mt.is_match(&body[0].into()) } - } + }, } } } diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index c78b95bd..0a10eee1 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -28,11 +28,11 @@ use super::ip::v4::Ipv4Mut; use super::ip::v6::Ipv6Mut; use super::ip::v6::v6_set_next_header; use super::packet::BodyTransform; +use super::packet::FullParsed; use super::packet::InnerFlowId; use super::packet::MblkFullParsed; -use super::packet::MblkPacketData; +use super::packet::MblkPacketDataView; use super::packet::Packet; -use super::packet::PacketData; use super::packet::Pullup; use super::parse::ValidUlp; use super::port::meta::ActionMeta; @@ -68,6 +68,7 @@ use opte_api::Direction; use opte_api::RuleDump; use serde::Deserialize; use serde::Serialize; +use uuid::Uuid; use zerocopy::ByteSliceMut; /// A marker trait indicating a type is an entry acuired from a [`Resource`]. @@ -174,12 +175,12 @@ pub trait ActionDesc { /// Generate a body transformation. /// /// An action may optionally generate a [`BodyTransform`] in - /// order to act on the body of the packet. + /// order to act on the body of the packet. This function is called + /// *before* the generated [`HdrTransform`] is applied. fn gen_bt( &self, _dir: Direction, - _meta: &MblkPacketData, - _payload_seg: &[u8], + _meta: MblkPacketDataView, ) -> Result>, GenBtError> { Ok(None) } @@ -276,7 +277,7 @@ impl StaticAction for Identity { &self, _dir: Direction, _flow_id: &InnerFlowId, - _pkt_meta: &MblkPacketData, + _pkt_meta: MblkPacketDataView, _action_meta: &mut ActionMeta, ) -> GenHtResult { Ok(AllowOrDeny::Allow(HdrTransform::identity(&self.name))) @@ -601,7 +602,7 @@ impl HdrTransform { } /// Run this header transformation against the passed in - /// [`PacketData`], mutating it in place. + /// [`Packet`], mutating it in place. /// /// Returns whether the inner checksum needs recomputed. /// @@ -612,11 +613,13 @@ impl HdrTransform { /// [`HdrTransformError::MissingHeader`] is returned. pub fn run( &self, - meta: &mut PacketData, + pkt: &mut Packet>, ) -> Result where T::Chunk: ByteSliceMut, { + let meta = pkt.meta_internal_mut(); + self.outer_ether .act_on_option::>, _>( &mut meta.headers.outer_eth, @@ -705,7 +708,7 @@ pub trait StatefulAction: Display { fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet, + pkt: MblkPacketDataView, meta: &mut ActionMeta, ) -> GenDescResult; @@ -725,7 +728,7 @@ pub trait StaticAction: Display { &self, dir: Direction, flow_id: &InnerFlowId, - packet_meta: &MblkPacketData, + packet_meta: MblkPacketDataView, action_meta: &mut ActionMeta, ) -> GenHtResult; @@ -797,7 +800,9 @@ pub trait HairpinAction: Display { /// modifications made by previous layers up to this point. /// This also provides access to a reader over the packet body, /// positioned after the parsed metadata. - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult; + /// + /// [`Packet`]: super::packet::Packet + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult; /// Return the predicates implicit to this action. /// @@ -987,6 +992,7 @@ pub struct Rule { state: S, action: Action, priority: u16, + stat_id: Option, } impl PartialEq for Rule { @@ -1001,6 +1007,10 @@ impl Rule { pub fn action(&self) -> &Action { &self.action } + + pub fn stat_id(&self) -> Option<&Uuid> { + self.stat_id.as_ref() + } } impl Rule { @@ -1010,9 +1020,22 @@ impl Rule { /// any implicit predicates dictated by the action. Additional /// predicates may be added along with the action's implicit ones. pub fn new(priority: u16, action: Action) -> Self { + Rule::new_with_id(priority, action, None) + } + + pub fn new_with_id( + priority: u16, + action: Action, + stat_id: Option, + ) -> Self { let (hdr_preds, data_preds) = action.implicit_preds(); - Rule { state: Ready { hdr_preds, data_preds }, action, priority } + Rule { + state: Ready { hdr_preds, data_preds }, + action, + priority, + stat_id, + } } /// Create a new rule that matches anything. @@ -1023,7 +1046,15 @@ impl Rule { /// useful for making intentions clear that this rule is to match /// anything. pub fn match_any(priority: u16, action: Action) -> Rule { - Rule { state: Finalized { preds: None }, action, priority } + Rule::match_any_with_id(priority, action, None) + } + + pub fn match_any_with_id( + priority: u16, + action: Action, + stat_id: Option, + ) -> Rule { + Rule { state: Finalized { preds: None }, action, priority, stat_id } } /// Add a single [`Predicate`] to the end of the list. @@ -1069,6 +1100,7 @@ impl Rule { state: Finalized { preds }, priority: self.priority, action: self.action, + stat_id: self.stat_id, } } } @@ -1076,7 +1108,7 @@ impl Rule { impl Rule { pub fn is_match( &self, - meta: &MblkPacketData, + pkt: &Packet, action_meta: &ActionMeta, ) -> bool { #[cfg(debug_assertions)] @@ -1098,13 +1130,13 @@ impl Rule { Some(preds) => { for p in &preds.hdr_preds { - if !p.is_match(meta, action_meta) { + if !p.is_match(pkt, action_meta) { return false; } } for p in &preds.data_preds { - if !p.is_match(meta) { + if !p.is_match(pkt) { return false; } } @@ -1142,6 +1174,7 @@ fn rule_matching() { use crate::engine::GenericUlp; use crate::engine::ip::v4::Ipv4; use crate::engine::ip::v4::Ipv4Mut; + use crate::engine::packet::Packet; use crate::engine::predicate::Ipv4AddrMatch; use crate::engine::predicate::Predicate; use ingot::ethernet::Ethertype; @@ -1178,7 +1211,6 @@ fn rule_matching() { .unwrap() .to_full_meta(); pkt.compute_checksums(); - let meta = pkt.meta(); r1.add_predicate(Predicate::InnerSrcIp4(vec![Ipv4AddrMatch::Exact( src_ip, @@ -1186,14 +1218,14 @@ fn rule_matching() { let r1 = r1.finalize(); let ameta = ActionMeta::new(); - assert!(r1.is_match(meta, &ameta)); + assert!(r1.is_match(&pkt, &ameta)); let new_src_ip = "10.11.11.99".parse().unwrap(); - let meta = pkt.meta_mut(); + let meta = pkt.meta_internal_mut(); if let Some(L3::Ipv4(v4)) = &mut meta.headers.inner_l3 { v4.set_source(new_src_ip); } - assert!(!r1.is_match(meta, &ameta)); + assert!(!r1.is_match(&pkt, &ameta)); } diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index e8e4f121..84363fde 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -12,8 +12,7 @@ use super::headers::UlpGenericModify; use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; use super::packet::InnerFlowId; -use super::packet::MblkFullParsed; -use super::packet::Packet; +use super::packet::MblkPacketDataView; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; @@ -243,13 +242,14 @@ impl SNat { fn gen_icmp_desc( &self, nat: SNatAlloc, - pkt: &Packet, + meta: MblkPacketDataView, ) -> GenDescResult { - let meta = pkt.meta(); - let echo_ident = match T::MESSAGE_PROTOCOL { Protocol::ICMP => { - let icmp = meta.inner_icmp().ok_or(GenIcmpErr::MetaNotFound)?; + let icmp = meta + .headers + .inner_icmp() + .ok_or(GenIcmpErr::MetaNotFound)?; Ok(if icmp.ty() == IcmpV4Type::ECHO_REQUEST { icmp.echo_id() @@ -258,8 +258,10 @@ impl SNat { }) } Protocol::ICMPv6 => { - let icmp6 = - meta.inner_icmp6().ok_or(GenIcmpErr::MetaNotFound)?; + let icmp6 = meta + .headers + .inner_icmp6() + .ok_or(GenIcmpErr::MetaNotFound)?; Ok(if icmp6.ty() == IcmpV6Type::ECHO_REQUEST { icmp6.echo_id() @@ -306,7 +308,7 @@ where fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet, + pkt: MblkPacketDataView, _meta: &mut ActionMeta, ) -> GenDescResult { let proto = flow_id.protocol(); @@ -480,6 +482,7 @@ mod test { use ingot::types::HeaderLen; use crate::ddi::mblk::MsgBlk; + use crate::engine::Packet; use crate::engine::ether::Ethernet; use crate::engine::ether::EthernetRef; use crate::engine::ip::v4::Ipv4; @@ -559,8 +562,9 @@ mod test { // ================================================================ // Verify descriptor generation. // ================================================================ - let flow_out = InnerFlowId::from(pkt.meta()); - let desc = match snat.gen_desc(&flow_out, &pkt, &mut action_meta) { + let flow_out = InnerFlowId::from(pkt.headers()); + let desc = match snat.gen_desc(&flow_out, pkt.meta(), &mut action_meta) + { Ok(AllowOrDeny::Allow(desc)) => desc, _ => panic!("expected AllowOrDeny::Allow(desc) result"), }; @@ -570,26 +574,20 @@ mod test { // Verify outbound header transformation // ================================================================ let out_ht = desc.gen_ht(Direction::Out); - out_ht.run(pkt.meta_mut()).unwrap(); + out_ht.run(&mut pkt).unwrap(); - let pmo = pkt.meta(); - let ether_meta = pmo.inner_ether(); + let pmo = pkt.headers(); + let ether_meta = &pmo.inner_eth; assert_eq!(ether_meta.source(), priv_mac); assert_eq!(ether_meta.destination(), dest_mac); - let ip4_meta = match pmo.inner_ip4() { - Some(v) => v, - _ => panic!("expect Ipv4Meta"), - }; + let ip4_meta = pmo.inner_ip4().unwrap(); assert_eq!(ip4_meta.source(), pub_ip); assert_eq!(ip4_meta.destination(), outside_ip); assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmo.inner_tcp() { - Some(v) => v, - _ => panic!("expect TcpMeta"), - }; + let tcp_meta = pmo.inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), pub_port); assert_eq!(tcp_meta.destination(), outside_port); @@ -624,26 +622,20 @@ mod test { pkt.compute_checksums(); let in_ht = desc.gen_ht(Direction::In); - in_ht.run(pkt.meta_mut()).unwrap(); + in_ht.run(&mut pkt).unwrap(); - let pmi = pkt.meta(); - let ether_meta = pmi.inner_ether(); + let pmi = pkt.headers(); + let ether_meta = &pmi.inner_eth; assert_eq!(ether_meta.source(), dest_mac); assert_eq!(ether_meta.destination(), priv_mac); - let ip4_meta = match pmi.inner_ip4() { - Some(v) => v, - _ => panic!("expect Ipv4Meta"), - }; + let ip4_meta = pmi.inner_ip4().unwrap(); assert_eq!(ip4_meta.source(), outside_ip); assert_eq!(ip4_meta.destination(), priv_ip); assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmi.inner_tcp() { - Some(v) => v, - _ => panic!("expect TcpMeta"), - }; + let tcp_meta = pmi.inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), outside_port); assert_eq!(tcp_meta.destination(), priv_port); diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs new file mode 100644 index 00000000..b1f972f9 --- /dev/null +++ b/lib/opte/src/engine/stat.rs @@ -0,0 +1,1301 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2025 Oxide Computer Company + +//! Flow stat objects modified and tracked as rules and entries are used. + +use crate::api::InnerFlowId; +use crate::ddi::sync::KRwLock; +use crate::ddi::time::Moment; +use crate::engine::flow_table::Ttl; +use alloc::boxed::Box; +use alloc::collections::BTreeMap; +use alloc::collections::BTreeSet; +use alloc::collections::btree_map::Entry; +#[cfg(any(test, feature = "std"))] +use alloc::string::String; +use alloc::sync::Arc; +use alloc::sync::Weak; +use alloc::vec::Vec; +use core::sync::atomic::AtomicU64; +use core::sync::atomic::Ordering; +use opte_api::Direction; +use opte_api::FlowPair; +use opte_api::FlowStat as ApiFlowStat; +use opte_api::FullCounter as ApiFullCounter; +use opte_api::PacketCounter as ApiPktCounter; +use opte_api::TcpState; +use uuid::Uuid; + +// TODO READOUT OF STAT FROM GIVEN ROOT(S). + +/// Opaque identifier for tracking unique stat objects. +#[derive(Copy, Clone, Hash, PartialEq, PartialOrd, Eq, Ord, Debug)] +struct StatId(u64); + +impl StatId { + fn new(val: &mut u64) -> Self { + let out = *val; + *val += 1; + StatId(out) + } +} + +/// Reduced form of an action for stats tracking purposes. +#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Default)] +pub(crate) enum Action { + #[default] + Allow, + Deny, + Hairpin, + Error, +} + +/// Packet counters and additional information associated with an accepted +/// flow's 5-tuple. +pub(crate) struct FlowStat { + /// The direction of this flow half. + dir: Direction, + /// The other half of this flow. + partner: InnerFlowId, + /// `TableStat`s to whom we must return our own `stats`. + parents: Box<[StatParent]>, + /// The cached list of IDs of reachable `RootStat` entries. + bases: BTreeSet, + + /// Actual stats associated with this flow. + shared: Arc, + + /// When was this flow last updated? + last_hit: AtomicU64, +} + +impl FlowStat { + /// Record an packet matching this flow and direction. + pub fn hit(&self, pkt_size: u64) { + self.hit_at(pkt_size, Moment::now()); + } + + /// Record an packet matching this flow and direction, using + /// an existing timestamp. + pub fn hit_at(&self, pkt_size: u64, time: Moment) { + self.last_hit.store(time.raw(), Ordering::Relaxed); + self.shared.stats.hit(self.dir, pkt_size); + } +} + +/// Packet counters shared by both halves of a flow. Each 5-tuple references +/// this struct through a [`FlowStat`]. +struct SharedFlowStat { + /// Counters associated with this flow. + stats: PacketCounter, + + #[expect(unused)] + /// Estimated TCP state from monitoring a flow. + /// + /// XXX: TODO + tcp: Option, + + /// The direction this flow was opened on. + first_dir: Direction, +} + +impl From<&FlowStat> for ApiFlowStat { + fn from(value: &FlowStat) -> Self { + ApiFlowStat { + partner: value.partner, + dir: value.dir, + first_dir: value.shared.first_dir, + bases: value.bases.iter().copied().collect(), + stats: (&value.shared.stats).into(), + } + } +} + +/// Stat objects which can be a parent to a non-root node. +#[derive(Clone, Debug)] +pub(crate) enum StatParent { + Root(Arc), + Internal(Arc), +} + +impl From> for StatParent { + fn from(value: Arc) -> Self { + Self::Root(value) + } +} + +impl From> for StatParent { + fn from(value: Arc) -> Self { + Self::Internal(value) + } +} + +impl StatParent { + fn parents(&self) -> &[StatParent] { + match self { + Self::Root(_) => &[], + Self::Internal(i) => &i.parents, + } + } + + fn global_id(&self) -> StatId { + self.inner().stats.id() + } + + fn root_id(&self) -> Option<&Uuid> { + match self { + Self::Root(r) => Some(&r.id), + Self::Internal(_) => None, + } + } + + fn inner(&self) -> &TableStat { + match self { + Self::Root(r) => &r.body, + Self::Internal(i) => &i.body, + } + } + + /// Allow a packet (at a given timestamp), without recording packet size/counts. + /// + /// This should be used when a flow will track such local stats via a UFT + /// entry. + fn allow_at(&self, time: Moment) { + if let Self::Root(r) = self { + r.record_hit(time); + } + self.inner().allow(); + } + + /// Record an action for a packet (at a given time) which will ultimately + /// be dropped or hairpinned. E.g., when no UFT will be created for a packet. + fn act_at( + &self, + action: Action, + pkt_size: u64, + direction: Direction, + time: Moment, + ) { + if let Self::Root(r) = self { + r.record_hit(time); + } + self.inner().act(action, pkt_size, direction); + } + + /// Add a weak child reference to this stat object. + fn append_child(&self, child: impl Into) { + let mut p_children = self.inner().children.write(); + p_children.push(child.into()); + } +} + +/// Stat objects which can be a child to a non-leaf node. +#[derive(Clone, Debug)] +enum StatChild { + Internal(Weak), + Flow(Weak), +} + +impl From<&Arc> for StatChild { + fn from(value: &Arc) -> Self { + Self::Internal(Arc::downgrade(value)) + } +} + +impl From<&Arc> for StatChild { + fn from(value: &Arc) -> Self { + Self::Flow(Arc::downgrade(value)) + } +} + +impl StatChild { + /// Returns whether any strong references to this child node remain. + fn is_alive(&self) -> bool { + match self { + Self::Internal(i) => i.strong_count() != 0, + Self::Flow(f) => f.strong_count() != 0, + } + } + + fn upgrade(&self) -> Option { + match self { + Self::Internal(i) => i.upgrade().map(StrongStatChild::Internal), + Self::Flow(f) => f.upgrade().map(StrongStatChild::Flow), + } + } +} + +enum StrongStatChild { + Internal(Arc), + Flow(Arc), +} + +impl StrongStatChild { + fn global_id(&self) -> StatId { + match self { + Self::Internal(i) => i.body.stats.id(), + Self::Flow(f) => f.shared.stats.id, + } + } + + fn combine_api(&self, into: &mut ApiFullCounter) { + match self { + Self::Internal(i) => i.body.stats.combine_api(into), + Self::Flow(f) => f.shared.stats.combine_api(&mut into.packets), + } + } +} + +/// Long-lived counters associated with a rule or control-plane relevant +/// object. +#[derive(Debug)] +pub struct RootStat { + /// The control-plane ID associated with these counters. + pub id: Uuid, + /// When was a hit last recorded? + last_hit: AtomicU64, + body: TableStat, +} + +impl RootStat { + /// Update the `last_hit` time of this stat. + fn record_hit(&self, time: Moment) { + self.last_hit.store(time.raw(), Ordering::Relaxed); + } + + /// Retrieve hit/packet stats reported by this stat object and all of + /// its live children. + fn combined_stats(&self) -> ApiFullCounter { + let mut visited = BTreeSet::new(); + + let mut scratch = ApiFullCounter::from(&self.body.stats); + let mut to_visit = { + let children = self.body.children.read(); + children.clone() + }; + + while let Some(node) = to_visit.pop() { + let Some(inode) = node.upgrade() else { continue }; + let id = inode.global_id(); + if !visited.insert(id) { + continue; + } + + inode.combine_api(&mut scratch); + + if let StrongStatChild::Internal(i) = inode { + let children = i.body.children.read(); + to_visit.extend_from_slice(&children); + } + } + + scratch + } +} + +/// Temporary counters associated with an LFT entry. +#[derive(Debug)] +pub(crate) struct InternalStat { + parents: Box<[StatParent]>, + body: TableStat, +} + +/// Shared components on non-flow stats. +struct TableStat { + /// A list of other stat-related objects who name this table + /// stat as one of its parents. + children: KRwLock>, + + /// The actual stats. + stats: FullCounter, +} + +impl core::fmt::Debug for TableStat { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("TableStat") + .field("children", &"") + .field("stats", &ApiFullCounter::from(&self.stats)) + .finish() + } +} + +impl TableStat { + /// Allow a packet (at a given timestamp), without recording packet size/counts. + /// + /// This should be used when a flow will track such local stats via a UFT + /// entry. + fn allow(&self) { + self.stats.allow.fetch_add(1, Ordering::Relaxed); + } + + /// Record an action for a packet (at a given time) which will ultimately + /// be dropped or hairpinned. E.g., when no UFT will be created for a packet. + fn act(&self, action: Action, pkt_size: u64, direction: Direction) { + self.stats.packets.hit(direction, pkt_size); + match action { + Action::Allow => &self.stats.allow, + Action::Deny => &self.stats.deny, + Action::Hairpin => &self.stats.hairpin, + Action::Error => &self.stats.error, + } + .fetch_add(1, Ordering::Relaxed); + } +} + +/// Packet count/byte counters. +/// +/// Base component of any counter set in OPTE. +struct PacketCounter { + id: StatId, + created_at: Moment, + + pkts_in: AtomicU64, + bytes_in: AtomicU64, + pkts_out: AtomicU64, + bytes_out: AtomicU64, +} + +impl PacketCounter { + fn from_next_id(id: &mut u64) -> PacketCounter { + PacketCounter { + id: StatId::new(id), + created_at: Moment::now(), + + pkts_in: 0.into(), + bytes_in: 0.into(), + pkts_out: 0.into(), + bytes_out: 0.into(), + } + } + + #[inline] + fn hit(&self, direction: Direction, pkt_size: u64) { + let (pkts, bytes) = match direction { + Direction::In => (&self.pkts_in, &self.bytes_in), + Direction::Out => (&self.pkts_out, &self.bytes_out), + }; + pkts.fetch_add(1, Ordering::Relaxed); + bytes.fetch_add(pkt_size, Ordering::Relaxed); + } + + /// Increment the values of `into` using all matching counters in `self`. + fn combine(&self, into: &Self) { + into.pkts_in + .fetch_add(self.pkts_in.load(Ordering::Relaxed), Ordering::Relaxed); + into.bytes_in.fetch_add( + self.bytes_in.load(Ordering::Relaxed), + Ordering::Relaxed, + ); + into.pkts_out.fetch_add( + self.pkts_out.load(Ordering::Relaxed), + Ordering::Relaxed, + ); + into.bytes_out.fetch_add( + self.bytes_out.load(Ordering::Relaxed), + Ordering::Relaxed, + ); + } + + /// Increment the values of `into` using all matching counters in `self`. + fn combine_api(&self, into: &mut ApiPktCounter) { + into.pkts_in += self.pkts_in.load(Ordering::Relaxed); + into.bytes_in += self.bytes_in.load(Ordering::Relaxed); + into.pkts_out += self.pkts_out.load(Ordering::Relaxed); + into.bytes_out += self.bytes_out.load(Ordering::Relaxed); + } +} + +impl From<&PacketCounter> for ApiPktCounter { + fn from(val: &PacketCounter) -> Self { + ApiPktCounter { + created_at: val.created_at.raw(), + pkts_in: val.pkts_in.load(Ordering::Relaxed), + bytes_in: val.bytes_in.load(Ordering::Relaxed), + pkts_out: val.pkts_out.load(Ordering::Relaxed), + bytes_out: val.bytes_out.load(Ordering::Relaxed), + } + } +} + +/// Counts of actions taken/packets encountered by a rule. +struct FullCounter { + allow: AtomicU64, + deny: AtomicU64, + hairpin: AtomicU64, + error: AtomicU64, + packets: PacketCounter, +} + +impl FullCounter { + fn from_next_id(id: &mut u64) -> FullCounter { + FullCounter { + allow: 0.into(), + deny: 0.into(), + hairpin: 0.into(), + error: 0.into(), + packets: PacketCounter::from_next_id(id), + } + } + + /// Increment the values of `into` using all matching counters in `self`. + fn combine(&self, into: &Self) { + self.packets.combine(&into.packets); + into.allow + .fetch_add(self.allow.load(Ordering::Relaxed), Ordering::Relaxed); + into.deny + .fetch_add(self.deny.load(Ordering::Relaxed), Ordering::Relaxed); + into.hairpin + .fetch_add(self.hairpin.load(Ordering::Relaxed), Ordering::Relaxed); + into.error + .fetch_add(self.error.load(Ordering::Relaxed), Ordering::Relaxed); + } + + /// Increment the values of `into` using all matching counters in `self`. + fn combine_api(&self, into: &mut ApiFullCounter) { + self.packets.combine_api(&mut into.packets); + into.allow += self.allow.load(Ordering::Relaxed); + into.deny += self.deny.load(Ordering::Relaxed); + into.hairpin += self.hairpin.load(Ordering::Relaxed); + into.error += self.error.load(Ordering::Relaxed); + } + + #[inline] + fn id(&self) -> StatId { + self.packets.id + } +} + +impl From<&FullCounter> for ApiFullCounter { + fn from(val: &FullCounter) -> Self { + ApiFullCounter { + packets: (&val.packets).into(), + allow: val.allow.load(Ordering::Relaxed), + deny: val.deny.load(Ordering::Relaxed), + hairpin: val.hairpin.load(Ordering::Relaxed), + error: val.error.load(Ordering::Relaxed), + } + } +} + +impl From<&RootStat> for ApiFullCounter { + fn from(val: &RootStat) -> Self { + (&val.body.stats).into() + } +} + +impl From<&InternalStat> for ApiFullCounter { + fn from(val: &InternalStat) -> Self { + (&val.body.stats).into() + } +} + +/// Manager of all stat/counter objects within a port. +#[derive(Default)] +pub struct StatTree { + next_id: u64, + roots: BTreeMap>, + internal: Vec>, + flows: BTreeMap>, +} + +impl StatTree { + /// Gets or creates the root stat for a given UUID. + /// + /// Allocates a new UUID if none is provided. + pub fn new_root(&mut self, uuid: Option) -> Arc { + let uuid = uuid.unwrap_or_else(|| Uuid::from_u64_pair(0, self.next_id)); + let ids = &mut self.next_id; + + Arc::clone(self.roots.entry(uuid).or_insert_with(|| { + Arc::new(RootStat { + id: uuid, + last_hit: Moment::now().raw().into(), + body: TableStat { + children: KRwLock::new(vec![]), + stats: FullCounter::from_next_id(ids), + }, + }) + })) + } + + /// Creates a new internal node from a given set of parents. + fn new_intermediate( + &mut self, + parents: Vec, + ) -> Arc { + let out = Arc::new(InternalStat { + parents: parents.into(), + body: TableStat { + children: KRwLock::new(vec![]), + stats: FullCounter::from_next_id(&mut self.next_id), + }, + }); + + for parent in &out.parents { + parent.append_child(&out); + } + + self.internal.push(Arc::clone(&out)); + + out + } + + /// Gets or creates the flow stat associated with a pair of 5-tuples. + pub(crate) fn new_flow( + &mut self, + flow_id: &InnerFlowId, + partner_flow: &InnerFlowId, + dir: Direction, + parents: Vec, + ) -> Arc { + if let Entry::Occupied(e) = self.flows.entry(*flow_id) { + // TODO: what to do with (maybe new) parents & bases?! + // I *think* these should win out, insert, and preserve + // the old stats. Need to think about it. + // + // I think what may be needed is a 'last synced' stat set for a + // flow, so that we can save out the delta from that if 'parents' + // changes. E.g.: + // EPOCH 0 -- flow has parents a, b', d + // -- flow exists for ~2min actively + // EPOCH 1 -- firewall rule change occurs + // -- flow *now* has parents a, c, d + // -- flow closes + // In the above example, b' and c should receive the packet + // byte/counts split at the epoch 0->1 transition. + return Arc::clone(e.get()); + } + + let parents = parents.into_boxed_slice(); + let bases = get_base_ids(&parents); + + let out = match self.flows.entry(*partner_flow) { + // Miss, but existing partner. + Entry::Occupied(partner) => Arc::new(FlowStat { + dir, + partner: *partner_flow, + parents, + bases, + shared: Arc::clone(&partner.get().shared), + last_hit: Moment::now().raw().into(), + }), + // Miss, no partner. + Entry::Vacant(_) => { + Arc::new(FlowStat { + dir, + partner: *partner_flow, + parents, + bases, + shared: Arc::new(SharedFlowStat { + stats: PacketCounter::from_next_id(&mut self.next_id), + // TODO + tcp: None, + first_dir: dir, + }), + last_hit: Moment::now().raw().into(), + }) + } + }; + + for parent in &out.parents { + parent.append_child(&out); + } + + // We have proven a miss on flow_id already + let _ = self.flows.insert(*flow_id, Arc::clone(&out)); + out + } + + /// Remove all stat entries which have grown stale, folding packet/decision + /// counters into registered parents. + pub fn expire(&mut self, now: Moment) { + const EXPIRY_WINDOW: Ttl = Ttl::new_seconds(10); + // Root removal and re-entry? Don't want any gaps. + const ROOT_EXPIRY_WINDOW: Ttl = Ttl::new_seconds(100); + + #[derive(Default, Eq, PartialEq)] + enum Liveness { + #[default] + NotSeen, + SeenKeep, + Seen(InnerFlowId), + } + + #[derive(Default)] + struct JointLive { + lhs: Liveness, + rhs: Liveness, + } + + // + // Flows -- we need to account for shared component between arc'd halves + // of each, hence the liveness tracking. At a high level, we can expire + // a flow if one half exists (but is stale), or both halves exist and + // *both* are stale. + // + let mut possibly_expired: BTreeMap = BTreeMap::new(); + for (k, v) in &self.flows { + let t_hit = + Moment::from_raw_nanos(v.last_hit.load(Ordering::Relaxed)); + let can_remove = EXPIRY_WINDOW.is_expired(t_hit, now) + && Arc::strong_count(v) == 1; + let base_id = v.shared.stats.id; + let el = possibly_expired.entry(base_id).or_default(); + match (v.dir, can_remove) { + (Direction::In, false) => { + el.lhs = Liveness::SeenKeep; + } + (Direction::Out, false) => { + el.rhs = Liveness::SeenKeep; + } + (Direction::In, true) => { + el.lhs = Liveness::Seen(*k); + } + (Direction::Out, true) => { + el.rhs = Liveness::Seen(*k); + } + } + } + + for v in possibly_expired.values() { + let cannot_remove = v.lhs == Liveness::SeenKeep + || v.rhs == Liveness::SeenKeep + || (v.lhs == Liveness::NotSeen && v.rhs == Liveness::NotSeen); + if cannot_remove { + continue; + } + + #[allow(clippy::mutable_key_type)] + let mut parents: BTreeSet = Default::default(); + let mut base_stats = None; + if let Liveness::Seen(id) = v.lhs + && let Some(flow) = self.flows.remove(&id) + { + let flow = Arc::into_inner(flow) + .expect("strong count 1 is enforced above"); + for p_id in flow.parents { + parents.insert(ById(p_id)); + } + base_stats = Some(flow.shared); + } + if let Liveness::Seen(id) = v.rhs + && let Some(flow) = self.flows.remove(&id) + { + let flow = Arc::into_inner(flow) + .expect("strong count 1 is enforced above"); + for p_id in flow.parents { + parents.insert(ById(p_id)); + } + base_stats = Some(flow.shared); + } + + // At long last, combine! + let base_stats = + base_stats.expect("should not have no parent here!!"); + for parent in parents { + base_stats.stats.combine(&parent.0.inner().stats.packets); + } + } + + // + // Internal/branch nodes. + // + self.internal.retain(|v| { + // Internal nodes do not have/use a last_hit time, as their + // lifetimes are tied exclusively to LFT entries (we do not + // re-query them, either). + if Arc::strong_count(v) == 1 { + for p in &v.parents { + v.body.stats.combine(&p.inner().stats); + } + false + } else { + true + } + }); + + // + // Roots may need to be held onto for some time in case rules with the + // same ID come and go in adjacent control plane operations... + // + self.roots.retain(|_, v| { + let t_hit = + Moment::from_raw_nanos(v.last_hit.load(Ordering::Relaxed)); + Arc::strong_count(v) > 1 + || !ROOT_EXPIRY_WINDOW.is_expired(t_hit, now) + }); + + // + // Reap any child references. + // + self.internal.iter().for_each(|el| { + let mut children = el.body.children.write(); + children.retain(|c| c.is_alive()); + }); + self.roots.values().for_each(|el| { + let mut children = el.body.children.write(); + children.retain(|c| c.is_alive()); + }); + } + + /// Return the IDs of all present roots. + pub fn all_root_ids(&self) -> impl Iterator { + self.roots.keys().copied() + } + + /// Return a snapshot of collated stats for a given root. + /// + /// This will include the values of all downstream children, + /// but may be susceptible to partial reads between individual counters. + pub fn root_stat(&self, id: &Uuid) -> Option { + self.roots.get(id).map(|v| RootStat::combined_stats(v)) + } + + /// Return a snapshot of collated stats for all present roots. + /// + /// This will include the values of all downstream children, + /// but may be susceptible to partial reads between individual counters. + pub fn all_root_stats( + &self, + ) -> impl Iterator { + self.roots.iter().map(|(k, v)| (*k, v.combined_stats())) + } + + /// Return the IDs of all present flows. + pub fn all_flow_pairs( + &self, + ) -> impl Iterator> { + self.flows.iter().map(|(k, v)| match v.dir { + Direction::In => FlowPair { inbound: *k, outbound: v.partner }, + Direction::Out => FlowPair { outbound: *k, inbound: v.partner }, + }) + } + + /// Return a snapshot of stats for a given flow. + pub fn flow_stat( + &self, + id: &InnerFlowId, + ) -> Option> { + self.flows.get(id).map(|v| ApiFlowStat::from(v.as_ref())) + } + + /// Return a snapshot of collated stats for all present flows. + pub fn all_flow_stats( + &self, + ) -> impl Iterator)> { + self.flows.iter().map(|(k, v)| (*k, ApiFlowStat::from(v.as_ref()))) + } + + #[cfg(any(test, feature = "std"))] + pub fn dump(&self) -> String { + let mut out = String::new(); + out.push_str("--Roots--\n"); + for (id, root) in &self.roots { + let d = ApiFullCounter::from(&root.body.stats); + out.push_str(&format!( + "\t{:?}/{id} -> {d:?}\n", + root.body.stats.id() + )); + } + out.push_str("----\n\n"); + out.push_str("--Ints--\n"); + for root in &self.internal { + let d = ApiFullCounter::from(&root.body.stats); + out.push_str(&format!("\t{:?} -> {d:?}\n", root.body.stats.id())); + let parents: Vec> = + root.parents.iter().map(|v| v.root_id().copied()).collect(); + out.push_str(&format!("\t\tparents {parents:?}\n\n")); + } + out.push_str("----\n\n"); + out.push_str("--Flows--\n"); + for (id, stat) in &self.flows { + // let d: ApiFlowStat = stat.as_ref().into(); + let d: ApiPktCounter = (&stat.as_ref().shared.stats).into(); + let parents: Vec<_> = + stat.parents.iter().map(|v| v.global_id()).collect(); + out.push_str(&format!("\t{id}/{} ->\n", stat.dir)); + out.push_str(&format!("\t\t{:?} {d:?}\n", stat.shared.stats.id)); + out.push_str(&format!("\t\tparents {parents:?}\n")); + out.push_str(&format!("\t\tbases {:?}\n\n", stat.bases)); + } + out.push_str("----\n"); + out + } +} + +/// Return the underlying stat IDs of decision-making rules which allowed a flow. +fn get_base_ids(parents: &[StatParent]) -> BTreeSet { + let mut out = BTreeSet::new(); + + let mut work_set = parents.to_vec(); + while let Some(el) = work_set.pop() { + work_set.extend_from_slice(el.parents()); + if let Some(id) = el.root_id() { + out.insert(*id); + } + } + + out +} + +/// Collects stats as a packet is processed, keeping track of the boundary +/// of the most recent layer. +pub(crate) struct FlowStatBuilder { + parents: Vec, + layer_end: usize, +} + +impl FlowStatBuilder { + pub(crate) fn new() -> Self { + Self { parents: Vec::with_capacity(0), layer_end: 0 } + } + + pub(crate) fn reserve(&mut self, capacity: usize) { + self.parents.reserve(capacity); + } + + /// Push a parent onto this flow. + pub(crate) fn push(&mut self, parent: StatParent) { + self.parents.push(parent); + } + + /// Mark all current parents as [`Action::Allow`]. + pub(crate) fn new_layer(&mut self) { + self.layer_end = self.parents.len(); + } + + /// Mark all current parents as [`Action::Allow`], moving them all into + /// a new [`InternalStat`]. + pub(crate) fn new_layer_lft( + &mut self, + tree: &mut StatTree, + ) -> Arc { + let out = tree.new_intermediate(self.parents.split_off(self.layer_end)); + self.parents.push(Arc::clone(&out).into()); + self.new_layer(); + out + } + + /// Return a list of stat parents if this packet is bound for flow creation. + pub(crate) fn terminate( + &mut self, + action: Action, + pkt_size: u64, + direction: Direction, + create_flow: bool, + ) -> Option> { + let now = Moment::now(); + match action { + Action::Allow if create_flow => { + self.parents.iter().for_each(|v| v.allow_at(now)); + // TODO: should *take*? + Some(self.parents.clone()) + } + Action::Allow => { + self.parents + .iter() + .for_each(|v| v.act_at(action, pkt_size, direction, now)); + None + } + Action::Deny | Action::Hairpin | Action::Error => { + let (accepted, last_layer) = + self.parents.split_at(self.layer_end); + accepted.iter().for_each(|v| { + v.act_at(Action::Allow, pkt_size, direction, now) + }); + last_layer + .iter() + .for_each(|v| v.act_at(action, pkt_size, direction, now)); + + None + } + } + } +} + +impl Default for FlowStatBuilder { + fn default() -> Self { + Self::new() + } +} + +/// Utility newtype for tracking visited nodes. +struct ById(StatParent); + +impl PartialOrd for ById { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for ById { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.0.global_id().cmp(&other.0.global_id()) + } +} + +impl PartialEq for ById { + fn eq(&self, other: &Self) -> bool { + self.0.global_id() == other.0.global_id() + } +} + +impl Eq for ById {} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api::AddrPair; + use core::time::Duration; + use ingot::ip::IpProtocol; + use opte_api::Ipv4Addr; + + const ROOT_0: Uuid = Uuid::from_u64_pair(1234, 0); + const ROOT_1: Uuid = Uuid::from_u64_pair(1234, 1); + const ROOT_2: Uuid = Uuid::from_u64_pair(1234, 2); + const ROOT_3: Uuid = Uuid::from_u64_pair(1234, 3); + + const FLOW_OUT: InnerFlowId = InnerFlowId { + proto: IpProtocol::UDP.0, + addrs: AddrPair::V4 { + src: Ipv4Addr::from_const([10, 0, 0, 1]), + dst: Ipv4Addr::from_const([1, 1, 1, 1]), + }, + proto_info: [12345, 53], + }; + + const FLOW_IN: InnerFlowId = InnerFlowId { + proto: IpProtocol::UDP.0, + addrs: AddrPair::V4 { + dst: Ipv4Addr::from_const([10, 0, 0, 1]), + src: Ipv4Addr::from_const([1, 1, 1, 1]), + }, + proto_info: [53, 12345], + }; + + const FLOW_OUT_2: InnerFlowId = InnerFlowId { + proto: IpProtocol::TCP.0, + addrs: AddrPair::V4 { + src: Ipv4Addr::from_const([10, 0, 0, 1]), + dst: Ipv4Addr::from_const([1, 1, 1, 1]), + }, + proto_info: [23456, 80], + }; + + const FLOW_IN_2: InnerFlowId = InnerFlowId { + proto: IpProtocol::TCP.0, + addrs: AddrPair::V4 { + dst: Ipv4Addr::from_const([10, 0, 0, 1]), + src: Ipv4Addr::from_const([1, 1, 1, 1]), + }, + proto_info: [80, 23456], + }; + + #[test] + fn flow_stat_deny() { + // Assert that all (non-terminal) layers are counted as an 'accept'. + // All stats in the last layer instead increment the terminal action. + let mut tree = StatTree::default(); + + let r0 = tree.new_root(Some(ROOT_0)); + let r1 = tree.new_root(Some(ROOT_1)); + let r2 = tree.new_root(Some(ROOT_2)); + let r3 = tree.new_root(Some(ROOT_3)); + + let i0 = tree.new_intermediate(vec![r0.into()]); + let i1 = tree.new_intermediate(vec![r2.into()]); + + let mut fb = FlowStatBuilder::new(); + fb.push(Arc::clone(&i0).into()); + fb.new_layer(); + fb.push(Arc::clone(&r1).into()); + fb.new_layer(); + fb.push(Arc::clone(&i1).into()); + fb.push(Arc::clone(&r3).into()); + + assert!( + fb.terminate(Action::Deny, 128, Direction::Out, false).is_none() + ); + let snap_i0: ApiFullCounter = i0.as_ref().into(); + assert_eq!(snap_i0.allow, 1); + assert_eq!(snap_i0.deny, 0); + assert_eq!(snap_i0.packets.pkts_out, 1); + assert_eq!(snap_i0.packets.bytes_out, 128); + + let snap_r1: ApiFullCounter = r1.as_ref().into(); + assert_eq!(snap_i0.allow, 1); + assert_eq!(snap_r1.deny, 0); + assert_eq!(snap_r1.packets.pkts_out, 1); + assert_eq!(snap_r1.packets.bytes_out, 128); + + let snap_i1: ApiFullCounter = i1.as_ref().into(); + assert_eq!(snap_i1.allow, 0); + assert_eq!(snap_i1.deny, 1); + assert_eq!(snap_i1.packets.pkts_out, 1); + assert_eq!(snap_i1.packets.bytes_out, 128); + + let snap_r3: ApiFullCounter = r3.as_ref().into(); + assert_eq!(snap_r3.allow, 0); + assert_eq!(snap_r3.deny, 1); + assert_eq!(snap_r3.packets.pkts_out, 1); + assert_eq!(snap_r3.packets.bytes_out, 128); + + // Does this work with only one layer? + let mut fb = FlowStatBuilder::new(); + fb.push(Arc::clone(&i0).into()); + assert!( + fb.terminate(Action::Deny, 64, Direction::Out, false).is_none() + ); + + let snap_i0: ApiFullCounter = i0.as_ref().into(); + assert_eq!(snap_i0.allow, 1); + assert_eq!(snap_i0.deny, 1); + assert_eq!(snap_i0.packets.pkts_out, 2); + assert_eq!(snap_i0.packets.bytes_out, 192); + } + + #[test] + fn flow_lifecycle() { + let mut tree = StatTree::default(); + + let r0 = tree.new_root(Some(ROOT_0)); + let r1 = tree.new_root(Some(ROOT_1)); + let r2 = tree.new_root(Some(ROOT_2)); + let r3 = tree.new_root(Some(ROOT_3)); + + let i0 = tree.new_intermediate(vec![Arc::clone(&r0).into()]); + let i1 = tree.new_intermediate(vec![Arc::clone(&r1).into()]); + + let p_sz = 64; + let f_out = { + let mut fb = FlowStatBuilder::new(); + fb.push(Arc::clone(&i0).into()); + fb.push(Arc::clone(&r3).into()); + tree.new_flow( + &FLOW_OUT, + &FLOW_IN, + Direction::Out, + fb.terminate(Action::Allow, p_sz, Direction::Out, true) + .unwrap(), + ) + }; + f_out.hit(p_sz); + assert_eq!(f_out.bases, vec![r0.id, r3.id].into_iter().collect()); + + let f_in = { + let mut fb = FlowStatBuilder::new(); + fb.push(Arc::clone(&i0).into()); + fb.push(Arc::clone(&i1).into()); + fb.push(Arc::clone(&r2).into()); + tree.new_flow( + &FLOW_IN, + &FLOW_OUT, + Direction::In, + fb.terminate(Action::Allow, p_sz, Direction::In, true).unwrap(), + ) + }; + f_in.hit(p_sz); + assert_eq!(f_in.bases, vec![r0.id, r1.id, r2.id].into_iter().collect()); + + // These should refer to the same block of packet counters. + assert!(Arc::ptr_eq(&f_out.shared, &f_in.shared)); + + // Suppose some more packets come in 5 seconds later. + let t_0 = Moment::now() + Duration::from_secs(5); + f_in.hit_at(150, t_0); + f_in.hit_at(100, t_0); + f_in.hit_at(230, t_0); + + // The UFT has been cleared out -- eviction, protocol finish, etc. + drop(f_in); + drop(f_out); + + // Perform expiry. Suppose we're doing so just after that update, + // then nothing should change. + let t_1 = t_0 + Duration::from_secs(1); + tree.expire(t_1); + assert!(tree.flows.contains_key(&FLOW_IN)); + assert!(tree.flows.contains_key(&FLOW_OUT)); + assert_eq!(tree.internal.len(), 2); + + // Both halves of a flow must be stale for expiry to proceed. + tree.expire(t_1 + Duration::from_secs(5)); + assert!(tree.flows.contains_key(&FLOW_IN)); + assert!(tree.flows.contains_key(&FLOW_OUT)); + assert_eq!(tree.internal.len(), 2); + + // Perform an expiry for real. Suppose that the LFT i1 has been removed + // from its layer table -- its stats will have been given to r1. + let t_2 = t_1 + Duration::from_secs(10); + drop(i1); + tree.expire(t_2); + assert!(!tree.flows.contains_key(&FLOW_IN)); + assert!(!tree.flows.contains_key(&FLOW_OUT)); + assert_eq!(tree.internal.len(), 1); + + let r0c = ApiFullCounter::from(r0.as_ref()); + assert_eq!(r0c.allow, 0); + assert_eq!(r0c.packets.pkts_in, 0); + assert_eq!(r0c.packets.pkts_out, 0); + assert_eq!(r0c.packets.bytes_in, 0); + assert_eq!(r0c.packets.bytes_out, 0); + + let i0c = ApiFullCounter::from(i0.as_ref()); + assert_eq!(i0c.allow, 2); + assert_eq!(i0c.packets.pkts_in, 4); + assert_eq!(i0c.packets.pkts_out, 1); + assert_eq!(i0c.packets.bytes_in, 544); + assert_eq!(i0c.packets.bytes_out, 64); + + for el in [ + ApiFullCounter::from(r1.as_ref()), + ApiFullCounter::from(r2.as_ref()), + ApiFullCounter::from(r3.as_ref()), + ] { + assert_eq!(el.allow, 1); + assert_eq!(el.packets.pkts_in, 4); + assert_eq!(el.packets.pkts_out, 1); + assert_eq!(el.packets.bytes_in, 544); + assert_eq!(el.packets.bytes_out, 64); + } + + // Now the LFT entry bound to r0 has gone away, and some other flows + // have written into the root stat. Expect that i0's stats have been + // folded into it. + let t_3 = t_2 + Duration::from_secs(10); + drop(i0); + r0.body.act(Action::Allow, 1001, Direction::In); + r0.body.act(Action::Allow, 1002, Direction::Out); + r0.body.act(Action::Deny, 64, Direction::Out); + r0.body.act(Action::Deny, 129, Direction::In); + r0.body.act(Action::Hairpin, 32, Direction::Out); + tree.expire(t_3); + + let r0c = ApiFullCounter::from(r0.as_ref()); + assert_eq!(r0c.allow, 4); + assert_eq!(r0c.deny, 2); + assert_eq!(r0c.hairpin, 1); + assert_eq!(r0c.packets.pkts_in, 6); + assert_eq!(r0c.packets.pkts_out, 4); + assert_eq!(r0c.packets.bytes_in, 1674); + assert_eq!(r0c.packets.bytes_out, 1162); + + // Children should be empty on all roots. + for el in [r0, r1, r2, r3] { + let children = el.body.children.read(); + assert!(children.is_empty()); + } + } + + #[test] + fn root_counters() { + let mut tree = StatTree::default(); + + let r0 = tree.new_root(Some(ROOT_0)); + let r1 = tree.new_root(Some(ROOT_1)); + let r2 = tree.new_root(Some(ROOT_2)); + let r3 = tree.new_root(Some(ROOT_3)); + + let i0 = tree.new_intermediate(vec![Arc::clone(&r0).into()]); + let i1 = tree.new_intermediate(vec![Arc::clone(&r1).into()]); + + let f0_out = { + let mut fb = FlowStatBuilder::new(); + fb.push(Arc::clone(&i0).into()); + tree.new_flow( + &FLOW_OUT, + &FLOW_IN, + Direction::Out, + fb.terminate(Action::Allow, 72, Direction::Out, true).unwrap(), + ) + }; + f0_out.hit(72); + + let f0_in = { + let mut fb = FlowStatBuilder::new(); + fb.push(Arc::clone(&i0).into()); + fb.push(Arc::clone(&i1).into()); + fb.push(Arc::clone(&r2).into()); + tree.new_flow( + &FLOW_IN, + &FLOW_OUT, + Direction::In, + fb.terminate(Action::Allow, 72, Direction::In, true).unwrap(), + ) + }; + f0_in.hit(72); + + let f1_out = { + let mut fb = FlowStatBuilder::new(); + fb.push(Arc::clone(&i0).into()); + fb.push(Arc::clone(&r2).into()); + fb.push(Arc::clone(&r3).into()); + tree.new_flow( + &FLOW_OUT_2, + &FLOW_IN_2, + Direction::Out, + fb.terminate(Action::Allow, 72, Direction::Out, true).unwrap(), + ) + }; + f1_out.hit(72); + + let t0 = Moment::now(); + let t1 = t0 + Duration::from_secs(7); + + f0_out.hit(72); + f0_out.hit(72); + f0_out.hit(1500); + f0_out.hit(1500); + f0_out.hit(1500); + + f0_in.hit(72); + f0_in.hit(60); + f0_in.hit(60); + f0_in.hit(60); + + f1_out.hit_at(60, t1); + f1_out.hit_at(60, t1); + f1_out.hit_at(60, t1); + + drop(i0); + drop(i1); + + // Verify that flow stats remain correct as flows/internal nodes + // are expired. + for i in 0..=15 { + let checkpoint = t1 + Duration::from_secs(i); + tree.expire(checkpoint); + + let r0_s = tree.root_stat(&ROOT_0).unwrap(); + assert_eq!(r0_s.allow, 3, "t={i}"); + assert_eq!(r0_s.packets.pkts_out, 10, "t={i}"); + assert_eq!(r0_s.packets.bytes_out, 4968, "t={i}"); + assert_eq!(r0_s.packets.pkts_in, 5, "t={i}"); + assert_eq!(r0_s.packets.bytes_in, 324, "t={i}"); + + let r1_s = tree.root_stat(&ROOT_1).unwrap(); + assert_eq!(r1_s.allow, 1, "t={i}"); + assert_eq!(r1_s.packets.pkts_out, 6, "t={i}"); + assert_eq!(r1_s.packets.bytes_out, 4716, "t={i}"); + assert_eq!(r1_s.packets.pkts_in, 5, "t={i}"); + assert_eq!(r1_s.packets.bytes_in, 324, "t={i}"); + + let r2_s = tree.root_stat(&ROOT_2).unwrap(); + assert_eq!(r2_s.allow, 2, "t={i}"); + assert_eq!(r2_s.packets.pkts_out, 10, "t={i}"); + assert_eq!(r2_s.packets.bytes_out, 4968, "t={i}"); + assert_eq!(r2_s.packets.pkts_in, 5, "t={i}"); + assert_eq!(r2_s.packets.bytes_in, 324, "t={i}"); + + let r3_s = tree.root_stat(&ROOT_3).unwrap(); + assert_eq!(r3_s.allow, 1, "t={i}"); + assert_eq!(r3_s.packets.pkts_out, 4, "t={i}"); + assert_eq!(r3_s.packets.bytes_out, 252, "t={i}"); + assert_eq!(r3_s.packets.pkts_in, 0, "t={i}"); + assert_eq!(r3_s.packets.bytes_in, 0, "t={i}"); + } + } +} diff --git a/lib/opte/src/lib.rs b/lib/opte/src/lib.rs index 6de57220..c50be535 100644 --- a/lib/opte/src/lib.rs +++ b/lib/opte/src/lib.rs @@ -28,10 +28,6 @@ extern crate cfg_if; // can use fully-qualified type paths. extern crate self as opte; -use alloc::boxed::Box; -use core::fmt; -use core::fmt::Display; - pub use ingot; #[cfg(any(feature = "api", test))] @@ -46,6 +42,8 @@ pub mod dynamic; pub mod engine; #[cfg(any(feature = "std", test))] pub mod print; +#[cfg(any(feature = "engine", test))] +pub mod provider; /// Return value with `bit` set. /// @@ -172,87 +170,3 @@ mod opte_provider { ) { } } - -// ================================================================ -// Providers -// -// Providers allow opte-core to work in different contexts (in theory) -// by allowing various implementations of core services to be plugged -// into the engine. For example, logging and stats can both be done as -// providers; providing implementations fit for in-kernel execution -// versus unit testing execution. Ideally we could get to a point -// where OPTE could also easily be stood up in userland (not that it -// is explicitly a goal, but only that the flexibility gives us better -// options for testing or unique production situations). However, this -// is the type of abstraction that can quickly grow out of control. If -// it doesn't serve an obvious purpose with at least two obvious -// implmentations, then it probably doesn't need to be a provider. -// -// XXX For now we stash providers here. This should probably move to -// dedicated module. -// ================================================================ - -/// A logging provider provides the means to log messages to some -/// destination based on the context in which OPTE is running. -/// -/// For example, in a unit test this could map to `println!`. In the -/// illumos kernel it would map to `cmn_err(9F)`. -/// -/// Logging levels are provided by [`LogLevel`]. These levels will map -/// to the underlying provider with varying degrees of success. -pub trait LogProvider { - /// Log a message at the specified level. - fn log(&self, level: LogLevel, msg: &str); -} - -#[derive(Clone, Copy, Debug)] -pub enum LogLevel { - Note, - Warn, - Error, -} - -impl Display for LogLevel { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let level_s = match self { - Self::Note => "[NOTE]", - Self::Warn => "[WARN]", - Self::Error => "[ERROR]", - }; - write!(f, "{level_s}") - } -} - -#[cfg(any(feature = "std", test))] -#[derive(Clone, Copy)] -pub struct PrintlnLog {} - -#[cfg(any(feature = "std", test))] -impl LogProvider for PrintlnLog { - fn log(&self, level: LogLevel, msg: &str) { - println!("{level} {msg}"); - } -} - -#[cfg(all(feature = "kernel", not(feature = "std"), not(test)))] -pub struct KernelLog {} - -#[cfg(all(feature = "kernel", not(feature = "std"), not(test)))] -impl LogProvider for KernelLog { - fn log(&self, level: LogLevel, msg: &str) { - use illumos_sys_hdrs as ddi; - - let cmn_level = match level { - LogLevel::Note => ddi::CE_NOTE, - LogLevel::Warn => ddi::CE_WARN, - LogLevel::Error => ddi::CE_WARN, - }; - - let msg_arg = alloc::ffi::CString::new(msg).unwrap(); - unsafe { ddi::cmn_err(cmn_level, msg_arg.as_ptr()) } - } -} - -pub struct ExecCtx { - pub log: Box, -} diff --git a/lib/opte/src/print.rs b/lib/opte/src/print.rs index bb909341..e28ac1e3 100644 --- a/lib/opte/src/print.rs +++ b/lib/opte/src/print.rs @@ -270,7 +270,7 @@ pub fn print_tcp_flows_into( ) -> std::io::Result<()> { let mut t = TabWriter::new(writer); - writeln!(t, "FLOW\tSTATE\tHITS\tSEGS IN\tSEGS OUT\tBYTES IN\tBYTES OUT")?; + writeln!(t, "FLOW\tSTATE\tHITS")?; for (flow_id, entry) in &flows.flows { print_tcp_flow(&mut t, flow_id, entry)?; } @@ -283,16 +283,7 @@ fn print_tcp_flow( id: &InnerFlowId, entry: &TcpFlowEntryDump, ) -> std::io::Result<()> { - writeln!( - t, - "{id}\t{}\t{}\t{}\t{}\t{}\t{}", - entry.tcp_state.tcp_state, - entry.hits, - entry.segs_in, - entry.segs_out, - entry.bytes_in, - entry.bytes_out, - ) + writeln!(t, "{id}\t{}\t{}", entry.tcp_state.tcp_state, entry.hits,) } /// Output a horizontal rule in bold to the given writer. diff --git a/lib/opte/src/provider.rs b/lib/opte/src/provider.rs new file mode 100644 index 00000000..e18ef0f2 --- /dev/null +++ b/lib/opte/src/provider.rs @@ -0,0 +1,87 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2025 Oxide Computer Company + +//! Providers allow opte-core to work in different contexts (in theory) +//! by allowing various implementations of core services to be plugged +//! into the engine. For example, logging and stats can both be done as +//! providers; providing implementations fit for in-kernel execution +//! versus unit testing execution. Ideally we could get to a point +//! where OPTE could also easily be stood up in userland (not that it +//! is explicitly a goal, but only that the flexibility gives us better +//! options for testing or unique production situations). However, this +//! is the type of abstraction that can quickly grow out of control. If +//! it doesn't serve an obvious purpose with at least two obvious +//! implmentations, then it probably doesn't need to be a provider. + +use alloc::boxed::Box; +use core::fmt; +use core::fmt::Display; + +/// The set of all platform-specific providers required by a port. +pub struct Providers { + pub log: Box, +} + +/// A logging provider provides the means to log messages to some +/// destination based on the context in which OPTE is running. +/// +/// For example, in a unit test this could map to `println!`. In the +/// illumos kernel it would map to `cmn_err(9F)`. +/// +/// Logging levels are provided by [`LogLevel`]. These levels will map +/// to the underlying provider with varying degrees of success. +pub trait LogProvider: Send + Sync { + /// Log a message at the specified level. + fn log(&self, level: LogLevel, msg: &str); +} + +#[derive(Clone, Copy, Debug)] +pub enum LogLevel { + Note, + Warn, + Error, +} + +impl Display for LogLevel { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let level_s = match self { + Self::Note => "[NOTE]", + Self::Warn => "[WARN]", + Self::Error => "[ERROR]", + }; + write!(f, "{level_s}") + } +} + +#[cfg(any(feature = "std", test))] +#[derive(Clone, Copy)] +pub struct PrintlnLog; + +#[cfg(any(feature = "std", test))] +impl LogProvider for PrintlnLog { + fn log(&self, level: LogLevel, msg: &str) { + println!("{level} {msg}"); + } +} + +#[cfg(all(feature = "kernel", not(feature = "std"), not(test)))] +pub struct KernelLog; + +#[cfg(all(feature = "kernel", not(feature = "std"), not(test)))] +impl LogProvider for KernelLog { + fn log(&self, level: LogLevel, msg: &str) { + use illumos_sys_hdrs as ddi; + + let cmn_level = match level { + LogLevel::Note => ddi::CE_NOTE, + LogLevel::Warn => ddi::CE_WARN, + LogLevel::Error => ddi::CE_WARN, + }; + + let msg_arg = alloc::ffi::CString::new(msg).unwrap(); + unsafe { ddi::cmn_err(cmn_level, msg_arg.as_ptr()) } + } +} diff --git a/lib/oxide-vpc/src/api.rs b/lib/oxide-vpc/src/api/mod.rs similarity index 98% rename from lib/oxide-vpc/src/api.rs rename to lib/oxide-vpc/src/api/mod.rs index b1e82e62..b866ffa1 100644 --- a/lib/oxide-vpc/src/api.rs +++ b/lib/oxide-vpc/src/api/mod.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2025 Oxide Computer Company use alloc::collections::BTreeMap; use alloc::collections::BTreeSet; @@ -20,6 +20,8 @@ use serde::Deserialize; use serde::Serialize; use uuid::Uuid; +pub mod stat; + /// This is the MAC address that OPTE uses to act as the virtual gateway. pub const GW_MAC_ADDR: MacAddr = MacAddr::from_const([0xA8, 0x40, 0x25, 0xFF, 0x77, 0x77]); @@ -366,7 +368,7 @@ impl From for GuestPhysAddr { /// abstraction, it's simply allowing one subnet to talk to another. /// There is no separate VPC router process, the real routing is done /// by the underlay. -#[derive(Clone, Debug, Copy, Deserialize, Serialize)] +#[derive(Clone, Debug, Copy, Deserialize, Serialize, Eq, PartialEq)] pub enum RouterTarget { Drop, InternetGateway(Option), @@ -428,7 +430,7 @@ impl Display for RouterTarget { } /// The class of router which a rule belongs to. -#[derive(Clone, Debug, Copy, Deserialize, Serialize)] +#[derive(Clone, Debug, Copy, Deserialize, Serialize, Eq, PartialEq)] pub enum RouterClass { /// The rule belongs to the shared VPC-wide router. System, @@ -579,14 +581,20 @@ pub struct ClearVirt2BoundaryReq { pub tep: Vec, } +#[derive(Copy, Clone, Debug, Deserialize, Serialize, Eq, PartialEq)] +pub struct Route { + pub dest: IpCidr, + pub target: RouterTarget, + pub class: RouterClass, + pub stat_id: Option, +} + /// Add an entry to the router. Addresses may be either IPv4 or IPv6, though the /// destination and target must match in protocol version. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct AddRouterEntryReq { pub port_name: String, - pub dest: IpCidr, - pub target: RouterTarget, - pub class: RouterClass, + pub route: Route, } /// Remove an entry to the router. Addresses may be either IPv4 or IPv6, though the @@ -594,9 +602,7 @@ pub struct AddRouterEntryReq { #[derive(Clone, Debug, Deserialize, Serialize)] pub struct DelRouterEntryReq { pub port_name: String, - pub dest: IpCidr, - pub target: RouterTarget, - pub class: RouterClass, + pub route: Route, } #[derive(Clone, Debug, Deserialize, Serialize)] @@ -621,7 +627,7 @@ pub struct AddFwRuleReq { pub rule: FirewallRule, } -#[derive(Debug, Deserialize, Serialize)] +#[derive(Clone, Debug, Deserialize, Serialize)] pub struct SetFwRulesReq { pub port_name: String, pub rules: Vec, @@ -640,6 +646,7 @@ pub struct FirewallRule { pub filters: Filters, pub action: FirewallAction, pub priority: u16, + pub stat_id: Option, } impl FromStr for FirewallRule { @@ -714,10 +721,10 @@ impl FromStr for FirewallRule { Ok(FirewallRule { direction: direction.unwrap(), - // target.unwrap(), filters, action: action.unwrap(), priority: priority.unwrap(), + stat_id: None, }) } } diff --git a/lib/oxide-vpc/src/api/stat.rs b/lib/oxide-vpc/src/api/stat.rs new file mode 100644 index 00000000..86d85c25 --- /dev/null +++ b/lib/oxide-vpc/src/api/stat.rs @@ -0,0 +1,40 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2025 Oxide Computer Company + +//! Stat IDs for the Oxide VPC API. + +use uuid::Uuid; + +pub static FW_DEFAULT_IN: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0000, &0u64.to_be_bytes()); +pub static FW_DEFAULT_OUT: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0000, &1u64.to_be_bytes()); + +pub static GATEWAY_NOSPOOF_IN: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0001, &0u64.to_be_bytes()); +pub static GATEWAY_NOSPOOF_OUT: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0001, &1u64.to_be_bytes()); + +pub static ROUTER_NOROUTE: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0002, &0u64.to_be_bytes()); + +pub static NAT_SNAT_V4: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &0u64.to_be_bytes()); +pub static NAT_SNAT_V6: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &1u64.to_be_bytes()); +pub static NAT_VALID_IGW_V4: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &2u64.to_be_bytes()); +pub static NAT_VALID_IGW_V6: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &3u64.to_be_bytes()); +pub static NAT_NONE: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &255u64.to_be_bytes()); + +pub static DESTINATION_INTERNET: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0004, &0u64.to_be_bytes()); +pub static DESTINATION_VPC_LOCAL: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0004, &1u64.to_be_bytes()); +pub static DESTINATION_VPC_PEER: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0004, &2u64.to_be_bytes()); diff --git a/lib/oxide-vpc/src/engine/firewall.rs b/lib/oxide-vpc/src/engine/firewall.rs index ad0bbb05..7a1222ba 100644 --- a/lib/oxide-vpc/src/engine/firewall.rs +++ b/lib/oxide-vpc/src/engine/firewall.rs @@ -18,6 +18,7 @@ use crate::api::Ports; pub use crate::api::ProtoFilter; use crate::api::RemFwRuleReq; use crate::api::SetFwRulesReq; +use crate::api::stat::*; use crate::engine::overlay::ACTION_META_VNI; use alloc::collections::BTreeSet; use alloc::string::ToString; @@ -50,48 +51,57 @@ pub fn setup( pb: &mut PortBuilder, ft_limit: NonZeroU32, ) -> Result<(), OpteError> { - let fw_layer = Firewall::create_layer(pb.name(), ft_limit); + // The inbound side of the firewall is a filtering layer, only + // traffic explicitly allowed should pass. By setting the + // default inbound action to deny we effectively implement the + // implied "implied deny inbound" rule as speficied in RFD 63 + // §2.8.1. + // + // RFD 63 §2.8.1 also states that all outbond traffic should + // be allowed by default, aka the "implied allow outbound" + // rule. Therefore, we set the default outbound action to + // allow. + let actions = LayerActions { + default_in: DefaultAction::Deny, + default_in_stat_id: Some(FW_DEFAULT_IN), + default_out: DefaultAction::StatefulAllow, + default_out_stat_id: Some(FW_DEFAULT_OUT), + ..Default::default() + }; + + let fw_layer = Layer::new(FW_LAYER_NAME, pb, actions, ft_limit); pb.add_layer(fw_layer, Pos::First) } pub fn add_fw_rule( port: &Port, - req: &AddFwRuleReq, + req: AddFwRuleReq, ) -> Result<(), OpteError> { - let action = match req.rule.action { - FirewallAction::Allow => Action::StatefulAllow, - FirewallAction::Deny => Action::Deny, - }; - - let rule = from_fw_rule(req.rule.clone(), action); - port.add_rule(FW_LAYER_NAME, req.rule.direction, rule) + let dir = req.rule.direction; + let rule = from_fw_rule(req.rule); + port.add_rule(FW_LAYER_NAME, dir, rule) } pub fn rem_fw_rule( port: &Port, - req: &RemFwRuleReq, + req: RemFwRuleReq, ) -> Result<(), OpteError> { port.remove_rule(FW_LAYER_NAME, req.dir, req.id) } pub fn set_fw_rules( port: &Port, - req: &SetFwRulesReq, + req: SetFwRulesReq, ) -> Result<(), OpteError> { let mut in_rules = vec![]; let mut out_rules = vec![]; - for fwr in &req.rules { - let action = match fwr.action { - FirewallAction::Allow => Action::StatefulAllow, - FirewallAction::Deny => Action::Deny, - }; - - let rule = from_fw_rule(fwr.clone(), action); - if fwr.direction == Direction::In { - in_rules.push(rule); - } else { - out_rules.push(rule); + for fwr in req.rules { + let dir = fwr.direction; + let rule = from_fw_rule(fwr); + match dir { + Direction::In => in_rules.push(rule), + Direction::Out => out_rules.push(rule), } } @@ -100,16 +110,23 @@ pub fn set_fw_rules( pub struct Firewall {} -pub fn from_fw_rule(fw_rule: FirewallRule, action: Action) -> Rule { - let addr_pred = fw_rule.filters.hosts().into_predicate(fw_rule.direction); - let proto_preds = fw_rule.filters.protocol().into_predicates(); - let port_pred = fw_rule.filters.ports().into_predicate(); +pub fn from_fw_rule(fw_rule: FirewallRule) -> Rule { + let FirewallRule { direction, filters, action, priority, stat_id } = + fw_rule; + + let action = match action { + FirewallAction::Allow => Action::StatefulAllow, + FirewallAction::Deny => Action::Deny, + }; + let addr_pred = filters.hosts().into_predicate(direction); + let proto_preds = filters.protocol().into_predicates(); + let port_pred = filters.ports().into_predicate(); if addr_pred.is_none() && proto_preds.is_empty() && port_pred.is_none() { - return Rule::match_any(fw_rule.priority, action); + return Rule::match_any_with_id(priority, action, stat_id); } - let mut rule = Rule::new(fw_rule.priority, action); + let mut rule = Rule::new_with_id(priority, action, stat_id); rule.add_predicates(proto_preds); @@ -124,28 +141,6 @@ pub fn from_fw_rule(fw_rule: FirewallRule, action: Action) -> Rule { rule.finalize() } -impl Firewall { - pub fn create_layer(port_name: &str, ft_limit: NonZeroU32) -> Layer { - // The inbound side of the firewall is a filtering layer, only - // traffic explicitly allowed should pass. By setting the - // default inbound action to deny we effectively implement the - // implied "implied deny inbound" rule as speficied in RFD 63 - // §2.8.1. - // - // RFD 63 §2.8.1 also states that all outbond traffic should - // be allowed by default, aka the "implied allow outbound" - // rule. Therefore, we set the default outbound action to - // allow. - let actions = LayerActions { - actions: vec![], - default_in: DefaultAction::Deny, - default_out: DefaultAction::StatefulAllow, - }; - - Layer::new(FW_LAYER_NAME, port_name, actions, ft_limit) - } -} - impl ProtoFilter { pub fn into_predicates(self) -> Vec { match self { diff --git a/lib/oxide-vpc/src/engine/gateway/arp.rs b/lib/oxide-vpc/src/engine/gateway/arp.rs index d530ce16..659bcb0e 100644 --- a/lib/oxide-vpc/src/engine/gateway/arp.rs +++ b/lib/oxide-vpc/src/engine/gateway/arp.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The ARP implementation of the Virtual Gateway. @@ -17,8 +17,13 @@ use opte::engine::predicate::EtherTypeMatch; use opte::engine::predicate::Predicate; use opte::engine::rule::Action; use opte::engine::rule::Rule; +use opte::engine::stat::StatTree; -pub fn setup(layer: &mut Layer, cfg: &VpcCfg) -> Result<(), OpteError> { +pub fn setup( + layer: &mut Layer, + cfg: &VpcCfg, + stats: &mut StatTree, +) -> Result<(), OpteError> { // ================================================================ // Outbound ARP Request for Gateway, from Guest // @@ -33,7 +38,7 @@ pub fn setup(layer: &mut Layer, cfg: &VpcCfg) -> Result<(), OpteError> { )]), Predicate::InnerEtherSrc(vec![EtherAddrMatch::Exact(cfg.guest_mac)]), ]); - layer.add_rule(Direction::Out, rule.finalize()); + layer.add_rule(Direction::Out, rule.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/dhcp.rs b/lib/oxide-vpc/src/engine/gateway/dhcp.rs index d10698e6..e008cbc0 100644 --- a/lib/oxide-vpc/src/engine/gateway/dhcp.rs +++ b/lib/oxide-vpc/src/engine/gateway/dhcp.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The DHCP implementation of the Virtual Gateway. @@ -21,12 +21,14 @@ use opte::engine::ip::v4::Ipv4Cidr; use opte::engine::layer::Layer; use opte::engine::rule::Action; use opte::engine::rule::Rule; +use opte::engine::stat::StatTree; pub fn setup( layer: &mut Layer, cfg: &VpcCfg, ip_cfg: &Ipv4Cfg, dhcp_cfg: DhcpCfg, + stats: &mut StatTree, ) -> Result<(), OpteError> { // All guest interfaces live on a `/32`-network in the Oxide VPC; // restricting the L2 domain to two nodes: the guest NIC and the @@ -91,9 +93,9 @@ pub fn setup( })); let discover_rule = Rule::new(1, offer); - layer.add_rule(Direction::Out, discover_rule.finalize()); + layer.add_rule(Direction::Out, discover_rule.finalize(), stats); let request_rule = Rule::new(1, ack); - layer.add_rule(Direction::Out, request_rule.finalize()); + layer.add_rule(Direction::Out, request_rule.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs b/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs index 00bbec2a..071cd3b3 100644 --- a/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs +++ b/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The DHCPv6 implementation of the Virtual Gateway. @@ -17,11 +17,13 @@ use opte::engine::dhcpv6::LeasedAddress; use opte::engine::layer::Layer; use opte::engine::rule::Action; use opte::engine::rule::Rule; +use opte::engine::stat::StatTree; pub fn setup( layer: &mut Layer, cfg: &VpcCfg, dhcp_cfg: DhcpCfg, + stats: &mut StatTree, ) -> Result<(), OpteError> { let ip_cfg = match cfg.ipv6_cfg() { None => return Ok(()), @@ -44,6 +46,6 @@ pub fn setup( let server = Action::Hairpin(Arc::new(action)); let rule = Rule::new(1, server); - layer.add_rule(Direction::Out, rule.finalize()); + layer.add_rule(Direction::Out, rule.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/icmp.rs b/lib/oxide-vpc/src/engine/gateway/icmp.rs index c4c48550..c08d4067 100644 --- a/lib/oxide-vpc/src/engine/gateway/icmp.rs +++ b/lib/oxide-vpc/src/engine/gateway/icmp.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The ICMP implementation of the Virtual Gateway. @@ -15,11 +15,13 @@ use opte::engine::icmp::v4::IcmpEchoReply; use opte::engine::layer::Layer; use opte::engine::rule::Action; use opte::engine::rule::Rule; +use opte::engine::stat::StatTree; pub fn setup( layer: &mut Layer, cfg: &VpcCfg, ip_cfg: &Ipv4Cfg, + stats: &mut StatTree, ) -> Result<(), OpteError> { // ================================================================ // ICMPv4 Echo Reply @@ -33,6 +35,6 @@ pub fn setup( echo_dst_ip: ip_cfg.gateway_ip, })); let rule = Rule::new(1, reply); - layer.add_rule(Direction::Out, rule.finalize()); + layer.add_rule(Direction::Out, rule.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/icmpv6.rs b/lib/oxide-vpc/src/engine/gateway/icmpv6.rs index 0009acb1..06993717 100644 --- a/lib/oxide-vpc/src/engine/gateway/icmpv6.rs +++ b/lib/oxide-vpc/src/engine/gateway/icmpv6.rs @@ -19,6 +19,7 @@ use opte::engine::layer::Layer; use opte::engine::predicate::Predicate; use opte::engine::rule::Action; use opte::engine::rule::Rule; +use opte::engine::stat::StatTree; use smoltcp::wire::Icmpv6Message; // Add support for ICMPv6: @@ -38,6 +39,7 @@ pub fn setup( layer: &mut Layer, cfg: &VpcCfg, ip_cfg: &Ipv6Cfg, + stats: &mut StatTree, ) -> Result<(), OpteError> { let dst_ip = Ipv6Addr::from_eui64(&cfg.gateway_mac); let hairpins = [ @@ -87,7 +89,7 @@ pub fn setup( hairpins.into_iter().enumerate().for_each(|(i, action)| { let priority = u16::try_from(i + 1).unwrap(); let rule = Rule::new(priority, action); - layer.add_rule(Direction::Out, rule.finalize()); + layer.add_rule(Direction::Out, rule.finalize(), stats); }); // Filter any uncaught in/out-bound NDP traffic. @@ -99,11 +101,11 @@ pub fn setup( let mut ndp_filter = Rule::new(next_out_prio, Action::Deny); ndp_filter.add_predicate(pred); - layer.add_rule(Direction::Out, ndp_filter.finalize()); + layer.add_rule(Direction::Out, ndp_filter.finalize(), stats); let mut ndp_filter = Rule::new(1, Action::Deny); ndp_filter.add_predicate(in_pred); - layer.add_rule(Direction::In, ndp_filter.finalize()); + layer.add_rule(Direction::In, ndp_filter.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index b8d6a580..946306cc 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The Oxide VPC Virtual Gateway. //! @@ -42,6 +42,7 @@ use crate::api::DhcpCfg; use crate::api::MacAddr; +use crate::api::stat::*; use crate::cfg::Ipv4Cfg; use crate::cfg::Ipv6Cfg; use crate::cfg::VpcCfg; @@ -60,7 +61,7 @@ use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::MblkPacketData; +use opte::engine::packet::MblkPacketDataView; use opte::engine::port::PortBuilder; use opte::engine::port::Pos; use opte::engine::port::meta::ActionMeta; @@ -89,7 +90,7 @@ pub use transit::*; pub const NAME: &str = "gateway"; pub fn setup( - pb: &PortBuilder, + pb: &mut PortBuilder, cfg: &VpcCfg, vpc_mappings: Arc, ft_limit: core::num::NonZeroU32, @@ -104,15 +105,18 @@ pub fn setup( // Since we are acting as a gateway we also rewrite the source MAC address // for inbound traffic to be that of the gateway. let actions = LayerActions { - actions: vec![], default_in: DefaultAction::Deny, + default_in_stat_id: Some(GATEWAY_NOSPOOF_IN), default_out: DefaultAction::Deny, + default_out_stat_id: Some(GATEWAY_NOSPOOF_IN), + ..Default::default() }; - let mut layer = Layer::new(NAME, pb.name(), actions, ft_limit); + let mut layer = Layer::new(NAME, pb, actions, ft_limit); if let Some(ipv4_cfg) = cfg.ipv4_cfg() { setup_ipv4( + pb, &mut layer, cfg, ipv4_cfg, @@ -122,7 +126,14 @@ pub fn setup( } if let Some(ipv6_cfg) = cfg.ipv6_cfg() { - setup_ipv6(&mut layer, cfg, ipv6_cfg, vpc_mappings, dhcp_cfg.clone())?; + setup_ipv6( + pb, + &mut layer, + cfg, + ipv6_cfg, + vpc_mappings, + dhcp_cfg.clone(), + )?; } pb.add_layer(layer, Pos::Before("firewall")) @@ -143,7 +154,7 @@ impl StaticAction for RewriteSrcMac { &self, _dir: Direction, _flow_id: &InnerFlowId, - _packet_meta: &MblkPacketData, + _packet_meta: MblkPacketDataView, _action_meta: &mut ActionMeta, ) -> GenHtResult { Ok(AllowOrDeny::Allow(HdrTransform { @@ -161,15 +172,17 @@ impl StaticAction for RewriteSrcMac { } fn setup_ipv4( + pb: &mut PortBuilder, layer: &mut Layer, cfg: &VpcCfg, ip_cfg: &Ipv4Cfg, vpc_mappings: Arc, dhcp_cfg: DhcpCfg, ) -> Result<(), OpteError> { - arp::setup(layer, cfg)?; - dhcp::setup(layer, cfg, ip_cfg, dhcp_cfg)?; - icmp::setup(layer, cfg, ip_cfg)?; + let stats = pb.stats_mut(); + arp::setup(layer, cfg, stats)?; + dhcp::setup(layer, cfg, ip_cfg, dhcp_cfg, stats)?; + icmp::setup(layer, cfg, ip_cfg, stats)?; let vpc_meta = Arc::new(VpcMeta::new(vpc_mappings)); @@ -180,7 +193,7 @@ fn setup_ipv4( nospoof_out.add_predicate(Predicate::InnerEtherSrc(vec![ EtherAddrMatch::Exact(cfg.guest_mac), ])); - layer.add_rule(Direction::Out, nospoof_out.finalize()); + layer.add_rule(Direction::Out, nospoof_out.finalize(), stats); let mut unicast_in = Rule::new( 1000, @@ -194,20 +207,22 @@ fn setup_ipv4( unicast_in.add_predicate(Predicate::InnerEtherDst(vec![ EtherAddrMatch::Exact(cfg.guest_mac), ])); - layer.add_rule(Direction::In, unicast_in.finalize()); + layer.add_rule(Direction::In, unicast_in.finalize(), stats); Ok(()) } fn setup_ipv6( + pb: &mut PortBuilder, layer: &mut Layer, cfg: &VpcCfg, ip_cfg: &Ipv6Cfg, vpc_mappings: Arc, dhcp_cfg: DhcpCfg, ) -> Result<(), OpteError> { - icmpv6::setup(layer, cfg, ip_cfg)?; - dhcpv6::setup(layer, cfg, dhcp_cfg)?; + let stats = pb.stats_mut(); + icmpv6::setup(layer, cfg, ip_cfg, stats)?; + dhcpv6::setup(layer, cfg, dhcp_cfg, stats)?; let vpc_meta = Arc::new(VpcMeta::new(vpc_mappings)); let mut nospoof_out = Rule::new(1000, Action::Meta(vpc_meta)); nospoof_out.add_predicate(Predicate::InnerSrcIp6(vec![ @@ -216,7 +231,7 @@ fn setup_ipv6( nospoof_out.add_predicate(Predicate::InnerEtherSrc(vec![ EtherAddrMatch::Exact(cfg.guest_mac), ])); - layer.add_rule(Direction::Out, nospoof_out.finalize()); + layer.add_rule(Direction::Out, nospoof_out.finalize(), stats); let mut unicast_in = Rule::new( 1000, @@ -230,7 +245,7 @@ fn setup_ipv6( unicast_in.add_predicate(Predicate::InnerEtherDst(vec![ EtherAddrMatch::Exact(cfg.guest_mac), ])); - layer.add_rule(Direction::In, unicast_in.finalize()); + layer.add_rule(Direction::In, unicast_in.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index c8f6fbf8..d41d1114 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -106,7 +106,7 @@ impl NetworkImpl for VpcNetwork { where T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { - match (dir, pkt.meta().inner_ether().ethertype()) { + match (dir, pkt.meta().headers.inner_eth.ethertype()) { (Direction::Out, Ethertype::ARP) => self.handle_arp_out(pkt), _ => Ok(HdlPktAction::Deny), diff --git a/lib/oxide-vpc/src/engine/nat.rs b/lib/oxide-vpc/src/engine/nat.rs index 2251a246..8fc14063 100644 --- a/lib/oxide-vpc/src/engine/nat.rs +++ b/lib/oxide-vpc/src/engine/nat.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2025 Oxide Computer Company use super::VpcNetwork; use super::router::ROUTER_LAYER_NAME; @@ -10,6 +10,7 @@ use super::router::RouterTargetClass; use super::router::RouterTargetInternal; use crate::api::ExternalIpCfg; use crate::api::SetExternalIpsReq; +use crate::api::stat::*; use crate::cfg::IpCfg; use crate::cfg::Ipv4Cfg; use crate::cfg::Ipv6Cfg; @@ -101,14 +102,16 @@ pub fn setup( // but no valid replacement source IP must be dropped, otherwise it will // be forwarded to boundary services. let actions = LayerActions { - actions: vec![], default_in: DefaultAction::Allow, + default_in_stat_id: Some(NAT_NONE), default_out: DefaultAction::Allow, + default_out_stat_id: Some(NAT_NONE), + ..Default::default() }; - let mut layer = Layer::new(NAT_LAYER_NAME, pb.name(), actions, ft_limit); + let mut layer = Layer::new(NAT_LAYER_NAME, pb, actions, ft_limit); let (in_rules, out_rules) = create_nat_rules(cfg, None)?; - layer.set_rules(in_rules, out_rules); + layer.set_rules(in_rules, out_rules, pb.stats_mut()); pb.add_layer(layer, Pos::After(ROUTER_LAYER_NAME)) } @@ -288,8 +291,11 @@ fn setup_ipv4_nat( let snat = Arc::new(snat); for igw_id in igw_matches { - let mut rule = - Rule::new(SNAT_PRIORITY, Action::Stateful(snat.clone())); + let mut rule = Rule::new_with_id( + SNAT_PRIORITY, + Action::Stateful(snat.clone()), + Some(NAT_SNAT_V4), + ); rule.add_predicate(Predicate::InnerEtherType(vec![ EtherTypeMatch::Exact(ETHER_TYPE_IPV4), @@ -437,8 +443,11 @@ fn setup_ipv6_nat( let snat = Arc::new(snat); for igw_id in igw_matches { - let mut rule = - Rule::new(SNAT_PRIORITY, Action::Stateful(snat.clone())); + let mut rule = Rule::new_with_id( + SNAT_PRIORITY, + Action::Stateful(snat.clone()), + Some(NAT_SNAT_V6), + ); rule.add_predicate(Predicate::InnerEtherType(vec![ EtherTypeMatch::Exact(ETHER_TYPE_IPV6), diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index 76f3b902..18229b8a 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -15,6 +15,7 @@ use crate::api::PhysNet; use crate::api::TunnelEndpoint; use crate::api::V2bMapResp; use crate::api::VpcMapResp; +use crate::api::stat::*; use crate::cfg::VpcCfg; use alloc::collections::BTreeSet; use alloc::collections::btree_map::BTreeMap; @@ -48,7 +49,7 @@ use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::MblkPacketData; +use opte::engine::packet::MblkPacketDataView; use opte::engine::port::PortBuilder; use opte::engine::port::Pos; use opte::engine::port::meta::ActionMeta; @@ -65,24 +66,29 @@ use opte::engine::rule::Resource; use opte::engine::rule::ResourceEntry; use opte::engine::rule::Rule; use opte::engine::rule::StaticAction; +use opte::engine::stat::RootStat; use poptrie::Poptrie; pub const OVERLAY_LAYER_NAME: &str = "overlay"; pub fn setup( - pb: &PortBuilder, + pb: &mut PortBuilder, cfg: &VpcCfg, v2p: Arc, v2b: Arc, ft_limit: core::num::NonZeroU32, ) -> core::result::Result<(), OpteError> { // Action Index 0 - let encap = Action::Static(Arc::new(EncapAction::new( - cfg.phys_ip, - cfg.vni, + let internet_stat = pb.stats_mut().new_root(Some(DESTINATION_INTERNET)); + let vpc_local_stat = pb.stats_mut().new_root(Some(DESTINATION_VPC_LOCAL)); + let encap = Action::Static(Arc::new(EncapAction { + phys_ip_src: cfg.phys_ip, + vni: cfg.vni, v2p, v2b, - ))); + internet_stat, + vpc_local_stat, + })); // Action Index 1 let decap = Action::Static(Arc::new(DecapAction::new())); @@ -91,14 +97,14 @@ pub fn setup( actions: vec![encap, decap], default_in: DefaultAction::Deny, default_out: DefaultAction::Deny, + ..Default::default() }; - let mut layer = - Layer::new(OVERLAY_LAYER_NAME, pb.name(), actions, ft_limit); + let mut layer = Layer::new(OVERLAY_LAYER_NAME, pb, actions, ft_limit); let encap_rule = Rule::match_any(1, layer.action(0).unwrap()); - layer.add_rule(Direction::Out, encap_rule); + layer.add_rule(Direction::Out, encap_rule, pb.stats_mut()); let decap_rule = Rule::match_any(1, layer.action(1).unwrap()); - layer.add_rule(Direction::In, decap_rule); + layer.add_rule(Direction::In, decap_rule, pb.stats_mut()); // NOTE The First/Last positions cannot fail; perhaps I should // improve the API to avoid the unwrap(). pb.add_layer(layer, Pos::Last) @@ -176,17 +182,9 @@ pub struct EncapAction { vni: Vni, v2p: Arc, v2b: Arc, -} -impl EncapAction { - pub fn new( - phys_ip_src: Ipv6Addr, - vni: Vni, - v2p: Arc, - v2b: Arc, - ) -> Self { - Self { phys_ip_src, vni, v2p, v2b } - } + internet_stat: Arc, + vpc_local_stat: Arc, } impl fmt::Display for EncapAction { @@ -201,7 +199,7 @@ impl StaticAction for EncapAction { // The encap action is only used for outgoing. _dir: Direction, flow_id: &InnerFlowId, - _pkt_meta: &MblkPacketData, + mut pkt: MblkPacketDataView, action_meta: &mut ActionMeta, ) -> GenHtResult { let f_hash = flow_id.crc32(); @@ -235,6 +233,7 @@ impl StaticAction for EncapAction { let (is_internal, phys_target) = match target { RouterTargetInternal::InternetGateway(_) => { + pkt.push_stat(Arc::clone(&self.internet_stat)); match self.v2b.get(&flow_id.dst_ip()) { Some(phys) => { // Hash the packet onto a route target. This is a very @@ -258,30 +257,38 @@ impl StaticAction for EncapAction { } } - RouterTargetInternal::Ip(virt_ip) => match self.v2p.get(&virt_ip) { - Some(phys) => ( - true, - PhysNet { ether: phys.ether, ip: phys.ip, vni: self.vni }, - ), - - // The router target has specified a VPC IP we do not - // currently know about; this could be for two - // reasons: - // - // 1. No such IP currently exists in the guest's VPC. - // - // 2. The destination IP exists in the guest's VPC, - // but we do not yet have a mapping for it. - // - // We cannot differentiate these cases from the point - // of view of this code without more information from - // the control plane; rather we drop the packet. If we - // are dealing with scenario (2), the control plane - // should eventually provide us with a mapping. - None => return Ok(AllowOrDeny::Deny), - }, + RouterTargetInternal::Ip(virt_ip) => { + pkt.push_stat(Arc::clone(&self.vpc_local_stat)); + match self.v2p.get(&virt_ip) { + Some(phys) => ( + true, + PhysNet { + ether: phys.ether, + ip: phys.ip, + vni: self.vni, + }, + ), + + // The router target has specified a VPC IP we do not + // currently know about; this could be for two + // reasons: + // + // 1. No such IP currently exists in the guest's VPC. + // + // 2. The destination IP exists in the guest's VPC, + // but we do not yet have a mapping for it. + // + // We cannot differentiate these cases from the point + // of view of this code without more information from + // the control plane; rather we drop the packet. If we + // are dealing with scenario (2), the control plane + // should eventually provide us with a mapping. + None => return Ok(AllowOrDeny::Deny), + } + } RouterTargetInternal::VpcSubnet(_) => { + pkt.push_stat(Arc::clone(&self.vpc_local_stat)); match self.v2p.get(&flow_id.dst_ip()) { Some(phys) => ( true, @@ -387,10 +394,10 @@ impl StaticAction for DecapAction { // The decap action is only used for inbound. _dir: Direction, _flow_id: &InnerFlowId, - pkt_meta: &MblkPacketData, + pkt_meta: MblkPacketDataView, action_meta: &mut ActionMeta, ) -> GenHtResult { - match pkt_meta.outer_encap_geneve_vni_and_origin() { + match pkt_meta.headers.outer_encap_geneve_vni_and_origin() { Some((vni, oxide_external_pkt)) => { // We only conditionally add this metadata because the // `Address::VNI` filter uses it to select VPC-originated diff --git a/lib/oxide-vpc/src/engine/router.rs b/lib/oxide-vpc/src/engine/router.rs index 32d574d1..c9e8ed0c 100644 --- a/lib/oxide-vpc/src/engine/router.rs +++ b/lib/oxide-vpc/src/engine/router.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The Oxide Network VPC Router. //! @@ -11,8 +11,10 @@ use super::VpcNetwork; use super::firewall as fw; use crate::api::DelRouterEntryResp; +use crate::api::Route; use crate::api::RouterClass; use crate::api::RouterTarget; +use crate::api::stat::*; use crate::cfg::VpcCfg; use alloc::string::String; use alloc::string::ToString; @@ -247,7 +249,7 @@ fn compute_rule_priority(cidr: &IpCidr, class: RouterClass) -> u16 { } pub fn setup( - pb: &PortBuilder, + pb: &mut PortBuilder, _cfg: &VpcCfg, ft_limit: core::num::NonZeroU32, ) -> Result<(), OpteError> { @@ -257,12 +259,13 @@ pub fn setup( // Outbound: If there is no matching route, then the packet should // make it no further. let actions = LayerActions { - actions: vec![], default_in: DefaultAction::Allow, default_out: DefaultAction::Deny, + default_out_stat_id: Some(ROUTER_NOROUTE), + ..Default::default() }; - let layer = Layer::new(ROUTER_LAYER_NAME, pb.name(), actions, ft_limit); + let layer = Layer::new(ROUTER_LAYER_NAME, pb, actions, ft_limit); pb.add_layer(layer, Pos::After(fw::FW_LAYER_NAME)) } @@ -284,11 +287,8 @@ fn valid_router_dest_target_pair(dest: &IpCidr, target: &RouterTarget) -> bool { ) } -fn make_rule( - dest: IpCidr, - target: RouterTarget, - class: RouterClass, -) -> Result, OpteError> { +fn make_rule(route: Route) -> Result, OpteError> { + let Route { dest, target, class, stat_id } = route; if !valid_router_dest_target_pair(&dest, &target) { return Err(OpteError::InvalidRouterEntry { dest, @@ -360,7 +360,7 @@ fn make_rule( }; let priority = compute_rule_priority(&dest, class); - let mut rule = Rule::new(priority, action); + let mut rule = Rule::new_with_id(priority, action, stat_id); rule.add_predicate(predicate); Ok(rule.finalize()) @@ -372,11 +372,9 @@ fn make_rule( /// destination [`IpCidr`] as well as its paired [`RouterTarget`]. pub fn del_entry( port: &Port, - dest: IpCidr, - target: RouterTarget, - class: RouterClass, + route: Route, ) -> Result { - let rule = make_rule(dest, target, class)?; + let rule = make_rule(route)?; let maybe_id = port.find_rule(ROUTER_LAYER_NAME, Direction::Out, &rule)?; match maybe_id { Some(id) => { @@ -393,11 +391,9 @@ pub fn del_entry( /// Route the [`IpCidr`] to the specified [`RouterTarget`]. pub fn add_entry( port: &Port, - dest: IpCidr, - target: RouterTarget, - class: RouterClass, + route: Route, ) -> Result { - let rule = make_rule(dest, target, class)?; + let rule = make_rule(route)?; port.add_rule(ROUTER_LAYER_NAME, Direction::Out, rule)?; Ok(NoResp::default()) } @@ -405,14 +401,12 @@ pub fn add_entry( /// Replace the current set of router entries with the set passed in. pub fn replace( port: &Port, - entries: Vec<(IpCidr, RouterTarget, RouterClass)>, + entries: &[Route], ) -> Result { - let mut out_rules = Vec::with_capacity(entries.len()); - for (cidr, target, class) in entries { - out_rules.push(make_rule(cidr, target, class)?); - } + let out_rules: Result, _> = + entries.iter().copied().map(make_rule).collect(); - port.set_rules(ROUTER_LAYER_NAME, vec![], out_rules)?; + port.set_rules(ROUTER_LAYER_NAME, vec![], out_rules?)?; Ok(NoResp::default()) } diff --git a/lib/oxide-vpc/tests/firewall_tests.rs b/lib/oxide-vpc/tests/firewall_tests.rs index 0be752fe..c4918a3a 100644 --- a/lib/oxide-vpc/tests/firewall_tests.rs +++ b/lib/oxide-vpc/tests/firewall_tests.rs @@ -27,7 +27,7 @@ fn firewall_replace_rules() { let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g2.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g2.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -64,7 +64,7 @@ fn firewall_replace_rules() { let tcp_out = "dir=out action=allow priority=1000 protocol=TCP"; firewall::set_fw_rules( &g1.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g1.port.name().to_string(), rules: vec![any_out.parse().unwrap(), tcp_out.parse().unwrap()], }, @@ -124,7 +124,7 @@ fn firewall_replace_rules() { let new_rule = "dir=in action=deny priority=1000 protocol=TCP"; firewall::set_fw_rules( &g2.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g2.port.name().to_string(), rules: vec![new_rule.parse().unwrap()], }, @@ -282,7 +282,7 @@ fn firewall_vni_outbound() { format!("dir=out action=allow priority=1000 hosts=vni={}", g1_cfg.vni); firewall::set_fw_rules( &g1.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g1.port.name().to_string(), rules: vec![any_out.parse().unwrap(), vni_out.parse().unwrap()], }, diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 757b31bf..752280ee 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -57,6 +57,7 @@ use opte_test_utils as common; use oxide_vpc::api::BOUNDARY_SERVICES_VNI; use oxide_vpc::api::ExternalIpCfg; use oxide_vpc::api::FirewallRule; +use oxide_vpc::api::Route; use oxide_vpc::api::RouterClass; use oxide_vpc::api::VpcCfg; use pcap::*; @@ -213,7 +214,7 @@ fn port_transition_pause() { "action=allow priority=10 dir=in protocol=tcp port=80".parse().unwrap(); firewall::add_fw_rule( &g1.port, - &AddFwRuleReq { port_name: g1.port.name().to_string(), rule: fw_rule }, + AddFwRuleReq { port_name: g1.port.name().to_string(), rule: fw_rule }, ) .unwrap(); incr!(g1, ["epoch", "firewall.rules.in"]); @@ -270,11 +271,14 @@ fn port_transition_pause() { assert!(matches!( router::del_entry( &g2.port, - IpCidr::Ip4(g2_cfg.ipv4_cfg().unwrap().vpc_subnet), - RouterTarget::VpcSubnet(IpCidr::Ip4( - g2_cfg.ipv4_cfg().unwrap().vpc_subnet - )), - RouterClass::System, + Route { + dest: IpCidr::Ip4(g2_cfg.ipv4_cfg().unwrap().vpc_subnet), + target: RouterTarget::VpcSubnet(IpCidr::Ip4( + g2_cfg.ipv4_cfg().unwrap().vpc_subnet + )), + class: RouterClass::System, + stat_id: None, + } ), Err(OpteError::BadState(_)) )); @@ -287,7 +291,7 @@ fn port_transition_pause() { // This exercises Port::add_rule(). let res = firewall::add_fw_rule( &g2.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g2.port.name().to_string(), rule: fw_rule.clone(), }, @@ -295,7 +299,7 @@ fn port_transition_pause() { assert!(matches!(res, Err(OpteError::BadState(_)))); let res = firewall::set_fw_rules( &g2.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g2.port.name().to_string(), rules: vec![fw_rule], }, @@ -332,7 +336,7 @@ fn add_remove_fw_rule() { let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g1.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g1.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -343,7 +347,7 @@ fn add_remove_fw_rule() { // Remove the rule just added, by ID. firewall::rem_fw_rule( &g1.port, - &oxide_vpc::api::RemFwRuleReq { + oxide_vpc::api::RemFwRuleReq { port_name: g1.port.name().to_string(), dir: In, id: 0, @@ -396,18 +400,18 @@ fn gateway_icmp4_ping() { // the VpcParser since it would expect any inbound packet to be // encapsulated. pcap.add_pkt(&hp); - // let reply = hp.parse(In, GenericUlp {}).unwrap(); - let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap().to_full_meta(); - let meta = reply.meta(); - assert!(meta.outer_ether().is_none()); - assert!(meta.outer_ip().is_none()); + let mut reply = + parse_inbound(&mut hp, GenericUlp {}).unwrap().to_full_meta(); + let meta = reply.headers(); + assert!(meta.outer_eth.is_none()); + assert!(meta.outer_l3.is_none()); assert!(meta.outer_encap_geneve_vni_and_origin().is_none()); - let eth = meta.inner_ether(); + let eth = &meta.inner_eth; assert_eq!(eth.source(), g1_cfg.gateway_mac); assert_eq!(eth.destination(), g1_cfg.guest_mac); - match meta.inner_l3().as_ref().unwrap() { + match meta.inner_l3.as_ref().unwrap() { L3::Ipv4(ip4) => { assert_eq!(ip4.source(), g1_cfg.ipv4_cfg().unwrap().gateway_ip); assert_eq!( @@ -420,7 +424,8 @@ fn gateway_icmp4_ping() { L3::Ipv6(_) => panic!("expected inner IPv4 metadata, got IPv6"), } - let mut reply_body = meta.inner_ulp().expect("ICMPv4 is a ULP").emit_vec(); + let mut reply_body = + meta.inner_ulp.as_ref().expect("ICMPv4 is a ULP").emit_vec(); reply.meta().append_remaining(&mut reply_body); let reply_pkt = Icmpv4Packet::new_checked(&reply_body).unwrap(); let mut csum = CsumCapab::ignored(); @@ -496,9 +501,14 @@ fn guest_to_guest_no_route() { // Make sure the router is configured to drop all packets. router::del_entry( &g1.port, - IpCidr::Ip4(g1_cfg.ipv4().vpc_subnet), - RouterTarget::VpcSubnet(IpCidr::Ip4(g1_cfg.ipv4().vpc_subnet)), - RouterClass::System, + Route { + dest: IpCidr::Ip4(g1_cfg.ipv4().vpc_subnet), + target: RouterTarget::VpcSubnet(IpCidr::Ip4( + g1_cfg.ipv4().vpc_subnet, + )), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); update!(g1, ["incr:epoch", "set:router.rules.out=0"]); @@ -547,7 +557,7 @@ fn guest_to_guest() { let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g2.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g2.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -566,8 +576,8 @@ fn guest_to_guest() { let mut pkt1_m = http_syn(&g1_cfg, &g2_cfg); pcap_guest1.add_pkt(&pkt1_m); let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); - let ulp_csum_b4 = pkt1.meta().inner_ulp.as_ref().unwrap().csum(); - let ip_csum_b4 = pkt1.meta().inner_l3.as_ref().unwrap().csum(); + let ulp_csum_b4 = pkt1.headers().inner_ulp.as_ref().unwrap().csum(); + let ip_csum_b4 = pkt1.headers().inner_l3.as_ref().unwrap().csum(); // ================================================================ // Run the packet through g1's port in the outbound direction and @@ -590,12 +600,12 @@ fn guest_to_guest() { assert_eq!(nodes.count(), 2); let pkt2 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let ulp_csum_after = pkt2.meta().inner_ulp.csum(); - let ip_csum_after = pkt2.meta().inner_l3.csum(); + let ulp_csum_after = pkt2.headers().inner_ulp.csum(); + let ip_csum_after = pkt2.headers().inner_l3.csum(); assert_eq!(ulp_csum_after, ulp_csum_b4); assert_eq!(ip_csum_after, ip_csum_b4); - let meta = pkt2.meta(); + let meta = pkt2.headers(); assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); @@ -659,7 +669,7 @@ fn guest_to_guest() { // assert_eq!(pkt2.body_seg(), 0); let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); - let g2_meta = pkt2.meta(); + let g2_meta = pkt2.headers(); // TODO: can we have a convenience method that verifies that the // emitspec was a rewind/drop from the head of the pkt? @@ -716,7 +726,7 @@ fn guest_to_guest_diff_vpc_no_peer() { let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g2.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g2.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -757,9 +767,12 @@ fn guest_to_internet_ipv4() { // Add router entry that allows g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -801,7 +814,7 @@ fn guest_to_internet_ipv4() { // - Geneve // - (Inner ULP headers) let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let meta = pkt1.meta(); + let meta = pkt1.headers(); assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); @@ -869,9 +882,12 @@ fn guest_to_internet_ipv6() { // Add router entry that allows g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip6("::/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip6("::/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -906,7 +922,7 @@ fn guest_to_internet_ipv6() { ); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let meta = pkt1.meta(); + let meta = pkt1.headers(); assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); @@ -1049,17 +1065,23 @@ fn multi_external_ip_setup( // Add router entry that allows g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip6("::/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip6("::/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -1068,7 +1090,7 @@ fn multi_external_ip_setup( let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g1.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g1.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -1163,7 +1185,7 @@ fn check_external_ip_inbound_behaviour( .unwrap() .to_full_meta(); assert_eq!( - pkt1.meta().inner_ip4().unwrap().destination(), + pkt1.headers().inner_ip4().unwrap().destination(), private_ip ); } @@ -1176,7 +1198,7 @@ fn check_external_ip_inbound_behaviour( .unwrap() .to_full_meta(); assert_eq!( - pkt1.meta().inner_ip6().unwrap().destination(), + pkt1.headers().inner_ip6().unwrap().destination(), private_ip ); } @@ -1225,12 +1247,14 @@ fn check_external_ip_inbound_behaviour( match ext_ip { IpAddr::Ip4(ip) => { - let chosen_ip = pkt2.meta().inner_ip4().unwrap().source(); + let chosen_ip = + pkt2.headers().inner_ip4().unwrap().source(); assert_ne!(chosen_ip, ip); assert_ne!(IpAddr::from(chosen_ip), private_ip); } IpAddr::Ip6(ip) => { - let chosen_ip = pkt2.meta().inner_ip6().unwrap().source(); + let chosen_ip = + pkt2.headers().inner_ip6().unwrap().source(); assert_ne!(chosen_ip, ip); assert_ne!(IpAddr::from(chosen_ip), private_ip); } @@ -1245,10 +1269,16 @@ fn check_external_ip_inbound_behaviour( ); match ext_ip { IpAddr::Ip4(ip) => { - assert_eq!(pkt2.meta().inner_ip4().unwrap().source(), ip); + assert_eq!( + pkt2.headers().inner_ip4().unwrap().source(), + ip + ); } IpAddr::Ip6(ip) => { - assert_eq!(pkt2.meta().inner_ip6().unwrap().source(), ip); + assert_eq!( + pkt2.headers().inner_ip6().unwrap().source(), + ip + ); } }; } @@ -1313,10 +1343,10 @@ fn external_ip_balanced_over_floating_ips() { match partner_ip { IpAddr::Ip4(_) => { - seen_v4s.push(pkt.meta().inner_ip4().unwrap().source()); + seen_v4s.push(pkt.headers().inner_ip4().unwrap().source()); } IpAddr::Ip6(_) => { - seen_v6s.push(pkt.meta().inner_ip6().unwrap().source()); + seen_v6s.push(pkt.headers().inner_ip6().unwrap().source()); } } } @@ -1448,10 +1478,10 @@ fn external_ip_epoch_affinity_preserved() { parse_inbound(&mut pkt2_m, VpcParser {}).unwrap().to_full_meta(); match ext_ip { IpAddr::Ip4(ip) => { - assert_eq!(pkt2.meta().inner_ip4().unwrap().source(), ip); + assert_eq!(pkt2.headers().inner_ip4().unwrap().source(), ip); } IpAddr::Ip6(ip) => { - assert_eq!(pkt2.meta().inner_ip6().unwrap().source(), ip); + assert_eq!(pkt2.headers().inner_ip6().unwrap().source(), ip); } }; } @@ -1546,7 +1576,7 @@ fn unpack_and_verify_icmp( In => parse_outbound(pkt, VpcParser {}).unwrap().to_full_meta(), Out => parse_inbound(pkt, VpcParser {}).unwrap().to_full_meta(), }; - let meta = parsed.meta(); + let meta = parsed.headers(); let (src_eth, dst_eth, src_ip, dst_ip, ident) = match dir { Direction::Out => ( @@ -1565,11 +1595,11 @@ fn unpack_and_verify_icmp( ), }; - let eth = meta.inner_ether(); + let eth = &meta.inner_eth; assert_eq!(eth.source(), src_eth); assert_eq!(eth.destination(), dst_eth); - match (dst_ip, meta.inner_l3().as_ref().unwrap()) { + match (dst_ip, meta.inner_l3.as_ref().unwrap()) { (IpAddr::Ip4(_), L3::Ipv4(meta)) => { assert_eq!(eth.ethertype(), Ethertype::IPV4); assert_eq!(IpAddr::from(meta.source()), src_ip); @@ -1608,8 +1638,8 @@ fn unpack_and_verify_icmp4( ) { // Because we treat ICMPv4 as a full-fledged ULP, we need to // unsplit the emitted header from the body. - let mut icmp = pkt.meta().inner_ulp().unwrap().emit_vec(); - pkt.meta().append_remaining(&mut icmp); + let mut icmp = pkt.headers().inner_ulp.as_ref().unwrap().emit_vec(); + pkt.append_remaining(&mut icmp); let icmp = Icmpv4Packet::new_checked(&icmp[..]).unwrap(); @@ -1630,8 +1660,8 @@ fn unpack_and_verify_icmp6( // Because we treat ICMPv4 as a full-fledged ULP, we need to // unsplit the emitted header from the body. - let mut icmp = pkt.meta().inner_ulp().unwrap().emit_vec(); - pkt.meta().append_remaining(&mut icmp); + let mut icmp = pkt.headers().inner_ulp.as_ref().unwrap().emit_vec(); + pkt.append_remaining(&mut icmp); let icmp = Icmpv6Packet::new_checked(&icmp[..]).unwrap(); assert!(icmp.verify_checksum(&src_ip, &dst_ip)); @@ -1671,17 +1701,23 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { // Add router entries that allow g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip6("::/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip6("::/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -1949,7 +1985,7 @@ fn arp_gateway() { // can't use the VpcParser since it would expect any // inbound packet to be encapsulated. let hppkt = parse_inbound(&mut hppkt, GenericUlp {}).unwrap(); - let meta = hppkt.meta(); + let meta = hppkt.headers(); let ethm = &meta.inner_eth; assert_eq!(ethm.destination(), cfg.guest_mac); assert_eq!(ethm.source(), cfg.gateway_mac); @@ -2080,7 +2116,7 @@ fn test_guest_to_gateway_icmpv6_ping( pcap.add_pkt(&hp); let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); - let meta = reply.meta(); + let meta = reply.headers(); let eth = &meta.inner_eth; assert_eq!(eth.source(), g1_cfg.gateway_mac); @@ -2172,7 +2208,7 @@ fn gateway_router_advert_reply() { pcap.add_pkt(&hp); let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); - let meta = reply.meta(); + let meta = reply.headers(); let eth = &meta.inner_eth; assert_eq!( @@ -2429,7 +2465,7 @@ fn validate_hairpin_advert( pcap.add_pkt(&hp); let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); - let meta = reply.meta(); + let meta = reply.headers(); // Check that the inner MACs are what we expect. let eth = &meta.inner_eth; @@ -2561,9 +2597,12 @@ fn outbound_ndp_dropped() { router::add_entry( &g1.port, - IpCidr::Ip6(ipv6.vpc_subnet), - RouterTarget::VpcSubnet(IpCidr::Ip6(ipv6.vpc_subnet)), - RouterClass::System, + Route { + dest: IpCidr::Ip6(ipv6.vpc_subnet), + target: RouterTarget::VpcSubnet(IpCidr::Ip6(ipv6.vpc_subnet)), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["router.rules.out", "epoch"]); @@ -2571,9 +2610,12 @@ fn outbound_ndp_dropped() { // Add router entry that allows g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip6("::/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip6("::/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["router.rules.out", "epoch"]); @@ -2744,10 +2786,10 @@ fn verify_dhcpv6_essentials<'a>( parse_outbound(request_pkt, GenericUlp {}).unwrap().to_full_meta(); let reply_pkt = parse_inbound(reply_pkt, GenericUlp {}).unwrap().to_full_meta(); - let request_meta = request_pkt.meta(); - let reply_meta = reply_pkt.meta(); - let request_ether = request_meta.inner_ether(); - let reply_ether = reply_meta.inner_ether(); + let request_meta = request_pkt.headers(); + let reply_meta = reply_pkt.headers(); + let request_ether = &request_meta.inner_eth; + let reply_ether = &reply_meta.inner_eth; assert_eq!( request_ether.destination(), dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap() @@ -2873,7 +2915,7 @@ fn test_reply_to_dhcpv6_solicit_or_request() { // inbound packet to be encapsulated. pcap.add_pkt(&hp); - let reply_pkt = + let mut reply_pkt = parse_inbound(&mut hp, GenericUlp {}).unwrap().to_full_meta(); let out_body = reply_pkt.meta().copy_remaining(); drop(reply_pkt); @@ -2997,8 +3039,14 @@ fn establish_http_conn( ] ); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let snat_port = - pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); + let snat_port = pkt1 + .to_full_meta() + .headers() + .inner_ulp + .as_ref() + .unwrap() + .src_port() + .unwrap(); // ================================================================ // Step 2 @@ -3074,9 +3122,12 @@ fn uft_lft_invalidation_out() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -3093,7 +3144,7 @@ fn uft_lft_invalidation_out() { let any_out = "dir=out action=deny priority=65535 protocol=any"; firewall::set_fw_rules( &g1.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g1.port.name().to_string(), rules: vec![any_out.parse().unwrap()], }, @@ -3132,6 +3183,8 @@ fn uft_lft_invalidation_out() { "incr:stats.port.out_uft_miss", ] ); + + _ = print_port(&g1.port, &g1.vpc_map); } // Verify that changing rules causes invalidation of UFT and LFT @@ -3162,9 +3215,12 @@ fn uft_lft_invalidation_in() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -3215,7 +3271,7 @@ fn uft_lft_invalidation_in() { let any_out = "dir=out action=deny priority=65535 protocol=any"; firewall::set_fw_rules( &g1.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g1.port.name().to_string(), rules: vec![any_out.parse().unwrap()], }, @@ -3299,8 +3355,14 @@ fn test_outbound_http(g1_cfg: &VpcCfg, g1: &mut PortAndVps) -> InnerFlowId { ] ); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let snat_port = - pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); + let snat_port = pkt1 + .to_full_meta() + .headers() + .inner_ulp + .as_ref() + .unwrap() + .src_port() + .unwrap(); assert_eq!(TcpState::SynSent, g1.port.tcp_state(&flow).unwrap()); // ================================================================ @@ -3480,9 +3542,12 @@ fn tcp_outbound() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -3531,7 +3596,7 @@ fn early_tcp_invalidation() { let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g1.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g1.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -3542,9 +3607,12 @@ fn early_tcp_invalidation() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -3581,8 +3649,14 @@ fn early_tcp_invalidation() { ); assert_eq!(TcpState::SynSent, g1.port.tcp_state(&flow).unwrap()); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let snat_port = - pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); + let snat_port = pkt1 + .to_full_meta() + .headers() + .inner_ulp + .as_ref() + .unwrap() + .src_port() + .unwrap(); // ================================================================ // Drive to established, then validate the same applies to inbound @@ -3731,9 +3805,12 @@ fn ephemeral_ip_preferred_over_snat_outbound() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -3771,7 +3848,7 @@ fn ephemeral_ip_preferred_over_snat_outbound() { let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap().to_full_meta(); assert_eq!( - pkt1.meta().inner_ip4().unwrap().source(), + pkt1.headers().inner_ip4().unwrap().source(), "10.60.1.20".parse().unwrap(), "did not choose assigned ephemeral IP" ); @@ -3823,9 +3900,12 @@ fn tcp_inbound() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -3863,8 +3943,14 @@ fn tcp_inbound() { ); let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); let flow = pkt1.flow().mirror(); - let sport = - pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); + let sport = pkt1 + .to_full_meta() + .headers() + .inner_ulp + .as_ref() + .unwrap() + .src_port() + .unwrap(); assert_eq!(TcpState::Listen, g1.port.tcp_state(&flow).unwrap()); // ================================================================ @@ -4106,9 +4192,12 @@ fn no_panic_on_flow_table_full() { // Add router entry that allows g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -4158,9 +4247,12 @@ fn intra_subnet_routes_with_custom() { let cidr = IpCidr::Ip4("172.30.4.0/22".parse().unwrap()); router::add_entry( &g1.port, - cidr, - RouterTarget::VpcSubnet(cidr), - RouterClass::System, + Route { + dest: cidr, + target: RouterTarget::VpcSubnet(cidr), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -4209,8 +4301,16 @@ fn intra_subnet_routes_with_custom() { // Suppose the user now installs a 'custom' route in the first subnet to // drop traffic towards the second subnet. This rule must take priority. - router::add_entry(&g1.port, cidr, RouterTarget::Drop, RouterClass::Custom) - .unwrap(); + router::add_entry( + &g1.port, + Route { + dest: cidr, + target: RouterTarget::Drop, + class: RouterClass::Custom, + stat_id: None, + }, + ) + .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); let mut pkt2_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, @@ -4240,8 +4340,16 @@ fn intra_subnet_routes_with_custom() { ); // When the user removes this rule, traffic may flow again to subnet 2. - router::del_entry(&g1.port, cidr, RouterTarget::Drop, RouterClass::Custom) - .unwrap(); + router::del_entry( + &g1.port, + Route { + dest: cidr, + target: RouterTarget::Drop, + class: RouterClass::Custom, + stat_id: None, + }, + ) + .unwrap(); update!(g1, ["incr:epoch", "decr:router.rules.out"]); let mut pkt3_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, @@ -4282,9 +4390,12 @@ fn port_as_router_target() { let dst_ip: Ipv4Addr = "192.168.0.1".parse().unwrap(); router::add_entry( &g1.port, - cidr, - RouterTarget::Ip(g2_cfg.ipv4().private_ip.into()), - RouterClass::Custom, + Route { + dest: cidr, + target: RouterTarget::Ip(g2_cfg.ipv4().private_ip.into()), + class: RouterClass::Custom, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -4327,12 +4438,12 @@ fn port_as_router_target() { // Encap routes between sleds correctly, inner IPs are not modified, // and L2 dst matches the guest's NIC. - let v6_encap_meta = &pkt1.meta().outer_v6; + let v6_encap_meta = &pkt1.headers().outer_v6; assert_eq!(v6_encap_meta.source(), g1_cfg.phys_ip); assert_eq!(v6_encap_meta.destination(), g2_cfg.phys_ip); - assert_eq!(pkt1.meta().inner_eth.destination(), g2_cfg.guest_mac); - assert_eq!(pkt1.meta().inner_eth.source(), g1_cfg.guest_mac); - let ValidL3::Ipv4(inner_ip4) = &pkt1.meta().inner_l3 else { + assert_eq!(pkt1.headers().inner_eth.destination(), g2_cfg.guest_mac); + assert_eq!(pkt1.headers().inner_eth.source(), g1_cfg.guest_mac); + let ValidL3::Ipv4(inner_ip4) = &pkt1.headers().inner_l3 else { panic!("encapped v4 packet did not parse back as v4"); }; assert_eq!(inner_ip4.source(), g1_cfg.ipv4().private_ip); @@ -4445,9 +4556,12 @@ fn select_eip_conditioned_on_igw() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(Some(default_igw)), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(Some(default_igw)), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -4455,33 +4569,45 @@ fn select_eip_conditioned_on_igw() { // Add custom inetgw routes. router::add_entry( &g1.port, - IpCidr::Ip4("1.1.1.0/24".parse().unwrap()), - RouterTarget::InternetGateway(Some(custom_igw0)), - RouterClass::Custom, + Route { + dest: IpCidr::Ip4("1.1.1.0/24".parse().unwrap()), + target: RouterTarget::InternetGateway(Some(custom_igw0)), + class: RouterClass::Custom, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip4("2.2.2.0/24".parse().unwrap()), - RouterTarget::InternetGateway(Some(custom_igw1)), - RouterClass::Custom, + Route { + dest: IpCidr::Ip4("2.2.2.0/24".parse().unwrap()), + target: RouterTarget::InternetGateway(Some(custom_igw1)), + class: RouterClass::Custom, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip4("3.3.3.0/24".parse().unwrap()), - RouterTarget::InternetGateway(Some(ipless_igw)), - RouterClass::Custom, + Route { + dest: IpCidr::Ip4("3.3.3.0/24".parse().unwrap()), + target: RouterTarget::InternetGateway(Some(ipless_igw)), + class: RouterClass::Custom, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip4("4.4.4.0/24".parse().unwrap()), - RouterTarget::InternetGateway(Some(all_ips_igw)), - RouterClass::Custom, + Route { + dest: IpCidr::Ip4("4.4.4.0/24".parse().unwrap()), + target: RouterTarget::InternetGateway(Some(all_ips_igw)), + class: RouterClass::Custom, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -4543,7 +4669,7 @@ fn select_eip_conditioned_on_igw() { expect_modified!(res, pkt1_m); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap().to_full_meta(); assert_eq!( - pkt1.meta().inner_ip4().unwrap().source(), + pkt1.headers().inner_ip4().unwrap().source(), g1_cfg.ipv4().external_ips.ephemeral_ip.unwrap() ); incr!( @@ -4573,7 +4699,7 @@ fn select_eip_conditioned_on_igw() { let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap().to_full_meta(); assert!( &g1_cfg.ipv4().external_ips.floating_ips[..2] - .contains(&pkt2.meta().inner_ip4().unwrap().source()) + .contains(&pkt2.headers().inner_ip4().unwrap().source()) ); incr!( g1, @@ -4601,7 +4727,7 @@ fn select_eip_conditioned_on_igw() { expect_modified!(res, pkt3_m); let pkt3 = parse_inbound(&mut pkt3_m, VpcParser {}).unwrap().to_full_meta(); assert_eq!( - pkt3.meta().inner_ip4().unwrap().source(), + pkt3.headers().inner_ip4().unwrap().source(), g1_cfg.ipv4().external_ips.floating_ips[2] ); incr!( @@ -4654,7 +4780,7 @@ fn select_eip_conditioned_on_igw() { let pkt5 = parse_inbound(&mut pkt5_m, VpcParser {}).unwrap().to_full_meta(); assert!( &g1_cfg.ipv4().external_ips.floating_ips[..] - .contains(&pkt5.meta().inner_ip4().unwrap().source()) + .contains(&pkt5.headers().inner_ip4().unwrap().source()) ); incr!( g1, @@ -4733,7 +4859,7 @@ fn icmpv6_inner_has_nat_applied() { let rule = "dir=in action=allow priority=9 protocol=ICMP6"; firewall::add_fw_rule( &g1.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g1.port.name().to_string(), rule: rule.parse().unwrap(), }, diff --git a/xde-tests/src/lib.rs b/xde-tests/src/lib.rs index 2fd8a634..e1ad188e 100644 --- a/xde-tests/src/lib.rs +++ b/xde-tests/src/lib.rs @@ -24,6 +24,7 @@ use oxide_vpc::api::Ipv6Addr; use oxide_vpc::api::MacAddr; use oxide_vpc::api::PhysNet; use oxide_vpc::api::Ports; +use oxide_vpc::api::Route; use oxide_vpc::api::RouterClass; use oxide_vpc::api::RouterTarget; use oxide_vpc::api::SNat4Cfg; @@ -119,9 +120,12 @@ impl OptePort { let adm = OpteHdl::open()?; adm.add_router_entry(&AddRouterEntryReq { port_name: self.name.clone(), - dest: IpCidr::Ip4(format!("{}/32", dest).parse().unwrap()), - target: RouterTarget::Ip(dest.parse().unwrap()), - class: RouterClass::System, + route: Route { + dest: IpCidr::Ip4(format!("{}/32", dest).parse().unwrap()), + target: RouterTarget::Ip(dest.parse().unwrap()), + class: RouterClass::System, + stat_id: None, + }, })?; Ok(()) } @@ -139,6 +143,7 @@ impl OptePort { action: FirewallAction::Allow, priority: 0, filters, + stat_id: None, }, })?; diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 6d572383..d68353c3 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -176,7 +176,6 @@ use ingot::geneve::GeneveRef; use ingot::ip::IpProtocol; use ingot::types::HeaderLen; use ingot::udp::Udp; -use opte::ExecCtx; use opte::api::ClearLftReq; use opte::api::ClearUftReq; use opte::api::CmdOk; @@ -223,6 +222,7 @@ use opte::engine::parse::ValidUlp; use opte::engine::port::Port; use opte::engine::port::PortBuilder; use opte::engine::port::ProcessResult; +use opte::provider::Providers; use oxide_vpc::api::AddFwRuleReq; use oxide_vpc::api::AddRouterEntryReq; use oxide_vpc::api::ClearVirt2BoundaryReq; @@ -361,7 +361,7 @@ pub struct XdeUnderlayPort { struct XdeState { management_lock: TokenLock, - ectx: Arc, + ectx: Arc, vpc_map: Arc, v2b: Arc, devs: ReadOnlyDevMap, @@ -402,7 +402,8 @@ fn get_xde_state() -> &'static XdeState { impl XdeState { fn new() -> Self { - let ectx = Arc::new(ExecCtx { log: Box::new(opte::KernelLog {}) }); + let ectx = + Arc::new(Providers { log: Box::new(opte::provider::KernelLog) }); let dev_map = Arc::new(KRwLock::new(DevMap::default())); let devs = ReadOnlyDevMap::new(dev_map.clone()); @@ -870,6 +871,26 @@ unsafe extern "C" fn xde_ioc_opte_cmd(karg: *mut c_void, mode: c_int) -> c_int { let resp = remove_cidr_hdlr(&mut env); hdlr_resp(&mut env, resp) } + + OpteCmd::ListRootStat => { + let resp = list_root_stats_hdlr(&mut env); + hdlr_resp(&mut env, resp) + } + + OpteCmd::ListFlowStat => { + let resp = list_flow_stats_hdlr(&mut env); + hdlr_resp(&mut env, resp) + } + + OpteCmd::DumpRootStat => { + let resp = dump_root_stats_hdlr(&mut env); + hdlr_resp(&mut env, resp) + } + + OpteCmd::DumpFlowStat => { + let resp = dump_flow_stats_hdlr(&mut env); + hdlr_resp(&mut env, resp) + } } } @@ -1799,7 +1820,7 @@ fn guest_loopback( } }; - let meta = parsed_pkt.meta(); + let meta = parsed_pkt.headers(); let old_len = parsed_pkt.len(); let ulp_meoi = match meta.ulp_meoi(old_len) { @@ -1812,7 +1833,7 @@ fn guest_loopback( let flow = parsed_pkt.flow(); - let ether_dst = parsed_pkt.meta().inner_eth.destination(); + let ether_dst = parsed_pkt.headers().inner_eth.destination(); let port_key = VniMac::new(vni, ether_dst); let maybe_dest_dev = entry_state.get_by_key(port_key); @@ -2000,7 +2021,7 @@ fn xde_mc_tx_one<'a>( }; let old_len = parsed_pkt.len(); - let meta = parsed_pkt.meta(); + let meta = parsed_pkt.headers(); let Ok(non_eth_payl_bytes) = u32::try_from((&meta.inner_l3, &meta.inner_ulp).packet_length()) else { @@ -2319,7 +2340,7 @@ fn new_port( vpc_map: Arc, v2p: Arc, v2b: Arc, - ectx: Arc, + ectx: Arc, dhcp_cfg: &DhcpCfg, ) -> Result>, OpteError> { let cfg = cfg.clone(); @@ -2337,10 +2358,10 @@ fn new_port( // XXX some layers have no need for LFT, perhaps have two types // of Layer: one with, one without? - gateway::setup(&pb, &cfg, vpc_map, FT_LIMIT_ONE, dhcp_cfg)?; - router::setup(&pb, &cfg, FT_LIMIT_ONE)?; + gateway::setup(&mut pb, &cfg, vpc_map, FT_LIMIT_ONE, dhcp_cfg)?; + router::setup(&mut pb, &cfg, FT_LIMIT_ONE)?; nat::setup(&mut pb, &cfg, nat_ft_limit)?; - overlay::setup(&pb, &cfg, v2p, v2b, FT_LIMIT_ONE)?; + overlay::setup(&mut pb, &cfg, v2p, v2b, FT_LIMIT_ONE)?; // Set the overall unified flow and TCP flow table limits based on the total // configuration above, by taking the maximum of size of the individual @@ -2471,7 +2492,7 @@ fn xde_rx_one( } }; - let meta = parsed_pkt.meta(); + let meta = parsed_pkt.headers(); let old_len = parsed_pkt.len(); let ulp_meoi = match meta.ulp_meoi(old_len) { @@ -2562,7 +2583,7 @@ fn add_router_entry_hdlr(env: &mut IoctlEnvelope) -> Result { .get_by_name(&req.port_name) .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; - router::add_entry(&dev.port, req.dest, req.target, req.class) + router::add_entry(&dev.port, req.route) } #[unsafe(no_mangle)] @@ -2576,7 +2597,7 @@ fn del_router_entry_hdlr( .get_by_name(&req.port_name) .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; - router::del_entry(&dev.port, req.dest, req.target, req.class) + router::del_entry(&dev.port, req.route) } #[unsafe(no_mangle)] @@ -2588,7 +2609,7 @@ fn add_fw_rule_hdlr(env: &mut IoctlEnvelope) -> Result { .get_by_name(&req.port_name) .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; - firewall::add_fw_rule(&dev.port, &req)?; + firewall::add_fw_rule(&dev.port, req)?; Ok(NoResp::default()) } @@ -2601,7 +2622,7 @@ fn rem_fw_rule_hdlr(env: &mut IoctlEnvelope) -> Result { .get_by_name(&req.port_name) .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; - firewall::rem_fw_rule(&dev.port, &req)?; + firewall::rem_fw_rule(&dev.port, req)?; Ok(NoResp::default()) } @@ -2614,7 +2635,7 @@ fn set_fw_rules_hdlr(env: &mut IoctlEnvelope) -> Result { .get_by_name(&req.port_name) .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; - firewall::set_fw_rules(&dev.port, &req)?; + firewall::set_fw_rules(&dev.port, req)?; Ok(NoResp::default()) } @@ -2782,6 +2803,88 @@ fn remove_cidr_hdlr( gateway::remove_cidr(&dev.port, req.cidr, req.dir, state.vpc_map.clone()) } +#[unsafe(no_mangle)] +fn list_root_stats_hdlr( + env: &mut IoctlEnvelope, +) -> Result { + let req: opte::api::ListRootStatReq = env.copy_in_req()?; + let state = get_xde_state(); + let devs = state.devs.read(); + let dev = devs + .get_by_name(&req.port_name) + .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; + + Ok(opte::api::ListRootStatResp { + root_ids: dev.port.read_stats(|stats| stats.all_root_ids().collect()), + }) +} + +#[unsafe(no_mangle)] +fn list_flow_stats_hdlr( + env: &mut IoctlEnvelope, +) -> Result, OpteError> { + let req: opte::api::ListFlowStatReq = env.copy_in_req()?; + let state = get_xde_state(); + let devs = state.devs.read(); + let dev = devs + .get_by_name(&req.port_name) + .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; + + Ok(opte::api::ListFlowStatResp { + flow_ids: dev.port.read_stats(|stats| stats.all_flow_pairs().collect()), + }) +} + +#[unsafe(no_mangle)] +fn dump_root_stats_hdlr( + env: &mut IoctlEnvelope, +) -> Result { + let req: opte::api::DumpRootStatReq = env.copy_in_req()?; + let state = get_xde_state(); + let devs = state.devs.read(); + let dev = devs + .get_by_name(&req.port_name) + .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; + + let root_stats = dev.port.read_stats(|stats| { + if req.root_ids.is_empty() { + stats.all_root_stats().collect() + } else { + req.root_ids + .iter() + .filter_map(|k| stats.root_stat(k).map(|v| (*k, v))) + .collect() + } + }); + + Ok(opte::api::DumpRootStatResp { root_stats }) +} + +#[unsafe(no_mangle)] +fn dump_flow_stats_hdlr( + env: &mut IoctlEnvelope, +) -> Result, OpteError> { + let req: opte::api::DumpFlowStatReq = env.copy_in_req()?; + let state = get_xde_state(); + let devs = state.devs.read(); + let dev = devs + .get_by_name(&req.port_name) + .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; + + let flow_stats = dev.port.read_stats(|stats| { + if req.flow_ids.is_empty() { + stats.all_flow_stats().collect() + } else { + req.flow_ids + .iter() + .filter_map(|k| stats.flow_stat(k).map(|v| (*k, v))) + .collect() + } + }); + + Ok(opte::api::DumpFlowStatResp { flow_stats }) +} + #[unsafe(no_mangle)] fn list_ports_hdlr() -> Result { let mut resp = ListPortsResp { ports: vec![] };