src/backends/baremetal.rs
audience: ai
The bare-metal backend. Operator supplies one or more machines to which the bridge has SSH root. Unlike the cloud backends, nothing is provisioned externally — the hosts are already running. “Provision” here means:
- Pick an idle machine satisfying the grant’s shape (CPU, RAM, TDX requirement, region label).
- Open a
russhroot session to it. - Drop the requester’s image onto disk (fetched by the control session from a mosaik content store and verified against the grant’s post-hash).
- Start a systemd-managed workload unit — or, on a
bare-TDX host, launch a nested TDX guest via
virsh+qemu-tdx. - Inject the per-grant SSH public key into the
workload’s
authorized_keys. - Open an inbound port on the host’s firewall for the workload.
- Return the host’s public name + workload port +
per-grant key as the uniform
ProvisionedInstance.
The control-plane SSH connection is held open by the backend for the duration of the grant; on termination or deadline, the backend tears the workload down and closes the port.
Bare VM vs bare-TDX host
Each BareMetalMachine entry declares
tdx_capable: bool:
-
tdx_capable = false(bare VM). A regular Linux host. Workloads run as systemd container units underpodman. The operator is trusted not to snoop on the workload; the requester treats the VM as a semi-trusted host. -
tdx_capable = true(bare-TDX host). A TDX- capable physical (or virtualised) host. Workloads run as nested TDX guests; the nested guest’s MR_TD flows back into the SSH receipt as evidence. The requester verifies that MR_TD against their image’s declared post-hash before trusting the workload’s outputs — closing the confidentiality gap bare VMs leave open.
Both shapes share the Backend trait so the fleet
router does not distinguish. Grants that require TDX
are routed only to tdx_capable machines at
admission time.
//! Bare-metal backend.
//!
//! Inputs: one or more machines to which the operator
//! has granted the bridge SSH root. Machines are either
//! **bare VMs** (a regular Linux host; workloads run in
//! systemd-managed containers) or **bare-TDX hosts** (a
//! TDX-capable physical or virtualised host; workloads
//! run as nested TDX guests whose MR_TD can flow back to
//! the requester as part of the receipt).
//!
//! Unlike the cloud backends, nothing is provisioned
//! externally: the hosts are already running. "Provision"
//! here means:
//!
//! 1. Pick an idle host satisfying the grant's shape.
//! 2. Open an SSH root session to it.
//! 3. Drop the requester's image onto disk (fetched by
//! the control session from a mosaik content store
//! and verified against the grant's post-hash).
//! 4. Start a systemd-managed workload unit (or launch a
//! nested TDX guest for bare-TDX hosts).
//! 5. Inject the per-grant SSH public key into the
//! workload's `authorized_keys`.
//! 6. Open an inbound port on the host's firewall so the
//! requester can SSH into the workload directly.
//! 7. Return the host's public name + port + per-grant
//! key as the `ProvisionedInstance`.
//!
//! The control-plane SSH connection is held open by the
//! backend for the duration of the grant; on termination
//! or deadline, the backend tears the workload down and
//! closes the port.
use std::collections::HashMap;
use std::sync::Arc;
use anyhow::{anyhow, Context};
use async_trait::async_trait;
use coalition_compute::{AlmanacTick, ComputeGrant, UsageMetrics};
use tokio::sync::Mutex;
use crate::backends::{Backend, Capabilities, ProvisionedInstance, RegionLabel};
use crate::config::{BareMetalBootConfig, BareMetalMachine};
use crate::zipnet_io::Envelope;
pub struct BareMetalBackend {
cfg: BareMetalBootConfig,
/// Runtime scheduling state: which machines are busy
/// with which grants. Simplest possible scheduler;
/// real impl would track CPU/RAM residuals.
busy: Arc<Mutex<HashMap<String, Vec<BusyGrant>>>>,
}
struct BusyGrant {
request_id: coalition_compute::RequestId,
cpu_millicores_reserved: u32,
ram_mib_reserved: u32,
}
impl BareMetalBackend {
pub async fn new(cfg: &BareMetalBootConfig) -> anyhow::Result<Self> {
// TODO: open SSH control sessions to each machine
// at boot, verify reachability, refuse to start if
// any required host is unreachable (unless the
// operator set `lenient_boot = true`).
Ok(Self {
cfg: cfg.clone(),
busy: Arc::new(Mutex::new(HashMap::new())),
})
}
/// Find an idle machine that fits the grant's shape.
async fn pick_machine(
&self,
grant: &ComputeGrant<'_>,
envelope: &Envelope,
) -> anyhow::Result<&BareMetalMachine> {
let preferred_region = envelope.requested_region();
let busy = self.busy.lock().await;
for m in &self.cfg.machines {
if let Some(want) = preferred_region {
if m.region != want { continue; }
}
// TODO: check grant's manifest.tdx_required
// vs m.tdx_capable
// TODO: check grant's CPU/RAM vs residuals
let used = busy.get(&m.host).map(|v| v.len()).unwrap_or(0);
let _ = used;
let _ = grant;
return Ok(m);
}
anyhow::bail!(
"baremetal: no machine available in region {:?}",
preferred_region,
)
}
async fn install_and_start(
&self,
m: &BareMetalMachine,
grant: &ComputeGrant<'_>,
envelope: &Envelope,
) -> anyhow::Result<InstalledWorkload> {
// Open a single russh session to the host as root.
// let ssh = russh::client::connect(...)
// ssh.authenticate_publickey(m.user, load_key(&m.ssh_key_path)?)
// ssh.exec("mkdir -p /var/lib/compute-bridge/{grant_id}")
// ssh.scp("/var/lib/compute-bridge/{grant_id}/image.oci", image_bytes)
// ssh.exec("sha256sum image.oci && verify against grant.image_hash")
// ssh.exec("systemd-run --unit=grant-{id} --property=... podman run -d ...")
// ssh.exec("firewall-cmd --add-port={workload_port}/tcp --zone=public")
// ssh.exec("ssh-inject-authorized-key grant-{id} {per_grant_pubkey}")
// Return host_port (public SSH into the workload).
//
// For a bare-TDX host, substitute `systemd-run … podman run`
// with launching a nested TDX guest via qemu + libvirt; the
// guest's MR_TD is read out of its attestation report and
// attached to the receipt as `ssh_host_key` evidence.
let _ = (m, grant, envelope);
Err(anyhow!(
"BareMetalBackend::install_and_start is a prototype stub; \
implement via russh + systemd-run (bare VM) or \
russh + virsh + qemu-tdx (bare-TDX host)"
))
}
}
struct InstalledWorkload {
workload_port: u16,
host_key: Vec<u8>,
key_public: Vec<u8>,
key_private: Vec<u8>,
}
#[async_trait]
impl Backend for BareMetalBackend {
fn name(&self) -> &'static str { "baremetal" }
async fn capabilities(&self) -> anyhow::Result<Capabilities> {
let regions: Vec<RegionLabel> = self.cfg.machines.iter()
.map(|m| m.region.clone())
.collect::<std::collections::BTreeSet<_>>()
.into_iter().collect();
let tdx_capable = self.cfg.machines.iter().any(|m| m.tdx_capable);
let sum_cpu: u32 = self.cfg.machines.iter().map(|m| m.cpu_millicores).sum();
let sum_ram: u32 = self.cfg.machines.iter().map(|m| m.ram_mib).sum();
Ok(Capabilities {
regions,
tdx_capable,
max_cpu_millicores: sum_cpu,
max_ram_mib: sum_ram,
})
}
fn can_satisfy(&self, grant: &ComputeGrant<'_>) -> bool {
// TODO: inspect the grant's manifest for TDX
// requirement; if required and no tdx_capable
// host exists in an acceptable region, return
// false.
let _ = grant;
!self.cfg.machines.is_empty()
}
async fn provision(
&self,
grant: &ComputeGrant<'_>,
envelope: &Envelope,
) -> anyhow::Result<ProvisionedInstance> {
let m = self.pick_machine(grant, envelope).await?;
let workload = self.install_and_start(m, grant, envelope).await?;
// Mark the machine busy so the scheduler won't
// over-commit it.
{
let mut busy = self.busy.lock().await;
busy.entry(m.host.clone()).or_default().push(BusyGrant {
request_id: grant.request_id,
cpu_millicores_reserved: 0, // TODO: from manifest
ram_mib_reserved: 0,
});
}
Ok(ProvisionedInstance {
backend: self.name(),
instance_id: format!("{}:{}", m.host, workload.workload_port),
region: m.region.clone(),
public_host: m.host.clone(),
ssh_port: workload.workload_port,
user: "compute".into(),
key_private: workload.key_private,
key_public: workload.key_public,
host_key: workload.host_key,
started_at: AlmanacTick::default(),
provider_id: coalition_compute::ProviderId::default(),
})
}
async fn watch_until_exit(
&self,
instance: &ProvisionedInstance,
valid_to: AlmanacTick,
) -> anyhow::Result<UsageMetrics> {
// Poll the workload unit's `systemctl is-active`
// over the control-plane SSH session; or wait
// until `valid_to`. Compute usage metrics from
// cgroup counters collected at teardown.
let _ = (instance, valid_to);
Err(anyhow!(
"BareMetalBackend::watch_until_exit is a prototype stub"
))
}
async fn terminate(&self, instance: &ProvisionedInstance) -> anyhow::Result<()> {
// systemctl stop grant-{id}; firewall-cmd --remove-port=...;
// rm -rf /var/lib/compute-bridge/{grant_id}
let mut busy = self.busy.lock().await;
if let Some(v) = busy.get_mut(&instance.public_host) {
v.retain(|g| format!("{}:{}", instance.public_host, instance.ssh_port)
!= format!("{}:{}", instance.public_host, instance.ssh_port));
}
Ok(())
}
}
Up: compute-bridge → backends.