Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

src/backends/aws.rs

audience: ai

AWS EC2 backend. Per-region Ec2Client cache; per-grant key-pair and security group; cloud-init that fetches the requester’s image by post-hash and authorises the per-grant SSH key.

Not tdx_capable: AWS has no consumer TDX offering as of this prototype’s writing. Operators needing TDX route grants through GCP (Confidential VMs), Azure (Confidential Computing v3 SKUs), or bare-metal hosts.

//! AWS EC2 backend.
//!
//! All control-plane calls happen inside the TDX guest.
//! The operator's AWS credentials are sealed into the
//! TDX measurement (see config.rs) so a curious host
//! cannot exfiltrate them.

use std::collections::HashMap;

use anyhow::{anyhow, Context};
use async_trait::async_trait;
use aws_sdk_ec2::Client as Ec2Client;
use coalition_compute::{AlmanacTick, ComputeGrant, UsageMetrics};

use crate::backends::{Backend, Capabilities, ProvisionedInstance, RegionLabel};
use crate::config::AwsBootConfig;
use crate::zipnet_io::Envelope;

pub struct AwsBackend {
    cfg: AwsBootConfig,
    clients: tokio::sync::RwLock<HashMap<String, Ec2Client>>,
}

impl AwsBackend {
    pub async fn new(cfg: &AwsBootConfig) -> anyhow::Result<Self> {
        Ok(Self {
            cfg: cfg.clone(),
            clients: tokio::sync::RwLock::new(HashMap::new()),
        })
    }

    async fn client_for(&self, region: &str) -> anyhow::Result<Ec2Client> {
        if !self.cfg.regions.iter().any(|r| r == region) {
            anyhow::bail!("aws: region {region} not in operator allowlist");
        }
        {
            let g = self.clients.read().await;
            if let Some(c) = g.get(region) {
                return Ok(c.clone());
            }
        }
        // TODO: real construction via aws_config::defaults(...)
        //       with operator's static credentials.
        anyhow::bail!(
            "AwsBackend::client_for is a prototype stub; wire up \
             aws_config::defaults(...) with the operator's sealed \
             credentials and the declared region"
        )
    }

    fn pick_instance_type(&self, grant: &ComputeGrant<'_>) -> anyhow::Result<String> {
        // TODO: real sizing from grant's manifest.
        let family = self.cfg.instance_families.first()
            .ok_or_else(|| anyhow!("aws: no instance families configured"))?;
        let _ = grant;
        Ok(format!("{family}.large"))
    }
}

#[async_trait]
impl Backend for AwsBackend {
    fn name(&self) -> &'static str { "aws" }

    async fn capabilities(&self) -> anyhow::Result<Capabilities> {
        Ok(Capabilities {
            regions: self.cfg.regions.iter().cloned().collect::<Vec<RegionLabel>>(),
            // AWS has no consumer TDX offering as of this
            // prototype's writing. Operators running TDX-
            // required workloads route through the
            // bare-metal backend or a GCP Confidential VM
            // configured for TDX.
            tdx_capable: false,
            max_cpu_millicores: u32::MAX,
            max_ram_mib: u32::MAX,
        })
    }

    fn can_satisfy(&self, grant: &ComputeGrant<'_>) -> bool {
        // TODO: if grant requires TDX, decline early.
        let _ = grant;
        true
    }

    async fn provision(
        &self,
        grant: &ComputeGrant<'_>,
        envelope: &Envelope,
    ) -> anyhow::Result<ProvisionedInstance> {
        let region = envelope.requested_region().unwrap_or("us-east-1");
        let client = self.client_for(region).await?;
        let instance_type = self.pick_instance_type(grant)?;

        // Real flow:
        //   create_key_pair         — per-grant ed25519 SSH key
        //   create_security_group   — inbound TCP/22 from anywhere
        //   run_instances           — with cloud-init that fetches
        //                             the requester's image by
        //                             post-hash from a mosaik content
        //                             store, verifies, starts, and
        //                             authorises the per-grant key
        //   wait InstanceState::running
        //   describe_instances      — get public DNS + host key
        //   return ProvisionedInstance
        let _ = (client, instance_type);
        Err(anyhow!(
            "AwsBackend::provision is a prototype stub; implement \
             create_key_pair + create_security_group + \
             run_instances + wait_for_running"
        ))
    }

    async fn watch_until_exit(
        &self,
        instance: &ProvisionedInstance,
        valid_to: AlmanacTick,
    ) -> anyhow::Result<UsageMetrics> {
        // TODO: poll DescribeInstances; terminate on deadline;
        // compute cpu-seconds / ram-mib-seconds / network bytes /
        // storage bytes and return.
        let _ = (instance, valid_to);
        Err(anyhow!(
            "AwsBackend::watch_until_exit is a prototype stub"
        ))
    }

    async fn terminate(&self, instance: &ProvisionedInstance) -> anyhow::Result<()> {
        // TODO: TerminateInstances + DeleteKeyPair + DeleteSecurityGroup
        let _ = instance;
        Ok(())
    }
}

Up: compute-bridgebackends.