diff --git a/firecracker/__init__.py b/firecracker/__init__.py index e213855fe..e24f31fd8 100644 --- a/firecracker/__init__.py +++ b/firecracker/__init__.py @@ -1 +1,2 @@ -from firecracker.microvm import MicroVM +from .microvm import MicroVM +from .config import FirecrackerConfig diff --git a/firecracker/config.py b/firecracker/config.py new file mode 100644 index 000000000..ad9bd97de --- /dev/null +++ b/firecracker/config.py @@ -0,0 +1,57 @@ +from typing import List, Optional + +from pydantic import BaseModel, PositiveInt +from vm_supervisor.models import FilePath + +VSOCK_PATH = "/tmp/v.sock" + + +class BootSource(BaseModel): + kernel_image_path: FilePath = "vmlinux.bin" + boot_args: str = "console=ttyS0 reboot=k panic=1 pci=off " \ + "ro noapic nomodules random.trust_cpu=on" + + @staticmethod + def args(enable_console: bool = True): + default = "reboot=k panic=1 pci=off ro noapic nomodules random.trust_cpu=on" + if enable_console: + return "console=ttyS0 " + default + else: + return default + + +class Drive(BaseModel): + drive_id: str = "rootfs" + path_on_host: FilePath = "./runtimes/aleph-alpine-3.13-python/rootfs.ext4" + is_root_device: bool = True + is_read_only: bool = True + + +class MachineConfig(BaseModel): + vcpu_count: PositiveInt = 1 + mem_size_mib: PositiveInt = 128 + ht_enabled: bool = False + + +class Vsock(BaseModel): + vsock_id: str = "1" + guest_cid: PositiveInt = 3 + uds_path: str = VSOCK_PATH + + +class NetworkInterface(BaseModel): + iface_id: str = "eth0" + guest_mac: str = "AA:FC:00:00:00:01" + host_dev_name: str + + +class FirecrackerConfig(BaseModel): + boot_source: BootSource + drives: List[Drive] + machine_config: MachineConfig + vsock: Optional[Vsock] + network_interfaces: Optional[List[NetworkInterface]] + + class Config: + allow_population_by_field_name = True + alias_generator = lambda x: x.replace('_', '-') diff --git a/firecracker/microvm.py b/firecracker/microvm.py index 187ddea8d..edcb27c53 100644 --- a/firecracker/microvm.py +++ b/firecracker/microvm.py @@ -8,12 +8,15 @@ from os import getuid from pathlib import Path from pwd import getpwnam -from typing import Optional, Tuple, Dict +from tempfile import NamedTemporaryFile +from typing import Optional, Tuple, Dict, List import aiohttp from aiohttp import ClientResponse +from firecracker.config import FirecrackerConfig from vm_supervisor.models import FilePath +from .config import Drive logger = logging.getLogger(__name__) @@ -70,6 +73,8 @@ class MicroVM: network_interface: Optional[str] = None stdout_task: Optional[Task] = None stderr_task: Optional[Task] = None + config_file = None + drives: List[Drive] = None @property def jailer_path(self): @@ -109,10 +114,7 @@ def __init__( self.use_jailer = use_jailer self.firecracker_bin_path = firecracker_bin_path self.jailer_bin_path = jailer_bin_path - - def get_session(self) -> aiohttp.ClientSession: - conn = aiohttp.UnixConnector(path=self.socket_path) - return aiohttp.ClientSession(connector=conn) + self.drives = [] def prepare_jailer(self): system(f"rm -fr {self.jailer_path}") @@ -132,35 +134,54 @@ def prepare_jailer(self): # system(f"cp disks/rootfs.ext4 {self.jailer_path}/opt") # system(f"cp hello-vmlinux.bin {self.jailer_path}/opt") - async def start(self) -> asyncio.subprocess.Process: + async def start(self, config: FirecrackerConfig) -> asyncio.subprocess.Process: if self.use_jailer: - return await self.start_jailed_firecracker() + return await self.start_jailed_firecracker(config) else: - return await self.start_firecracker() + return await self.start_firecracker(config) + + async def start_firecracker(self, config: FirecrackerConfig) -> asyncio.subprocess.Process: - async def start_firecracker(self) -> asyncio.subprocess.Process: - logger.debug( - " ".join((self.firecracker_bin_path, "--api-sock", self.socket_path)) - ) if os.path.exists(VSOCK_PATH): os.remove(VSOCK_PATH) if os.path.exists(self.socket_path): os.remove(self.socket_path) + + config_file = NamedTemporaryFile() + config_file.write(config.json(by_alias=True, exclude_none=True, indent=4).encode()) + config_file.flush() + self.config_file = config_file + print(self.config_file) + + logger.debug( + " ".join((self.firecracker_bin_path, "--api-sock", self.socket_path, + "--config-file", config_file.name)) + ) + self.proc = await asyncio.create_subprocess_exec( self.firecracker_bin_path, "--api-sock", self.socket_path, + "--config-file", + config_file.name, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) return self.proc - async def start_jailed_firecracker(self) -> asyncio.subprocess.Process: + async def start_jailed_firecracker(self, config: FirecrackerConfig) -> asyncio.subprocess.Process: if not self.jailer_bin_path: raise ValueError("Jailer binary path is missing") uid = str(getpwnam("jailman").pw_uid) gid = str(getpwnam("jailman").pw_gid) + + config_file = NamedTemporaryFile(dir=f"{self.jailer_path}/tmp/", suffix='.json') + config_file.write(config.json(by_alias=True, exclude_none=True, indent=4).encode()) + config_file.flush() + os.chmod(config_file.name, 0o644) + self.config_file = config_file + logger.debug( " ".join( ( @@ -173,9 +194,13 @@ async def start_jailed_firecracker(self) -> asyncio.subprocess.Process: uid, "--gid", gid, + "--", + "--config-file", + "/tmp/" + os.path.basename(config_file.name), ) ) ) + self.proc = await asyncio.create_subprocess_exec( self.jailer_bin_path, "--id", @@ -186,138 +211,90 @@ async def start_jailed_firecracker(self) -> asyncio.subprocess.Process: uid, "--gid", gid, + "--", + "--config-file", + "/tmp/" + os.path.basename(config_file.name), stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) return self.proc - async def socket_is_ready(self, delay=0.01): - while not os.path.exists(self.socket_path): - await asyncio.sleep(delay) + def enable_kernel(self, kernel_image_path: str) -> str: + """Make a kernel available to the VM. - async def set_boot_source( - self, kernel_image_path: str, enable_console: bool = False - ): + Creates a symlink to the kernel file if jailer is in use. + """ if self.use_jailer: kernel_filename = Path(kernel_image_path).name jailer_kernel_image_path = f"/opt/{kernel_filename}" os.link(kernel_image_path, f"{self.jailer_path}{jailer_kernel_image_path}") kernel_image_path = jailer_kernel_image_path + return kernel_image_path - console = "console=ttyS0" if enable_console else "" - data = { - "kernel_image_path": kernel_image_path, - # Add console=ttyS0 for debugging, but it makes the boot twice slower - "boot_args": f"{console} reboot=k panic=1 pci=off ro noapic nomodules random.trust_cpu=on", - } - async with self.get_session() as session: - response: ClientResponse = await session.put( - "http://localhost/boot-source", json=data - ) - response.raise_for_status() + def enable_rootfs(self, path_on_host: str) -> str: + """Make a rootfs available to the VM. - async def set_rootfs(self, path_on_host: str): + Creates a symlink to the rootfs file if jailer is in use. + """ if self.use_jailer: rootfs_filename = Path(path_on_host).name jailer_path_on_host = f"/opt/{rootfs_filename}" os.link(path_on_host, f"{self.jailer_path}/{jailer_path_on_host}") - path_on_host = jailer_path_on_host - - data = { - "drive_id": "rootfs", - "path_on_host": path_on_host, - "is_root_device": True, - "is_read_only": True, - } - async with self.get_session() as session: - response = await session.put("http://localhost/drives/rootfs", json=data) - response.raise_for_status() - - async def mount(self, volume_paths: Dict[str, FilePath]): - for index, (path, partition_path) in enumerate(volume_paths.items()): - device_name = f"vd{string.ascii_lowercase[index + 1]}" - if self.use_jailer: - partition_filename = Path(partition_path).name - jailer_path_on_host = f"/opt/{partition_filename}" - os.link(partition_path, f"{self.jailer_path}/{jailer_path_on_host}") - partition_path = jailer_path_on_host - - data = { - "drive_id": device_name, - "path_on_host": partition_path, - "is_root_device": False, - "is_read_only": True, - } - async with self.get_session() as session: - response = await session.put(f"http://localhost/drives/{device_name}", json=data) - response.raise_for_status() - - - async def set_vsock(self): - data = { - "vsock_id": "1", - "guest_cid": 3, - "uds_path": VSOCK_PATH, - } - async with self.get_session() as session: - response = await session.put("http://localhost/vsock", json=data) - response.raise_for_status() - - async def set_network(self, interface: str = "eth0"): - """Configure the host network with a tap interface to the VM.""" - logger.debug("Network setup") + return jailer_path_on_host + else: + return path_on_host + + def compute_device_name(self, index: int) -> str: + return f"vd{string.ascii_lowercase[index + 1]}" + + def enable_drive(self, drive_path: str) -> Drive: + """Make a volume available to the VM. + + Creates a symlink to the volume file if jailer is in use. + """ + index = len(self.drives) + device_name = self.compute_device_name(index) + if self.use_jailer: + drive_filename = Path(drive_path).name + jailer_path_on_host = f"/opt/{drive_filename}" + os.link(drive_path, f"{self.jailer_path}/{jailer_path_on_host}") + drive_path = jailer_path_on_host + + drive = Drive( + drive_id=device_name, + path_on_host=FilePath(drive_path), + is_root_device=False, + is_read_only=True, + ) + self.drives.append(drive) + return drive + + async def create_network_interface(self, interface: str = "eth0") -> str: + logger.debug("Create network interface") + + assert self.network_interface is None # Only one is supported at the moment + assert self.network_tap is None self.network_interface = interface - name = f"vmtap{self.vm_id}" - self.network_tap = name + host_dev_name = f"vmtap{self.vm_id}" + self.network_tap = host_dev_name - system(f"ip tuntap add {name} mode tap") + system(f"ip tuntap add {host_dev_name} mode tap") system( - f"ip addr add {self.host_ip}/24 dev {name}" + f"ip addr add {self.host_ip}/24 dev {host_dev_name}" ) - system(f"ip link set {name} up") + system(f"ip link set {host_dev_name} up") system('sh -c "echo 1 > /proc/sys/net/ipv4/ip_forward"') # TODO: Don't fill iptables with duplicate rules; purge rules on delete system(f"iptables -t nat -A POSTROUTING -o {interface} -j MASQUERADE") system( "iptables -A FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT" ) - system(f"iptables -A FORWARD -i {name} -o {interface} -j ACCEPT") - - data = { - "iface_id": "eth0", - "guest_mac": f"AA:FC:00:00:00:01", - "host_dev_name": name, - } - async with self.get_session() as session: - response = await session.put( - "http://localhost/network-interfaces/eth0", json=data - ) - logger.debug(response) - logger.debug(await response.text()) - response.raise_for_status() - - async def set_resources(self, vcpus: int = 1, memory: int = 128, - ht_enabled: bool = False): - """Set machine resources (number of CPU cores, memory)""" - data = { - "vcpu_count": vcpus, - "mem_size_mib": memory, - "ht_enabled": ht_enabled, - } - async with self.get_session() as session: - response = await session.put("http://localhost/machine-config", json=data) - response.raise_for_status() - - async def start_instance(self): - data = { - "action_type": "InstanceStart", - } - async with self.get_session() as session: - response = await session.put("http://localhost/actions", json=data) - response.raise_for_status() + system(f"iptables -A FORWARD -i {host_dev_name} -o {interface} -j ACCEPT") + + return host_dev_name async def print_logs(self): while not self.proc: diff --git a/vm_supervisor/vm/firecracker_microvm.py b/vm_supervisor/vm/firecracker_microvm.py index 8961fb9af..3ce29941a 100644 --- a/vm_supervisor/vm/firecracker_microvm.py +++ b/vm_supervisor/vm/firecracker_microvm.py @@ -1,7 +1,6 @@ import asyncio import dataclasses import logging -import string from dataclasses import dataclass from enum import Enum from multiprocessing import Process, set_start_method @@ -14,6 +13,8 @@ from aleph_message.models import ProgramContent from aleph_message.models.program import MachineResources, MachineVolume +from firecracker.config import BootSource, Drive, MachineConfig, FirecrackerConfig, Vsock, \ + NetworkInterface from firecracker.microvm import MicroVM, setfacl, Encoding from guest_api.__main__ import run_guest_api from ..conf import settings @@ -198,21 +199,40 @@ async def setup(self): jailer_bin_path=settings.JAILER_PATH, ) fvm.prepare_jailer() - await fvm.start() + + config = FirecrackerConfig( + boot_source=BootSource( + kernel_image_path=FilePath(fvm.enable_kernel(self.resources.kernel_image_path)), + boot_args=BootSource.args(enable_console=self.enable_console), + ), + drives=[ + Drive( + drive_id="rootfs", + path_on_host=FilePath(fvm.enable_rootfs(self.resources.rootfs_path)), + is_root_device=True, + is_read_only=True, + ), + ] + [ + fvm.enable_drive(volume) + for volume in self.resources.volume_paths.values() + ], + machine_config=MachineConfig( + vcpu_count=self.hardware_resources.vcpus, + mem_size_mib=self.hardware_resources.memory, + ), + vsock=Vsock(), + network_interfaces = [ + NetworkInterface( + iface_id="eth0", + host_dev_name=await fvm.create_network_interface(interface="eth0"), + ) + ] if self.enable_networking else [], + ) + + logger.debug(config.json(by_alias=True, exclude_none=True, indent=4)) + try: - await fvm.socket_is_ready() - await fvm.set_boot_source( - self.resources.kernel_image_path, - enable_console=self.enable_console, - ) - await fvm.set_rootfs(self.resources.rootfs_path) - await fvm.mount(self.resources.volume_paths) - - await fvm.set_vsock() - await fvm.set_resources(vcpus=self.hardware_resources.vcpus, - memory=self.hardware_resources.memory) - if self.enable_networking: - await fvm.set_network(interface=settings.NETWORK_INTERFACE) + await fvm.start(config) logger.debug("setup done") self.fvm = fvm except Exception: @@ -229,10 +249,7 @@ async def start(self): if self.enable_console: fvm.start_printing_logs() - await asyncio.gather( - fvm.start_instance(), - fvm.wait_for_init(), - ) + await fvm.wait_for_init() logger.debug(f"started fvm {self.vm_id}") async def configure(self): @@ -246,7 +263,7 @@ async def configure(self): # Start at vdb since vda is already used by the root filesystem volumes: List[Volume] = [ - Volume(mount=volume.mount, device=f"vd{string.ascii_lowercase[index+1]}") + Volume(mount=volume.mount, device=self.fvm.drives[index].drive_id) for index, volume in enumerate(self.resources.volumes) ]