diff options
author | zer0def | 2020-11-03 07:45:18 +0100 |
---|---|---|
committer | zer0def | 2020-11-03 13:43:37 +0100 |
commit | 161ad664a34245d74f4f676cd0ce93c129f670c2 (patch) | |
tree | 318e021601c1f7fa157ea8e51ca96b6fa1aea1b4 /image_builder.sh | |
parent | 6d069ee0a97e157f399852496aa2ee864fec7d8a (diff) | |
download | aur-161ad664a34245d74f4f676cd0ce93c129f670c2.tar.gz |
Added initrd and image builds
Diffstat (limited to 'image_builder.sh')
-rwxr-xr-x | image_builder.sh | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/image_builder.sh b/image_builder.sh new file mode 100755 index 000000000000..51d5dc420b8d --- /dev/null +++ b/image_builder.sh @@ -0,0 +1,521 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2017-2019 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +set -e + +[ -n "${DEBUG}" ] && set -x + +DOCKER_RUNTIME=${DOCKER_RUNTIME:-runc} + +readonly script_name="${0##*/}" +readonly script_dir=$(dirname "$(readlink -f "$0")") +readonly lib_file="${script_dir}/../scripts/lib.sh" + +readonly ext4_format="ext4" +readonly xfs_format="xfs" + +# ext4: percentage of the filesystem which may only be allocated by privileged processes. +readonly reserved_blocks_percentage=3 + +# Where the rootfs starts in MB +readonly rootfs_start=1 + +# Where the rootfs ends in MB +readonly rootfs_end=-1 + +# DAX header size +# * NVDIMM driver reads the device namespace information from nvdimm namespace (4K offset). +# The MBR #1 + DAX metadata are saved in the first 2MB of the image. +readonly dax_header_sz=2 + +# DAX aligment +# * DAX huge pages [2]: 2MB alignment +# [2] - https://nvdimm.wiki.kernel.org/2mib_fs_dax +readonly dax_alignment=2 + +# The list of systemd units and files that are not needed in Kata Containers +readonly -a systemd_units=( + "systemd-coredump@" + "systemd-journald" + "systemd-journald-dev-log" + "systemd-journal-flush" + "systemd-random-seed" + "systemd-timesyncd" + "systemd-tmpfiles-setup" + "systemd-udevd" + "systemd-udevd-control" + "systemd-udevd-kernel" + "systemd-udev-trigger" + "systemd-update-utmp" +) + +readonly -a systemd_files=( + "systemd-bless-boot-generator" + "systemd-fstab-generator" + "systemd-getty-generator" + "systemd-gpt-auto-generator" + "systemd-tmpfiles-cleanup.timer" +) + +# Set a default value +AGENT_INIT=${AGENT_INIT:-no} + +# Align image to (size in MB) according to different architecture. +case "$(uname -m)" in + aarch64) readonly mem_boundary_mb=16 ;; + *) readonly mem_boundary_mb=128 ;; +esac + +# shellcheck source=../scripts/lib.sh +source "${lib_file}" + +usage() { + cat <<EOT +Usage: ${script_name} [options] <rootfs-dir> + This script will create a Kata Containers image file of + an adequate size based on the <rootfs-dir> directory. + +Options: + -h Show this help + -o path to generate image file ENV: IMAGE + -r Free space of the root partition in MB ENV: ROOT_FREE_SPACE + +Extra environment variables: + AGENT_BIN: Use it to change the expected agent binary name + AGENT_INIT: Use kata agent as init process + NSDAX_BIN: Use to specify path to pre-compiled 'nsdax' tool. + FS_TYPE: Filesystem type to use. Only xfs and ext4 are supported. + USE_DOCKER: If set will build image in a Docker Container (requries docker) + DEFAULT: not set + USE_PODMAN: If set and USE_DOCKER not set, will build image in a Podman Container (requries podman) + DEFAULT: not set + + +Following diagram shows how the resulting image will look like + + .-----------.----------.---------------.-----------. + | 0 - 512 B | 4 - 8 Kb | 2M - 2M+512B | 3M | + |-----------+----------+---------------+-----------+ + | MBR #1 | DAX | MBR #2 | Rootfs | + '-----------'----------'---------------'-----------+ + | | ^ | ^ + | '-data-' '--------' + | | + '--------rootfs-partition---------' + + +MBR: Master boot record. +DAX: Metadata required by the NVDIMM driver to enable DAX in the guest [1][2] (struct nd_pfn_sb). +Rootfs: partition that contains the root filesystem (/usr, /bin, ect). + +Kernels and hypervisors that support DAX/NVDIMM read the MBR #2, otherwise MBR #1 is read. + +[1] - https://github.com/kata-containers/osbuilder/blob/master/image-builder/nsdax.gpl.c +[2] - https://github.com/torvalds/linux/blob/master/drivers/nvdimm/pfn.h + +EOT +} + + +# build the image using container engine +build_with_container() { + local rootfs="$1" + local image="$2" + local fs_type="$3" + local block_size="$4" + local root_free_space="$5" + local agent_bin="$6" + local agent_init="$7" + local container_engine="$8" + local nsdax_bin="$9" + local container_image_name="image-builder-osbuilder" + local shared_files="" + + image_dir=$(readlink -f "$(dirname "${image}")") + image_name=$(basename "${image}") + + "${container_engine}" build \ + --build-arg http_proxy="${http_proxy}" \ + --build-arg https_proxy="${https_proxy}" \ + -t "${container_image_name}" "${script_dir}" + + readonly mke2fs_conf="/etc/mke2fs.conf" + if [ -f "${mke2fs_conf}" ]; then + shared_files+="-v ${mke2fs_conf}:${mke2fs_conf}:ro " + fi + + #Make sure we use a compatible runtime to build rootfs + # In case Clear Containers Runtime is installed we dont want to hit issue: + #https://github.com/clearcontainers/runtime/issues/828 + "${container_engine}" run \ + --rm \ + --runtime "${DOCKER_RUNTIME}" \ + --privileged \ + --env AGENT_BIN="${agent_bin}" \ + --env AGENT_INIT="${agent_init}" \ + --env FS_TYPE="${fs_type}" \ + --env BLOCK_SIZE="${block_size}" \ + --env ROOT_FREE_SPACE="${root_free_space}" \ + --env NSDAX_BIN="${nsdax_bin}" \ + --env DEBUG="${DEBUG}" \ + -v /dev:/dev \ + -v "${script_dir}":"/osbuilder" \ + -v "${script_dir}/../scripts":"/scripts" \ + -v "${rootfs}":"/rootfs" \ + -v "${image_dir}":"/image" \ + ${shared_files} \ + ${container_image_name} \ + bash "/osbuilder/${script_name}" -o "/image/${image_name}" /rootfs +} + +check_rootfs() { + local rootfs="${1}" + + [ -d "${rootfs}" ] || die "${rootfs} is not a directory" + + # The kata rootfs image expect init and kata-agent to be installed + init_path="/sbin/init" + init="${rootfs}${init_path}" + if [ ! -x "${init}" ] && [ ! -L "${init}" ]; then + error "${init_path} is not installed in ${rootfs}" + return 1 + fi + OK "init is installed" + + + candidate_systemd_paths="/usr/lib/systemd/systemd /lib/systemd/systemd" + + # check agent or systemd + case "${AGENT_INIT}" in + "no") + for systemd_path in $candidate_systemd_paths; do + systemd="${rootfs}${systemd_path}" + if [ -x "${systemd}" ] || [ -L "${systemd}" ]; then + found="yes" + break + fi + done + if [ ! $found ]; then + error "None of ${candidate_systemd_paths} is installed in ${rootfs}" + return 1 + fi + OK "init is systemd" + ;; + + "yes") + agent_path="/sbin/init" + agent="${rootfs}${agent_path}" + if [ ! -x "${agent}" ]; then + error "${agent_path} is not installed in ${rootfs}. Use AGENT_BIN env variable to change the expected agent binary name" + return 1 + fi + # checksum must be different to system + for systemd_path in $candidate_systemd_paths; do + systemd="${rootfs}${systemd_path}" + if [ -f "${systemd}" ] && cmp -s "${systemd}" "${agent}"; then + error "The agent is not the init process. ${agent_path} is systemd" + return 1 + fi + done + + OK "Agent installed" + ;; + + *) + error "Invalid value for AGENT_INIT: '${AGENT_INIT}'. Use to 'yes' or 'no'" + return 1 + ;; + esac + + return 0 +} + +calculate_required_disk_size() { + local rootfs="$1" + local fs_type="$2" + local block_size="$3" + + readonly rootfs_size_mb=$(du -B 1MB -s "${rootfs}" | awk '{print $1}') + readonly image="$(mktemp)" + readonly max_tries=20 + readonly increment=10 + + for i in $(seq 1 $max_tries); do + local img_size="$((rootfs_size_mb + (i * increment)))" + create_disk "${image}" "${img_size}" "${fs_type}" "${rootfs_start}" > /dev/null 2>&1 + if ! device="$(setup_loop_device "${image}")"; then + continue + fi + + if ! format_loop "${device}" "${block_size}" "${fs_type}" > /dev/null 2>&1 ; then + die "Could not format loop device: ${device}" + fi + local mount_dir + while [ -z "${mount_dir}" ]; do mount_dir="$(udisksctl mount -b ${device}p1)" || sleep 3; done + mount_dir="${mount_dir##* }" + avail="$(df -BM --output=avail "${mount_dir}" | tail -n1 | sed 's/[M ]//g')" + udisksctl unmount -b "${device}p1" &>/dev/null + udisksctl loop-delete -b "${device}" &>/dev/null + unset mount_dir + + if [ "${avail}" -gt "${rootfs_size_mb}" ]; then + #rmdir "${mount_dir}" + rm -f "${image}" + echo "${img_size}" + return + fi + done + + + #rmdir "${mount_dir}" + rm -f "${image}" + error "Could not calculate the required disk size" +} + +# Calculate image size based on the rootfs and free space +calculate_img_size() { + local rootfs="$1" + local root_free_space_mb="$2" + local fs_type="$3" + local block_size="$4" + + # rootfs start + DAX header size + rootfs end + local reserved_size_mb=$((rootfs_start + dax_header_sz + rootfs_end)) + + disk_size="$(calculate_required_disk_size "${rootfs}" "${fs_type}" "${block_size}")" + + img_size="$((disk_size + reserved_size_mb))" + if [ -n "${root_free_space_mb}" ]; then + img_size="$((img_size + root_free_space_mb))" + fi + + remaining="$((img_size % mem_boundary_mb))" + if [ "${remaining}" != "0" ]; then + img_size=$((img_size + mem_boundary_mb - remaining)) + fi + + echo "${img_size}" +} + +setup_loop_device() { + local image="$1" + + # Get the loop device bound to the image file (requires /dev mounted in the + # image build system and root privileges) + local device="$(udisksctl loop-setup -f ${image})" + device="${device##* }" + device="${device%.*}" + echo "${device}" + return 0 +} + +format_loop() { + local device="$1" + local block_size="$2" + local fs_type="$3" + + case "${fs_type}" in + "${ext4_format}") + mkfs.ext4 -q -F -E root_owner -b "${block_size}" "${device}p1" + info "Set filesystem reserved blocks percentage to ${reserved_blocks_percentage}%" + tune2fs -m "${reserved_blocks_percentage}" "${device}p1" + ;; + + "${xfs_format}") + # DAX and reflink cannot be used together! + # Explicitly disable reflink, if it fails then reflink + # is not supported and '-m reflink=0' is not needed. + if mkfs.xfs -m reflink=0 -q -f -b size="${block_size}" "${device}p1" 2>&1 | grep -q "unknown option"; then + mkfs.xfs -q -f -b size="${block_size}" "${device}p1" + fi + ;; + + *) + error "Unsupported fs type: ${fs_type}" + return 1 + ;; + esac +} + +create_disk() { + local image="$1" + local img_size="$2" + local fs_type="$3" + local part_start="$4" + + info "Creating raw disk with size ${img_size}M" + qemu-img create -q -f raw "${image}" "${img_size}M" + OK "Image file created" + + # Kata runtime expect an image with just one partition + # The partition is the rootfs content + info "Creating partitions" + parted -s -a optimal "${image}" -- \ + mklabel msdos \ + mkpart primary "${fs_type}" "${part_start}"M "${rootfs_end}"M + + OK "Partitions created" +} + +create_rootfs_image() { + local rootfs="$1" + local image="$2" + local img_size="$3" + local fs_type="$4" + local block_size="$5" + + create_disk "${image}" "${img_size}" "${fs_type}" "${rootfs_start}" + + if ! device="$(setup_loop_device "${image}")"; then + die "Could not setup loop device" + fi + + if ! format_loop "${device}" "${block_size}" "${fs_type}"; then + die "Could not format loop device: ${device}" + fi + + info "Mounting root partition" + local mount_dir + while [ -z "${mount_dir}" ]; do mount_dir="$(udisksctl mount -b ${device}p1)" || sleep 3; done + mount_dir="${mount_dir##* }" + OK "root partition mounted" + + info "Copying content from rootfs to root partition" + cp -a "${rootfs}"/* "${mount_dir}" + sync + OK "rootfs copied" + + info "Removing unneeded systemd services and sockets" + for u in "${systemd_units[@]}"; do + find "${mount_dir}" \ + -path "${mount_dir}/lost+found" -prune -o -type f \( \ + -name "${u}.service" -o \ + -name "${u}.socket" \) \ + -exec rm -f {} \; + done + + info "Removing unneeded systemd files" + for u in "${systemd_files[@]}"; do + find "${mount_dir}" \ + -path "${mount_dir}/lost+found" -prune -o \ + -type f -name "${u}" -exec rm -f {} \; + done + + info "Creating empty machine-id to allow systemd to bind-mount it" + touch "${mount_dir}/etc/machine-id" + + info "Unmounting root partition" + udisksctl unmount -b "${device}p1" + OK "Root partition unmounted" + + if [ "${fs_type}" = "${ext4_format}" ]; then + fsck.ext4 -D -y "${device}p1" + fi + + udisksctl loop-delete -b "${device}" + #rmdir "${mount_dir}" +} + +set_dax_header() { + local image="$1" + local img_size="$2" + local fs_type="$3" + local nsdax_bin="$4" + + # rootfs start + DAX header size + local rootfs_offset=$((rootfs_start + dax_header_sz)) + local header_image="${image}.header" + local dax_image="${image}.dax" + rm -f "${dax_image}" "${header_image}" + + create_disk "${header_image}" "${img_size}" "${fs_type}" "${rootfs_offset}" + + dax_header_bytes=$((dax_header_sz * 1024 * 1024)) + dax_alignment_bytes=$((dax_alignment * 1024 * 1024)) + info "Set DAX metadata" + # Set metadata header + # Issue: https://github.com/kata-containers/osbuilder/issues/240 + if [ -z "${nsdax_bin}" ] ; then + nsdax_bin="${script_dir}/nsdax" + gcc -O2 "${script_dir}/nsdax.gpl.c" -o "${nsdax_bin}" + trap "rm ${nsdax_bin}" EXIT + fi + "${nsdax_bin}" "${header_image}" "${dax_header_bytes}" "${dax_alignment_bytes}" + sync + + touch "${dax_image}" + # Copy MBR #1 + DAX metadata + dd if="${header_image}" of="${dax_image}" bs="${dax_header_sz}M" count=1 + # Copy MBR #2 + Rootfs + dd if="${image}" of="${dax_image}" oflag=append conv=notrunc + # final image + mv "${dax_image}" "${image}" + sync + + rm -f "${dax_image}" "${header_image}" +} + +main() { + # variables that can be overwritten by environment variables + local agent_bin="${AGENT_BIN:-kata-agent}" + local agent_init="${AGENT_INIT:-no}" + local fs_type="${FS_TYPE:-${ext4_format}}" + local image="${IMAGE:-kata-containers.img}" + local block_size="${BLOCK_SIZE:-4096}" + local root_free_space="${ROOT_FREE_SPACE:-}" + local nsdax_bin="${NSDAX_BIN:-}" + + while getopts "ho:r:f:" opt + do + case "$opt" in + h) usage; return 0;; + o) image="${OPTARG}" ;; + r) root_free_space="${OPTARG}" ;; + f) fs_type="${OPTARG}" ;; + *) break ;; + esac + done + + shift $(( OPTIND - 1 )) + rootfs="$(readlink -f "$1")" + if [ -z "${rootfs}" ]; then + usage + exit 0 + fi + + local container_engine + if [ -n "${USE_DOCKER}" ]; then + container_engine="docker" + elif [ -n "${USE_PODMAN}" ]; then + container_engine="podman" + fi + + if [ -n "$container_engine" ]; then + build_with_container "${rootfs}" \ + "${image}" "${fs_type}" "${block_size}" \ + "${root_free_space}" "${agent_bin}" \ + "${agent_init}" "${container_engine}" \ + "${nsdax_bin}" + exit $? + fi + + if ! check_rootfs "${rootfs}" ; then + die "Invalid rootfs" + fi + + img_size=$(calculate_img_size "${rootfs}" "${root_free_space}" "${fs_type}" "${block_size}") + + # the first 2M are for the first MBR + NVDIMM metadata and were already + # consider in calculate_img_size + rootfs_img_size=$((img_size - dax_header_sz)) + create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \ + "${fs_type}" "${block_size}" + + # insert at the beginning of the image the MBR + DAX header + set_dax_header "${image}" "${img_size}" "${fs_type}" "${nsdax_bin}" +} + +main "$@" |