From 415d6cfd039814d9f2dd37ad8b56c5e03226479f Mon Sep 17 00:00:00 2001 From: Kristian Klausen <kristian@klausen.dk> Date: Fri, 22 Jul 2022 19:28:25 +0200 Subject: [PATCH] gitlab_runner: Refactor libvirt-executor Changes: - Switch to arch-boxes' base image - Verify the base image's signature - Use the new "latest" symlink, instead of parsing the HTML for finding the latest arch-boxes image[1] - Create the base image by using arch-chroot and friends, instead of creating a full-blown VM - Create the VMs from domain XML template instead of virt-clone - Switch mirror to geo.mirror.pkgbuild.com - Try to follow "filesystem hierarchy" standards for where to place configuration (id_rsa) and "vendor data" (arch-boxes.asc and domain_template.xml) - Misc fixes and cleanups [1] https://gitlab.archlinux.org/archlinux/infrastructure/-/merge_requests/552 --- roles/gitlab_runner/files/arch-boxes.asc | 16 ++ roles/gitlab_runner/files/domain_template.xml | 41 +++++ roles/gitlab_runner/files/libvirt-executor | 149 +++++------------- .../files/libvirt-executor-update-base-image | 58 +++++++ ...libvirt-executor-update-base-image.service | 6 + ... libvirt-executor-update-base-image.timer} | 0 .../libvirt-executor-vm-template.service | 6 - roles/gitlab_runner/files/user-data | 8 - roles/gitlab_runner/handlers/main.yml | 3 - roles/gitlab_runner/tasks/main.yml | 37 +++-- roles/libvirt/tasks/main.yml | 2 +- 11 files changed, 185 insertions(+), 141 deletions(-) create mode 100644 roles/gitlab_runner/files/arch-boxes.asc create mode 100644 roles/gitlab_runner/files/domain_template.xml create mode 100755 roles/gitlab_runner/files/libvirt-executor-update-base-image create mode 100644 roles/gitlab_runner/files/libvirt-executor-update-base-image.service rename roles/gitlab_runner/files/{libvirt-executor-vm-template.timer => libvirt-executor-update-base-image.timer} (100%) delete mode 100644 roles/gitlab_runner/files/libvirt-executor-vm-template.service delete mode 100644 roles/gitlab_runner/files/user-data diff --git a/roles/gitlab_runner/files/arch-boxes.asc b/roles/gitlab_runner/files/arch-boxes.asc new file mode 100644 index 000000000..8093217cb --- /dev/null +++ b/roles/gitlab_runner/files/arch-boxes.asc @@ -0,0 +1,16 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mDMEYpOJrBYJKwYBBAHaRw8BAQdAcSZilBvR58s6aD2qgsDE7WpvHQR2R5exQhNQ +yuILsTq0JWFyY2gtYm94ZXMgPGFyY2gtYm94ZXNAYXJjaGxpbnV4Lm9yZz6IkAQT +FggAOBYhBBuaFphKToy0SHEtKuC3i/QybG+PBQJik4msAhsBBQsJCAcCBhUKCQgL +AgQWAgMBAh4BAheAAAoJEOC3i/QybG+P81YA/A7HUftMGpzlJrPYBFPqW0nFIh7m +sIZ5yXxh7cTgqtJ7AQDFKSrulrsDa6hsqmEC11PWhv1VN6i9wfRvb1FwQPF6D7gz +BGKTiecWCSsGAQQB2kcPAQEHQBzLxT2+CwumKUtfi9UEXMMx/oGgpjsgp2ehYPBM +N8ejiPUEGBYIACYWIQQbmhaYSk6MtEhxLSrgt4v0MmxvjwUCYpOJ5wIbAgUJCWYB +gACBCRDgt4v0Mmxvj3YgBBkWCAAdFiEEZW5MWsHMO4blOdl+NDY1poWakXQFAmKT +iecACgkQNDY1poWakXTwaQEAwymt4PgXltHUH8GVUB6Xu7Gb5o6LwV9fNQJc1CMl +7CABAJw0We0w1q78cJ8uWiomE1MHdRxsuqbuqtsCn2Dn6/0Cj+4A/Apcqm7uzFam +pA5u9yvz1VJBWZY1PRBICBFSkuRtacUCAQC7YNurPPoWDyjiJPrf0Vzaz8UtKp0q +BSF/a3EoocLnCA== +=APeC +-----END PGP PUBLIC KEY BLOCK----- diff --git a/roles/gitlab_runner/files/domain_template.xml b/roles/gitlab_runner/files/domain_template.xml new file mode 100644 index 000000000..c383fef64 --- /dev/null +++ b/roles/gitlab_runner/files/domain_template.xml @@ -0,0 +1,41 @@ +<domain type='kvm'> + <name>$vm_name</name> + <memory unit='MiB'>1024</memory> + <vcpu>4</vcpu> + <os> + <type arch='x86_64' machine='q35'>hvm</type> + </os> + <features> + <acpi/> + <apic/> + </features> + <cpu mode='host-passthrough'/> + <!-- https://github.com/virt-manager/virt-manager/blob/7ae10b5566ac4d8c7afd94499a9733ed42cf3d07/virtinst/domain/clock.py#L49-L59 --> + <clock offset='utc'> + <timer name='rtc' tickpolicy='catchup'/> + <timer name='pit' tickpolicy='delay'/> + <timer name='hpet' present='no'/> + </clock> + <devices> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2'/> + <source file='/var/lib/libvirt/images/$vm_name.qcow2'/> + <target dev='sdb' bus='scsi'/> + </disk> + <controller type='pci' model='pcie-root'/> + <controller type='scsi' model='virtio-scsi'/> + <controller type='usb' model='none'/> + <interface type='network'> + <source network='default'/> + <model type='virtio'/> + </interface> + <rng model='virtio'> + <backend model='random'>/dev/urandom</backend> + </rng> + <video> + <model type='virtio'/> + </video> + <graphics type='vnc'/> + </devices> +</domain> + diff --git a/roles/gitlab_runner/files/libvirt-executor b/roles/gitlab_runner/files/libvirt-executor index d30f37a8a..b32518fd1 100755 --- a/roles/gitlab_runner/files/libvirt-executor +++ b/roles/gitlab_runner/files/libvirt-executor @@ -1,148 +1,85 @@ #!/usr/bin/env bash set -o nounset -o errexit -o pipefail -readonly MIRROR="https://mirror.pkgbuild.com" -readonly LIBVIRT_DEFAULT_POOL_PATH="/var/lib/libvirt/images" -readonly STATE_DIR="/usr/local/lib/libvirt-executor" +readonly libvirt_default_pool_path="/var/lib/libvirt/images" ssh() { - command ssh -i "${STATE_DIR}/id_rsa" -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=off "root@${vm_ip}" "${@}" + command ssh \ + -i "/etc/libvirt-executor/id_rsa" \ + -F /dev/null \ + -o ServerAliveCountMax=2 \ + -o ServerAliveInterval=15 \ + -o UserKnownHostsFile=/dev/null \ + -o StrictHostKeyChecking=off \ + "root@${1}" "${@:2}" } -get_vm_ip() { - if [[ -z "${vm_ip-}" ]]; then - vm_ip="$(virsh -q domifaddr "${1}" | awk -F'[ /]+' '{print $5}')" - [[ -n "${vm_ip}" ]] || return 1 - fi -} - -get_vm_name() { - printf 'libvirt_executor_runner_%s_project-%s_concurrent_%s\n' "${CUSTOM_ENV_CI_RUNNER_SHORT_TOKEN}" "${CUSTOM_ENV_CI_PROJECT_ID}" "${CUSTOM_ENV_CI_CONCURRENT_PROJECT_ID}" +vm_name() { + printf 'runner-%s-project-%d-pipeline-%d-job-%d\n' "${CUSTOM_ENV_CI_RUNNER_SHORT_TOKEN}" "${CUSTOM_ENV_CI_PROJECT_ID}" "${CUSTOM_ENV_CI_PIPELINE_IID}" "${CUSTOM_ENV_CI_JOB_ID}" } -clone_vm() { - for _ in {1..10}; do - # --reflink sadly doesn't work with non-raw formats: - # https://bugzilla.redhat.com/show_bug.cgi?id=1324006 - if virt-clone -o "${1}" -n "${2}" --auto-clone; then - return 0 - fi - sleep 1 - done +vm_ip() { + local ip + ip="$(virsh -q domifaddr "${1}" | awk -F'[ /]+' '{print $5}')" + if [[ -n ${ip} ]]; then + echo "${ip}" + return 0 + fi return 1 } wait_for_ssh() { - for _ in {1..90}; do - if ! get_vm_ip "${1}"; then + for _ in {1..60}; do + if ! ip="$(vm_ip "${1}")"; then echo "Waiting for network" sleep 1 continue fi - if ! ssh true; then + if ! ssh "${ip}" true; then echo "Waiting for SSH to be ready" sleep 1 continue fi + printf "%s" "${ip}" return 0 done - echo 'Waited 90 seconds for VM to start, exiting...' + echo 'Waited 60 seconds for VM to start, exiting...' exit "${SYSTEM_FAILURE_EXIT_CODE:-1}" } -wait_for_vm_shutdown() { - for _ in {1..10}; do - if LC_ALL=C virsh domstate "${1}" | grep -F "shut off"; then - return 0 - fi - sleep 1 - done - return 1 -} - -# Create a updated VM image with the required tools -create_vm_template() { - local vm_name - printf -v vm_name 'libvirt_executor_vm_template_%(%s)T_tmp' - - local latest_image="$(curl -fs "${MIRROR}/images/latest/" | grep -Eo 'Arch-Linux-x86_64-cloudimg-[0-9]{8}\.[0-9]+\.qcow2'| head -n 1)" - if [ -z "${latest_image}" ]; then - echo "Error: Couldn't find latest cloud image" - exit 1 - fi - local image_path="${LIBVIRT_DEFAULT_POOL_PATH}/${vm_name}.qcow2" - trap 'rm -f -- "${image_path}"' EXIT - curl -sSf "${MIRROR}/images/latest/${latest_image}" --output "${image_path}" - qemu-img resize "${image_path}" 10G - local tmp_user_data - tmp_user_data="$(mktemp -u)" - trap 'rm -f -- "$tmp_user_data"; virsh destroy "${vm_name}"; virsh undefine "${vm_name}" --remove-all-storage; exit 1' EXIT - sed "s:PUBLIC_SSH_KEY:$(<"${STATE_DIR}/id_rsa.pub"):" "${STATE_DIR}/user-data" > "${tmp_user_data}" - virt-install --name "${vm_name}" \ - --cloud-init "user-data=${tmp_user_data}" \ - --disk path="${image_path}",device=disk \ - --memory 1024 \ - --vcpus 4 \ - --os-type Linux \ - --os-variant archlinux \ - --network network=default,filterref.filter=clean-traffic \ - --noautoconsole - rm -- "${tmp_user_data}" - wait_for_ssh "${vm_name}" - - ssh "cat > /etc/pacman.d/mirrorlist" <<< "Server = ${MIRROR}/\$repo/os/\$arch" - ssh "cat > /etc/systemd/network/20-wired.network" <<< $'[Match]\nName=eth0\n[Network]\nDHCP=yes' - ssh pacman -Sy --noconfirm --needed archlinux-keyring - ssh pacman -Syu --noconfirm git git-lfs gitlab-runner - ssh "sed -E 's/^#(IgnorePkg *=)/\1 linux/' -i /etc/pacman.conf" - - # Reboot to be sure the network is working - virsh shutdown "${vm_name}" - wait_for_vm_shutdown "${vm_name}" - virsh start "${vm_name}" - vm_ip="" - wait_for_ssh "${vm_name}" - ssh rm /etc/machine-id /var/lib/dbus/machine-id - - virsh shutdown "${vm_name}" - wait_for_vm_shutdown "${vm_name}" - virsh domrename "${vm_name}" "${vm_name%%_tmp}" - trap - EXIT - - # Keep the 3 most recent VM templates - virsh list --state-shutoff --name | grep "^libvirt_executor_vm_template_[0-9]*$" | sort -r | tail -n +4 | xargs -n 1 --no-run-if-empty virsh undefine --remove-all-storage -} - # https://docs.gitlab.com/runner/executors/custom.html#prepare prepare() { - vm_template="$(virsh list --state-shutoff --name | grep "^libvirt_executor_vm_template_[0-9]*$" | sort -r | head -n 1)" - if [[ -z "${vm_template}" ]]; then - echo "Error no VM template found" - exit 1 + # shellcheck disable=SC2064 + trap "exit ${SYSTEM_FAILURE_EXIT_CODE:-1}" ERR + local base_image + base_image="$(compgen -G "${libvirt_default_pool_path}/runner-base-*.qcow2" | sort -n -t - -k3,3 | tail -n 1)" + + if [[ -z ${base_image} ]]; then + echo 'Base image not found...' + exit "${SYSTEM_FAILURE_EXIT_CODE:-1}" fi - vm_name="$(get_vm_name)" - clone_vm "${vm_template}" "${vm_name}" - virsh start "${vm_name}" - wait_for_ssh "${vm_name}" + + qemu-img create -f qcow2 -b "${base_image}" -F qcow2 "${libvirt_default_pool_path}/$(vm_name).qcow2" + virsh define <(sed "s/\$vm_name/$(vm_name)/" /usr/local/lib/libvirt-executor/domain_template.xml) + virsh start "$(vm_name)" + + wait_for_ssh "$(vm_name)" } # https://docs.gitlab.com/runner/executors/custom.html#run run() { - vm_name="$(get_vm_name)" - wait_for_ssh "${vm_name}" - ssh bash < "${1}" || exit "${BUILD_FAILURE_EXIT_CODE:-1}" + local ip + ip="$(wait_for_ssh "$(vm_name)")" + ssh "${ip}" bash < "${1}" || exit "${BUILD_FAILURE_EXIT_CODE:-1}" } # https://docs.gitlab.com/runner/executors/custom.html#cleanup cleanup() { - vm_name="$(get_vm_name)" - virsh destroy "${vm_name}" || true - virsh undefine "${vm_name}" --remove-all-storage + virsh destroy "$(vm_name)" || true + rm "${libvirt_default_pool_path}/$(vm_name).qcow2" + virsh undefine "$(vm_name)" } case "${1:-}" in - create-vm-template) - create_vm_template - ;; prepare) prepare ;; diff --git a/roles/gitlab_runner/files/libvirt-executor-update-base-image b/roles/gitlab_runner/files/libvirt-executor-update-base-image new file mode 100755 index 000000000..1af3f0f7c --- /dev/null +++ b/roles/gitlab_runner/files/libvirt-executor-update-base-image @@ -0,0 +1,58 @@ +#!/bin/bash +set -o nounset -o errexit +readonly libvirt_default_pool_path="/var/lib/libvirt/images" + +cleanup() { + set +o errexit + + if mountpoint -q mnt; then + umount -R mnt + fi + if [[ -n ${loopdev} ]]; then + losetup -d "${loopdev}" + fi + rm -r "${tmpdir}" +} + +tmpdir="$(mktemp --directory --tmpdir="/var/tmp")" +trap cleanup EXIT + +cd "${tmpdir}" +curl -sSf --remote-name-all https://geo.mirror.pkgbuild.com/images/latest/Arch-Linux-x86_64-basic.qcow2{,.sig} +sq verify --signer-cert /usr/local/lib/libvirt-executor/arch-boxes.asc --detached Arch-Linux-x86_64-basic.qcow2.sig Arch-Linux-x86_64-basic.qcow2 + +image=Arch-Linux-x86_64-basic.img +qemu-img convert -f qcow2 -O raw Arch-Linux-x86_64-basic.qcow2 Arch-Linux-x86_64-basic.img + +loopdev="$(losetup --find --partscan --show "${image}")" +mount --mkdir "${loopdev}p2" mnt +mount --mkdir --bind -o ro /etc/pacman.d/gnupg mnt/etc/pacman.d/gnupg + +# shellcheck disable=SC2016 +printf 'Server = https://geo.mirror.pkgbuild.com/$repo/os/$arch' > mnt/etc/pacman.d/mirrorlist +arch-chroot mnt systemctl disable reflector-init +arch-chroot mnt pacman -Sy --noconfirm --needed archlinux-keyring +arch-chroot mnt pacman -Syu --noconfirm --needed git git-lfs gitlab-runner +sed -E 's/^#(IgnorePkg *=)/\1 linux/' -i mnt/etc/pacman.conf +arch-chroot mnt userdel -r arch +mkdir mnt/root/.ssh +cp /etc/libvirt-executor/id_rsa.pub mnt/root/.ssh/authorized_keys +chmod 600 mnt/root/.ssh/authorized_keys +rm -f mnt/etc/machine-id + +cp -a mnt/boot/{initramfs-linux-fallback.img,initramfs-linux.img} + +umount mnt/etc/pacman.d/gnupg +rmdir mnt/etc/pacman.d/gnupg +umount mnt +losetup -d "${loopdev}" +loopdev="" + +qemu-img convert -f raw -O qcow2 Arch-Linux-x86_64-basic.img Arch-Linux-x86_64-basic.qcow2 +image_path="$(printf '%s/runner-base-%(%s)T.qcow2' "${libvirt_default_pool_path}")" +cp Arch-Linux-x86_64-basic.qcow2 "${image_path}.tmp" +mv "${image_path}"{.tmp,} + +cd "${libvirt_default_pool_path}" +# Keep one week of base images +compgen -G "${libvirt_default_pool_path}/runner-base-*.qcow2" | sort -n -t - -k3,3 | head -n -7 | xargs --no-run-if-empty rm -vf diff --git a/roles/gitlab_runner/files/libvirt-executor-update-base-image.service b/roles/gitlab_runner/files/libvirt-executor-update-base-image.service new file mode 100644 index 000000000..2bcb4000d --- /dev/null +++ b/roles/gitlab_runner/files/libvirt-executor-update-base-image.service @@ -0,0 +1,6 @@ +[Unit] +Description=Update libvirt-executor base image + +[Service] +Type=oneshot +ExecStart=/usr/local/sbin/libvirt-executor-update-base-image diff --git a/roles/gitlab_runner/files/libvirt-executor-vm-template.timer b/roles/gitlab_runner/files/libvirt-executor-update-base-image.timer similarity index 100% rename from roles/gitlab_runner/files/libvirt-executor-vm-template.timer rename to roles/gitlab_runner/files/libvirt-executor-update-base-image.timer diff --git a/roles/gitlab_runner/files/libvirt-executor-vm-template.service b/roles/gitlab_runner/files/libvirt-executor-vm-template.service deleted file mode 100644 index c465e64fd..000000000 --- a/roles/gitlab_runner/files/libvirt-executor-vm-template.service +++ /dev/null @@ -1,6 +0,0 @@ -[Unit] -Description=Create updated VM image with the required tools - -[Service] -Type=oneshot -ExecStart=/usr/local/sbin/libvirt-executor create-vm-template diff --git a/roles/gitlab_runner/files/user-data b/roles/gitlab_runner/files/user-data deleted file mode 100644 index 59b4eb7e0..000000000 --- a/roles/gitlab_runner/files/user-data +++ /dev/null @@ -1,8 +0,0 @@ -#cloud-config -disable_root: false -users: - - name: root - ssh_authorized_keys: - - PUBLIC_SSH_KEY -runcmd: -- [ sudo, touch, /etc/cloud/cloud-init.disabled ] diff --git a/roles/gitlab_runner/handlers/main.yml b/roles/gitlab_runner/handlers/main.yml index c18aea5d0..40375983e 100644 --- a/roles/gitlab_runner/handlers/main.yml +++ b/roles/gitlab_runner/handlers/main.yml @@ -7,8 +7,5 @@ - name: restart gitlab-runner-docker-cleanup.timer service: name=gitlab-runner-docker-cleanup.timer state=restarted daemon_reload=yes -- name: restart libvirt-executor-vm-template.timer - service: name=libvirt-executor-vm-template.timer state=restarted daemon_reload=yes - - name: restart docker service: name=docker state=restarted diff --git a/roles/gitlab_runner/tasks/main.yml b/roles/gitlab_runner/tasks/main.yml index ead08b4d4..c3fda82cf 100644 --- a/roles/gitlab_runner/tasks/main.yml +++ b/roles/gitlab_runner/tasks/main.yml @@ -1,5 +1,5 @@ - name: install dependencies - pacman: name=docker,python-docker,python-gitlab,gitlab-runner state=latest update_cache=yes + pacman: name=docker,python-docker,python-gitlab,gitlab-runner,arch-install-scripts,sequoia-sq state=latest update_cache=yes notify: restart gitlab-runner - name: install docker.slice @@ -60,27 +60,30 @@ - name: enable and start gitlab runner service systemd: name=gitlab-runner state=started enabled=yes daemon_reload=yes -- name: install libvirt-executor script - copy: src=libvirt-executor dest=/usr/local/sbin/ owner=root group=root mode=0755 +- name: create libvirt-executor configuration and data directories + file: path={{ item }} state=directory owner=root group=root mode=0755 + loop: + - /etc/libvirt-executor + - /usr/local/lib/libvirt-executor -- name: create libvirt-executor state directory - file: path=/usr/local/lib/libvirt-executor state=directory owner=root group=root mode=0700 +- name: install libvirt-executor + copy: src={{ item.src }} dest={{ item.dest }} owner=root group=root mode={{ item.mode }} + loop: + - {src: arch-boxes.asc, dest: /usr/local/lib/libvirt-executor/, mode: 644} + - {src: domain_template.xml, dest: /usr/local/lib/libvirt-executor/, mode: 755} + - {src: libvirt-executor, dest: /usr/local/sbin/, mode: 755} + - {src: libvirt-executor-update-base-image, dest: /usr/local/sbin/, mode: 755} - name: create SSH keys for libvirt-executor - command: ssh-keygen -N "" -f /usr/local/lib/libvirt-executor/id_rsa + command: ssh-keygen -N "" -f /etc/libvirt-executor/id_rsa args: - creates: /usr/local/lib/libvirt-executor/id_rsa + creates: /etc/libvirt-executor/id_rsa -- name: install user-data for libvirt-executor - copy: src=user-data dest=/usr/local/lib/libvirt-executor/ owner=root group=root mode=0755 - -- name: install libvirt-executor-vm-template.{service,timer} +- name: install libvirt-executor-update-base-image.{service,timer} copy: src={{ item }} dest=/etc/systemd/system/{{ item }} owner=root group=root mode=0644 loop: - - libvirt-executor-vm-template.service - - libvirt-executor-vm-template.timer - notify: - - restart libvirt-executor-vm-template.timer + - libvirt-executor-update-base-image.service + - libvirt-executor-update-base-image.timer -- name: enable and start libvirt-executor-vm-template.timer - systemd: name=libvirt-executor-vm-template.timer state=started enabled=yes daemon_reload=yes +- name: enable and start libvirt-executor-update-base-image.timer + systemd: name=libvirt-executor-update-base-image.timer state=started enabled=yes daemon_reload=yes diff --git a/roles/libvirt/tasks/main.yml b/roles/libvirt/tasks/main.yml index b02da4fab..96d9e185c 100644 --- a/roles/libvirt/tasks/main.yml +++ b/roles/libvirt/tasks/main.yml @@ -3,7 +3,7 @@ pacman: name=iptables force=yes state=absent - name: install libvirt and needed optional dependencies - pacman: name=libvirt,virt-install,cdrtools,qemu-headless,dnsmasq,iptables-nft state=present + pacman: name=libvirt,qemu-headless,dnsmasq,iptables-nft state=present register: result - name: reload firewalld -- GitLab