From 06e0252720ac07d621882ac779e15243257c0c6a Mon Sep 17 00:00:00 2001
From: Kristian Klausen <kristian@klausen.dk>
Date: Sun, 5 Sep 2021 00:27:34 +0200
Subject: [PATCH] gitlab_runner: Add VM based executor (libvirt-executor)

For some workloads running in a container is too restrictive, ex:
arch-boxes (loop device, filesystem mount, pacstrap) and archiso
(pacstrap). Currently they both run a TCG accelerated QEMU VM, which is
very slow and painful to work with. We should provide a better option to
our users!

This adds a hardware accelerated VM for this kinds of workloads, which
is way faster and you can do whatever you like (mostly)!

Fix #283
---
 .../vault_gitlab_runner.yml                   |  16 +-
 .../vault_gitlab_runner.yml                   |  16 +-
 hosts                                         |   4 +
 playbooks/gitlab-runners.yml                  |   2 +-
 roles/gitlab_runner/files/libvirt-executor    | 163 ++++++++++++++++++
 .../libvirt-executor-vm-template.service      |   6 +
 .../files/libvirt-executor-vm-template.timer  |  10 ++
 roles/gitlab_runner/files/user-data           |   8 +
 roles/gitlab_runner/handlers/main.yml         |   3 +
 roles/gitlab_runner/tasks/main.yml            |  30 +++-
 roles/gitlab_runner/templates/config.toml.j2  |  20 +++
 11 files changed, 262 insertions(+), 16 deletions(-)
 create mode 100755 roles/gitlab_runner/files/libvirt-executor
 create mode 100644 roles/gitlab_runner/files/libvirt-executor-vm-template.service
 create mode 100644 roles/gitlab_runner/files/libvirt-executor-vm-template.timer
 create mode 100644 roles/gitlab_runner/files/user-data

diff --git a/host_vars/runner1.archlinux.org/vault_gitlab_runner.yml b/host_vars/runner1.archlinux.org/vault_gitlab_runner.yml
index d5f202422..aa83ddaf0 100644
--- a/host_vars/runner1.archlinux.org/vault_gitlab_runner.yml
+++ b/host_vars/runner1.archlinux.org/vault_gitlab_runner.yml
@@ -1,8 +1,10 @@
 $ANSIBLE_VAULT;1.1;AES256
-65336561343638323331326436643038656633323235323439373730396330366362643537313038
-6566346231333965616165643735346633306632393031300a396138633230363964386533646431
-35626132383035643431323839323830306435616463613934373435313565353263393735636662
-3739326165373931650a386331313133656566363232343635636632363761383366363233356266
-66613865396239626533303134643265366633323431393236643763316362323966313466306564
-32623031396365396363666162613664316539636639356333333463653432613536666539323638
-373866396166306432393165353530623534
+38643735613065653235613332353832396134653465343264393537376164626630346433323534
+3838626539393162613666633865316233323936356531360a323939353032356335663439303834
+32376137643533386137353632396330386663323136346663373532623966613632623339373564
+6531343062386166310a313939343435333264303165336536663862386366666332663431376433
+33366564363432653039313935356232396339386665353430393234666639663339393863326332
+38383664643433663137623632313566353965303434323935326339613330373436646166306235
+33396637313264356232633863616632343333303732383965646336346331613430336561396139
+35626662343731336136636366386662393831373836363765633765336634656530326564333634
+35323431313031613130343536626563343163663661313434623736653861663732
diff --git a/host_vars/secure-runner1.archlinux.org/vault_gitlab_runner.yml b/host_vars/secure-runner1.archlinux.org/vault_gitlab_runner.yml
index c7d9dd957..c7856bf72 100644
--- a/host_vars/secure-runner1.archlinux.org/vault_gitlab_runner.yml
+++ b/host_vars/secure-runner1.archlinux.org/vault_gitlab_runner.yml
@@ -1,8 +1,10 @@
 $ANSIBLE_VAULT;1.1;AES256
-31356534353263303630336136323233343664643962613339303933616134393461636364663633
-3032373939333130633632323035386132366261346332320a346462336333386265303262636331
-61396135363430393937316661613130616338643462323361386331323264343037633765646231
-3262323033663962320a623835383532353333626333656335356533353265663036366132393665
-66613335376333633038373633306239646130383830613139653130613265613135343764383137
-65626161333761343938663262636336616634623731653265393732363233383761653333326636
-613139393130636634343461333965656334
+61656464356262393461303061653330656164613364303364633434393566353732333665343565
+3332666566303439303934663664343032316430656164360a613533616465666465653334613237
+35343536646232333030623736303466396438353537313534613837383336623434656138396634
+3135383232333232640a613765663863356232373363333235393263386438643338653838343936
+63663636383239333437653239636465313861653532636363363038303936363632323237666262
+63386630623165626462356232393438313739356465363038626431623666366431326264383037
+30353130633836336135613239343234396338613732306263353333386632353334356331643630
+64396131383730343366643132353363356637353832643230343739303933386232363737653162
+34666363356265303162656632356361363034303931363362346463323662346636
diff --git a/hosts b/hosts
index b5b87b65d..d3c2dc339 100644
--- a/hosts
+++ b/hosts
@@ -90,6 +90,10 @@ runner1.archlinux.org
 runner2.archlinux.org
 secure-runner1.archlinux.org
 
+[gitlab_vm_runners]
+runner1.archlinux.org
+secure-runner1.archlinux.org
+
 [reproduciblebuilds]
 repro1.pkgbuild.com
 
diff --git a/playbooks/gitlab-runners.yml b/playbooks/gitlab-runners.yml
index 072ccaf71..b943d70c9 100644
--- a/playbooks/gitlab-runners.yml
+++ b/playbooks/gitlab-runners.yml
@@ -11,5 +11,5 @@
     - { role: fail2ban }
     - { role: prometheus_exporters }
     - { role: promtail }
-    - { role: libvirt }
+    - { role: libvirt, when: "'gitlab_vm_runners' in group_names" }
     - { role: gitlab_runner }
diff --git a/roles/gitlab_runner/files/libvirt-executor b/roles/gitlab_runner/files/libvirt-executor
new file mode 100755
index 000000000..f7fbec967
--- /dev/null
+++ b/roles/gitlab_runner/files/libvirt-executor
@@ -0,0 +1,163 @@
+#!/usr/bin/env bash
+set -o nounset -o errexit -o pipefail
+readonly MIRROR="https://mirror.pkgbuild.com"
+readonly LIBVIRT_DEFAULT_POOL_PATH="/var/lib/libvirt/images"
+readonly STATE_DIR="/usr/local/lib/libvirt-executor"
+
+ssh() {
+  command ssh -i "${STATE_DIR}/id_rsa" -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=off "root@${vm_ip}" "${@}"
+}
+
+get_vm_ip() {
+  if [[ -z "${vm_ip-}" ]]; then
+    vm_ip="$(virsh -q domifaddr "${1}" | awk -F'[ /]+' '{print $5}')"
+    [[ -n "${vm_ip}" ]] || return 1
+  fi
+}
+
+get_vm_name() {
+  printf 'libvirt_executor_runner_%s_project-%s_concurrent_%s\n' "${CUSTOM_ENV_CI_RUNNER_SHORT_TOKEN}" "${CUSTOM_ENV_CI_PROJECT_ID}" "${CUSTOM_ENV_CI_CONCURRENT_PROJECT_ID}"
+}
+
+clone_vm() {
+  for _ in {1..10}; do
+    # --reflink sadly doesn't work with non-raw formats:
+    # https://bugzilla.redhat.com/show_bug.cgi?id=1324006
+    if virt-clone -o "${1}" -n "${2}" --auto-clone; then
+      return 0
+    fi
+    sleep 1
+  done
+  return 1
+}
+
+wait_for_ssh() {
+  for _ in {1..90}; do
+    if ! get_vm_ip "${1}"; then
+      echo "Waiting for network"
+      sleep 1
+      continue
+    fi
+    if ! ssh true; then
+      echo "Waiting for SSH to be ready"
+      sleep 1
+      continue
+    fi
+    return 0
+  done
+  echo 'Waited 90 seconds for VM to start, exiting...'
+  exit "${SYSTEM_FAILURE_EXIT_CODE:-1}"
+}
+
+wait_for_vm_shutdown() {
+  for _ in {1..10}; do
+    if LC_ALL=C virsh domstate "${1}" | grep -F "shut off"; then
+      return 0
+    fi
+    sleep 1
+  done
+  return 1
+}
+
+# Create a updated VM image with the required tools
+create_vm_template() {
+  local vm_name
+  printf -v vm_name 'libvirt_executor_vm_template_%(%s)T_tmp'
+
+  local latest_image="$(curl -fs "${MIRROR}/images/latest/" | grep -Eo 'Arch-Linux-x86_64-cloudimg-[0-9]{8}\.[0-9]+\.qcow2'| head -n 1)"
+  if [ -z "${latest_image}" ]; then
+    echo "Error: Couldn't find latest cloud image"
+    exit 1
+  fi
+  local image_path="${LIBVIRT_DEFAULT_POOL_PATH}/${vm_name}.qcow2"
+  trap 'rm -f -- "${image_path}"' EXIT
+  curl -sSf "${MIRROR}/images/latest/${latest_image}" --output "${image_path}"
+  qemu-img resize "${image_path}" 10G
+  local tmp_user_data
+  tmp_user_data="$(mktemp -u)"
+  trap 'rm -f -- "$tmp_user_data"; virsh destroy "${vm_name}"; virsh undefine "${vm_name}" --remove-all-storage; exit 1' EXIT
+  sed "s:PUBLIC_SSH_KEY:$(<"${STATE_DIR}/id_rsa.pub"):" "${STATE_DIR}/user-data" > "${tmp_user_data}"
+  virt-install --name "${vm_name}" \
+               --cloud-init "user-data=${tmp_user_data}" \
+               --disk path="${image_path}",device=disk \
+               --memory 1024 \
+               --vcpus 4 \
+               --os-type Linux \
+               --os-variant archlinux \
+               --network network=default,filterref.filter=clean-traffic \
+               --noautoconsole
+  rm -- "${tmp_user_data}"
+  wait_for_ssh "${vm_name}"
+
+  ssh "cat > /etc/pacman.d/mirrorlist" <<< "Server = ${MIRROR}/\$repo/os/\$arch"
+  ssh "cat > /etc/systemd/network/20-wired.network" <<< $'[Match]\nName=eth0\n[Network]\nDHCP=yes'
+  ssh pacman -Sy --noconfirm --needed archlinux-keyring
+  ssh pacman -Syu --noconfirm git git-lfs gitlab-runner
+  ssh "sed -E 's/^#(IgnorePkg *=)/\1 linux/' -i /etc/pacman.conf"
+
+  # Reboot to be sure the network is working
+  virsh shutdown "${vm_name}"
+  wait_for_vm_shutdown "${vm_name}"
+  virsh start "${vm_name}"
+  vm_ip=""
+  wait_for_ssh "${vm_name}"
+  ssh rm /etc/machine-id /var/lib/dbus/machine-id
+
+  virsh shutdown "${vm_name}"
+  wait_for_vm_shutdown "${vm_name}"
+  virsh domrename "${vm_name}" "${vm_name%%_tmp}"
+  trap - EXIT
+
+  # Keep the 3 most recent VM templates
+  virsh list --state-shutoff --name | grep "^libvirt_executor_vm_template_[0-9]*$" | sort -r | tail -n +4 | xargs -n 1 --no-run-if-empty virsh undefine --remove-all-storage
+}
+
+# https://docs.gitlab.com/runner/executors/custom.html#prepare
+prepare() {
+  vm_template="$(virsh list --state-shutoff --name | grep "^libvirt_executor_vm_template_[0-9]*$" | sort -r | head -n 1)"
+  if [[ -z "${vm_template}" ]]; then
+    echo "Error no VM template found"
+    exit 1
+  fi
+  vm_name="$(get_vm_name)"
+  clone_vm "${vm_template}" "${vm_name}"
+  virsh start "${vm_name}"
+  wait_for_ssh "${vm_name}"
+}
+
+# https://docs.gitlab.com/runner/executors/custom.html#run
+run() {
+  vm_name="$(get_vm_name)"
+  wait_for_ssh "${vm_name}"
+  # Upstream issue: https://gitlab.com/gitlab-org/gitlab-runner/-/issues/28189
+  if [[ "${2}" == *'_artifacts'* ]]; then
+    ssh 'TMPDIR=/var/tmp bash' < "${1}" || exit "${BUILD_FAILURE_EXIT_CODE:-1}"
+  else
+    ssh bash < "${1}" || exit "${BUILD_FAILURE_EXIT_CODE:-1}"
+  fi
+}
+
+# https://docs.gitlab.com/runner/executors/custom.html#cleanup
+cleanup() {
+  vm_name="$(get_vm_name)"
+  virsh destroy "${vm_name}" || true
+  virsh undefine "${vm_name}" --remove-all-storage
+}
+
+case "${1:-}" in
+  create-vm-template)
+    create_vm_template
+    ;;
+  prepare)
+    prepare
+    ;;
+  run)
+    run "${2}" "${3}"
+    ;;
+  cleanup)
+    cleanup
+    ;;
+  *)
+    echo "Error invalid command: ${1:-}"
+    exit 1;
+esac
diff --git a/roles/gitlab_runner/files/libvirt-executor-vm-template.service b/roles/gitlab_runner/files/libvirt-executor-vm-template.service
new file mode 100644
index 000000000..abc341f3a
--- /dev/null
+++ b/roles/gitlab_runner/files/libvirt-executor-vm-template.service
@@ -0,0 +1,6 @@
+[Unit]
+Description=Create updated VM image with the required tools
+
+[Service]
+Type=oneshot
+ExecStart=/usr/local/bin/libvirt-executor create-vm-template
diff --git a/roles/gitlab_runner/files/libvirt-executor-vm-template.timer b/roles/gitlab_runner/files/libvirt-executor-vm-template.timer
new file mode 100644
index 000000000..4c7435bdb
--- /dev/null
+++ b/roles/gitlab_runner/files/libvirt-executor-vm-template.timer
@@ -0,0 +1,10 @@
+[Unit]
+Description=Run libvirt-executor-vm-template.service daily
+
+[Timer]
+OnCalendar=daily
+Persistent=true
+RandomizedDelaySec=1d
+
+[Install]
+WantedBy=timers.target
diff --git a/roles/gitlab_runner/files/user-data b/roles/gitlab_runner/files/user-data
new file mode 100644
index 000000000..59b4eb7e0
--- /dev/null
+++ b/roles/gitlab_runner/files/user-data
@@ -0,0 +1,8 @@
+#cloud-config
+disable_root: false
+users:
+  - name: root
+    ssh_authorized_keys:
+      - PUBLIC_SSH_KEY
+runcmd:
+- [ sudo, touch, /etc/cloud/cloud-init.disabled ]
diff --git a/roles/gitlab_runner/handlers/main.yml b/roles/gitlab_runner/handlers/main.yml
index 40375983e..c18aea5d0 100644
--- a/roles/gitlab_runner/handlers/main.yml
+++ b/roles/gitlab_runner/handlers/main.yml
@@ -7,5 +7,8 @@
 - name: restart gitlab-runner-docker-cleanup.timer
   service: name=gitlab-runner-docker-cleanup.timer state=restarted daemon_reload=yes
 
+- name: restart libvirt-executor-vm-template.timer
+  service: name=libvirt-executor-vm-template.timer state=restarted daemon_reload=yes
+
 - name: restart docker
   service: name=docker state=restarted
diff --git a/roles/gitlab_runner/tasks/main.yml b/roles/gitlab_runner/tasks/main.yml
index dbd871061..b8cefef33 100644
--- a/roles/gitlab_runner/tasks/main.yml
+++ b/roles/gitlab_runner/tasks/main.yml
@@ -34,7 +34,7 @@
 #   --non-interactive \
 #   --url=https://gitlab.archlinux.org/ \
 #   --docker-image=archlinux:latest \
-#   --tag-list=docker \ # Use docker,secure for secure runners
+#   --tag-list=docker \ # Use docker,secure for secure runners and docker,secure-vm for secure VM runners
 #   --registration-token="{{ vault_gitlab_runner_registration_token }}" \
 #   --executor=docker \
 #   --description="{{ inventory_hostname }}" \
@@ -59,3 +59,31 @@
 
 - name: enable and start gitlab runner service
   systemd: name=gitlab-runner state=started enabled=yes daemon_reload=yes
+
+- name: setup libvirt-executor
+  block:
+    - name: install libvirt-executor script
+      copy: src=libvirt-executor dest=/usr/local/bin/ owner=root group=root mode=0755
+
+    - name: create libvirt-executor state directory
+      file: path=/usr/local/lib/libvirt-executor state=directory owner=root group=root mode=0700
+
+    - name: create SSH keys for libvirt-executor
+      command: ssh-keygen -N "" -f /usr/local/lib/libvirt-executor/id_rsa
+      args:
+        creates: /usr/local/lib/libvirt-executor/id_rsa
+
+    - name: install user-data for libvirt-executor
+      copy: src=user-data dest=/usr/local/lib/libvirt-executor/ owner=root group=root mode=0755
+
+    - name: install libvirt-executor-vm-template.{service,timer}
+      copy: src={{ item }} dest=/etc/systemd/system/{{ item }} owner=root group=root mode=0644
+      loop:
+        - libvirt-executor-vm-template.service
+        - libvirt-executor-vm-template.timer
+      notify:
+        - restart libvirt-executor-vm-template.timer
+
+    - name: enable and start libvirt-executor-vm-template.timer
+      systemd: name=libvirt-executor-vm-template.timer state=started enabled=yes daemon_reload=yes
+  when: "'gitlab_vm_runners' in group_names"
diff --git a/roles/gitlab_runner/templates/config.toml.j2 b/roles/gitlab_runner/templates/config.toml.j2
index 4752005a3..abe606213 100644
--- a/roles/gitlab_runner/templates/config.toml.j2
+++ b/roles/gitlab_runner/templates/config.toml.j2
@@ -23,3 +23,23 @@ listen_address = ":9252"
     disable_cache = false
     volumes = ["/cache"]
     shm_size = 0
+{% if 'gitlab_vm_runners' in group_names %}
+
+[[runners]]
+  name = "{{ inventory_hostname }}"
+  url = "https://gitlab.archlinux.org"
+  token = "{{ vault_gitlab_vm_runner_token }}"
+  executor = "custom"
+  builds_dir = "/builds"
+  cache_dir = "/cache"
+  limit = {{ (ansible_memtotal_mb * 0.9 / 1024) | round | int }}
+  [runners.custom]
+    prepare_exec = "/usr/local/bin/libvirt-executor"
+    prepare_args = [ "prepare" ]
+
+    run_exec = "/usr/local/bin/libvirt-executor"
+    run_args = [ "run" ]
+
+    cleanup_exec = "/usr/local/bin/libvirt-executor"
+    cleanup_args = [ "cleanup" ]
+{% endif %}
-- 
GitLab