Skip to content
Snippets Groups Projects
Commit d8e18216 authored by Kristian Klausen's avatar Kristian Klausen :tada:
Browse files

Parallelize downloading

parent 71020600
No related branches found
No related tags found
No related merge requests found
......@@ -10,7 +10,7 @@ snapshot:
variables:
GIT_STRATEGY: clone
before_script:
- pacman -Syu --needed --noconfirm git jq wget libxslt
- pacman -Syu --needed --noconfirm git jq wget rsync libxslt
- git remote set-url origin "https://${GITLAB_PROJECT_USER}:${GITLAB_PROJECT_TOKEN}@${CI_SERVER_HOST}/${CI_PROJECT_PATH}.git"
- 'user_json="$(curl --silent --show-error --fail --header "PRIVATE-TOKEN: ${GITLAB_PROJECT_TOKEN}" "${CI_API_V4_URL}/user")"'
- git config --global user.email "$(jq -r .email <<< "${user_json}")"
......
......@@ -6,7 +6,7 @@ This is a take on it! :)
## Usage
First install `wget`, `libxslt` and `prettier`, then run:
First install `wget`, `rsync`, `libxslt` and `prettier`, then run:
```sh
$ ./snapshotter.sh [maximum number of tasks to download] [download attachment: true (default) or false] [prettify the HTML files: true (default) or false] [download dir, default: snapshots/2021-04-01T22:52+02:00]
......
......@@ -9,7 +9,13 @@ function generate_urls() {
eval "echo https://bugs.archlinux.org/task/{1..$1} | tr ' ' '\n'"
}
function download() {
function _download() {
set -o nounset -o errexit -o pipefail
local dir
dir="$(grep --only-matching "[0-9]*$" <<< "${2}")"
mkdir "${dir}"
cd "${dir}"
local include_directories="user,themes,ajax,javascript"
if [ "${1}" = "true" ]; then
include_directories+=",task"
......@@ -27,7 +33,22 @@ function download() {
--max-redirect 0 \
--user-agent="${user_agent}" \
--no-verbose \
--input-file=<(echo "${2}") || [[ ${?} = 8 ]] # 8 Server issued an error response.
--input-file=<(printf "%s\n" "${@:2}") || [[ ${?} = 8 ]] # 8 Server issued an error response.
}
function download() {
export -f _download
time echo "${2}" | xargs --max-procs=10 --max-args=50 bash -c "_download \"${1}\" \"\${@}\""
mkdir final
for d in */; do
if [[ ${d} = final/ ]]; then
continue
fi
rsync --recursive --ignore-existing "${d}" final/
rm -rf "${d}"
done
mv final/* .
rmdir final
}
function cleanup_html() {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment