diff --git a/roles/aurweb/files/robots.txt b/roles/aurweb/files/robots.txt new file mode 100644 index 0000000000000000000000000000000000000000..d87a40c8cbfb6f9c6fef057aeac116c6608d7fbb --- /dev/null +++ b/roles/aurweb/files/robots.txt @@ -0,0 +1,12 @@ +User-agent: * +# block the search page from indexing, as the search is done via URL parameters +Disallow: /packages?* +# block all interactive things from being indexed, such as posting requests +Disallow: /pkgbase/* +# block all account pages from being indexed, as they require login anyways +Disallow: /account/* +# block the cgit interface except for the useful things +Disallow: /cgit/aur.git/* +Allow: /cgit/aur.git/tree +Allow: /cgit/aur.git/log +Crawl-delay: 2 diff --git a/roles/aurweb/tasks/main.yml b/roles/aurweb/tasks/main.yml index e53c9e90d3b9e22333a8370a099527a710e17605..2799ba975e462b3c37c7ed97eb2dc606c3778478 100644 --- a/roles/aurweb/tasks/main.yml +++ b/roles/aurweb/tasks/main.yml @@ -109,6 +109,9 @@ - name: Copy aurweb configuration file copy: src={{ aurweb_dir }}/conf/config.defaults dest={{ aurweb_conf_dir }}/config.defaults remote_src=yes owner=root group=root mode=0644 +- name: Configure robots.txt + copy: src=robots.txt dest="{{ aurweb_dir }}/robots.txt" owner=root group=root mode=0644 + - name: Install goaurrpc configuration template: src=goaurrpc.conf.j2 dest=/etc/goaurrpc.conf owner=root group=root mode=0644 diff --git a/roles/aurweb/templates/nginx.d.conf.j2 b/roles/aurweb/templates/nginx.d.conf.j2 index 547fe49a3f448a09eb2229a59c83142acee3bd33..1373b66231a9c2857fa23e84fdc0f978f8195f57 100644 --- a/roles/aurweb/templates/nginx.d.conf.j2 +++ b/roles/aurweb/templates/nginx.d.conf.j2 @@ -51,6 +51,10 @@ server { root {{ aurweb_dir }}/static; index index.php; + location = /robots.txt { + alias {{ aurweb_dir }}/robots.txt; + } + # redirect /tu to /package-maintainer for external links location ~ ^/tu($|/.*) { return 301 https://aur.archlinux.org/package-maintainer$1;