/etc/nginx/sites-available/default
server { listen 80 default_server; listen [::]:80 default_server; root /var/www/public_html; # Add doku.php as index index doku.php; server_name _; # Block Bytespider if ($http_user_agent ~ (Bytespider|bytedance) ) { return 403; } location / { try_files $uri $uri/ @dokuwiki; } # Do not serve config, data and .ht* # Comment during installation location ~ /(data/|conf/|bin/|inc/|install.php) { return 403; } location ~ /\.ht { deny all; } # Maximum POST-size and Buffer client_max_body_size 20M; client_body_buffer_size 128K; # Rewrite Rules location @dokuwiki { rewrite ^/_media/(.*) /lib/exe/fetch.php?media=$1 last; rewrite ^/_detail/(.*) /lib/exe/detail.php?media=$1 last; rewrite ^/_export/([^/]+)/(.*) /doku.php?do=export_$1&id=$2 last; rewrite ^/(.*) /doku.php?id=$1 last; } # pass PHP scripts to FastCGI server location ~ \.php$ { try_files $uri =404; fastcgi_pass unix:/var/run/php/php7.4-fpm.sock; fastcgi_index index.php; include fastcgi_params; fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name; } }
/usr/local/bin/cleanup_dokuwiki_cache.sh
#!/bin/bash cleanup() { local data_path="$1" # full path to data directory of wiki local retention_days="$2" # number of days after which old files are to be removed # remove stale lock files (files which are 1-2 days old) find "${data_path}"/locks/ -name '*.lock' -type f -mtime +1 -delete # remove files older than ${retention_days} days from the cache find "${data_path}"/cache/ -type f -mtime +${retention_days} -delete } # cleanup DokuWiki installations (path to datadir, number of days) cleanup /var/www/data 180
Quelle: https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/
/var/www/public_html/robots.txt
User-agent: CCBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: GPTBot Disallow: / User-agent: Google-Extended Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Omgilibot Disallow: / User-agent: Omgili Disallow: / User-agent: FacebookBot Disallow: / User-agent: Bytespider Disallow: / User-agent: ImagesiftBot Disallow: /
/etc/borgmatic/config.yaml
... location: # List of source directories to backup (required). Globs and # tildes are expanded. source_directories: - /etc - /home - /root - /usr/local - /var/log - /var/www ...