From 2fb0e7c473113360afa9f9e70cc5596162a25497 Mon Sep 17 00:00:00 2001 From: Cyril Chaboisseau Date: Sat, 1 May 2021 15:08:52 +0200 Subject: [PATCH 1/3] deduplicate list of user-agents --- scripts/user-agents.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/user-agents.sh b/scripts/user-agents.sh index 9207053..3837392 100755 --- a/scripts/user-agents.sh +++ b/scripts/user-agents.sh @@ -15,8 +15,8 @@ elif [ -S /tmp/autoconf.sock ] ; then fi # generate new conf -BLACKLIST="$(curl -s https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list) -$(curl -s https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Crawlers.txt)" +BLACKLIST="$(curl -s https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list; +curl -s https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Crawlers.txt | sort -u)" if [ "$?" -ne 0 ] ; then job_log "[BLACKLIST] can't update user-agent list" fi From 02ae3b6bd321e8bbbef47d5a62bfc393a5335298 Mon Sep 17 00:00:00 2001 From: Cyril Chaboisseau Date: Sat, 1 May 2021 15:48:33 +0200 Subject: [PATCH 2/3] change IFS before subshell There needs to be a change in IFS before the 2 curl commands in order to keep line by line formatting --- scripts/user-agents.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/user-agents.sh b/scripts/user-agents.sh index 3837392..8d091ed 100755 --- a/scripts/user-agents.sh +++ b/scripts/user-agents.sh @@ -15,7 +15,7 @@ elif [ -S /tmp/autoconf.sock ] ; then fi # generate new conf -BLACKLIST="$(curl -s https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list; +IFS= BLACKLIST="$(curl -s https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list; curl -s https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Crawlers.txt | sort -u)" if [ "$?" -ne 0 ] ; then job_log "[BLACKLIST] can't update user-agent list" From f8d71e067e868ad4dc084b92dfbabd05e48cbc9a Mon Sep 17 00:00:00 2001 From: Cyril Chaboisseau Date: Sat, 1 May 2021 19:04:18 +0200 Subject: [PATCH 3/3] improved way to generate user-agent file --- scripts/user-agents.sh | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/scripts/user-agents.sh b/scripts/user-agents.sh index 8d091ed..fb704cf 100755 --- a/scripts/user-agents.sh +++ b/scripts/user-agents.sh @@ -20,13 +20,7 @@ curl -s https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Cr if [ "$?" -ne 0 ] ; then job_log "[BLACKLIST] can't update user-agent list" fi -DATA="" -IFS=$'\n' -for ua in $BLACKLIST ; do - DATA="${DATA}~*${ua} yes;\n" -done -DATA_ESCAPED=$(echo "$DATA" | sed 's: :\\\\ :g' | sed 's:\\\\ yes;: yes;:g' | sed 's:\\\\\\ :\\\\ :g') -echo -e "map \$http_user_agent \$bad_user_agent { default no; $DATA_ESCAPED }" > /tmp/map-user-agent.conf +echo -e "map \$http_user_agent \$bad_user_agent { default no; $(echo $BLACKLIST | sed 's:\([^\\]\) :\1\\\\ :;s:^:~*:;s:$: yes;:') }" > /tmp/map-user-agent.conf # check number of lines lines="$(wc -l /tmp/map-user-agent.conf | cut -d ' ' -f 1)"