diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 70b2847f..a87eea00 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -13,7 +13,7 @@ jobs: fetch-depth: 0 - name: Install WHOIS client - run: sudo apt install -y whois + run: sudo apt install -y whois parallel gawk - name: Set up Python uses: actions/setup-python@v5 @@ -35,72 +35,26 @@ jobs: - name: Download IPs run: | + set -euo pipefail set -x - bash google/downloader.sh - bash googlebot/downloader.sh - bash amazon/downloader.sh - bash microsoft/downloader.sh - bash oracle/downloader.sh - bash digitalocean/downloader.sh - bash bing/downloader.sh - bash github/downloader.sh - bash facebook/downloader.sh - bash twitter/downloader.sh - bash linode/downloader.sh - bash telegram/downloader.sh - bash openai/downloader.sh || echo 'GPTBot download failed' # TODO: fixme - bash cloudflare/downloader.sh - bash vultr/downloader.sh - bash apple-proxy/downloader.sh - bash protonvpn/downloader.sh + find . -name downloader.sh | sort -h | awk '{print "Executing "$1"...";system("bash "$1)}' - name: Create All-In-One ranges run: | - cat google/ipv4.txt googlebot/ipv4.txt amazon/ipv4.txt microsoft/ipv4.txt oracle/ipv4.txt digitalocean/ipv4.txt bing/ipv4.txt github/ipv4.txt facebook/ipv4.txt twitter/ipv4.txt linode/ipv4.txt telegram/ipv4.txt openai/ipv4.txt cloudflare/ipv4.txt vultr/ipv4.txt apple-proxy/ipv4.txt protonvpn/ipv4.txt | sort -V | uniq > all/ipv4.txt - cat google/ipv6.txt googlebot/ipv6.txt amazon/ipv6.txt microsoft/ipv6.txt digitalocean/ipv6.txt github/ipv6.txt facebook/ipv6.txt twitter/ipv6.txt linode/ipv6.txt telegram/ipv6.txt cloudflare/ipv6.txt vultr/ipv6.txt apple-proxy/ipv6.txt protonvpn/ipv6.txt | sort -V | uniq > all/ipv6.txt + cat $(find . -name ipv4.txt | sort -h) | sort -V | uniq > all/ipv4.txt + cat $(find . -name ipv6.txt | sort -h) | sort -V | uniq > all/ipv6.txt + + - name: Merge ipv4 Ranges + run: | + set -euo pipefail + set -x + find . -name ipv4.txt | sort -h | parallel --will-cite -j 1 echo "Merging '{}'"';'python utils/merge.py --source={} '|' sort -V '>' {.}_merged.txt - - name: Merge Ranges + - name: Merge ipv6 Ranges run: | set -euo pipefail set -x - # ipv4 - python utils/merge.py --source=google/ipv4.txt | sort -V > google/ipv4_merged.txt - python utils/merge.py --source=googlebot/ipv4.txt | sort -V > googlebot/ipv4_merged.txt - python utils/merge.py --source=amazon/ipv4.txt | sort -V > amazon/ipv4_merged.txt - python utils/merge.py --source=microsoft/ipv4.txt | sort -V > microsoft/ipv4_merged.txt - python utils/merge.py --source=oracle/ipv4.txt | sort -V > oracle/ipv4_merged.txt - python utils/merge.py --source=digitalocean/ipv4.txt | sort -V > digitalocean/ipv4_merged.txt - python utils/merge.py --source=bing/ipv4.txt | sort -V > bing/ipv4_merged.txt - python utils/merge.py --source=github/ipv4.txt | sort -V > github/ipv4_merged.txt - python utils/merge.py --source=facebook/ipv4.txt | sort -V > facebook/ipv4_merged.txt - python utils/merge.py --source=twitter/ipv4.txt | sort -V > twitter/ipv4_merged.txt - python utils/merge.py --source=linode/ipv4.txt | sort -V > linode/ipv4_merged.txt - python utils/merge.py --source=telegram/ipv4.txt | sort -V > telegram/ipv4_merged.txt - python utils/merge.py --source=openai/ipv4.txt | sort -V > openai/ipv4_merged.txt - python utils/merge.py --source=cloudflare/ipv4.txt | sort -V > cloudflare/ipv4_merged.txt - python utils/merge.py --source=vultr/ipv4.txt | sort -V > vultr/ipv4_merged.txt - python utils/merge.py --source=apple-proxy/ipv4.txt | sort -V > apple-proxy/ipv4_merged.txt - python utils/merge.py --source=protonvpn/ipv4.txt | sort -V > protonvpn/ipv4_merged.txt - python utils/merge.py --source=all/ipv4.txt | sort -V > all/ipv4_merged.txt - # ipv6 - python utils/merge.py --source=google/ipv6.txt | sort -V > google/ipv6_merged.txt - python utils/merge.py --source=googlebot/ipv6.txt | sort -V > googlebot/ipv6_merged.txt - python utils/merge.py --source=amazon/ipv6.txt | sort -V > amazon/ipv6_merged.txt - python utils/merge.py --source=microsoft/ipv6.txt | sort -V > microsoft/ipv6_merged.txt - # oracle not provide ipv6 - python utils/merge.py --source=digitalocean/ipv6.txt | sort -V > digitalocean/ipv6_merged.txt - # bing not provide ipv6 - python utils/merge.py --source=github/ipv6.txt | sort -V > github/ipv6_merged.txt - python utils/merge.py --source=facebook/ipv6.txt | sort -V > facebook/ipv6_merged.txt - python utils/merge.py --source=twitter/ipv6.txt | sort -V > twitter/ipv6_merged.txt - python utils/merge.py --source=linode/ipv6.txt | sort -V > linode/ipv6_merged.txt - python utils/merge.py --source=telegram/ipv6.txt | sort -V > telegram/ipv6_merged.txt - # openai not provide ipv6 - python utils/merge.py --source=cloudflare/ipv6.txt | sort -V > cloudflare/ipv6_merged.txt - python utils/merge.py --source=vultr/ipv6.txt | sort -V > vultr/ipv6_merged.txt - python utils/merge.py --source=apple-proxy/ipv6.txt | sort -V > apple-proxy/ipv6_merged.txt - python utils/merge.py --source=protonvpn/ipv6.txt | sort -V > protonvpn/ipv6_merged.txt - python utils/merge.py --source=all/ipv6.txt | sort -V > all/ipv6_merged.txt + find . -name ipv6.txt | sort -h | parallel --will-cite -j 1 echo "Merging '{}'"';'python utils/merge.py --source={} '|' sort -V '>' {.}_merged.txt - name: Commit files env: diff --git a/openai/downloader.sh b/openai/downloader.sh index 62b21f12..b0be6e56 100644 --- a/openai/downloader.sh +++ b/openai/downloader.sh @@ -8,12 +8,37 @@ set -x # get from public ranges -curl -s https://openai.com/gptbot-ranges.txt > /tmp/openai.txt - -# save ipv4 -grep -v ':' /tmp/openai.txt > /tmp/openai-ipv4.txt - -# ipv6 not provided +download_and_parse_json() { + curl --connect-timeout 60 --retry 3 --retry-delay 15 -s "${1}" \ + -H 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7' \ + -H 'accept-language: en' \ + -H 'cache-control: no-cache' \ + -H 'pragma: no-cache' \ + -H 'priority: u=0, i' \ + -H 'sec-ch-ua: "Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"' \ + -H 'sec-ch-ua-mobile: ?0' \ + -H 'sec-ch-ua-platform: "macOS"' \ + -H 'sec-fetch-dest: document' \ + -H 'sec-fetch-mode: navigate' \ + -H 'sec-fetch-site: none' \ + -H 'sec-fetch-user: ?1' \ + -H 'upgrade-insecure-requests: 1' \ + -H 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36' \ + > /tmp/openai.json + + jq '.prefixes[] | [.ipv4Prefix][] | select(. != null)' -r /tmp/openai.json > /tmp/openai.txt + + # save ipv4 + grep -v ':' /tmp/openai.txt >> /tmp/openai-ipv4.txt + + # ipv6 not provided + + sleep 10 +} + +download_and_parse_json "https://openai.com/chatgpt-user.json" +download_and_parse_json "https://openai.com/searchbot.json" +download_and_parse_json "https://openai.com/gptbot.json" # sort & uniq diff --git a/openai/ipv4.txt b/openai/ipv4.txt index 4a46e069..2e9dbda6 100644 --- a/openai/ipv4.txt +++ b/openai/ipv4.txt @@ -1,2 +1,35 @@ +4.196.118.112/28 +4.197.22.112/28 +4.227.36.0/25 +13.65.240.240/28 +20.42.10.176/28 +20.61.70.160/28 +20.97.189.96/28 +20.125.66.80/28 +20.161.75.208/28 +20.171.206.0/24 +20.171.207.0/24 +20.215.188.192/28 +23.98.142.176/28 +23.98.179.16/28 +40.84.180.64/28 +40.84.180.224/28 +40.84.221.208/28 +40.84.221.224/28 +51.8.102.0/24 +51.8.155.48/28 +51.8.155.64/28 +51.8.155.112/28 +52.156.77.144/28 +52.159.249.96/28 +52.225.75.208/28 52.230.152.0/24 52.233.106.0/24 +52.236.94.144/28 +135.234.64.0/24 +135.237.131.208/28 +172.178.140.144/28 +172.178.141.128/28 +172.182.193.160/28 +172.183.222.128/28 +172.203.190.128/28