Browse Source

Fix download OpenAI & Improve workflow

pull/22/head
lord-alfred 3 months ago
parent
commit
e803209f37
  1. 72
      .github/workflows/update.yml
  2. 37
      openai/downloader.sh
  3. 33
      openai/ipv4.txt

72
.github/workflows/update.yml

@ -13,7 +13,7 @@ jobs:
fetch-depth: 0
- name: Install WHOIS client
run: sudo apt install -y whois
run: sudo apt install -y whois parallel gawk
- name: Set up Python
uses: actions/setup-python@v5
@ -35,72 +35,26 @@ jobs:
- name: Download IPs
run: |
set -euo pipefail
set -x
bash google/downloader.sh
bash googlebot/downloader.sh
bash amazon/downloader.sh
bash microsoft/downloader.sh
bash oracle/downloader.sh
bash digitalocean/downloader.sh
bash bing/downloader.sh
bash github/downloader.sh
bash facebook/downloader.sh
bash twitter/downloader.sh
bash linode/downloader.sh
bash telegram/downloader.sh
bash openai/downloader.sh || echo 'GPTBot download failed' # TODO: fixme
bash cloudflare/downloader.sh
bash vultr/downloader.sh
bash apple-proxy/downloader.sh
bash protonvpn/downloader.sh
find . -name downloader.sh | sort -h | awk '{print "Executing "$1"...";system("bash "$1)}'
- name: Create All-In-One ranges
run: |
cat google/ipv4.txt googlebot/ipv4.txt amazon/ipv4.txt microsoft/ipv4.txt oracle/ipv4.txt digitalocean/ipv4.txt bing/ipv4.txt github/ipv4.txt facebook/ipv4.txt twitter/ipv4.txt linode/ipv4.txt telegram/ipv4.txt openai/ipv4.txt cloudflare/ipv4.txt vultr/ipv4.txt apple-proxy/ipv4.txt protonvpn/ipv4.txt | sort -V | uniq > all/ipv4.txt
cat google/ipv6.txt googlebot/ipv6.txt amazon/ipv6.txt microsoft/ipv6.txt digitalocean/ipv6.txt github/ipv6.txt facebook/ipv6.txt twitter/ipv6.txt linode/ipv6.txt telegram/ipv6.txt cloudflare/ipv6.txt vultr/ipv6.txt apple-proxy/ipv6.txt protonvpn/ipv6.txt | sort -V | uniq > all/ipv6.txt
cat $(find . -name ipv4.txt | sort -h) | sort -V | uniq > all/ipv4.txt
cat $(find . -name ipv6.txt | sort -h) | sort -V | uniq > all/ipv6.txt
- name: Merge ipv4 Ranges
run: |
set -euo pipefail
set -x
find . -name ipv4.txt | sort -h | parallel --will-cite -j 1 echo "Merging '{}'"';'python utils/merge.py --source={} '|' sort -V '>' {.}_merged.txt
- name: Merge Ranges
- name: Merge ipv6 Ranges
run: |
set -euo pipefail
set -x
# ipv4
python utils/merge.py --source=google/ipv4.txt | sort -V > google/ipv4_merged.txt
python utils/merge.py --source=googlebot/ipv4.txt | sort -V > googlebot/ipv4_merged.txt
python utils/merge.py --source=amazon/ipv4.txt | sort -V > amazon/ipv4_merged.txt
python utils/merge.py --source=microsoft/ipv4.txt | sort -V > microsoft/ipv4_merged.txt
python utils/merge.py --source=oracle/ipv4.txt | sort -V > oracle/ipv4_merged.txt
python utils/merge.py --source=digitalocean/ipv4.txt | sort -V > digitalocean/ipv4_merged.txt
python utils/merge.py --source=bing/ipv4.txt | sort -V > bing/ipv4_merged.txt
python utils/merge.py --source=github/ipv4.txt | sort -V > github/ipv4_merged.txt
python utils/merge.py --source=facebook/ipv4.txt | sort -V > facebook/ipv4_merged.txt
python utils/merge.py --source=twitter/ipv4.txt | sort -V > twitter/ipv4_merged.txt
python utils/merge.py --source=linode/ipv4.txt | sort -V > linode/ipv4_merged.txt
python utils/merge.py --source=telegram/ipv4.txt | sort -V > telegram/ipv4_merged.txt
python utils/merge.py --source=openai/ipv4.txt | sort -V > openai/ipv4_merged.txt
python utils/merge.py --source=cloudflare/ipv4.txt | sort -V > cloudflare/ipv4_merged.txt
python utils/merge.py --source=vultr/ipv4.txt | sort -V > vultr/ipv4_merged.txt
python utils/merge.py --source=apple-proxy/ipv4.txt | sort -V > apple-proxy/ipv4_merged.txt
python utils/merge.py --source=protonvpn/ipv4.txt | sort -V > protonvpn/ipv4_merged.txt
python utils/merge.py --source=all/ipv4.txt | sort -V > all/ipv4_merged.txt
# ipv6
python utils/merge.py --source=google/ipv6.txt | sort -V > google/ipv6_merged.txt
python utils/merge.py --source=googlebot/ipv6.txt | sort -V > googlebot/ipv6_merged.txt
python utils/merge.py --source=amazon/ipv6.txt | sort -V > amazon/ipv6_merged.txt
python utils/merge.py --source=microsoft/ipv6.txt | sort -V > microsoft/ipv6_merged.txt
# oracle not provide ipv6
python utils/merge.py --source=digitalocean/ipv6.txt | sort -V > digitalocean/ipv6_merged.txt
# bing not provide ipv6
python utils/merge.py --source=github/ipv6.txt | sort -V > github/ipv6_merged.txt
python utils/merge.py --source=facebook/ipv6.txt | sort -V > facebook/ipv6_merged.txt
python utils/merge.py --source=twitter/ipv6.txt | sort -V > twitter/ipv6_merged.txt
python utils/merge.py --source=linode/ipv6.txt | sort -V > linode/ipv6_merged.txt
python utils/merge.py --source=telegram/ipv6.txt | sort -V > telegram/ipv6_merged.txt
# openai not provide ipv6
python utils/merge.py --source=cloudflare/ipv6.txt | sort -V > cloudflare/ipv6_merged.txt
python utils/merge.py --source=vultr/ipv6.txt | sort -V > vultr/ipv6_merged.txt
python utils/merge.py --source=apple-proxy/ipv6.txt | sort -V > apple-proxy/ipv6_merged.txt
python utils/merge.py --source=protonvpn/ipv6.txt | sort -V > protonvpn/ipv6_merged.txt
python utils/merge.py --source=all/ipv6.txt | sort -V > all/ipv6_merged.txt
find . -name ipv6.txt | sort -h | parallel --will-cite -j 1 echo "Merging '{}'"';'python utils/merge.py --source={} '|' sort -V '>' {.}_merged.txt
- name: Commit files
env:

37
openai/downloader.sh

@ -8,12 +8,37 @@ set -x
# get from public ranges
curl -s https://openai.com/gptbot-ranges.txt > /tmp/openai.txt
# save ipv4
grep -v ':' /tmp/openai.txt > /tmp/openai-ipv4.txt
# ipv6 not provided
download_and_parse_json() {
curl --connect-timeout 60 --retry 3 --retry-delay 15 -s "${1}" \
-H 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7' \
-H 'accept-language: en' \
-H 'cache-control: no-cache' \
-H 'pragma: no-cache' \
-H 'priority: u=0, i' \
-H 'sec-ch-ua: "Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"' \
-H 'sec-ch-ua-mobile: ?0' \
-H 'sec-ch-ua-platform: "macOS"' \
-H 'sec-fetch-dest: document' \
-H 'sec-fetch-mode: navigate' \
-H 'sec-fetch-site: none' \
-H 'sec-fetch-user: ?1' \
-H 'upgrade-insecure-requests: 1' \
-H 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36' \
> /tmp/openai.json
jq '.prefixes[] | [.ipv4Prefix][] | select(. != null)' -r /tmp/openai.json > /tmp/openai.txt
# save ipv4
grep -v ':' /tmp/openai.txt >> /tmp/openai-ipv4.txt
# ipv6 not provided
sleep 10
}
download_and_parse_json "https://openai.com/chatgpt-user.json"
download_and_parse_json "https://openai.com/searchbot.json"
download_and_parse_json "https://openai.com/gptbot.json"
# sort & uniq

33
openai/ipv4.txt

@ -1,2 +1,35 @@
4.196.118.112/28
4.197.22.112/28
4.227.36.0/25
13.65.240.240/28
20.42.10.176/28
20.61.70.160/28
20.97.189.96/28
20.125.66.80/28
20.161.75.208/28
20.171.206.0/24
20.171.207.0/24
20.215.188.192/28
23.98.142.176/28
23.98.179.16/28
40.84.180.64/28
40.84.180.224/28
40.84.221.208/28
40.84.221.224/28
51.8.102.0/24
51.8.155.48/28
51.8.155.64/28
51.8.155.112/28
52.156.77.144/28
52.159.249.96/28
52.225.75.208/28
52.230.152.0/24
52.233.106.0/24
52.236.94.144/28
135.234.64.0/24
135.237.131.208/28
172.178.140.144/28
172.178.141.128/28
172.182.193.160/28
172.183.222.128/28
172.203.190.128/28

Loading…
Cancel
Save