mirror of
https://github.com/Toperlock/sing-box-geosite.git
synced 2025-06-08 09:42:14 +08:00
Update main.py
This commit is contained in:
parent
f9da98f4f7
commit
e914c92c61
54
main.py
54
main.py
@ -2,14 +2,62 @@ import pandas as pd
|
|||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
import requests
|
||||||
|
import yaml
|
||||||
|
import ipaddress
|
||||||
|
|
||||||
def read_csv_and_append(link):
|
def read_yaml_from_url(url):
|
||||||
return pd.read_csv(link, header=None, names=['pattern', 'address', 'other'], on_bad_lines='warn')
|
response = requests.get(url)
|
||||||
|
response.raise_for_status() # Raise an HTTPError for bad responses
|
||||||
|
yaml_data = yaml.safe_load(response.text)
|
||||||
|
return yaml_data
|
||||||
|
|
||||||
|
def read_list_from_url(url):
|
||||||
|
df = pd.read_csv(url, header=None, names=['pattern', 'address', 'other'], on_bad_lines='warn')
|
||||||
|
return df
|
||||||
|
|
||||||
|
def is_ipv4_or_ipv6(address):
|
||||||
|
try:
|
||||||
|
ipaddress.IPv4Network(address)
|
||||||
|
return 'ipv4'
|
||||||
|
except ValueError:
|
||||||
|
try:
|
||||||
|
ipaddress.IPv6Network(address)
|
||||||
|
return 'ipv6'
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def parse_and_convert_to_dataframe(link):
|
||||||
|
# 根据链接扩展名分情况处理
|
||||||
|
if link.endswith('.yaml') or link.endswith('.txt'):
|
||||||
|
try:
|
||||||
|
yaml_data = read_yaml_from_url(link)
|
||||||
|
rows = []
|
||||||
|
for item in yaml_data.get('payload', []):
|
||||||
|
address = item.strip("'")
|
||||||
|
if ',' not in item:
|
||||||
|
if is_ipv4_or_ipv6(item):
|
||||||
|
pattern = 'IP-CIDR'
|
||||||
|
else:
|
||||||
|
if address.startswith('+'):
|
||||||
|
pattern = 'DOMAIN-SUFFIX'
|
||||||
|
address = address[1:]
|
||||||
|
else:
|
||||||
|
pattern = 'DOMAIN'
|
||||||
|
else:
|
||||||
|
pattern, address = item.split(',', 1)
|
||||||
|
rows.append({'pattern': pattern.strip(), 'address': address.strip(), 'other': None})
|
||||||
|
df = pd.DataFrame(rows, columns=['pattern', 'address', 'other'])
|
||||||
|
except:
|
||||||
|
df = read_list_from_url(link)
|
||||||
|
else:
|
||||||
|
df = read_list_from_url(link)
|
||||||
|
return df
|
||||||
|
|
||||||
def parse_list_file(link, output_directory):
|
def parse_list_file(link, output_directory):
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
# 使用executor.map并行处理链接
|
# 使用executor.map并行处理链接
|
||||||
results = list(executor.map(read_csv_and_append, [link]))
|
results = list(executor.map(parse_and_convert_to_dataframe, [link]))
|
||||||
# 拼接为一个DataFrame
|
# 拼接为一个DataFrame
|
||||||
df = pd.concat(results, ignore_index=True)
|
df = pd.concat(results, ignore_index=True)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user