mirror of
https://github.com/Toperlock/sing-box-geosite.git
synced 2025-06-08 09:42:14 +08:00
Fix network errors
This commit is contained in:
parent
52e9a14b2c
commit
1e05c0bd1d
20
main.py
20
main.py
@ -4,6 +4,7 @@ import concurrent.futures
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests
|
||||||
|
import urllib
|
||||||
import yaml
|
import yaml
|
||||||
import ipaddress
|
import ipaddress
|
||||||
|
|
||||||
@ -63,6 +64,7 @@ def is_ipv4_or_ipv6(address):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def parse_and_convert_to_dataframe(link):
|
def parse_and_convert_to_dataframe(link):
|
||||||
|
rules = []
|
||||||
# 根据链接扩展名分情况处理
|
# 根据链接扩展名分情况处理
|
||||||
if link.endswith('.yaml') or link.endswith('.txt'):
|
if link.endswith('.yaml') or link.endswith('.txt'):
|
||||||
try:
|
try:
|
||||||
@ -92,10 +94,10 @@ def parse_and_convert_to_dataframe(link):
|
|||||||
rows.append({'pattern': pattern.strip(), 'address': address.strip(), 'other': None})
|
rows.append({'pattern': pattern.strip(), 'address': address.strip(), 'other': None})
|
||||||
df = pd.DataFrame(rows, columns=['pattern', 'address', 'other'])
|
df = pd.DataFrame(rows, columns=['pattern', 'address', 'other'])
|
||||||
except:
|
except:
|
||||||
df, rules_from_url = read_list_from_url(link)
|
df, rules = read_list_from_url(link)
|
||||||
else:
|
else:
|
||||||
df, rules_from_url = read_list_from_url(link)
|
df, rules = read_list_from_url(link)
|
||||||
return df
|
return df, rules
|
||||||
|
|
||||||
# 对字典进行排序,含list of dict
|
# 对字典进行排序,含list of dict
|
||||||
def sort_dict(obj):
|
def sort_dict(obj):
|
||||||
@ -110,8 +112,10 @@ def sort_dict(obj):
|
|||||||
|
|
||||||
def parse_list_file(link, output_directory):
|
def parse_list_file(link, output_directory):
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
results= list(executor.map(parse_and_convert_to_dataframe, [link])) # 使用executor.map并行处理链接
|
results= list(executor.map(parse_and_convert_to_dataframe, [link])) # 使用executor.map并行处理链接, 得到(df, rules)元组的列表
|
||||||
df = pd.concat(results, ignore_index=True) # 拼接为一个DataFrame
|
dfs = [df for df, rules in results] # 提取df的内容
|
||||||
|
rules_list = [rules for df, rules in results] # 提取逻辑规则rules的内容
|
||||||
|
df = pd.concat(dfs, ignore_index=True) # 拼接为一个DataFrame
|
||||||
df = df[~df['pattern'].str.contains('#')].reset_index(drop=True) # 删除pattern中包含#号的行
|
df = df[~df['pattern'].str.contains('#')].reset_index(drop=True) # 删除pattern中包含#号的行
|
||||||
df = df[df['pattern'].isin(MAP_DICT.keys())].reset_index(drop=True) # 删除不在字典中的pattern
|
df = df[df['pattern'].isin(MAP_DICT.keys())].reset_index(drop=True) # 删除不在字典中的pattern
|
||||||
df = df.drop_duplicates().reset_index(drop=True) # 删除重复行
|
df = df.drop_duplicates().reset_index(drop=True) # 删除重复行
|
||||||
@ -136,9 +140,8 @@ def parse_list_file(link, output_directory):
|
|||||||
result_rules["rules"].insert(0, {'domain': domain_entries})
|
result_rules["rules"].insert(0, {'domain': domain_entries})
|
||||||
|
|
||||||
# 处理逻辑规则
|
# 处理逻辑规则
|
||||||
_, rules_from_url = read_list_from_url(link)
|
if rules_list[0] != "[]":
|
||||||
if rules_from_url:
|
result_rules["rules"].extend(rules_list[0])
|
||||||
result_rules["rules"].extend(rules_from_url)
|
|
||||||
|
|
||||||
# 使用 output_directory 拼接完整路径
|
# 使用 output_directory 拼接完整路径
|
||||||
file_name = os.path.join(output_directory, f"{os.path.basename(link).split('.')[0]}.json")
|
file_name = os.path.join(output_directory, f"{os.path.basename(link).split('.')[0]}.json")
|
||||||
@ -161,6 +164,7 @@ result_file_names = []
|
|||||||
for link in links:
|
for link in links:
|
||||||
result_file_name = parse_list_file(link, output_directory=output_dir)
|
result_file_name = parse_list_file(link, output_directory=output_dir)
|
||||||
result_file_names.append(result_file_name)
|
result_file_names.append(result_file_name)
|
||||||
|
print(result_file_names)
|
||||||
|
|
||||||
# 打印生成的文件名
|
# 打印生成的文件名
|
||||||
# for file_name in result_file_names:
|
# for file_name in result_file_names:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user