Fix network errors

This commit is contained in:
Dorae 2024-05-27 14:57:57 +08:00 committed by GitHub
parent 52e9a14b2c
commit 1e05c0bd1d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

20
main.py
View File

@ -4,6 +4,7 @@ import concurrent.futures
import os import os
import json import json
import requests import requests
import urllib
import yaml import yaml
import ipaddress import ipaddress
@ -63,6 +64,7 @@ def is_ipv4_or_ipv6(address):
return None return None
def parse_and_convert_to_dataframe(link): def parse_and_convert_to_dataframe(link):
rules = []
# 根据链接扩展名分情况处理 # 根据链接扩展名分情况处理
if link.endswith('.yaml') or link.endswith('.txt'): if link.endswith('.yaml') or link.endswith('.txt'):
try: try:
@ -92,10 +94,10 @@ def parse_and_convert_to_dataframe(link):
rows.append({'pattern': pattern.strip(), 'address': address.strip(), 'other': None}) rows.append({'pattern': pattern.strip(), 'address': address.strip(), 'other': None})
df = pd.DataFrame(rows, columns=['pattern', 'address', 'other']) df = pd.DataFrame(rows, columns=['pattern', 'address', 'other'])
except: except:
df, rules_from_url = read_list_from_url(link) df, rules = read_list_from_url(link)
else: else:
df, rules_from_url = read_list_from_url(link) df, rules = read_list_from_url(link)
return df return df, rules
# 对字典进行排序含list of dict # 对字典进行排序含list of dict
def sort_dict(obj): def sort_dict(obj):
@ -110,8 +112,10 @@ def sort_dict(obj):
def parse_list_file(link, output_directory): def parse_list_file(link, output_directory):
with concurrent.futures.ThreadPoolExecutor() as executor: with concurrent.futures.ThreadPoolExecutor() as executor:
results= list(executor.map(parse_and_convert_to_dataframe, [link])) # 使用executor.map并行处理链接 results= list(executor.map(parse_and_convert_to_dataframe, [link])) # 使用executor.map并行处理链接, 得到(df, rules)元组的列表
df = pd.concat(results, ignore_index=True) # 拼接为一个DataFrame dfs = [df for df, rules in results] # 提取df的内容
rules_list = [rules for df, rules in results] # 提取逻辑规则rules的内容
df = pd.concat(dfs, ignore_index=True) # 拼接为一个DataFrame
df = df[~df['pattern'].str.contains('#')].reset_index(drop=True) # 删除pattern中包含#号的行 df = df[~df['pattern'].str.contains('#')].reset_index(drop=True) # 删除pattern中包含#号的行
df = df[df['pattern'].isin(MAP_DICT.keys())].reset_index(drop=True) # 删除不在字典中的pattern df = df[df['pattern'].isin(MAP_DICT.keys())].reset_index(drop=True) # 删除不在字典中的pattern
df = df.drop_duplicates().reset_index(drop=True) # 删除重复行 df = df.drop_duplicates().reset_index(drop=True) # 删除重复行
@ -136,9 +140,8 @@ def parse_list_file(link, output_directory):
result_rules["rules"].insert(0, {'domain': domain_entries}) result_rules["rules"].insert(0, {'domain': domain_entries})
# 处理逻辑规则 # 处理逻辑规则
_, rules_from_url = read_list_from_url(link) if rules_list[0] != "[]":
if rules_from_url: result_rules["rules"].extend(rules_list[0])
result_rules["rules"].extend(rules_from_url)
# 使用 output_directory 拼接完整路径 # 使用 output_directory 拼接完整路径
file_name = os.path.join(output_directory, f"{os.path.basename(link).split('.')[0]}.json") file_name = os.path.join(output_directory, f"{os.path.basename(link).split('.')[0]}.json")
@ -161,6 +164,7 @@ result_file_names = []
for link in links: for link in links:
result_file_name = parse_list_file(link, output_directory=output_dir) result_file_name = parse_list_file(link, output_directory=output_dir)
result_file_names.append(result_file_name) result_file_names.append(result_file_name)
print(result_file_names)
# 打印生成的文件名 # 打印生成的文件名
# for file_name in result_file_names: # for file_name in result_file_names: