diff --git a/main.py b/main.py index 8d6bd64..e53f264 100644 --- a/main.py +++ b/main.py @@ -110,50 +110,54 @@ def sort_dict(obj): return obj def parse_list_file(link, output_directory): - with concurrent.futures.ThreadPoolExecutor() as executor: - results= list(executor.map(parse_and_convert_to_dataframe, [link])) # 使用executor.map并行处理链接, 得到(df, rules)元组的列表 - dfs = [df for df, rules in results] # 提取df的内容 - rules_list = [rules for df, rules in results] # 提取逻辑规则rules的内容 - df = pd.concat(dfs, ignore_index=True) # 拼接为一个DataFrame - df = df[~df['pattern'].str.contains('#')].reset_index(drop=True) # 删除pattern中包含#号的行 - df = df[df['pattern'].isin(MAP_DICT.keys())].reset_index(drop=True) # 删除不在字典中的pattern - df = df.drop_duplicates().reset_index(drop=True) # 删除重复行 - df['pattern'] = df['pattern'].replace(MAP_DICT) # 替换pattern为字典中的值 - os.makedirs(output_directory, exist_ok=True) # 创建自定义文件夹 + try: + with concurrent.futures.ThreadPoolExecutor() as executor: + results= list(executor.map(parse_and_convert_to_dataframe, [link])) # 使用executor.map并行处理链接, 得到(df, rules)元组的列表 + dfs = [df for df, rules in results] # 提取df的内容 + rules_list = [rules for df, rules in results] # 提取逻辑规则rules的内容 + df = pd.concat(dfs, ignore_index=True) # 拼接为一个DataFrame + df = df[~df['pattern'].str.contains('#')].reset_index(drop=True) # 删除pattern中包含#号的行 + df = df[df['pattern'].isin(MAP_DICT.keys())].reset_index(drop=True) # 删除不在字典中的pattern + df = df.drop_duplicates().reset_index(drop=True) # 删除重复行 + df['pattern'] = df['pattern'].replace(MAP_DICT) # 替换pattern为字典中的值 + os.makedirs(output_directory, exist_ok=True) # 创建自定义文件夹 - result_rules = {"version": 1, "rules": []} - domain_entries = [] - for pattern, addresses in df.groupby('pattern')['address'].apply(list).to_dict().items(): - if pattern == 'domain_suffix': - rule_entry = {pattern: [address.strip() for address in addresses]} - result_rules["rules"].append(rule_entry) - # domain_entries.extend([address.strip() for address in addresses]) # 1.9以下的版本需要额外处理 domain_suffix - elif pattern == 'domain': - domain_entries.extend([address.strip() for address in addresses]) - else: - rule_entry = {pattern: [address.strip() for address in addresses]} - result_rules["rules"].append(rule_entry) - # 删除 'domain_entries' 中的重复值 - domain_entries = list(set(domain_entries)) - if domain_entries: - result_rules["rules"].insert(0, {'domain': domain_entries}) + result_rules = {"version": 1, "rules": []} + domain_entries = [] + for pattern, addresses in df.groupby('pattern')['address'].apply(list).to_dict().items(): + if pattern == 'domain_suffix': + rule_entry = {pattern: [address.strip() for address in addresses]} + result_rules["rules"].append(rule_entry) + # domain_entries.extend([address.strip() for address in addresses]) # 1.9以下的版本需要额外处理 domain_suffix + elif pattern == 'domain': + domain_entries.extend([address.strip() for address in addresses]) + else: + rule_entry = {pattern: [address.strip() for address in addresses]} + result_rules["rules"].append(rule_entry) + # 删除 'domain_entries' 中的重复值 + domain_entries = list(set(domain_entries)) + if domain_entries: + result_rules["rules"].insert(0, {'domain': domain_entries}) - # 处理逻辑规则 - """ - if rules_list[0] != "[]": - result_rules["rules"].extend(rules_list[0]) - """ + # 处理逻辑规则 + """ + if rules_list[0] != "[]": + result_rules["rules"].extend(rules_list[0]) + """ - # 使用 output_directory 拼接完整路径 - file_name = os.path.join(output_directory, f"{os.path.basename(link).split('.')[0]}.json") - with open(file_name, 'w', encoding='utf-8') as output_file: - result_rules_str = json.dumps(sort_dict(result_rules), ensure_ascii=False, indent=2) - result_rules_str = result_rules_str.replace('\\\\', '\\') - output_file.write(result_rules_str) + # 使用 output_directory 拼接完整路径 + file_name = os.path.join(output_directory, f"{os.path.basename(link).split('.')[0]}.json") + with open(file_name, 'w', encoding='utf-8') as output_file: + result_rules_str = json.dumps(sort_dict(result_rules), ensure_ascii=False, indent=2) + result_rules_str = result_rules_str.replace('\\\\', '\\') + output_file.write(result_rules_str) - srs_path = file_name.replace(".json", ".srs") - os.system(f"sing-box rule-set compile --output {srs_path} {file_name}") - return file_name + srs_path = file_name.replace(".json", ".srs") + os.system(f"sing-box rule-set compile --output {srs_path} {file_name}") + return file_name + except: + print(f'获取链接出错,已跳过:{link}') + pass # 读取 links.txt 中的每个链接并生成对应的 JSON 文件 with open("../links.txt", 'r') as links_file: