mirror of
https://github.com/zhinianboke/xianyu-auto-reply.git
synced 2025-08-02 04:27:36 +08:00
373 lines
14 KiB
Python
373 lines
14 KiB
Python
"""
|
||
闲鱼订单详情获取工具
|
||
基于Playwright实现订单详情页面访问和数据提取
|
||
"""
|
||
|
||
import asyncio
|
||
import time
|
||
import sys
|
||
import os
|
||
from typing import Optional, Dict, Any
|
||
from playwright.async_api import async_playwright, Browser, BrowserContext, Page
|
||
from loguru import logger
|
||
|
||
# 修复Docker环境中的asyncio事件循环策略问题
|
||
if sys.platform.startswith('linux') or os.getenv('DOCKER_ENV'):
|
||
try:
|
||
# 在Linux/Docker环境中设置事件循环策略
|
||
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
|
||
except Exception as e:
|
||
logger.warning(f"设置事件循环策略失败: {e}")
|
||
|
||
# 确保在Docker环境中使用正确的事件循环
|
||
if os.getenv('DOCKER_ENV'):
|
||
try:
|
||
# 强制使用SelectorEventLoop(在Docker中更稳定)
|
||
if hasattr(asyncio, 'SelectorEventLoop'):
|
||
loop = asyncio.SelectorEventLoop()
|
||
asyncio.set_event_loop(loop)
|
||
except Exception as e:
|
||
logger.warning(f"设置SelectorEventLoop失败: {e}")
|
||
|
||
|
||
class OrderDetailFetcher:
|
||
"""闲鱼订单详情获取器"""
|
||
|
||
def __init__(self, cookie_string: str = None):
|
||
self.browser: Optional[Browser] = None
|
||
self.context: Optional[BrowserContext] = None
|
||
self.page: Optional[Page] = None
|
||
|
||
# 请求头配置
|
||
self.headers = {
|
||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||
"accept-language": "en,zh-CN;q=0.9,zh;q=0.8,ru;q=0.7",
|
||
"cache-control": "no-cache",
|
||
"pragma": "no-cache",
|
||
"priority": "u=0, i",
|
||
"sec-ch-ua": "\"Not)A;Brand\";v=\"8\", \"Chromium\";v=\"138\", \"Google Chrome\";v=\"138\"",
|
||
"sec-ch-ua-mobile": "?0",
|
||
"sec-ch-ua-platform": "\"Windows\"",
|
||
"sec-fetch-dest": "document",
|
||
"sec-fetch-mode": "navigate",
|
||
"sec-fetch-site": "same-origin",
|
||
"sec-fetch-user": "?1",
|
||
"upgrade-insecure-requests": "1"
|
||
}
|
||
|
||
# Cookie配置 - 支持动态传入
|
||
self.cookie = cookie_string
|
||
|
||
async def init_browser(self, headless: bool = True):
|
||
"""初始化浏览器"""
|
||
try:
|
||
playwright = await async_playwright().start()
|
||
|
||
# 启动浏览器(Docker环境优化)
|
||
browser_args = [
|
||
'--no-sandbox',
|
||
'--disable-setuid-sandbox',
|
||
'--disable-dev-shm-usage',
|
||
'--disable-accelerated-2d-canvas',
|
||
'--no-first-run',
|
||
'--no-zygote',
|
||
'--disable-gpu',
|
||
'--disable-background-timer-throttling',
|
||
'--disable-backgrounding-occluded-windows',
|
||
'--disable-renderer-backgrounding',
|
||
'--disable-features=TranslateUI',
|
||
'--disable-ipc-flooding-protection',
|
||
'--disable-extensions',
|
||
'--disable-default-apps',
|
||
'--disable-sync',
|
||
'--disable-translate',
|
||
'--hide-scrollbars',
|
||
'--mute-audio',
|
||
'--no-default-browser-check',
|
||
'--no-pings',
|
||
'--single-process' # 在Docker中使用单进程模式
|
||
]
|
||
|
||
# 在Docker环境中添加额外参数
|
||
if os.getenv('DOCKER_ENV'):
|
||
browser_args.extend([
|
||
'--disable-background-networking',
|
||
'--disable-background-timer-throttling',
|
||
'--disable-client-side-phishing-detection',
|
||
'--disable-default-apps',
|
||
'--disable-hang-monitor',
|
||
'--disable-popup-blocking',
|
||
'--disable-prompt-on-repost',
|
||
'--disable-sync',
|
||
'--disable-web-resources',
|
||
'--metrics-recording-only',
|
||
'--no-first-run',
|
||
'--safebrowsing-disable-auto-update',
|
||
'--enable-automation',
|
||
'--password-store=basic',
|
||
'--use-mock-keychain'
|
||
])
|
||
|
||
self.browser = await playwright.chromium.launch(
|
||
headless=headless,
|
||
args=browser_args
|
||
)
|
||
|
||
# 创建浏览器上下文
|
||
self.context = await self.browser.new_context(
|
||
viewport={'width': 1920, 'height': 1080},
|
||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36'
|
||
)
|
||
|
||
# 设置额外的HTTP头
|
||
await self.context.set_extra_http_headers(self.headers)
|
||
|
||
# 创建页面
|
||
self.page = await self.context.new_page()
|
||
|
||
# 设置Cookie
|
||
await self._set_cookies()
|
||
|
||
logger.info("浏览器初始化成功")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"浏览器初始化失败: {e}")
|
||
return False
|
||
|
||
async def _set_cookies(self):
|
||
"""设置Cookie"""
|
||
try:
|
||
# 解析Cookie字符串
|
||
cookies = []
|
||
for cookie_pair in self.cookie.split('; '):
|
||
if '=' in cookie_pair:
|
||
name, value = cookie_pair.split('=', 1)
|
||
cookies.append({
|
||
'name': name.strip(),
|
||
'value': value.strip(),
|
||
'domain': '.goofish.com',
|
||
'path': '/'
|
||
})
|
||
|
||
# 添加Cookie到上下文
|
||
await self.context.add_cookies(cookies)
|
||
logger.info(f"已设置 {len(cookies)} 个Cookie")
|
||
|
||
except Exception as e:
|
||
logger.error(f"设置Cookie失败: {e}")
|
||
|
||
async def fetch_order_detail(self, order_id: str, timeout: int = 30) -> Optional[Dict[str, Any]]:
|
||
"""
|
||
获取订单详情
|
||
|
||
Args:
|
||
order_id: 订单ID
|
||
timeout: 超时时间(秒)
|
||
|
||
Returns:
|
||
包含订单详情的字典,失败时返回None
|
||
"""
|
||
try:
|
||
if not self.page:
|
||
logger.error("浏览器未初始化")
|
||
return None
|
||
|
||
# 构建订单详情URL
|
||
url = f"https://www.goofish.com/order-detail?orderId={order_id}&role=seller"
|
||
logger.info(f"开始访问订单详情页面: {url}")
|
||
|
||
# 访问页面
|
||
response = await self.page.goto(url, wait_until='networkidle', timeout=timeout * 1000)
|
||
|
||
if not response or response.status != 200:
|
||
logger.error(f"页面访问失败,状态码: {response.status if response else 'None'}")
|
||
return None
|
||
|
||
logger.info("页面加载成功,等待内容渲染...")
|
||
|
||
# 等待页面完全加载
|
||
await self.page.wait_for_load_state('networkidle')
|
||
|
||
# 额外等待确保动态内容加载完成
|
||
await asyncio.sleep(3)
|
||
|
||
# 获取并解析SKU信息
|
||
sku_info = await self._get_sku_content()
|
||
|
||
# 获取页面标题
|
||
title = await self.page.title()
|
||
|
||
result = {
|
||
'order_id': order_id,
|
||
'url': url,
|
||
'title': title,
|
||
'sku_info': sku_info, # 包含解析后的规格信息
|
||
'spec_name': sku_info.get('spec_name', '') if sku_info else '',
|
||
'spec_value': sku_info.get('spec_value', '') if sku_info else '',
|
||
'timestamp': time.time()
|
||
}
|
||
|
||
logger.info(f"订单详情获取成功: {order_id}")
|
||
if sku_info:
|
||
logger.info(f"规格信息 - 名称: {result['spec_name']}, 值: {result['spec_value']}")
|
||
return result
|
||
|
||
except Exception as e:
|
||
logger.error(f"获取订单详情失败: {e}")
|
||
return None
|
||
|
||
def _parse_sku_content(self, sku_content: str) -> Dict[str, str]:
|
||
"""
|
||
解析SKU内容,根据冒号分割规格名称和规格值
|
||
|
||
Args:
|
||
sku_content: 原始SKU内容字符串
|
||
|
||
Returns:
|
||
包含规格名称和规格值的字典,如果解析失败则返回空字典
|
||
"""
|
||
try:
|
||
if not sku_content or ':' not in sku_content:
|
||
logger.warning(f"SKU内容格式无效或不包含冒号: {sku_content}")
|
||
return {}
|
||
|
||
# 根据冒号分割
|
||
parts = sku_content.split(':', 1) # 只分割第一个冒号
|
||
|
||
if len(parts) == 2:
|
||
spec_name = parts[0].strip()
|
||
spec_value = parts[1].strip()
|
||
|
||
if spec_name and spec_value:
|
||
result = {
|
||
'spec_name': spec_name,
|
||
'spec_value': spec_value
|
||
}
|
||
logger.info(f"SKU解析成功 - 规格名称: {spec_name}, 规格值: {spec_value}")
|
||
return result
|
||
else:
|
||
logger.warning(f"SKU解析失败,规格名称或值为空: 名称='{spec_name}', 值='{spec_value}'")
|
||
return {}
|
||
else:
|
||
logger.warning(f"SKU内容分割失败: {sku_content}")
|
||
return {}
|
||
|
||
except Exception as e:
|
||
logger.error(f"解析SKU内容异常: {e}")
|
||
return {}
|
||
|
||
async def _get_sku_content(self) -> Optional[Dict[str, str]]:
|
||
"""获取并解析SKU内容"""
|
||
try:
|
||
# 等待SKU元素出现
|
||
sku_selector = '.sku--u_ddZval'
|
||
|
||
# 检查元素是否存在
|
||
sku_element = await self.page.query_selector(sku_selector)
|
||
|
||
if sku_element:
|
||
# 获取元素文本内容
|
||
sku_content = await sku_element.text_content()
|
||
if sku_content:
|
||
sku_content = sku_content.strip()
|
||
logger.info(f"找到SKU原始内容: {sku_content}")
|
||
print(f"🛍️ SKU原始内容: {sku_content}")
|
||
|
||
# 解析SKU内容
|
||
parsed_sku = self._parse_sku_content(sku_content)
|
||
if parsed_sku:
|
||
print(f"📋 规格名称: {parsed_sku['spec_name']}")
|
||
print(f"📝 规格值: {parsed_sku['spec_value']}")
|
||
return parsed_sku
|
||
else:
|
||
logger.warning("SKU内容解析失败")
|
||
return {}
|
||
else:
|
||
logger.warning("SKU元素内容为空")
|
||
return {}
|
||
else:
|
||
logger.warning("未找到SKU元素")
|
||
|
||
# 尝试获取页面的所有class包含sku的元素
|
||
all_sku_elements = await self.page.query_selector_all('[class*="sku"]')
|
||
if all_sku_elements:
|
||
logger.info(f"找到 {len(all_sku_elements)} 个包含'sku'的元素")
|
||
for i, element in enumerate(all_sku_elements):
|
||
class_name = await element.get_attribute('class')
|
||
text_content = await element.text_content()
|
||
logger.info(f"SKU元素 {i+1}: class='{class_name}', text='{text_content}'")
|
||
|
||
return {}
|
||
|
||
except Exception as e:
|
||
logger.error(f"获取SKU内容失败: {e}")
|
||
return {}
|
||
|
||
async def close(self):
|
||
"""关闭浏览器"""
|
||
try:
|
||
if self.page:
|
||
await self.page.close()
|
||
if self.context:
|
||
await self.context.close()
|
||
if self.browser:
|
||
await self.browser.close()
|
||
logger.info("浏览器已关闭")
|
||
except Exception as e:
|
||
logger.error(f"关闭浏览器失败: {e}")
|
||
|
||
async def __aenter__(self):
|
||
"""异步上下文管理器入口"""
|
||
await self.init_browser()
|
||
return self
|
||
|
||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||
"""异步上下文管理器出口"""
|
||
await self.close()
|
||
|
||
|
||
# 便捷函数
|
||
async def fetch_order_detail_simple(order_id: str, cookie_string: str = None, headless: bool = True) -> Optional[Dict[str, Any]]:
|
||
"""
|
||
简单的订单详情获取函数
|
||
|
||
Args:
|
||
order_id: 订单ID
|
||
cookie_string: Cookie字符串,如果不提供则使用默认值
|
||
headless: 是否无头模式
|
||
|
||
Returns:
|
||
订单详情字典或None
|
||
"""
|
||
fetcher = OrderDetailFetcher(cookie_string)
|
||
try:
|
||
if await fetcher.init_browser(headless=headless):
|
||
return await fetcher.fetch_order_detail(order_id)
|
||
finally:
|
||
await fetcher.close()
|
||
return None
|
||
|
||
|
||
# 测试代码
|
||
if __name__ == "__main__":
|
||
async def test():
|
||
# 测试订单ID
|
||
test_order_id = "2856024697612814489"
|
||
|
||
print(f"🔍 开始获取订单详情: {test_order_id}")
|
||
|
||
result = await fetch_order_detail_simple(test_order_id, headless=False)
|
||
|
||
if result:
|
||
print("✅ 订单详情获取成功:")
|
||
print(f"📋 订单ID: {result['order_id']}")
|
||
print(f"🌐 URL: {result['url']}")
|
||
print(f"📄 页面标题: {result['title']}")
|
||
print(f"🛍️ SKU内容: {result['sku_content']}")
|
||
else:
|
||
print("❌ 订单详情获取失败")
|
||
|
||
# 运行测试
|
||
asyncio.run(test())
|