#!/usr/bin/env python3 """ 从 chatgpt.com-*_analysis.json 中,按优先级对每个指纹字段评分排序。 评分规则: +10 bot 自动化检测专属字段(webdriver, $cdc_*, callPhantom 等) + 5 出现在核心检测循环 tH=154 或 tH=155 + 2 每额外出现在一个不同 tH(跨 tH 频次) + 3 属于已知高风险 API(Crypto, RTCPeerConnection, OfflineAudioContext 等) + 1 属于 navigator / screen / canvas 系列 """ import json import sys import glob from collections import defaultdict # ── 配置 ───────────────────────────────────────────────────── ANALYSIS_JSON = sorted(glob.glob( "/home/carry/myprj/hcaptcha/asset/chatgpt.com-*_analysis.json" ))[-1] # bot 自动化检测专属字段(出现即暴露) BOT_SIGNALS = { "webdriver", "callPhantom", "callSelenium", "_selenium", "__phantomas", "domAutomationController", "awesomium", "$wdc_", "domAutomation", "_WEBDRIVER_ELEM_CACHE", "spawn", "__nightmare", "__webdriver_script_fn", "__webdriver_script_func", "__driver_evaluate", "__webdriver_evaluate", "__selenium_evaluate", "__fxdriver_evaluate", "__driver_unwrapped", "__webdriver_unwrapped", "__selenium_unwrapped", "__fxdriver_unwrapped", "hcaptchaCallbackZenno", "_Selenium_IDE_Recorder", "cdc_adoQpoasnfa76pfcZLmcfl_Array", "cdc_adoQpoasnfa76pfcZLmcfl_Promise", "cdc_adoQpoasnfa76pfcZLmcfl_Symbol", "CDCJStestRunStatus", "$cdc_asdjflasutopfhvcZLmcfl_", "$chrome_asyncScriptInfo", } # 高风险 API(指纹强度高) HIGH_RISK_APIS = { "Crypto", "RTCPeerConnection", "OfflineAudioContext", "CanvasRenderingContext2D", "HTMLCanvasElement", "WebGL2RenderingContext", "WebGLRenderingContext", "IDBFactory", "PluginArray", "NavigatorUAData", "PerformanceNavigationTiming", "PerformanceResourceTiming", } # navigator / screen / canvas 系列 MEDIUM_APIS = { "Navigator", "Screen", "Storage", "Performance", "HTMLDocument", "ScreenOrientation", "NetworkInformation", "languages", "maxTouchPoints", "webdriver", "platform", "userAgent", } # 核心检测循环 tH CORE_TH = {154, 155} # ── 加载 ──────────────────────────────────────────────────── def load(path): with open(path, encoding="utf-8") as f: return json.load(f) # ── 评分 ──────────────────────────────────────────────────── def score(data): # api -> {tH set, score, reasons} api_info = defaultdict(lambda: {"tH_set": set(), "score": 0, "reasons": []}) for tH_str, entry in data.items(): tH = int(tH_str) for ig in entry.get("ig_values", []): if not isinstance(ig, str): continue # 跳过明显是"值"而非 API 名的字符串 if ig.startswith("0,1,2") or ig.startswith("1:") or \ ig.startswith("#") or ig.startswith("return ") or \ ig.startswith("https://") or len(ig) > 80: continue info = api_info[ig] info["tH_set"].add(tH) # 计算分数 for api, info in api_info.items(): s = 0 reasons = [] # bot 信号 if api in BOT_SIGNALS: s += 10 reasons.append("🚨 bot检测字段 +10") # 核心检测循环 core_hit = info["tH_set"] & CORE_TH if core_hit: s += 5 reasons.append(f"🎯 核心循环 tH={sorted(core_hit)} +5") # 高风险 API if api in HIGH_RISK_APIS: s += 3 reasons.append("⚡ 高风险API +3") # 中等 API if api in MEDIUM_APIS: s += 1 reasons.append("📡 navigator/screen类 +1") # 跨 tH 频次(每多一个 tH +2) freq = len(info["tH_set"]) if freq > 1: bonus = (freq - 1) * 2 s += bonus reasons.append(f"🔁 跨{freq}个tH +{bonus}") info["score"] = s info["reasons"] = reasons return api_info # ── 输出 ───────────────────────────────────────────────────── def report(api_info): # 按分数排序 ranked = sorted(api_info.items(), key=lambda x: -x[1]["score"]) print("=" * 70) print(" HSW 指纹字段 优先级排名") print("=" * 70) # 分档 tiers = [ ("🔴 P0 必须正确(≥10分)", lambda s: s >= 10), ("🟠 P1 高优先级(5~9分)", lambda s: 5 <= s < 10), ("🟡 P2 中优先级(3~4分)", lambda s: 3 <= s < 5), ("🟢 P3 低优先级(1~2分)", lambda s: 1 <= s < 3), ("⚪ P4 可忽略(0分)", lambda s: s == 0), ] for tier_label, condition in tiers: tier_items = [(api, info) for api, info in ranked if condition(info["score"])] if not tier_items: continue print(f"\n{tier_label} [{len(tier_items)} 个]") print(f" {'分数':<5} {'字段名':<45} 出现tH") print(f" {'─'*5} {'─'*45} {'─'*20}") for api, info in tier_items: tH_list = ",".join(str(t) for t in sorted(info["tH_set"])) print(f" {info['score']:<5} {api:<45} tH={tH_list}") for r in info["reasons"]: print(f" {r}") # 导出 JSON out = { api: { "score": info["score"], "tH_list": sorted(info["tH_set"]), "reasons": info["reasons"], } for api, info in ranked } out_path = ANALYSIS_JSON.replace("_analysis.json", "_priority.json") with open(out_path, "w", encoding="utf-8") as f: json.dump(out, f, ensure_ascii=False, indent=2) print(f"\n📄 优先级结果已写入: {out_path}") # ── 入口 ───────────────────────────────────────────────────── if __name__ == "__main__": path = sys.argv[1] if len(sys.argv) > 1 else ANALYSIS_JSON print(f"📂 读取: {path}\n") data = load(path) api_info = score(data) report(api_info)