This commit is contained in:
dela
2026-02-21 18:27:49 +08:00
parent 0ac4b23f07
commit 5dc86ccfbf
270 changed files with 49508 additions and 4636 deletions

175
analyze_priority.py Normal file
View File

@@ -0,0 +1,175 @@
#!/usr/bin/env python3
"""
从 chatgpt.com-*_analysis.json 中,按优先级对每个指纹字段评分排序。
评分规则:
+10 bot 自动化检测专属字段webdriver, $cdc_*, callPhantom 等)
+ 5 出现在核心检测循环 tH=154 或 tH=155
+ 2 每额外出现在一个不同 tH跨 tH 频次)
+ 3 属于已知高风险 APICrypto, RTCPeerConnection, OfflineAudioContext 等)
+ 1 属于 navigator / screen / canvas 系列
"""
import json
import sys
import glob
from collections import defaultdict
# ── 配置 ─────────────────────────────────────────────────────
ANALYSIS_JSON = sorted(glob.glob(
"/home/carry/myprj/hcaptcha/asset/chatgpt.com-*_analysis.json"
))[-1]
# bot 自动化检测专属字段(出现即暴露)
BOT_SIGNALS = {
"webdriver", "callPhantom", "callSelenium", "_selenium", "__phantomas",
"domAutomationController", "awesomium", "$wdc_", "domAutomation",
"_WEBDRIVER_ELEM_CACHE", "spawn", "__nightmare", "__webdriver_script_fn",
"__webdriver_script_func", "__driver_evaluate", "__webdriver_evaluate",
"__selenium_evaluate", "__fxdriver_evaluate", "__driver_unwrapped",
"__webdriver_unwrapped", "__selenium_unwrapped", "__fxdriver_unwrapped",
"hcaptchaCallbackZenno", "_Selenium_IDE_Recorder",
"cdc_adoQpoasnfa76pfcZLmcfl_Array",
"cdc_adoQpoasnfa76pfcZLmcfl_Promise",
"cdc_adoQpoasnfa76pfcZLmcfl_Symbol",
"CDCJStestRunStatus",
"$cdc_asdjflasutopfhvcZLmcfl_",
"$chrome_asyncScriptInfo",
}
# 高风险 API指纹强度高
HIGH_RISK_APIS = {
"Crypto", "RTCPeerConnection", "OfflineAudioContext",
"CanvasRenderingContext2D", "HTMLCanvasElement", "WebGL2RenderingContext",
"WebGLRenderingContext", "IDBFactory", "PluginArray", "NavigatorUAData",
"PerformanceNavigationTiming", "PerformanceResourceTiming",
}
# navigator / screen / canvas 系列
MEDIUM_APIS = {
"Navigator", "Screen", "Storage", "Performance", "HTMLDocument",
"ScreenOrientation", "NetworkInformation", "languages", "maxTouchPoints",
"webdriver", "platform", "userAgent",
}
# 核心检测循环 tH
CORE_TH = {154, 155}
# ── 加载 ────────────────────────────────────────────────────
def load(path):
with open(path, encoding="utf-8") as f:
return json.load(f)
# ── 评分 ────────────────────────────────────────────────────
def score(data):
# api -> {tH set, score, reasons}
api_info = defaultdict(lambda: {"tH_set": set(), "score": 0, "reasons": []})
for tH_str, entry in data.items():
tH = int(tH_str)
for ig in entry.get("ig_values", []):
if not isinstance(ig, str):
continue
# 跳过明显是"值"而非 API 名的字符串
if ig.startswith("0,1,2") or ig.startswith("1:") or \
ig.startswith("#") or ig.startswith("return ") or \
ig.startswith("https://") or len(ig) > 80:
continue
info = api_info[ig]
info["tH_set"].add(tH)
# 计算分数
for api, info in api_info.items():
s = 0
reasons = []
# bot 信号
if api in BOT_SIGNALS:
s += 10
reasons.append("🚨 bot检测字段 +10")
# 核心检测循环
core_hit = info["tH_set"] & CORE_TH
if core_hit:
s += 5
reasons.append(f"🎯 核心循环 tH={sorted(core_hit)} +5")
# 高风险 API
if api in HIGH_RISK_APIS:
s += 3
reasons.append("⚡ 高风险API +3")
# 中等 API
if api in MEDIUM_APIS:
s += 1
reasons.append("📡 navigator/screen类 +1")
# 跨 tH 频次(每多一个 tH +2
freq = len(info["tH_set"])
if freq > 1:
bonus = (freq - 1) * 2
s += bonus
reasons.append(f"🔁 跨{freq}个tH +{bonus}")
info["score"] = s
info["reasons"] = reasons
return api_info
# ── 输出 ─────────────────────────────────────────────────────
def report(api_info):
# 按分数排序
ranked = sorted(api_info.items(), key=lambda x: -x[1]["score"])
print("=" * 70)
print(" HSW 指纹字段 优先级排名")
print("=" * 70)
# 分档
tiers = [
("🔴 P0 必须正确≥10分", lambda s: s >= 10),
("🟠 P1 高优先级5~9分", lambda s: 5 <= s < 10),
("🟡 P2 中优先级3~4分", lambda s: 3 <= s < 5),
("🟢 P3 低优先级1~2分", lambda s: 1 <= s < 3),
("⚪ P4 可忽略0分", lambda s: s == 0),
]
for tier_label, condition in tiers:
tier_items = [(api, info) for api, info in ranked if condition(info["score"])]
if not tier_items:
continue
print(f"\n{tier_label} [{len(tier_items)} 个]")
print(f" {'分数':<5} {'字段名':<45} 出现tH")
print(f" {''*5} {''*45} {''*20}")
for api, info in tier_items:
tH_list = ",".join(str(t) for t in sorted(info["tH_set"]))
print(f" {info['score']:<5} {api:<45} tH={tH_list}")
for r in info["reasons"]:
print(f" {r}")
# 导出 JSON
out = {
api: {
"score": info["score"],
"tH_list": sorted(info["tH_set"]),
"reasons": info["reasons"],
}
for api, info in ranked
}
out_path = ANALYSIS_JSON.replace("_analysis.json", "_priority.json")
with open(out_path, "w", encoding="utf-8") as f:
json.dump(out, f, ensure_ascii=False, indent=2)
print(f"\n📄 优先级结果已写入: {out_path}")
# ── 入口 ─────────────────────────────────────────────────────
if __name__ == "__main__":
path = sys.argv[1] if len(sys.argv) > 1 else ANALYSIS_JSON
print(f"📂 读取: {path}\n")
data = load(path)
api_info = score(data)
report(api_info)