176 lines
6.4 KiB
Python
176 lines
6.4 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
从 chatgpt.com-*_analysis.json 中,按优先级对每个指纹字段评分排序。
|
||
|
||
评分规则:
|
||
+10 bot 自动化检测专属字段(webdriver, $cdc_*, callPhantom 等)
|
||
+ 5 出现在核心检测循环 tH=154 或 tH=155
|
||
+ 2 每额外出现在一个不同 tH(跨 tH 频次)
|
||
+ 3 属于已知高风险 API(Crypto, RTCPeerConnection, OfflineAudioContext 等)
|
||
+ 1 属于 navigator / screen / canvas 系列
|
||
"""
|
||
|
||
import json
|
||
import sys
|
||
import glob
|
||
from collections import defaultdict
|
||
|
||
# ── 配置 ─────────────────────────────────────────────────────
|
||
ANALYSIS_JSON = sorted(glob.glob(
|
||
"/home/carry/myprj/hcaptcha/asset/chatgpt.com-*_analysis.json"
|
||
))[-1]
|
||
|
||
# bot 自动化检测专属字段(出现即暴露)
|
||
BOT_SIGNALS = {
|
||
"webdriver", "callPhantom", "callSelenium", "_selenium", "__phantomas",
|
||
"domAutomationController", "awesomium", "$wdc_", "domAutomation",
|
||
"_WEBDRIVER_ELEM_CACHE", "spawn", "__nightmare", "__webdriver_script_fn",
|
||
"__webdriver_script_func", "__driver_evaluate", "__webdriver_evaluate",
|
||
"__selenium_evaluate", "__fxdriver_evaluate", "__driver_unwrapped",
|
||
"__webdriver_unwrapped", "__selenium_unwrapped", "__fxdriver_unwrapped",
|
||
"hcaptchaCallbackZenno", "_Selenium_IDE_Recorder",
|
||
"cdc_adoQpoasnfa76pfcZLmcfl_Array",
|
||
"cdc_adoQpoasnfa76pfcZLmcfl_Promise",
|
||
"cdc_adoQpoasnfa76pfcZLmcfl_Symbol",
|
||
"CDCJStestRunStatus",
|
||
"$cdc_asdjflasutopfhvcZLmcfl_",
|
||
"$chrome_asyncScriptInfo",
|
||
}
|
||
|
||
# 高风险 API(指纹强度高)
|
||
HIGH_RISK_APIS = {
|
||
"Crypto", "RTCPeerConnection", "OfflineAudioContext",
|
||
"CanvasRenderingContext2D", "HTMLCanvasElement", "WebGL2RenderingContext",
|
||
"WebGLRenderingContext", "IDBFactory", "PluginArray", "NavigatorUAData",
|
||
"PerformanceNavigationTiming", "PerformanceResourceTiming",
|
||
}
|
||
|
||
# navigator / screen / canvas 系列
|
||
MEDIUM_APIS = {
|
||
"Navigator", "Screen", "Storage", "Performance", "HTMLDocument",
|
||
"ScreenOrientation", "NetworkInformation", "languages", "maxTouchPoints",
|
||
"webdriver", "platform", "userAgent",
|
||
}
|
||
|
||
# 核心检测循环 tH
|
||
CORE_TH = {154, 155}
|
||
|
||
|
||
# ── 加载 ────────────────────────────────────────────────────
|
||
def load(path):
|
||
with open(path, encoding="utf-8") as f:
|
||
return json.load(f)
|
||
|
||
|
||
# ── 评分 ────────────────────────────────────────────────────
|
||
def score(data):
|
||
# api -> {tH set, score, reasons}
|
||
api_info = defaultdict(lambda: {"tH_set": set(), "score": 0, "reasons": []})
|
||
|
||
for tH_str, entry in data.items():
|
||
tH = int(tH_str)
|
||
for ig in entry.get("ig_values", []):
|
||
if not isinstance(ig, str):
|
||
continue
|
||
# 跳过明显是"值"而非 API 名的字符串
|
||
if ig.startswith("0,1,2") or ig.startswith("1:") or \
|
||
ig.startswith("#") or ig.startswith("return ") or \
|
||
ig.startswith("https://") or len(ig) > 80:
|
||
continue
|
||
|
||
info = api_info[ig]
|
||
info["tH_set"].add(tH)
|
||
|
||
# 计算分数
|
||
for api, info in api_info.items():
|
||
s = 0
|
||
reasons = []
|
||
|
||
# bot 信号
|
||
if api in BOT_SIGNALS:
|
||
s += 10
|
||
reasons.append("🚨 bot检测字段 +10")
|
||
|
||
# 核心检测循环
|
||
core_hit = info["tH_set"] & CORE_TH
|
||
if core_hit:
|
||
s += 5
|
||
reasons.append(f"🎯 核心循环 tH={sorted(core_hit)} +5")
|
||
|
||
# 高风险 API
|
||
if api in HIGH_RISK_APIS:
|
||
s += 3
|
||
reasons.append("⚡ 高风险API +3")
|
||
|
||
# 中等 API
|
||
if api in MEDIUM_APIS:
|
||
s += 1
|
||
reasons.append("📡 navigator/screen类 +1")
|
||
|
||
# 跨 tH 频次(每多一个 tH +2)
|
||
freq = len(info["tH_set"])
|
||
if freq > 1:
|
||
bonus = (freq - 1) * 2
|
||
s += bonus
|
||
reasons.append(f"🔁 跨{freq}个tH +{bonus}")
|
||
|
||
info["score"] = s
|
||
info["reasons"] = reasons
|
||
|
||
return api_info
|
||
|
||
|
||
# ── 输出 ─────────────────────────────────────────────────────
|
||
def report(api_info):
|
||
# 按分数排序
|
||
ranked = sorted(api_info.items(), key=lambda x: -x[1]["score"])
|
||
|
||
print("=" * 70)
|
||
print(" HSW 指纹字段 优先级排名")
|
||
print("=" * 70)
|
||
|
||
# 分档
|
||
tiers = [
|
||
("🔴 P0 必须正确(≥10分)", lambda s: s >= 10),
|
||
("🟠 P1 高优先级(5~9分)", lambda s: 5 <= s < 10),
|
||
("🟡 P2 中优先级(3~4分)", lambda s: 3 <= s < 5),
|
||
("🟢 P3 低优先级(1~2分)", lambda s: 1 <= s < 3),
|
||
("⚪ P4 可忽略(0分)", lambda s: s == 0),
|
||
]
|
||
|
||
for tier_label, condition in tiers:
|
||
tier_items = [(api, info) for api, info in ranked if condition(info["score"])]
|
||
if not tier_items:
|
||
continue
|
||
print(f"\n{tier_label} [{len(tier_items)} 个]")
|
||
print(f" {'分数':<5} {'字段名':<45} 出现tH")
|
||
print(f" {'─'*5} {'─'*45} {'─'*20}")
|
||
for api, info in tier_items:
|
||
tH_list = ",".join(str(t) for t in sorted(info["tH_set"]))
|
||
print(f" {info['score']:<5} {api:<45} tH={tH_list}")
|
||
for r in info["reasons"]:
|
||
print(f" {r}")
|
||
|
||
# 导出 JSON
|
||
out = {
|
||
api: {
|
||
"score": info["score"],
|
||
"tH_list": sorted(info["tH_set"]),
|
||
"reasons": info["reasons"],
|
||
}
|
||
for api, info in ranked
|
||
}
|
||
out_path = ANALYSIS_JSON.replace("_analysis.json", "_priority.json")
|
||
with open(out_path, "w", encoding="utf-8") as f:
|
||
json.dump(out, f, ensure_ascii=False, indent=2)
|
||
print(f"\n📄 优先级结果已写入: {out_path}")
|
||
|
||
|
||
# ── 入口 ─────────────────────────────────────────────────────
|
||
if __name__ == "__main__":
|
||
path = sys.argv[1] if len(sys.argv) > 1 else ANALYSIS_JSON
|
||
print(f"📂 读取: {path}\n")
|
||
data = load(path)
|
||
api_info = score(data)
|
||
report(api_info)
|