Files
ProxyPool/internal/importer/importer.go
2026-01-31 22:53:12 +08:00

293 lines
5.7 KiB
Go

package importer
import (
"context"
"encoding/csv"
"errors"
"io"
"strconv"
"strings"
"github.com/google/uuid"
"proxyrotator/internal/model"
)
// Importer 代理导入器
type Importer struct{}
// NewImporter 创建导入器
func NewImporter() *Importer {
return &Importer{}
}
// ParseText 解析文本格式的代理列表
func (im *Importer) ParseText(ctx context.Context, in model.ImportInput, text string) ([]model.Proxy, []model.InvalidLine) {
lines := strings.Split(text, "\n")
var proxies []model.Proxy
var invalid []model.InvalidLine
seen := make(map[string]bool)
for _, raw := range lines {
raw = strings.TrimSpace(raw)
if raw == "" || strings.HasPrefix(raw, "#") {
continue
}
p, err := ParseProxyLine(raw, in.ProtocolHint)
if err != nil {
invalid = append(invalid, model.InvalidLine{
Raw: raw,
Reason: err.Error(),
})
continue
}
// 设置默认值
p.ID = uuid.New().String()
p.Group = coalesce(p.Group, in.Group, "default")
p.Tags = mergeTags(p.Tags, in.Tags)
p.Status = model.StatusUnknown
// 内存去重
key := dedupKey(p)
if seen[key] {
continue
}
seen[key] = true
proxies = append(proxies, *p)
}
return proxies, invalid
}
// ParseCSV 解析 CSV 格式的代理列表
// 期望列: protocol,host,port,username,password,group,tags
func (im *Importer) ParseCSV(ctx context.Context, in model.ImportInput, r io.Reader) ([]model.Proxy, []model.InvalidLine) {
reader := csv.NewReader(r)
reader.FieldsPerRecord = -1 // 允许不定列数
reader.TrimLeadingSpace = true
var proxies []model.Proxy
var invalid []model.InvalidLine
seen := make(map[string]bool)
// 读取表头
header, err := reader.Read()
if err != nil {
return nil, []model.InvalidLine{{Raw: "", Reason: "failed to read CSV header"}}
}
// 解析列索引
colIdx := parseHeader(header)
lineNum := 1
for {
record, err := reader.Read()
if err == io.EOF {
break
}
lineNum++
if err != nil {
invalid = append(invalid, model.InvalidLine{
Raw: strings.Join(record, ","),
Reason: err.Error(),
})
continue
}
p, err := parseCSVRecord(record, colIdx, in)
if err != nil {
invalid = append(invalid, model.InvalidLine{
Raw: strings.Join(record, ","),
Reason: err.Error(),
})
continue
}
// 内存去重
key := dedupKey(p)
if seen[key] {
continue
}
seen[key] = true
proxies = append(proxies, *p)
}
return proxies, invalid
}
// columnIndex CSV 列索引
type columnIndex struct {
protocol int
host int
port int
username int
password int
group int
tags int
}
// parseHeader 解析 CSV 表头
func parseHeader(header []string) columnIndex {
idx := columnIndex{
protocol: -1,
host: -1,
port: -1,
username: -1,
password: -1,
group: -1,
tags: -1,
}
for i, col := range header {
switch strings.ToLower(strings.TrimSpace(col)) {
case "protocol":
idx.protocol = i
case "host":
idx.host = i
case "port":
idx.port = i
case "username", "user":
idx.username = i
case "password", "pass":
idx.password = i
case "group":
idx.group = i
case "tags":
idx.tags = i
}
}
return idx
}
// parseCSVRecord 解析 CSV 记录
func parseCSVRecord(record []string, idx columnIndex, in model.ImportInput) (*model.Proxy, error) {
get := func(i int) string {
if i >= 0 && i < len(record) {
return strings.TrimSpace(record[i])
}
return ""
}
// 如果没有表头,尝试按位置解析
if idx.host == -1 && len(record) >= 2 {
// 假设格式: host,port 或 host,port,username,password
line := strings.Join(record, ":")
if len(record) >= 4 {
line = record[2] + ":" + record[3] + "@" + record[0] + ":" + record[1]
} else {
line = record[0] + ":" + record[1]
}
p, err := ParseProxyLine(line, in.ProtocolHint)
if err != nil {
return nil, err
}
p.ID = uuid.New().String()
p.Group = coalesce(in.Group, "default")
p.Tags = in.Tags
p.Status = model.StatusUnknown
return p, nil
}
// 根据表头解析
protocol := get(idx.protocol)
host := get(idx.host)
portStr := get(idx.port)
if host == "" {
return nil, errors.New("missing host")
}
var p model.Proxy
p.ID = uuid.New().String()
p.Host = host
p.Status = model.StatusUnknown
// 解析协议
switch strings.ToLower(protocol) {
case "http":
p.Protocol = model.ProtoHTTP
case "https":
p.Protocol = model.ProtoHTTPS
case "socks5":
p.Protocol = model.ProtoSOCKS5
default:
p.Protocol = model.ProtoHTTP
}
// 解析端口
if portStr != "" {
port := 0
for _, c := range portStr {
if c >= '0' && c <= '9' {
port = port*10 + int(c-'0')
}
}
if port > 0 && port < 65536 {
p.Port = port
} else {
p.Port = 80
}
} else {
p.Port = 80
}
p.Username = get(idx.username)
p.Password = get(idx.password)
p.Group = coalesce(get(idx.group), in.Group, "default")
// 解析 tags
tagsStr := get(idx.tags)
if tagsStr != "" {
p.Tags = strings.Split(tagsStr, ";")
}
p.Tags = mergeTags(p.Tags, in.Tags)
return &p, nil
}
// dedupKey 生成去重键
func dedupKey(p *model.Proxy) string {
return string(p.Protocol) + ":" + p.Host + ":" + strconv.Itoa(p.Port) + ":" + p.Username
}
// coalesce 返回第一个非空字符串
func coalesce(values ...string) string {
for _, v := range values {
if v != "" {
return v
}
}
return ""
}
// mergeTags 合并去重 tags
func mergeTags(a, b []string) []string {
seen := make(map[string]bool)
var result []string
for _, t := range a {
t = strings.TrimSpace(t)
if t != "" && !seen[t] {
seen[t] = true
result = append(result, t)
}
}
for _, t := range b {
t = strings.TrimSpace(t)
if t != "" && !seen[t] {
seen[t] = true
result = append(result, t)
}
}
return result
}