This commit is contained in:
dela
2026-01-31 22:53:12 +08:00
commit bc639cf460
30 changed files with 6836 additions and 0 deletions

View File

@@ -0,0 +1,292 @@
package importer
import (
"context"
"encoding/csv"
"errors"
"io"
"strconv"
"strings"
"github.com/google/uuid"
"proxyrotator/internal/model"
)
// Importer 代理导入器
type Importer struct{}
// NewImporter 创建导入器
func NewImporter() *Importer {
return &Importer{}
}
// ParseText 解析文本格式的代理列表
func (im *Importer) ParseText(ctx context.Context, in model.ImportInput, text string) ([]model.Proxy, []model.InvalidLine) {
lines := strings.Split(text, "\n")
var proxies []model.Proxy
var invalid []model.InvalidLine
seen := make(map[string]bool)
for _, raw := range lines {
raw = strings.TrimSpace(raw)
if raw == "" || strings.HasPrefix(raw, "#") {
continue
}
p, err := ParseProxyLine(raw, in.ProtocolHint)
if err != nil {
invalid = append(invalid, model.InvalidLine{
Raw: raw,
Reason: err.Error(),
})
continue
}
// 设置默认值
p.ID = uuid.New().String()
p.Group = coalesce(p.Group, in.Group, "default")
p.Tags = mergeTags(p.Tags, in.Tags)
p.Status = model.StatusUnknown
// 内存去重
key := dedupKey(p)
if seen[key] {
continue
}
seen[key] = true
proxies = append(proxies, *p)
}
return proxies, invalid
}
// ParseCSV 解析 CSV 格式的代理列表
// 期望列: protocol,host,port,username,password,group,tags
func (im *Importer) ParseCSV(ctx context.Context, in model.ImportInput, r io.Reader) ([]model.Proxy, []model.InvalidLine) {
reader := csv.NewReader(r)
reader.FieldsPerRecord = -1 // 允许不定列数
reader.TrimLeadingSpace = true
var proxies []model.Proxy
var invalid []model.InvalidLine
seen := make(map[string]bool)
// 读取表头
header, err := reader.Read()
if err != nil {
return nil, []model.InvalidLine{{Raw: "", Reason: "failed to read CSV header"}}
}
// 解析列索引
colIdx := parseHeader(header)
lineNum := 1
for {
record, err := reader.Read()
if err == io.EOF {
break
}
lineNum++
if err != nil {
invalid = append(invalid, model.InvalidLine{
Raw: strings.Join(record, ","),
Reason: err.Error(),
})
continue
}
p, err := parseCSVRecord(record, colIdx, in)
if err != nil {
invalid = append(invalid, model.InvalidLine{
Raw: strings.Join(record, ","),
Reason: err.Error(),
})
continue
}
// 内存去重
key := dedupKey(p)
if seen[key] {
continue
}
seen[key] = true
proxies = append(proxies, *p)
}
return proxies, invalid
}
// columnIndex CSV 列索引
type columnIndex struct {
protocol int
host int
port int
username int
password int
group int
tags int
}
// parseHeader 解析 CSV 表头
func parseHeader(header []string) columnIndex {
idx := columnIndex{
protocol: -1,
host: -1,
port: -1,
username: -1,
password: -1,
group: -1,
tags: -1,
}
for i, col := range header {
switch strings.ToLower(strings.TrimSpace(col)) {
case "protocol":
idx.protocol = i
case "host":
idx.host = i
case "port":
idx.port = i
case "username", "user":
idx.username = i
case "password", "pass":
idx.password = i
case "group":
idx.group = i
case "tags":
idx.tags = i
}
}
return idx
}
// parseCSVRecord 解析 CSV 记录
func parseCSVRecord(record []string, idx columnIndex, in model.ImportInput) (*model.Proxy, error) {
get := func(i int) string {
if i >= 0 && i < len(record) {
return strings.TrimSpace(record[i])
}
return ""
}
// 如果没有表头,尝试按位置解析
if idx.host == -1 && len(record) >= 2 {
// 假设格式: host,port 或 host,port,username,password
line := strings.Join(record, ":")
if len(record) >= 4 {
line = record[2] + ":" + record[3] + "@" + record[0] + ":" + record[1]
} else {
line = record[0] + ":" + record[1]
}
p, err := ParseProxyLine(line, in.ProtocolHint)
if err != nil {
return nil, err
}
p.ID = uuid.New().String()
p.Group = coalesce(in.Group, "default")
p.Tags = in.Tags
p.Status = model.StatusUnknown
return p, nil
}
// 根据表头解析
protocol := get(idx.protocol)
host := get(idx.host)
portStr := get(idx.port)
if host == "" {
return nil, errors.New("missing host")
}
var p model.Proxy
p.ID = uuid.New().String()
p.Host = host
p.Status = model.StatusUnknown
// 解析协议
switch strings.ToLower(protocol) {
case "http":
p.Protocol = model.ProtoHTTP
case "https":
p.Protocol = model.ProtoHTTPS
case "socks5":
p.Protocol = model.ProtoSOCKS5
default:
p.Protocol = model.ProtoHTTP
}
// 解析端口
if portStr != "" {
port := 0
for _, c := range portStr {
if c >= '0' && c <= '9' {
port = port*10 + int(c-'0')
}
}
if port > 0 && port < 65536 {
p.Port = port
} else {
p.Port = 80
}
} else {
p.Port = 80
}
p.Username = get(idx.username)
p.Password = get(idx.password)
p.Group = coalesce(get(idx.group), in.Group, "default")
// 解析 tags
tagsStr := get(idx.tags)
if tagsStr != "" {
p.Tags = strings.Split(tagsStr, ";")
}
p.Tags = mergeTags(p.Tags, in.Tags)
return &p, nil
}
// dedupKey 生成去重键
func dedupKey(p *model.Proxy) string {
return string(p.Protocol) + ":" + p.Host + ":" + strconv.Itoa(p.Port) + ":" + p.Username
}
// coalesce 返回第一个非空字符串
func coalesce(values ...string) string {
for _, v := range values {
if v != "" {
return v
}
}
return ""
}
// mergeTags 合并去重 tags
func mergeTags(a, b []string) []string {
seen := make(map[string]bool)
var result []string
for _, t := range a {
t = strings.TrimSpace(t)
if t != "" && !seen[t] {
seen[t] = true
result = append(result, t)
}
}
for _, t := range b {
t = strings.TrimSpace(t)
if t != "" && !seen[t] {
seen[t] = true
result = append(result, t)
}
}
return result
}

171
internal/importer/parser.go Normal file
View File

@@ -0,0 +1,171 @@
package importer
import (
"fmt"
"net/url"
"regexp"
"strconv"
"strings"
"proxyrotator/internal/model"
)
var (
// 匹配 host:port 格式
hostPortRegex = regexp.MustCompile(`^([a-zA-Z0-9.-]+):(\d+)$`)
// 匹配 user:pass@host:port 格式
userPassHostPortRegex = regexp.MustCompile(`^([^:@]+):([^@]+)@([a-zA-Z0-9.-]+):(\d+)$`)
// 匹配 host:port:user:pass 格式
hostPortUserPassRegex = regexp.MustCompile(`^([a-zA-Z0-9.-]+):(\d+):([^:]+):(.+)$`)
)
// ParseProxyLine 解析单行代理格式
// 支持格式:
// - host:port
// - user:pass@host:port
// - host:port:user:pass
// - http://host:port
// - http://user:pass@host:port
// - socks5://host:port
// - socks5://user:pass@host:port
func ParseProxyLine(raw string, protocolHint string) (*model.Proxy, error) {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil, fmt.Errorf("empty line")
}
// 尝试解析为 URL
if strings.Contains(raw, "://") {
return parseAsURL(raw)
}
// 尝试解析 host:port:user:pass 格式
if matches := hostPortUserPassRegex.FindStringSubmatch(raw); matches != nil {
port, err := strconv.Atoi(matches[2])
if err != nil || port <= 0 || port >= 65536 {
return nil, fmt.Errorf("invalid port: %s", matches[2])
}
protocol := inferProtocol(protocolHint, port)
return &model.Proxy{
Protocol: protocol,
Host: matches[1],
Port: port,
Username: matches[3],
Password: matches[4],
}, nil
}
// 尝试解析 user:pass@host:port 格式
if matches := userPassHostPortRegex.FindStringSubmatch(raw); matches != nil {
port, err := strconv.Atoi(matches[4])
if err != nil || port <= 0 || port >= 65536 {
return nil, fmt.Errorf("invalid port: %s", matches[4])
}
protocol := inferProtocol(protocolHint, port)
return &model.Proxy{
Protocol: protocol,
Host: matches[3],
Port: port,
Username: matches[1],
Password: matches[2],
}, nil
}
// 尝试解析 host:port 格式
if matches := hostPortRegex.FindStringSubmatch(raw); matches != nil {
port, err := strconv.Atoi(matches[2])
if err != nil || port <= 0 || port >= 65536 {
return nil, fmt.Errorf("invalid port: %s", matches[2])
}
protocol := inferProtocol(protocolHint, port)
return &model.Proxy{
Protocol: protocol,
Host: matches[1],
Port: port,
}, nil
}
return nil, fmt.Errorf("unrecognized format")
}
// parseAsURL 解析 URL 格式的代理
func parseAsURL(raw string) (*model.Proxy, error) {
u, err := url.Parse(raw)
if err != nil {
return nil, fmt.Errorf("invalid URL: %w", err)
}
var protocol model.ProxyProtocol
switch strings.ToLower(u.Scheme) {
case "http":
protocol = model.ProtoHTTP
case "https":
protocol = model.ProtoHTTPS
case "socks5":
protocol = model.ProtoSOCKS5
default:
return nil, fmt.Errorf("unsupported protocol: %s", u.Scheme)
}
host := u.Hostname()
if host == "" {
return nil, fmt.Errorf("missing host")
}
portStr := u.Port()
if portStr == "" {
// 默认端口
switch protocol {
case model.ProtoHTTP:
portStr = "80"
case model.ProtoHTTPS:
portStr = "443"
case model.ProtoSOCKS5:
portStr = "1080"
}
}
port, err := strconv.Atoi(portStr)
if err != nil || port <= 0 || port >= 65536 {
return nil, fmt.Errorf("invalid port: %s", portStr)
}
var username, password string
if u.User != nil {
username = u.User.Username()
password, _ = u.User.Password()
}
return &model.Proxy{
Protocol: protocol,
Host: host,
Port: port,
Username: username,
Password: password,
}, nil
}
// inferProtocol 根据提示和端口推断协议
func inferProtocol(hint string, port int) model.ProxyProtocol {
switch strings.ToLower(hint) {
case "http":
return model.ProtoHTTP
case "https":
return model.ProtoHTTPS
case "socks5":
return model.ProtoSOCKS5
}
// 根据端口推断
switch port {
case 443:
return model.ProtoHTTPS
case 1080:
return model.ProtoSOCKS5
default:
return model.ProtoHTTP
}
}