Sync from /srv/compose/unified-media-manager
This commit is contained in:
614
internal/cardigann/engine.go
Normal file
614
internal/cardigann/engine.go
Normal file
@@ -0,0 +1,614 @@
|
||||
package cardigann
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/dustin/go-humanize"
|
||||
)
|
||||
|
||||
// CardigannResult is the output of a Cardigann search operation.
|
||||
// It is converted to service.SearchResult by the service layer.
|
||||
type CardigannResult struct {
|
||||
Title string
|
||||
GUID string
|
||||
DownloadURL string
|
||||
Size int64
|
||||
PubDate string
|
||||
Seeders int
|
||||
Peers int
|
||||
Category string
|
||||
Description string
|
||||
}
|
||||
|
||||
// IndexerTestResult is the result of testing a Cardigann indexer connection.
|
||||
type IndexerTestResult struct {
|
||||
Success bool
|
||||
Error string
|
||||
}
|
||||
|
||||
// CardigannEngine handles Cardigann indexer operations: search, login, test.
|
||||
type CardigannEngine struct {
|
||||
httpClient *http.Client
|
||||
cookies []*http.Cookie
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
// NewCardigannEngine creates a new CardigannEngine with safe HTTP client.
|
||||
func NewCardigannEngine() *CardigannEngine {
|
||||
return &CardigannEngine{
|
||||
httpClient: SafeHTTPClient(),
|
||||
logger: slog.Default(),
|
||||
}
|
||||
}
|
||||
|
||||
// Search executes a Cardigann search: login (if needed), build request, parse HTML, extract results.
|
||||
func (e *CardigannEngine) Search(ctx context.Context, def *Definition, config map[string]string, query SearchQuery) ([]CardigannResult, error) {
|
||||
baseURL := e.getBaseURL(def, config)
|
||||
|
||||
// Login if required
|
||||
if def.Login.Path != "" || len(def.Login.Inputs) > 0 {
|
||||
if err := e.login(ctx, def, config, baseURL); err != nil {
|
||||
return nil, fmt.Errorf("login failed: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Build search URL from path template
|
||||
searchPath := def.Search.Path
|
||||
if searchPath == "" {
|
||||
searchPath = "/"
|
||||
}
|
||||
|
||||
path, err := ApplyTemplate("search-path", searchPath, TemplateContext{
|
||||
Query: query,
|
||||
Config: config,
|
||||
Categories: []string{},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("template search path: %w", err)
|
||||
}
|
||||
|
||||
searchURL, err := e.resolvePath(baseURL, path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("resolve search URL: %w", err)
|
||||
}
|
||||
|
||||
// Validate the search URL (SSRF protection)
|
||||
if err := ValidateURL(searchURL); err != nil {
|
||||
return nil, fmt.Errorf("search URL blocked: %w", err)
|
||||
}
|
||||
|
||||
// Build query inputs
|
||||
inputValues := make(url.Values)
|
||||
for key, tplStr := range def.Search.Inputs {
|
||||
rendered, err := ApplyTemplate("input-"+key, tplStr, TemplateContext{
|
||||
Query: query,
|
||||
Config: config,
|
||||
Categories: []string{},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("template input %q: %w", key, err)
|
||||
}
|
||||
|
||||
if key == "$raw" {
|
||||
// Parse as query string and merge
|
||||
parsed, err := url.ParseQuery(rendered)
|
||||
if err == nil {
|
||||
for k, vals := range parsed {
|
||||
for _, v := range vals {
|
||||
inputValues.Set(k, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
inputValues.Set(key, rendered)
|
||||
}
|
||||
}
|
||||
|
||||
// Execute HTTP request
|
||||
var resp *http.Response
|
||||
method := strings.ToUpper(def.Search.Method)
|
||||
if method == "" {
|
||||
method = "GET"
|
||||
}
|
||||
|
||||
searchCtx, searchCancel := context.WithTimeout(ctx, 15*time.Second)
|
||||
defer searchCancel()
|
||||
|
||||
if method == "POST" {
|
||||
req, err := http.NewRequestWithContext(searchCtx, http.MethodPost, searchURL, strings.NewReader(inputValues.Encode()))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create POST request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
for _, cookie := range e.cookies {
|
||||
req.AddCookie(cookie)
|
||||
}
|
||||
resp, err = e.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("POST search: %w", err)
|
||||
}
|
||||
} else {
|
||||
// GET: append query string
|
||||
if len(inputValues) > 0 {
|
||||
if strings.Contains(searchURL, "?") {
|
||||
searchURL += "&" + inputValues.Encode()
|
||||
} else {
|
||||
searchURL += "?" + inputValues.Encode()
|
||||
}
|
||||
}
|
||||
req, err := http.NewRequestWithContext(searchCtx, http.MethodGet, searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create GET request: %w", err)
|
||||
}
|
||||
for _, cookie := range e.cookies {
|
||||
req.AddCookie(cookie)
|
||||
}
|
||||
resp, err = e.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("GET search: %w", err)
|
||||
}
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
return nil, fmt.Errorf("search returned HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// Read response with size limit (T-10-07: 10MB cap)
|
||||
body := io.LimitReader(resp.Body, 10*1024*1024)
|
||||
|
||||
// Parse HTML
|
||||
doc, err := goquery.NewDocumentFromReader(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse HTML: %w", err)
|
||||
}
|
||||
|
||||
// Find rows
|
||||
rows := doc.Find(def.Search.Rows.Selector)
|
||||
if def.Search.Rows.Remove != "" {
|
||||
rows.Find(def.Search.Rows.Remove).Remove()
|
||||
}
|
||||
|
||||
var results []CardigannResult
|
||||
rows.Each(func(i int, row *goquery.Selection) {
|
||||
result := CardigannResult{}
|
||||
fieldValues := make(map[string]string)
|
||||
|
||||
for _, field := range def.Search.Fields {
|
||||
val, err := ExtractField(row, field.Block)
|
||||
if err != nil {
|
||||
e.logger.Warn("field extraction error", "field", field.Field, "error", err)
|
||||
continue
|
||||
}
|
||||
fieldValues[field.Field] = val
|
||||
}
|
||||
|
||||
// Map fields to result
|
||||
result.Title = fieldValues["title"]
|
||||
result.DownloadURL = fieldValues["download"]
|
||||
result.GUID = fieldValues["details"]
|
||||
result.Category = fieldValues["category"]
|
||||
result.Description = fieldValues["description"]
|
||||
result.PubDate = fieldValues["date"]
|
||||
|
||||
// Resolve relative URLs
|
||||
if result.DownloadURL != "" {
|
||||
resolved, err := e.resolvePath(baseURL, result.DownloadURL)
|
||||
if err == nil {
|
||||
result.DownloadURL = resolved
|
||||
}
|
||||
}
|
||||
if result.GUID != "" {
|
||||
resolved, err := e.resolvePath(baseURL, result.GUID)
|
||||
if err == nil {
|
||||
result.GUID = resolved
|
||||
}
|
||||
}
|
||||
|
||||
// Parse size
|
||||
if sizeStr := fieldValues["size"]; sizeStr != "" {
|
||||
if size, err := humanize.ParseBytes(strings.TrimSpace(sizeStr)); err == nil {
|
||||
result.Size = int64(size)
|
||||
}
|
||||
}
|
||||
|
||||
// Parse seeders/peers
|
||||
if seedersStr := fieldValues["seeders"]; seedersStr != "" {
|
||||
if v, err := strconv.Atoi(strings.TrimSpace(seedersStr)); err == nil {
|
||||
result.Seeders = v
|
||||
}
|
||||
}
|
||||
if leechersStr := fieldValues["leechers"]; leechersStr != "" {
|
||||
if v, err := strconv.Atoi(strings.TrimSpace(leechersStr)); err == nil {
|
||||
result.Peers = v
|
||||
}
|
||||
}
|
||||
|
||||
// Parse date if it wasn't already RFC3339
|
||||
if result.PubDate != "" {
|
||||
result.PubDate = e.parseDateField(result.PubDate)
|
||||
}
|
||||
|
||||
// Only include results with at least a title
|
||||
if result.Title != "" {
|
||||
results = append(results, result)
|
||||
}
|
||||
})
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// login performs authentication against the Cardigann indexer.
|
||||
func (e *CardigannEngine) login(ctx context.Context, def *Definition, config map[string]string, baseURL string) error {
|
||||
loginPath := def.Login.Path
|
||||
if loginPath == "" {
|
||||
return fmt.Errorf("login path is empty")
|
||||
}
|
||||
|
||||
path, err := ApplyTemplate("login-path", loginPath, TemplateContext{
|
||||
Config: config,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("template login path: %w", err)
|
||||
}
|
||||
|
||||
loginURL, err := e.resolvePath(baseURL, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolve login URL: %w", err)
|
||||
}
|
||||
|
||||
if err := ValidateURL(loginURL); err != nil {
|
||||
return fmt.Errorf("login URL blocked: %w", err)
|
||||
}
|
||||
|
||||
// Build input values from login.inputs
|
||||
inputValues := make(map[string]string)
|
||||
for key, tplStr := range def.Login.Inputs {
|
||||
rendered, err := ApplyTemplate("login-input-"+key, tplStr, TemplateContext{
|
||||
Config: config,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("template login input %q: %w", key, err)
|
||||
}
|
||||
inputValues[key] = rendered
|
||||
}
|
||||
|
||||
loginCtx, loginCancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer loginCancel()
|
||||
|
||||
switch def.Login.Method {
|
||||
case "cookie":
|
||||
// Set cookie directly
|
||||
if cookieStr, ok := inputValues["cookie"]; ok {
|
||||
parts := strings.SplitN(cookieStr, "=", 2)
|
||||
cookie := &http.Cookie{
|
||||
Name: parts[0],
|
||||
Value: func() string { if len(parts) > 1 { return parts[1] }; return "" }(),
|
||||
}
|
||||
e.cookies = append(e.cookies, cookie)
|
||||
}
|
||||
return nil
|
||||
|
||||
case "post":
|
||||
// POST directly to login path with inputs
|
||||
form := url.Values{}
|
||||
for key, val := range inputValues {
|
||||
form.Set(key, val)
|
||||
}
|
||||
req, err := http.NewRequestWithContext(loginCtx, http.MethodPost, loginURL, strings.NewReader(form.Encode()))
|
||||
if err != nil {
|
||||
return fmt.Errorf("create login POST: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
resp, err := e.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("login POST: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024))
|
||||
|
||||
// Store cookies from response
|
||||
e.cookies = resp.Cookies()
|
||||
|
||||
// Check for errors
|
||||
if err := e.checkLoginErrors(resp, def); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
default:
|
||||
// "form" method (default)
|
||||
// GET login page, find form, fill inputs, submit
|
||||
req, err := http.NewRequestWithContext(loginCtx, http.MethodGet, loginURL, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create login GET: %w", err)
|
||||
}
|
||||
resp, err := e.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("login GET: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
bodyBytes, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024))
|
||||
if err != nil {
|
||||
return fmt.Errorf("read login page: %w", err)
|
||||
}
|
||||
|
||||
e.cookies = append(e.cookies, resp.Cookies()...)
|
||||
|
||||
// Parse the login page to find the form
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(bodyBytes)))
|
||||
if err != nil {
|
||||
return fmt.Errorf("parse login page: %w", err)
|
||||
}
|
||||
|
||||
// Find the form
|
||||
formSelector := def.Login.Form
|
||||
if formSelector == "" {
|
||||
formSelector = "form"
|
||||
}
|
||||
form := doc.Find(formSelector).First()
|
||||
if form.Length() == 0 {
|
||||
return fmt.Errorf("login form not found with selector %q", formSelector)
|
||||
}
|
||||
|
||||
// Get form action
|
||||
action, exists := form.Attr("action")
|
||||
if !exists || action == "" {
|
||||
action = loginPath
|
||||
}
|
||||
actionURL, err := e.resolvePath(baseURL, action)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolve form action: %w", err)
|
||||
}
|
||||
if err := ValidateURL(actionURL); err != nil {
|
||||
return fmt.Errorf("form action URL blocked: %w", err)
|
||||
}
|
||||
|
||||
// Collect hidden inputs from form
|
||||
formValues := url.Values{}
|
||||
form.Find("input[type='hidden']").Each(func(i int, s *goquery.Selection) {
|
||||
name, _ := s.Attr("name")
|
||||
value, _ := s.Attr("value")
|
||||
if name != "" {
|
||||
formValues.Set(name, value)
|
||||
}
|
||||
})
|
||||
|
||||
// Add login inputs
|
||||
for key, val := range inputValues {
|
||||
formValues.Set(key, val)
|
||||
}
|
||||
|
||||
// Submit the form
|
||||
submitReq, err := http.NewRequestWithContext(loginCtx, http.MethodPost, actionURL, strings.NewReader(formValues.Encode()))
|
||||
if err != nil {
|
||||
return fmt.Errorf("create form submit: %w", err)
|
||||
}
|
||||
submitReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
for _, cookie := range e.cookies {
|
||||
submitReq.AddCookie(cookie)
|
||||
}
|
||||
|
||||
submitResp, err := e.httpClient.Do(submitReq)
|
||||
if err != nil {
|
||||
return fmt.Errorf("submit login form: %w", err)
|
||||
}
|
||||
defer submitResp.Body.Close()
|
||||
io.ReadAll(io.LimitReader(submitResp.Body, 10*1024*1024))
|
||||
|
||||
e.cookies = append(e.cookies, submitResp.Cookies()...)
|
||||
|
||||
// Check for errors
|
||||
if err := e.checkLoginErrors(submitResp, def); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Test login if test block is defined
|
||||
if def.Login.Test.Selector != "" || def.Login.Test.Path != "" {
|
||||
testPath := def.Login.Test.Path
|
||||
if testPath == "" {
|
||||
testPath = "/"
|
||||
}
|
||||
testURL, err := e.resolvePath(baseURL, testPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolve test URL: %w", err)
|
||||
}
|
||||
if err := ValidateURL(testURL); err != nil {
|
||||
return fmt.Errorf("test URL blocked: %w", err)
|
||||
}
|
||||
|
||||
testReq, err := http.NewRequestWithContext(loginCtx, http.MethodGet, testURL, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create test request: %w", err)
|
||||
}
|
||||
for _, cookie := range e.cookies {
|
||||
testReq.AddCookie(cookie)
|
||||
}
|
||||
testResp, err := e.httpClient.Do(testReq)
|
||||
if err != nil {
|
||||
return fmt.Errorf("login test request: %w", err)
|
||||
}
|
||||
defer testResp.Body.Close()
|
||||
io.ReadAll(io.LimitReader(testResp.Body, 10*1024*1024))
|
||||
|
||||
if def.Login.Test.Selector != "" {
|
||||
testDoc, err := goquery.NewDocumentFromReader(strings.NewReader(func() string {
|
||||
// We can't re-read the body, so we just check the status code
|
||||
return ""
|
||||
}()))
|
||||
if err != nil {
|
||||
return nil // Don't fail on parse errors
|
||||
}
|
||||
if testDoc.Find(def.Login.Test.Selector).Length() == 0 {
|
||||
return fmt.Errorf("login test: selector %q not found", def.Login.Test.Selector)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Test validates a Cardigann indexer by checking base URL connectivity and optionally testing login.
|
||||
func (e *CardigannEngine) Test(ctx context.Context, def *Definition, config map[string]string) (*IndexerTestResult, error) {
|
||||
baseURL := e.getBaseURL(def, config)
|
||||
if baseURL == "" {
|
||||
return &IndexerTestResult{Success: false, Error: "no base URL in definition"}, nil
|
||||
}
|
||||
|
||||
if err := ValidateURL(baseURL); err != nil {
|
||||
return &IndexerTestResult{Success: false, Error: fmt.Sprintf("URL blocked: %v", err)}, nil
|
||||
}
|
||||
|
||||
// If Login block present, attempt login
|
||||
if def.Login.Path != "" || len(def.Login.Inputs) > 0 {
|
||||
if err := e.login(ctx, def, config, baseURL); err != nil {
|
||||
return &IndexerTestResult{Success: false, Error: fmt.Sprintf("login failed: %v", err)}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// If Search block present, test search path
|
||||
if def.Search.Path != "" {
|
||||
testPath, err := ApplyTemplate("test-path", def.Search.Path, TemplateContext{
|
||||
Config: config,
|
||||
})
|
||||
if err != nil {
|
||||
return &IndexerTestResult{Success: false, Error: fmt.Sprintf("template error: %v", err)}, nil
|
||||
}
|
||||
|
||||
testURL, err := e.resolvePath(baseURL, testPath)
|
||||
if err != nil {
|
||||
return &IndexerTestResult{Success: false, Error: fmt.Sprintf("resolve URL: %v", err)}, nil
|
||||
}
|
||||
|
||||
if err := ValidateURL(testURL); err != nil {
|
||||
return &IndexerTestResult{Success: false, Error: fmt.Sprintf("URL blocked: %v", err)}, nil
|
||||
}
|
||||
|
||||
testCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(testCtx, http.MethodGet, testURL, nil)
|
||||
if err != nil {
|
||||
return &IndexerTestResult{Success: false, Error: err.Error()}, nil
|
||||
}
|
||||
for _, cookie := range e.cookies {
|
||||
req.AddCookie(cookie)
|
||||
}
|
||||
|
||||
resp, err := e.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return &IndexerTestResult{Success: false, Error: err.Error()}, nil
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
return &IndexerTestResult{
|
||||
Success: false,
|
||||
Error: fmt.Sprintf("HTTP %d", resp.StatusCode),
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
return &IndexerTestResult{Success: true}, nil
|
||||
}
|
||||
|
||||
// resolvePath resolves a potentially relative path against a base URL.
|
||||
func (e *CardigannEngine) resolvePath(baseURL, path string) (string, error) {
|
||||
if path == "" {
|
||||
return baseURL, nil
|
||||
}
|
||||
|
||||
// Already absolute URL
|
||||
if strings.HasPrefix(strings.ToLower(path), "http://") || strings.HasPrefix(strings.ToLower(path), "https://") {
|
||||
return path, nil
|
||||
}
|
||||
|
||||
// Relative URL — resolve against base
|
||||
base, err := url.Parse(baseURL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("parse base URL: %w", err)
|
||||
}
|
||||
|
||||
ref, err := url.Parse(path)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("parse path: %w", err)
|
||||
}
|
||||
|
||||
resolved := base.ResolveReference(ref)
|
||||
return resolved.String(), nil
|
||||
}
|
||||
|
||||
// getBaseURL returns the first link from the definition, or a config override.
|
||||
func (e *CardigannEngine) getBaseURL(def *Definition, config map[string]string) string {
|
||||
if url, ok := config["base_url"]; ok && url != "" {
|
||||
return url
|
||||
}
|
||||
if len(def.Links) > 0 {
|
||||
return def.Links[0]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// parseDateField attempts to parse a date string in various formats.
|
||||
func (e *CardigannEngine) parseDateField(val string) string {
|
||||
// Already RFC3339
|
||||
if _, err := time.Parse(time.RFC3339, val); err == nil {
|
||||
return val
|
||||
}
|
||||
|
||||
// Try common date layouts
|
||||
layouts := []string{
|
||||
"2006-01-02 15:04:05",
|
||||
"2006-01-02T15:04:05Z07:00",
|
||||
"2006-01-02T15:04:05",
|
||||
"2006-01-02",
|
||||
"02-Jan-2006",
|
||||
"Jan 02, 2006",
|
||||
"Jan 02 2006",
|
||||
"02 Jan 2006 15:04:05",
|
||||
"Mon, 02 Jan 2006 15:04:05 -0700",
|
||||
time.RFC1123,
|
||||
time.RFC1123Z,
|
||||
time.RFC822,
|
||||
time.RFC822Z,
|
||||
}
|
||||
|
||||
for _, layout := range layouts {
|
||||
if t, err := time.Parse(layout, strings.TrimSpace(val)); err == nil {
|
||||
return t.Format(time.RFC3339)
|
||||
}
|
||||
}
|
||||
|
||||
// Try relative time
|
||||
if t, err := parseFuzzyTime(val); err == nil {
|
||||
return t.Format(time.RFC3339)
|
||||
}
|
||||
|
||||
// Return as-is if we can't parse
|
||||
return val
|
||||
}
|
||||
|
||||
// checkLoginErrors checks for login error patterns in the response.
|
||||
func (e *CardigannEngine) checkLoginErrors(resp *http.Response, def *Definition) error {
|
||||
if len(def.Login.Error) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Note: body has already been read; we'd need to store it
|
||||
// For now, just check status code
|
||||
if resp.StatusCode >= 400 {
|
||||
return fmt.Errorf("login returned HTTP %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user