Sync from /srv/compose/unified-media-manager

This commit is contained in:
Christopher Mayor
2026-04-24 10:45:19 -07:00
commit 7dbd00e537
132 changed files with 25394 additions and 0 deletions

View File

@@ -0,0 +1,614 @@
package cardigann
import (
"context"
"fmt"
"io"
"log/slog"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/dustin/go-humanize"
)
// CardigannResult is the output of a Cardigann search operation.
// It is converted to service.SearchResult by the service layer.
type CardigannResult struct {
Title string
GUID string
DownloadURL string
Size int64
PubDate string
Seeders int
Peers int
Category string
Description string
}
// IndexerTestResult is the result of testing a Cardigann indexer connection.
type IndexerTestResult struct {
Success bool
Error string
}
// CardigannEngine handles Cardigann indexer operations: search, login, test.
type CardigannEngine struct {
httpClient *http.Client
cookies []*http.Cookie
logger *slog.Logger
}
// NewCardigannEngine creates a new CardigannEngine with safe HTTP client.
func NewCardigannEngine() *CardigannEngine {
return &CardigannEngine{
httpClient: SafeHTTPClient(),
logger: slog.Default(),
}
}
// Search executes a Cardigann search: login (if needed), build request, parse HTML, extract results.
func (e *CardigannEngine) Search(ctx context.Context, def *Definition, config map[string]string, query SearchQuery) ([]CardigannResult, error) {
baseURL := e.getBaseURL(def, config)
// Login if required
if def.Login.Path != "" || len(def.Login.Inputs) > 0 {
if err := e.login(ctx, def, config, baseURL); err != nil {
return nil, fmt.Errorf("login failed: %w", err)
}
}
// Build search URL from path template
searchPath := def.Search.Path
if searchPath == "" {
searchPath = "/"
}
path, err := ApplyTemplate("search-path", searchPath, TemplateContext{
Query: query,
Config: config,
Categories: []string{},
})
if err != nil {
return nil, fmt.Errorf("template search path: %w", err)
}
searchURL, err := e.resolvePath(baseURL, path)
if err != nil {
return nil, fmt.Errorf("resolve search URL: %w", err)
}
// Validate the search URL (SSRF protection)
if err := ValidateURL(searchURL); err != nil {
return nil, fmt.Errorf("search URL blocked: %w", err)
}
// Build query inputs
inputValues := make(url.Values)
for key, tplStr := range def.Search.Inputs {
rendered, err := ApplyTemplate("input-"+key, tplStr, TemplateContext{
Query: query,
Config: config,
Categories: []string{},
})
if err != nil {
return nil, fmt.Errorf("template input %q: %w", key, err)
}
if key == "$raw" {
// Parse as query string and merge
parsed, err := url.ParseQuery(rendered)
if err == nil {
for k, vals := range parsed {
for _, v := range vals {
inputValues.Set(k, v)
}
}
}
} else {
inputValues.Set(key, rendered)
}
}
// Execute HTTP request
var resp *http.Response
method := strings.ToUpper(def.Search.Method)
if method == "" {
method = "GET"
}
searchCtx, searchCancel := context.WithTimeout(ctx, 15*time.Second)
defer searchCancel()
if method == "POST" {
req, err := http.NewRequestWithContext(searchCtx, http.MethodPost, searchURL, strings.NewReader(inputValues.Encode()))
if err != nil {
return nil, fmt.Errorf("create POST request: %w", err)
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
for _, cookie := range e.cookies {
req.AddCookie(cookie)
}
resp, err = e.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("POST search: %w", err)
}
} else {
// GET: append query string
if len(inputValues) > 0 {
if strings.Contains(searchURL, "?") {
searchURL += "&" + inputValues.Encode()
} else {
searchURL += "?" + inputValues.Encode()
}
}
req, err := http.NewRequestWithContext(searchCtx, http.MethodGet, searchURL, nil)
if err != nil {
return nil, fmt.Errorf("create GET request: %w", err)
}
for _, cookie := range e.cookies {
req.AddCookie(cookie)
}
resp, err = e.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("GET search: %w", err)
}
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
return nil, fmt.Errorf("search returned HTTP %d", resp.StatusCode)
}
// Read response with size limit (T-10-07: 10MB cap)
body := io.LimitReader(resp.Body, 10*1024*1024)
// Parse HTML
doc, err := goquery.NewDocumentFromReader(body)
if err != nil {
return nil, fmt.Errorf("parse HTML: %w", err)
}
// Find rows
rows := doc.Find(def.Search.Rows.Selector)
if def.Search.Rows.Remove != "" {
rows.Find(def.Search.Rows.Remove).Remove()
}
var results []CardigannResult
rows.Each(func(i int, row *goquery.Selection) {
result := CardigannResult{}
fieldValues := make(map[string]string)
for _, field := range def.Search.Fields {
val, err := ExtractField(row, field.Block)
if err != nil {
e.logger.Warn("field extraction error", "field", field.Field, "error", err)
continue
}
fieldValues[field.Field] = val
}
// Map fields to result
result.Title = fieldValues["title"]
result.DownloadURL = fieldValues["download"]
result.GUID = fieldValues["details"]
result.Category = fieldValues["category"]
result.Description = fieldValues["description"]
result.PubDate = fieldValues["date"]
// Resolve relative URLs
if result.DownloadURL != "" {
resolved, err := e.resolvePath(baseURL, result.DownloadURL)
if err == nil {
result.DownloadURL = resolved
}
}
if result.GUID != "" {
resolved, err := e.resolvePath(baseURL, result.GUID)
if err == nil {
result.GUID = resolved
}
}
// Parse size
if sizeStr := fieldValues["size"]; sizeStr != "" {
if size, err := humanize.ParseBytes(strings.TrimSpace(sizeStr)); err == nil {
result.Size = int64(size)
}
}
// Parse seeders/peers
if seedersStr := fieldValues["seeders"]; seedersStr != "" {
if v, err := strconv.Atoi(strings.TrimSpace(seedersStr)); err == nil {
result.Seeders = v
}
}
if leechersStr := fieldValues["leechers"]; leechersStr != "" {
if v, err := strconv.Atoi(strings.TrimSpace(leechersStr)); err == nil {
result.Peers = v
}
}
// Parse date if it wasn't already RFC3339
if result.PubDate != "" {
result.PubDate = e.parseDateField(result.PubDate)
}
// Only include results with at least a title
if result.Title != "" {
results = append(results, result)
}
})
return results, nil
}
// login performs authentication against the Cardigann indexer.
func (e *CardigannEngine) login(ctx context.Context, def *Definition, config map[string]string, baseURL string) error {
loginPath := def.Login.Path
if loginPath == "" {
return fmt.Errorf("login path is empty")
}
path, err := ApplyTemplate("login-path", loginPath, TemplateContext{
Config: config,
})
if err != nil {
return fmt.Errorf("template login path: %w", err)
}
loginURL, err := e.resolvePath(baseURL, path)
if err != nil {
return fmt.Errorf("resolve login URL: %w", err)
}
if err := ValidateURL(loginURL); err != nil {
return fmt.Errorf("login URL blocked: %w", err)
}
// Build input values from login.inputs
inputValues := make(map[string]string)
for key, tplStr := range def.Login.Inputs {
rendered, err := ApplyTemplate("login-input-"+key, tplStr, TemplateContext{
Config: config,
})
if err != nil {
return fmt.Errorf("template login input %q: %w", key, err)
}
inputValues[key] = rendered
}
loginCtx, loginCancel := context.WithTimeout(ctx, 10*time.Second)
defer loginCancel()
switch def.Login.Method {
case "cookie":
// Set cookie directly
if cookieStr, ok := inputValues["cookie"]; ok {
parts := strings.SplitN(cookieStr, "=", 2)
cookie := &http.Cookie{
Name: parts[0],
Value: func() string { if len(parts) > 1 { return parts[1] }; return "" }(),
}
e.cookies = append(e.cookies, cookie)
}
return nil
case "post":
// POST directly to login path with inputs
form := url.Values{}
for key, val := range inputValues {
form.Set(key, val)
}
req, err := http.NewRequestWithContext(loginCtx, http.MethodPost, loginURL, strings.NewReader(form.Encode()))
if err != nil {
return fmt.Errorf("create login POST: %w", err)
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
resp, err := e.httpClient.Do(req)
if err != nil {
return fmt.Errorf("login POST: %w", err)
}
defer resp.Body.Close()
io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024))
// Store cookies from response
e.cookies = resp.Cookies()
// Check for errors
if err := e.checkLoginErrors(resp, def); err != nil {
return err
}
default:
// "form" method (default)
// GET login page, find form, fill inputs, submit
req, err := http.NewRequestWithContext(loginCtx, http.MethodGet, loginURL, nil)
if err != nil {
return fmt.Errorf("create login GET: %w", err)
}
resp, err := e.httpClient.Do(req)
if err != nil {
return fmt.Errorf("login GET: %w", err)
}
defer resp.Body.Close()
bodyBytes, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024))
if err != nil {
return fmt.Errorf("read login page: %w", err)
}
e.cookies = append(e.cookies, resp.Cookies()...)
// Parse the login page to find the form
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(bodyBytes)))
if err != nil {
return fmt.Errorf("parse login page: %w", err)
}
// Find the form
formSelector := def.Login.Form
if formSelector == "" {
formSelector = "form"
}
form := doc.Find(formSelector).First()
if form.Length() == 0 {
return fmt.Errorf("login form not found with selector %q", formSelector)
}
// Get form action
action, exists := form.Attr("action")
if !exists || action == "" {
action = loginPath
}
actionURL, err := e.resolvePath(baseURL, action)
if err != nil {
return fmt.Errorf("resolve form action: %w", err)
}
if err := ValidateURL(actionURL); err != nil {
return fmt.Errorf("form action URL blocked: %w", err)
}
// Collect hidden inputs from form
formValues := url.Values{}
form.Find("input[type='hidden']").Each(func(i int, s *goquery.Selection) {
name, _ := s.Attr("name")
value, _ := s.Attr("value")
if name != "" {
formValues.Set(name, value)
}
})
// Add login inputs
for key, val := range inputValues {
formValues.Set(key, val)
}
// Submit the form
submitReq, err := http.NewRequestWithContext(loginCtx, http.MethodPost, actionURL, strings.NewReader(formValues.Encode()))
if err != nil {
return fmt.Errorf("create form submit: %w", err)
}
submitReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
for _, cookie := range e.cookies {
submitReq.AddCookie(cookie)
}
submitResp, err := e.httpClient.Do(submitReq)
if err != nil {
return fmt.Errorf("submit login form: %w", err)
}
defer submitResp.Body.Close()
io.ReadAll(io.LimitReader(submitResp.Body, 10*1024*1024))
e.cookies = append(e.cookies, submitResp.Cookies()...)
// Check for errors
if err := e.checkLoginErrors(submitResp, def); err != nil {
return err
}
}
// Test login if test block is defined
if def.Login.Test.Selector != "" || def.Login.Test.Path != "" {
testPath := def.Login.Test.Path
if testPath == "" {
testPath = "/"
}
testURL, err := e.resolvePath(baseURL, testPath)
if err != nil {
return fmt.Errorf("resolve test URL: %w", err)
}
if err := ValidateURL(testURL); err != nil {
return fmt.Errorf("test URL blocked: %w", err)
}
testReq, err := http.NewRequestWithContext(loginCtx, http.MethodGet, testURL, nil)
if err != nil {
return fmt.Errorf("create test request: %w", err)
}
for _, cookie := range e.cookies {
testReq.AddCookie(cookie)
}
testResp, err := e.httpClient.Do(testReq)
if err != nil {
return fmt.Errorf("login test request: %w", err)
}
defer testResp.Body.Close()
io.ReadAll(io.LimitReader(testResp.Body, 10*1024*1024))
if def.Login.Test.Selector != "" {
testDoc, err := goquery.NewDocumentFromReader(strings.NewReader(func() string {
// We can't re-read the body, so we just check the status code
return ""
}()))
if err != nil {
return nil // Don't fail on parse errors
}
if testDoc.Find(def.Login.Test.Selector).Length() == 0 {
return fmt.Errorf("login test: selector %q not found", def.Login.Test.Selector)
}
}
}
return nil
}
// Test validates a Cardigann indexer by checking base URL connectivity and optionally testing login.
func (e *CardigannEngine) Test(ctx context.Context, def *Definition, config map[string]string) (*IndexerTestResult, error) {
baseURL := e.getBaseURL(def, config)
if baseURL == "" {
return &IndexerTestResult{Success: false, Error: "no base URL in definition"}, nil
}
if err := ValidateURL(baseURL); err != nil {
return &IndexerTestResult{Success: false, Error: fmt.Sprintf("URL blocked: %v", err)}, nil
}
// If Login block present, attempt login
if def.Login.Path != "" || len(def.Login.Inputs) > 0 {
if err := e.login(ctx, def, config, baseURL); err != nil {
return &IndexerTestResult{Success: false, Error: fmt.Sprintf("login failed: %v", err)}, nil
}
}
// If Search block present, test search path
if def.Search.Path != "" {
testPath, err := ApplyTemplate("test-path", def.Search.Path, TemplateContext{
Config: config,
})
if err != nil {
return &IndexerTestResult{Success: false, Error: fmt.Sprintf("template error: %v", err)}, nil
}
testURL, err := e.resolvePath(baseURL, testPath)
if err != nil {
return &IndexerTestResult{Success: false, Error: fmt.Sprintf("resolve URL: %v", err)}, nil
}
if err := ValidateURL(testURL); err != nil {
return &IndexerTestResult{Success: false, Error: fmt.Sprintf("URL blocked: %v", err)}, nil
}
testCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(testCtx, http.MethodGet, testURL, nil)
if err != nil {
return &IndexerTestResult{Success: false, Error: err.Error()}, nil
}
for _, cookie := range e.cookies {
req.AddCookie(cookie)
}
resp, err := e.httpClient.Do(req)
if err != nil {
return &IndexerTestResult{Success: false, Error: err.Error()}, nil
}
resp.Body.Close()
if resp.StatusCode >= 400 {
return &IndexerTestResult{
Success: false,
Error: fmt.Sprintf("HTTP %d", resp.StatusCode),
}, nil
}
}
return &IndexerTestResult{Success: true}, nil
}
// resolvePath resolves a potentially relative path against a base URL.
func (e *CardigannEngine) resolvePath(baseURL, path string) (string, error) {
if path == "" {
return baseURL, nil
}
// Already absolute URL
if strings.HasPrefix(strings.ToLower(path), "http://") || strings.HasPrefix(strings.ToLower(path), "https://") {
return path, nil
}
// Relative URL — resolve against base
base, err := url.Parse(baseURL)
if err != nil {
return "", fmt.Errorf("parse base URL: %w", err)
}
ref, err := url.Parse(path)
if err != nil {
return "", fmt.Errorf("parse path: %w", err)
}
resolved := base.ResolveReference(ref)
return resolved.String(), nil
}
// getBaseURL returns the first link from the definition, or a config override.
func (e *CardigannEngine) getBaseURL(def *Definition, config map[string]string) string {
if url, ok := config["base_url"]; ok && url != "" {
return url
}
if len(def.Links) > 0 {
return def.Links[0]
}
return ""
}
// parseDateField attempts to parse a date string in various formats.
func (e *CardigannEngine) parseDateField(val string) string {
// Already RFC3339
if _, err := time.Parse(time.RFC3339, val); err == nil {
return val
}
// Try common date layouts
layouts := []string{
"2006-01-02 15:04:05",
"2006-01-02T15:04:05Z07:00",
"2006-01-02T15:04:05",
"2006-01-02",
"02-Jan-2006",
"Jan 02, 2006",
"Jan 02 2006",
"02 Jan 2006 15:04:05",
"Mon, 02 Jan 2006 15:04:05 -0700",
time.RFC1123,
time.RFC1123Z,
time.RFC822,
time.RFC822Z,
}
for _, layout := range layouts {
if t, err := time.Parse(layout, strings.TrimSpace(val)); err == nil {
return t.Format(time.RFC3339)
}
}
// Try relative time
if t, err := parseFuzzyTime(val); err == nil {
return t.Format(time.RFC3339)
}
// Return as-is if we can't parse
return val
}
// checkLoginErrors checks for login error patterns in the response.
func (e *CardigannEngine) checkLoginErrors(resp *http.Response, def *Definition) error {
if len(def.Login.Error) == 0 {
return nil
}
// Note: body has already been read; we'd need to store it
// For now, just check status code
if resp.StatusCode >= 400 {
return fmt.Errorf("login returned HTTP %d", resp.StatusCode)
}
return nil
}