package cardigann import ( "context" "fmt" "io" "log/slog" "net/http" "net/url" "strconv" "strings" "time" "github.com/PuerkitoBio/goquery" "github.com/dustin/go-humanize" ) // CardigannResult is the output of a Cardigann search operation. // It is converted to service.SearchResult by the service layer. type CardigannResult struct { Title string GUID string DownloadURL string Size int64 PubDate string Seeders int Peers int Category string Description string } // IndexerTestResult is the result of testing a Cardigann indexer connection. type IndexerTestResult struct { Success bool Error string } // CardigannEngine handles Cardigann indexer operations: search, login, test. type CardigannEngine struct { httpClient *http.Client cookies []*http.Cookie logger *slog.Logger } // NewCardigannEngine creates a new CardigannEngine with safe HTTP client. func NewCardigannEngine() *CardigannEngine { return &CardigannEngine{ httpClient: SafeHTTPClient(), logger: slog.Default(), } } // Search executes a Cardigann search: login (if needed), build request, parse HTML, extract results. func (e *CardigannEngine) Search(ctx context.Context, def *Definition, config map[string]string, query SearchQuery) ([]CardigannResult, error) { baseURL := e.getBaseURL(def, config) // Login if required if def.Login.Path != "" || len(def.Login.Inputs) > 0 { if err := e.login(ctx, def, config, baseURL); err != nil { return nil, fmt.Errorf("login failed: %w", err) } } // Build search URL from path template searchPath := def.Search.Path if searchPath == "" { searchPath = "/" } path, err := ApplyTemplate("search-path", searchPath, TemplateContext{ Query: query, Config: config, Categories: []string{}, }) if err != nil { return nil, fmt.Errorf("template search path: %w", err) } searchURL, err := e.resolvePath(baseURL, path) if err != nil { return nil, fmt.Errorf("resolve search URL: %w", err) } // Validate the search URL (SSRF protection) if err := ValidateURL(searchURL); err != nil { return nil, fmt.Errorf("search URL blocked: %w", err) } // Build query inputs inputValues := make(url.Values) for key, tplStr := range def.Search.Inputs { rendered, err := ApplyTemplate("input-"+key, tplStr, TemplateContext{ Query: query, Config: config, Categories: []string{}, }) if err != nil { return nil, fmt.Errorf("template input %q: %w", key, err) } if key == "$raw" { // Parse as query string and merge parsed, err := url.ParseQuery(rendered) if err == nil { for k, vals := range parsed { for _, v := range vals { inputValues.Set(k, v) } } } } else { inputValues.Set(key, rendered) } } // Execute HTTP request var resp *http.Response method := strings.ToUpper(def.Search.Method) if method == "" { method = "GET" } searchCtx, searchCancel := context.WithTimeout(ctx, 15*time.Second) defer searchCancel() if method == "POST" { req, err := http.NewRequestWithContext(searchCtx, http.MethodPost, searchURL, strings.NewReader(inputValues.Encode())) if err != nil { return nil, fmt.Errorf("create POST request: %w", err) } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") for _, cookie := range e.cookies { req.AddCookie(cookie) } resp, err = e.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("POST search: %w", err) } } else { // GET: append query string if len(inputValues) > 0 { if strings.Contains(searchURL, "?") { searchURL += "&" + inputValues.Encode() } else { searchURL += "?" + inputValues.Encode() } } req, err := http.NewRequestWithContext(searchCtx, http.MethodGet, searchURL, nil) if err != nil { return nil, fmt.Errorf("create GET request: %w", err) } for _, cookie := range e.cookies { req.AddCookie(cookie) } resp, err = e.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("GET search: %w", err) } } defer resp.Body.Close() if resp.StatusCode >= 400 { return nil, fmt.Errorf("search returned HTTP %d", resp.StatusCode) } // Read response with size limit (T-10-07: 10MB cap) body := io.LimitReader(resp.Body, 10*1024*1024) // Parse HTML doc, err := goquery.NewDocumentFromReader(body) if err != nil { return nil, fmt.Errorf("parse HTML: %w", err) } // Find rows rows := doc.Find(def.Search.Rows.Selector) if def.Search.Rows.Remove != "" { rows.Find(def.Search.Rows.Remove).Remove() } var results []CardigannResult rows.Each(func(i int, row *goquery.Selection) { result := CardigannResult{} fieldValues := make(map[string]string) for _, field := range def.Search.Fields { val, err := ExtractField(row, field.Block) if err != nil { e.logger.Warn("field extraction error", "field", field.Field, "error", err) continue } fieldValues[field.Field] = val } // Map fields to result result.Title = fieldValues["title"] result.DownloadURL = fieldValues["download"] result.GUID = fieldValues["details"] result.Category = fieldValues["category"] result.Description = fieldValues["description"] result.PubDate = fieldValues["date"] // Resolve relative URLs if result.DownloadURL != "" { resolved, err := e.resolvePath(baseURL, result.DownloadURL) if err == nil { result.DownloadURL = resolved } } if result.GUID != "" { resolved, err := e.resolvePath(baseURL, result.GUID) if err == nil { result.GUID = resolved } } // Parse size if sizeStr := fieldValues["size"]; sizeStr != "" { if size, err := humanize.ParseBytes(strings.TrimSpace(sizeStr)); err == nil { result.Size = int64(size) } } // Parse seeders/peers if seedersStr := fieldValues["seeders"]; seedersStr != "" { if v, err := strconv.Atoi(strings.TrimSpace(seedersStr)); err == nil { result.Seeders = v } } if leechersStr := fieldValues["leechers"]; leechersStr != "" { if v, err := strconv.Atoi(strings.TrimSpace(leechersStr)); err == nil { result.Peers = v } } // Parse date if it wasn't already RFC3339 if result.PubDate != "" { result.PubDate = e.parseDateField(result.PubDate) } // Only include results with at least a title if result.Title != "" { results = append(results, result) } }) return results, nil } // login performs authentication against the Cardigann indexer. func (e *CardigannEngine) login(ctx context.Context, def *Definition, config map[string]string, baseURL string) error { loginPath := def.Login.Path if loginPath == "" { return fmt.Errorf("login path is empty") } path, err := ApplyTemplate("login-path", loginPath, TemplateContext{ Config: config, }) if err != nil { return fmt.Errorf("template login path: %w", err) } loginURL, err := e.resolvePath(baseURL, path) if err != nil { return fmt.Errorf("resolve login URL: %w", err) } if err := ValidateURL(loginURL); err != nil { return fmt.Errorf("login URL blocked: %w", err) } // Build input values from login.inputs inputValues := make(map[string]string) for key, tplStr := range def.Login.Inputs { rendered, err := ApplyTemplate("login-input-"+key, tplStr, TemplateContext{ Config: config, }) if err != nil { return fmt.Errorf("template login input %q: %w", key, err) } inputValues[key] = rendered } loginCtx, loginCancel := context.WithTimeout(ctx, 10*time.Second) defer loginCancel() switch def.Login.Method { case "cookie": // Set cookie directly if cookieStr, ok := inputValues["cookie"]; ok { parts := strings.SplitN(cookieStr, "=", 2) cookie := &http.Cookie{ Name: parts[0], Value: func() string { if len(parts) > 1 { return parts[1] }; return "" }(), } e.cookies = append(e.cookies, cookie) } return nil case "post": // POST directly to login path with inputs form := url.Values{} for key, val := range inputValues { form.Set(key, val) } req, err := http.NewRequestWithContext(loginCtx, http.MethodPost, loginURL, strings.NewReader(form.Encode())) if err != nil { return fmt.Errorf("create login POST: %w", err) } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") resp, err := e.httpClient.Do(req) if err != nil { return fmt.Errorf("login POST: %w", err) } defer resp.Body.Close() io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) // Store cookies from response e.cookies = resp.Cookies() // Check for errors if err := e.checkLoginErrors(resp, def); err != nil { return err } default: // "form" method (default) // GET login page, find form, fill inputs, submit req, err := http.NewRequestWithContext(loginCtx, http.MethodGet, loginURL, nil) if err != nil { return fmt.Errorf("create login GET: %w", err) } resp, err := e.httpClient.Do(req) if err != nil { return fmt.Errorf("login GET: %w", err) } defer resp.Body.Close() bodyBytes, err := io.ReadAll(io.LimitReader(resp.Body, 10*1024*1024)) if err != nil { return fmt.Errorf("read login page: %w", err) } e.cookies = append(e.cookies, resp.Cookies()...) // Parse the login page to find the form doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(bodyBytes))) if err != nil { return fmt.Errorf("parse login page: %w", err) } // Find the form formSelector := def.Login.Form if formSelector == "" { formSelector = "form" } form := doc.Find(formSelector).First() if form.Length() == 0 { return fmt.Errorf("login form not found with selector %q", formSelector) } // Get form action action, exists := form.Attr("action") if !exists || action == "" { action = loginPath } actionURL, err := e.resolvePath(baseURL, action) if err != nil { return fmt.Errorf("resolve form action: %w", err) } if err := ValidateURL(actionURL); err != nil { return fmt.Errorf("form action URL blocked: %w", err) } // Collect hidden inputs from form formValues := url.Values{} form.Find("input[type='hidden']").Each(func(i int, s *goquery.Selection) { name, _ := s.Attr("name") value, _ := s.Attr("value") if name != "" { formValues.Set(name, value) } }) // Add login inputs for key, val := range inputValues { formValues.Set(key, val) } // Submit the form submitReq, err := http.NewRequestWithContext(loginCtx, http.MethodPost, actionURL, strings.NewReader(formValues.Encode())) if err != nil { return fmt.Errorf("create form submit: %w", err) } submitReq.Header.Set("Content-Type", "application/x-www-form-urlencoded") for _, cookie := range e.cookies { submitReq.AddCookie(cookie) } submitResp, err := e.httpClient.Do(submitReq) if err != nil { return fmt.Errorf("submit login form: %w", err) } defer submitResp.Body.Close() io.ReadAll(io.LimitReader(submitResp.Body, 10*1024*1024)) e.cookies = append(e.cookies, submitResp.Cookies()...) // Check for errors if err := e.checkLoginErrors(submitResp, def); err != nil { return err } } // Test login if test block is defined if def.Login.Test.Selector != "" || def.Login.Test.Path != "" { testPath := def.Login.Test.Path if testPath == "" { testPath = "/" } testURL, err := e.resolvePath(baseURL, testPath) if err != nil { return fmt.Errorf("resolve test URL: %w", err) } if err := ValidateURL(testURL); err != nil { return fmt.Errorf("test URL blocked: %w", err) } testReq, err := http.NewRequestWithContext(loginCtx, http.MethodGet, testURL, nil) if err != nil { return fmt.Errorf("create test request: %w", err) } for _, cookie := range e.cookies { testReq.AddCookie(cookie) } testResp, err := e.httpClient.Do(testReq) if err != nil { return fmt.Errorf("login test request: %w", err) } defer testResp.Body.Close() io.ReadAll(io.LimitReader(testResp.Body, 10*1024*1024)) if def.Login.Test.Selector != "" { testDoc, err := goquery.NewDocumentFromReader(strings.NewReader(func() string { // We can't re-read the body, so we just check the status code return "" }())) if err != nil { return nil // Don't fail on parse errors } if testDoc.Find(def.Login.Test.Selector).Length() == 0 { return fmt.Errorf("login test: selector %q not found", def.Login.Test.Selector) } } } return nil } // Test validates a Cardigann indexer by checking base URL connectivity and optionally testing login. func (e *CardigannEngine) Test(ctx context.Context, def *Definition, config map[string]string) (*IndexerTestResult, error) { baseURL := e.getBaseURL(def, config) if baseURL == "" { return &IndexerTestResult{Success: false, Error: "no base URL in definition"}, nil } if err := ValidateURL(baseURL); err != nil { return &IndexerTestResult{Success: false, Error: fmt.Sprintf("URL blocked: %v", err)}, nil } // If Login block present, attempt login if def.Login.Path != "" || len(def.Login.Inputs) > 0 { if err := e.login(ctx, def, config, baseURL); err != nil { return &IndexerTestResult{Success: false, Error: fmt.Sprintf("login failed: %v", err)}, nil } } // If Search block present, test search path if def.Search.Path != "" { testPath, err := ApplyTemplate("test-path", def.Search.Path, TemplateContext{ Config: config, }) if err != nil { return &IndexerTestResult{Success: false, Error: fmt.Sprintf("template error: %v", err)}, nil } testURL, err := e.resolvePath(baseURL, testPath) if err != nil { return &IndexerTestResult{Success: false, Error: fmt.Sprintf("resolve URL: %v", err)}, nil } if err := ValidateURL(testURL); err != nil { return &IndexerTestResult{Success: false, Error: fmt.Sprintf("URL blocked: %v", err)}, nil } testCtx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() req, err := http.NewRequestWithContext(testCtx, http.MethodGet, testURL, nil) if err != nil { return &IndexerTestResult{Success: false, Error: err.Error()}, nil } for _, cookie := range e.cookies { req.AddCookie(cookie) } resp, err := e.httpClient.Do(req) if err != nil { return &IndexerTestResult{Success: false, Error: err.Error()}, nil } resp.Body.Close() if resp.StatusCode >= 400 { return &IndexerTestResult{ Success: false, Error: fmt.Sprintf("HTTP %d", resp.StatusCode), }, nil } } return &IndexerTestResult{Success: true}, nil } // resolvePath resolves a potentially relative path against a base URL. func (e *CardigannEngine) resolvePath(baseURL, path string) (string, error) { if path == "" { return baseURL, nil } // Already absolute URL if strings.HasPrefix(strings.ToLower(path), "http://") || strings.HasPrefix(strings.ToLower(path), "https://") { return path, nil } // Relative URL — resolve against base base, err := url.Parse(baseURL) if err != nil { return "", fmt.Errorf("parse base URL: %w", err) } ref, err := url.Parse(path) if err != nil { return "", fmt.Errorf("parse path: %w", err) } resolved := base.ResolveReference(ref) return resolved.String(), nil } // getBaseURL returns the first link from the definition, or a config override. func (e *CardigannEngine) getBaseURL(def *Definition, config map[string]string) string { if url, ok := config["base_url"]; ok && url != "" { return url } if len(def.Links) > 0 { return def.Links[0] } return "" } // parseDateField attempts to parse a date string in various formats. func (e *CardigannEngine) parseDateField(val string) string { // Already RFC3339 if _, err := time.Parse(time.RFC3339, val); err == nil { return val } // Try common date layouts layouts := []string{ "2006-01-02 15:04:05", "2006-01-02T15:04:05Z07:00", "2006-01-02T15:04:05", "2006-01-02", "02-Jan-2006", "Jan 02, 2006", "Jan 02 2006", "02 Jan 2006 15:04:05", "Mon, 02 Jan 2006 15:04:05 -0700", time.RFC1123, time.RFC1123Z, time.RFC822, time.RFC822Z, } for _, layout := range layouts { if t, err := time.Parse(layout, strings.TrimSpace(val)); err == nil { return t.Format(time.RFC3339) } } // Try relative time if t, err := parseFuzzyTime(val); err == nil { return t.Format(time.RFC3339) } // Return as-is if we can't parse return val } // checkLoginErrors checks for login error patterns in the response. func (e *CardigannEngine) checkLoginErrors(resp *http.Response, def *Definition) error { if len(def.Login.Error) == 0 { return nil } // Note: body has already been read; we'd need to store it // For now, just check status code if resp.StatusCode >= 400 { return fmt.Errorf("login returned HTTP %d", resp.StatusCode) } return nil }