package cardigann import ( "fmt" "strings" "github.com/PuerkitoBio/goquery" ) // ExtractField evaluates a CSS selector block against a goquery selection // and returns the extracted (and filtered) string value. func ExtractField(selection *goquery.Selection, block SelectorBlock) (string, error) { var val string // If Text is set, it's a static text value if block.Text != "" { val = block.Text return applyFiltersToValue(val, block) } // If no selector, return empty if block.Selector == "" { return "", nil } // Find matching elements sub := selection.Find(block.Selector) if sub.Length() == 0 { return "", nil } // Remove child elements matching Remove selector if block.Remove != "" { sub.Find(block.Remove).Remove() } // If Case patterns defined, iterate and return matching value if len(block.Case) > 0 { for pattern, result := range block.Case { // Check if any matched element matches the pattern found := false sub.EachWithBreak(func(i int, s *goquery.Selection) bool { text := strings.TrimSpace(s.Text()) if text == pattern || strings.Contains(text, pattern) { found = true val = result return false } return true }) if found { return applyFiltersToValue(val, block) } } return "", nil } // If Attribute specified, get attribute from first element if block.Attribute != "" { attrVal, exists := sub.Attr(block.Attribute) if !exists { return "", nil } val = attrVal } else { // Get trimmed text content val = strings.TrimSpace(sub.First().Text()) } return applyFiltersToValue(val, block) } // applyFiltersToValue applies the filter chain to a value. func applyFiltersToValue(val string, block SelectorBlock) (string, error) { if len(block.Filters) == 0 { return val, nil } result, err := ApplyFilters(val, block.Filters) if err != nil { return val, fmt.Errorf("filter chain error: %w", err) } return result, nil }