Files
unified-media-manager/internal/cardigann/selector.go
2026-04-24 10:45:19 -07:00

85 lines
1.9 KiB
Go

package cardigann
import (
"fmt"
"strings"
"github.com/PuerkitoBio/goquery"
)
// ExtractField evaluates a CSS selector block against a goquery selection
// and returns the extracted (and filtered) string value.
func ExtractField(selection *goquery.Selection, block SelectorBlock) (string, error) {
var val string
// If Text is set, it's a static text value
if block.Text != "" {
val = block.Text
return applyFiltersToValue(val, block)
}
// If no selector, return empty
if block.Selector == "" {
return "", nil
}
// Find matching elements
sub := selection.Find(block.Selector)
if sub.Length() == 0 {
return "", nil
}
// Remove child elements matching Remove selector
if block.Remove != "" {
sub.Find(block.Remove).Remove()
}
// If Case patterns defined, iterate and return matching value
if len(block.Case) > 0 {
for pattern, result := range block.Case {
// Check if any matched element matches the pattern
found := false
sub.EachWithBreak(func(i int, s *goquery.Selection) bool {
text := strings.TrimSpace(s.Text())
if text == pattern || strings.Contains(text, pattern) {
found = true
val = result
return false
}
return true
})
if found {
return applyFiltersToValue(val, block)
}
}
return "", nil
}
// If Attribute specified, get attribute from first element
if block.Attribute != "" {
attrVal, exists := sub.Attr(block.Attribute)
if !exists {
return "", nil
}
val = attrVal
} else {
// Get trimmed text content
val = strings.TrimSpace(sub.First().Text())
}
return applyFiltersToValue(val, block)
}
// applyFiltersToValue applies the filter chain to a value.
func applyFiltersToValue(val string, block SelectorBlock) (string, error) {
if len(block.Filters) == 0 {
return val, nil
}
result, err := ApplyFilters(val, block.Filters)
if err != nil {
return val, fmt.Errorf("filter chain error: %w", err)
}
return result, nil
}