85 lines
1.9 KiB
Go
85 lines
1.9 KiB
Go
package cardigann
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
// ExtractField evaluates a CSS selector block against a goquery selection
|
|
// and returns the extracted (and filtered) string value.
|
|
func ExtractField(selection *goquery.Selection, block SelectorBlock) (string, error) {
|
|
var val string
|
|
|
|
// If Text is set, it's a static text value
|
|
if block.Text != "" {
|
|
val = block.Text
|
|
return applyFiltersToValue(val, block)
|
|
}
|
|
|
|
// If no selector, return empty
|
|
if block.Selector == "" {
|
|
return "", nil
|
|
}
|
|
|
|
// Find matching elements
|
|
sub := selection.Find(block.Selector)
|
|
if sub.Length() == 0 {
|
|
return "", nil
|
|
}
|
|
|
|
// Remove child elements matching Remove selector
|
|
if block.Remove != "" {
|
|
sub.Find(block.Remove).Remove()
|
|
}
|
|
|
|
// If Case patterns defined, iterate and return matching value
|
|
if len(block.Case) > 0 {
|
|
for pattern, result := range block.Case {
|
|
// Check if any matched element matches the pattern
|
|
found := false
|
|
sub.EachWithBreak(func(i int, s *goquery.Selection) bool {
|
|
text := strings.TrimSpace(s.Text())
|
|
if text == pattern || strings.Contains(text, pattern) {
|
|
found = true
|
|
val = result
|
|
return false
|
|
}
|
|
return true
|
|
})
|
|
if found {
|
|
return applyFiltersToValue(val, block)
|
|
}
|
|
}
|
|
return "", nil
|
|
}
|
|
|
|
// If Attribute specified, get attribute from first element
|
|
if block.Attribute != "" {
|
|
attrVal, exists := sub.Attr(block.Attribute)
|
|
if !exists {
|
|
return "", nil
|
|
}
|
|
val = attrVal
|
|
} else {
|
|
// Get trimmed text content
|
|
val = strings.TrimSpace(sub.First().Text())
|
|
}
|
|
|
|
return applyFiltersToValue(val, block)
|
|
}
|
|
|
|
// applyFiltersToValue applies the filter chain to a value.
|
|
func applyFiltersToValue(val string, block SelectorBlock) (string, error) {
|
|
if len(block.Filters) == 0 {
|
|
return val, nil
|
|
}
|
|
|
|
result, err := ApplyFilters(val, block.Filters)
|
|
if err != nil {
|
|
return val, fmt.Errorf("filter chain error: %w", err)
|
|
}
|
|
return result, nil
|
|
}
|