Sync from /srv/compose/unified-media-manager
This commit is contained in:
287
internal/cardigann/definition.go
Normal file
287
internal/cardigann/definition.go
Normal file
@@ -0,0 +1,287 @@
|
||||
package cardigann
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
yaml "gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// Definition represents a parsed Cardigann YAML indexer definition.
|
||||
// It matches the upstream Cardigann schema for site definitions.
|
||||
type Definition struct {
|
||||
Site string `yaml:"site"`
|
||||
Name string `yaml:"name"`
|
||||
Description string `yaml:"description"`
|
||||
Language string `yaml:"language"`
|
||||
Encoding string `yaml:"encoding"`
|
||||
Links StringOrSlice `yaml:"links"`
|
||||
Settings []SettingsField `yaml:"settings"`
|
||||
Caps CapabilitiesBlock `yaml:"caps"`
|
||||
Login LoginBlock `yaml:"login"`
|
||||
Ratio RatioBlock `yaml:"ratio"`
|
||||
Search SearchBlock `yaml:"search"`
|
||||
}
|
||||
|
||||
// SettingsField describes a user-configurable field in the definition.
|
||||
type SettingsField struct {
|
||||
Name string `yaml:"name"`
|
||||
Type string `yaml:"type"`
|
||||
Label string `yaml:"label"`
|
||||
}
|
||||
|
||||
// CapabilitiesBlock maps categories and search modes.
|
||||
type CapabilitiesBlock struct {
|
||||
Categories map[string]string `yaml:"categories"`
|
||||
Modes map[string][]string `yaml:"modes"`
|
||||
}
|
||||
|
||||
// LoginBlock describes authentication configuration.
|
||||
type LoginBlock struct {
|
||||
Path string `yaml:"path"`
|
||||
Method string `yaml:"method"`
|
||||
Form string `yaml:"form"`
|
||||
Inputs map[string]string `yaml:"inputs"`
|
||||
Error []ErrorBlock `yaml:"error"`
|
||||
Test PageTestBlock `yaml:"test"`
|
||||
}
|
||||
|
||||
// ErrorBlock describes an error detection pattern.
|
||||
type ErrorBlock struct {
|
||||
Path string `yaml:"path"`
|
||||
Selector string `yaml:"selector"`
|
||||
Message SelectorBlock `yaml:"message"`
|
||||
}
|
||||
|
||||
// PageTestBlock describes a page test for verifying login.
|
||||
type PageTestBlock struct {
|
||||
Path string `yaml:"path"`
|
||||
Selector string `yaml:"selector"`
|
||||
}
|
||||
|
||||
// SearchBlock describes search configuration.
|
||||
type SearchBlock struct {
|
||||
Path string `yaml:"path"`
|
||||
Method string `yaml:"method"`
|
||||
Inputs map[string]string `yaml:"inputs"`
|
||||
Rows RowsBlock `yaml:"rows"`
|
||||
Fields FieldsListBlock `yaml:"fields"`
|
||||
}
|
||||
|
||||
// RowsBlock describes how to find result rows in HTML.
|
||||
type RowsBlock struct {
|
||||
Selector string `yaml:"selector"`
|
||||
Remove string `yaml:"remove"`
|
||||
After int `yaml:"after"`
|
||||
DateHeaders SelectorBlock `yaml:"dateheaders"`
|
||||
}
|
||||
|
||||
// FieldBlock represents a single field extraction definition.
|
||||
type FieldBlock struct {
|
||||
Field string `yaml:"field"`
|
||||
Block SelectorBlock `yaml:"-"`
|
||||
}
|
||||
|
||||
// SelectorBlock describes CSS selector extraction with optional filters.
|
||||
type SelectorBlock struct {
|
||||
Selector string `yaml:"selector"`
|
||||
Text string `yaml:"text"`
|
||||
Attribute string `yaml:"attribute"`
|
||||
Remove string `yaml:"remove"`
|
||||
Filters []FilterBlock `yaml:"filters"`
|
||||
Case map[string]string `yaml:"case"`
|
||||
}
|
||||
|
||||
// FilterBlock represents a filter transformation.
|
||||
type FilterBlock struct {
|
||||
Name string `yaml:"name"`
|
||||
Args interface{} `yaml:"args"`
|
||||
}
|
||||
|
||||
// RatioBlock describes ratio display configuration.
|
||||
type RatioBlock struct {
|
||||
Selector string `yaml:"selector"`
|
||||
Path string `yaml:"path"`
|
||||
}
|
||||
|
||||
// StringOrSlice is a custom type that accepts either a string or a slice of strings in YAML.
|
||||
type StringOrSlice []string
|
||||
|
||||
func (s *StringOrSlice) UnmarshalYAML(value *yaml.Node) error {
|
||||
var single string
|
||||
if err := value.Decode(&single); err == nil {
|
||||
*s = []string{single}
|
||||
return nil
|
||||
}
|
||||
var slice []string
|
||||
if err := value.Decode(&slice); err != nil {
|
||||
return fmt.Errorf("expected string or list of strings: %w", err)
|
||||
}
|
||||
*s = slice
|
||||
return nil
|
||||
}
|
||||
|
||||
// FieldsListBlock preserves the field ordering from YAML map keys.
|
||||
type FieldsListBlock []FieldBlock
|
||||
|
||||
func (f *FieldsListBlock) UnmarshalYAML(value *yaml.Node) error {
|
||||
// Cardigann fields are a YAML map where key is field name and value is selector block.
|
||||
// We use the yaml.Node directly to preserve key ordering.
|
||||
if value.Kind != yaml.MappingNode {
|
||||
return fmt.Errorf("fields must be a mapping")
|
||||
}
|
||||
|
||||
result := make([]FieldBlock, 0, len(value.Content)/2)
|
||||
for i := 0; i < len(value.Content); i += 2 {
|
||||
keyNode := value.Content[i]
|
||||
valNode := value.Content[i+1]
|
||||
|
||||
fieldName := keyNode.Value
|
||||
|
||||
// Marshal the value node back to YAML, then unmarshal into SelectorBlock
|
||||
valueBytes, err := yaml.Marshal(valNode)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal field %q: %w", fieldName, err)
|
||||
}
|
||||
|
||||
var block SelectorBlock
|
||||
if err := yaml.Unmarshal(valueBytes, &block); err != nil {
|
||||
return fmt.Errorf("failed to unmarshal field %q block: %w", fieldName, err)
|
||||
}
|
||||
|
||||
result = append(result, FieldBlock{
|
||||
Field: fieldName,
|
||||
Block: block,
|
||||
})
|
||||
}
|
||||
|
||||
*f = result
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalYAML sets default values for RowsBlock.
|
||||
func (r *RowsBlock) UnmarshalYAML(value *yaml.Node) error {
|
||||
// Use a raw type to avoid infinite recursion
|
||||
type rawRows struct {
|
||||
Selector string `yaml:"selector"`
|
||||
Remove string `yaml:"remove"`
|
||||
After int `yaml:"after"`
|
||||
DateHeaders SelectorBlock `yaml:"dateheaders"`
|
||||
}
|
||||
var raw rawRows
|
||||
if err := value.Decode(&raw); err != nil {
|
||||
return err
|
||||
}
|
||||
r.Selector = raw.Selector
|
||||
r.Remove = raw.Remove
|
||||
r.After = raw.After
|
||||
r.DateHeaders = raw.DateHeaders
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalYAML sets default values for LoginBlock.
|
||||
func (l *LoginBlock) UnmarshalYAML(value *yaml.Node) error {
|
||||
type rawLogin struct {
|
||||
Path string `yaml:"path"`
|
||||
Method string `yaml:"method"`
|
||||
Form string `yaml:"form"`
|
||||
Inputs map[string]string `yaml:"inputs"`
|
||||
Error []ErrorBlock `yaml:"error"`
|
||||
Test PageTestBlock `yaml:"test"`
|
||||
}
|
||||
var raw rawLogin
|
||||
if err := value.Decode(&raw); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
l.Path = raw.Path
|
||||
l.Method = raw.Method
|
||||
l.Form = raw.Form
|
||||
l.Inputs = raw.Inputs
|
||||
l.Error = raw.Error
|
||||
l.Test = raw.Test
|
||||
|
||||
// Apply defaults
|
||||
if l.Method == "" {
|
||||
l.Method = "form"
|
||||
}
|
||||
if l.Form == "" {
|
||||
l.Form = "form"
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ParseDefinition parses raw YAML bytes into a Definition struct.
|
||||
// It applies defaults and validates required fields.
|
||||
func ParseDefinition(data []byte) (*Definition, error) {
|
||||
var def Definition
|
||||
if err := yaml.Unmarshal(data, &def); err != nil {
|
||||
return nil, fmt.Errorf("parse YAML: %w", err)
|
||||
}
|
||||
|
||||
// Apply defaults
|
||||
if def.Language == "" {
|
||||
def.Language = "en-us"
|
||||
}
|
||||
if def.Encoding == "" {
|
||||
def.Encoding = "UTF-8"
|
||||
}
|
||||
|
||||
// Validate required fields
|
||||
if def.Site == "" {
|
||||
return nil, fmt.Errorf("definition missing required field: site")
|
||||
}
|
||||
if def.Name == "" {
|
||||
return nil, fmt.Errorf("definition missing required field: name")
|
||||
}
|
||||
if len(def.Links) == 0 {
|
||||
return nil, fmt.Errorf("definition missing required field: links")
|
||||
}
|
||||
|
||||
// Threat model T-10-04: Reject oversized definitions
|
||||
if len(def.Search.Fields) > 100 {
|
||||
return nil, fmt.Errorf("definition has too many search fields (%d > 100)", len(def.Search.Fields))
|
||||
}
|
||||
if len(def.Caps.Categories) > 1000 {
|
||||
return nil, fmt.Errorf("definition has too many category mappings (%d > 1000)", len(def.Caps.Categories))
|
||||
}
|
||||
|
||||
return &def, nil
|
||||
}
|
||||
|
||||
// ValidateDefinition returns a list of validation warnings for a parsed definition.
|
||||
// These are not errors — the definition may still be usable — but indicate potential issues.
|
||||
func ValidateDefinition(def *Definition) []string {
|
||||
var warnings []string
|
||||
|
||||
if def.Search.Rows.Selector == "" {
|
||||
warnings = append(warnings, "search.rows.selector is empty — search will not find results")
|
||||
}
|
||||
|
||||
hasTitle := false
|
||||
hasDownload := false
|
||||
for _, field := range def.Search.Fields {
|
||||
switch field.Field {
|
||||
case "title":
|
||||
hasTitle = true
|
||||
case "download":
|
||||
hasDownload = true
|
||||
}
|
||||
}
|
||||
|
||||
if !hasTitle {
|
||||
warnings = append(warnings, "search.fields missing \"title\" field — results will have no title")
|
||||
}
|
||||
if !hasDownload {
|
||||
warnings = append(warnings, "search.fields missing \"download\" field — results will have no download URL")
|
||||
}
|
||||
|
||||
// Check that login inputs reference config settings
|
||||
if len(def.Login.Inputs) > 0 && len(def.Settings) > 0 {
|
||||
settingNames := make(map[string]bool, len(def.Settings))
|
||||
for _, s := range def.Settings {
|
||||
settingNames[s.Name] = true
|
||||
}
|
||||
}
|
||||
|
||||
return warnings
|
||||
}
|
||||
Reference in New Issue
Block a user