222 lines
5.0 KiB
Go
222 lines
5.0 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/TopherMayor/unified-media-manager/internal/db"
|
|
)
|
|
|
|
type MatchResult struct {
|
|
MediaID int64 `json:"media_id"`
|
|
MediaType string `json:"media_type"`
|
|
Title string `json:"title"`
|
|
Year *int `json:"year,omitempty"`
|
|
Season *int `json:"season,omitempty"`
|
|
Episode *int `json:"episode,omitempty"`
|
|
RootFolder string `json:"root_folder"`
|
|
Confidence string `json:"confidence"`
|
|
}
|
|
|
|
type MatcherService struct {
|
|
db *db.DB
|
|
}
|
|
|
|
func NewMatcherService(database *db.DB) *MatcherService {
|
|
return &MatcherService{db: database}
|
|
}
|
|
|
|
var (
|
|
seasonEpisodeRe = regexp.MustCompile(`(?i)[sS](\d{1,2})[eE](\d{1,2})`)
|
|
altSeasonEpsRe = regexp.MustCompile(`(\d{1,2})[xX](\d{1,2})`)
|
|
bracketRe2 = regexp.MustCompile(`\[.*?\]`)
|
|
qualityTrailRe = regexp.MustCompile(`(?i)(?:[sS]\d{1,2}[eE]\d{1,2}|\d{3,4}[pi]|720|1080|2160|HDTV|WEB|BluRay|BRRip|BDRip|DVDRip|REMUX|x264|x265|HEVC|AAC|DTS|AC3|DD|FLAC).*$`)
|
|
sepRe = regexp.MustCompile(`[._-]+`)
|
|
punctRe = regexp.MustCompile(`[^\w\s]`)
|
|
multiSpaceRe = regexp.MustCompile(`\s+`)
|
|
)
|
|
|
|
func normalizeTitle(s string) string {
|
|
s = strings.ToLower(s)
|
|
s = punctRe.ReplaceAllString(s, " ")
|
|
s = multiSpaceRe.ReplaceAllString(s, " ")
|
|
return strings.TrimSpace(s)
|
|
}
|
|
|
|
func parseSeasonEpisode(s string) (season, episode int, found bool) {
|
|
if m := seasonEpisodeRe.FindStringSubmatch(s); m != nil {
|
|
season = atoi(m[1])
|
|
episode = atoi(m[2])
|
|
return season, episode, true
|
|
}
|
|
if m := altSeasonEpsRe.FindStringSubmatch(s); m != nil {
|
|
season = atoi(m[1])
|
|
episode = atoi(m[2])
|
|
return season, episode, true
|
|
}
|
|
return 0, 0, false
|
|
}
|
|
|
|
func atoi(s string) int {
|
|
n := 0
|
|
for _, c := range s {
|
|
if c >= '0' && c <= '9' {
|
|
n = n*10 + int(c-'0')
|
|
}
|
|
}
|
|
return n
|
|
}
|
|
|
|
func extractCleanTitle(releaseName string) string {
|
|
cleaned := bracketRe2.ReplaceAllString(releaseName, " ")
|
|
if m := seasonEpisodeRe.FindStringIndex(cleaned); m != nil {
|
|
cleaned = cleaned[:m[0]]
|
|
} else if m := qualityTrailRe.FindStringIndex(cleaned); m != nil {
|
|
cleaned = cleaned[:m[0]]
|
|
}
|
|
cleaned = sepRe.ReplaceAllString(cleaned, " ")
|
|
return normalizeTitle(cleaned)
|
|
}
|
|
|
|
func levenshteinDistance(a, b string) int {
|
|
la, lb := len(a), len(b)
|
|
if la == 0 {
|
|
return lb
|
|
}
|
|
if lb == 0 {
|
|
return la
|
|
}
|
|
prev := make([]int, lb+1)
|
|
curr := make([]int, lb+1)
|
|
for j := 0; j <= lb; j++ {
|
|
prev[j] = j
|
|
}
|
|
for i := 1; i <= la; i++ {
|
|
curr[0] = i
|
|
for j := 1; j <= lb; j++ {
|
|
cost := 1
|
|
if a[i-1] == b[j-1] {
|
|
cost = 0
|
|
}
|
|
curr[j] = minOf3(
|
|
prev[j]+1,
|
|
curr[j-1]+1,
|
|
prev[j-1]+cost,
|
|
)
|
|
}
|
|
prev, curr = curr, prev
|
|
}
|
|
return prev[lb]
|
|
}
|
|
|
|
func minOf3(a, b, c int) int {
|
|
if a < b {
|
|
if a < c {
|
|
return a
|
|
}
|
|
return c
|
|
}
|
|
if b < c {
|
|
return b
|
|
}
|
|
return c
|
|
}
|
|
|
|
func (s *MatcherService) Match(ctx context.Context, releaseName string, mediaType string) (*MatchResult, error) {
|
|
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
|
defer cancel()
|
|
|
|
season, episode, hasSE := parseSeasonEpisode(releaseName)
|
|
cleanTitle := extractCleanTitle(releaseName)
|
|
|
|
if cleanTitle == "" {
|
|
return &MatchResult{Confidence: "none"}, nil
|
|
}
|
|
|
|
qb := NewQueryBuilder(1)
|
|
qb.AddLiteral("deleted_at IS NULL")
|
|
qb.AddLiteral("monitored = true")
|
|
|
|
if mediaType == "series" || hasSE {
|
|
qb.AddLiteral("media_type IN ('series', 'episode')")
|
|
} else if mediaType != "" {
|
|
qb.AddLiteral("media_type NOT IN ('series', 'episode')")
|
|
qb.Add("media_type = $%d", mediaType)
|
|
}
|
|
|
|
query := fmt.Sprintf("SELECT %s FROM media%s", mediaColumns, qb.Where())
|
|
rows, err := s.db.Pool.Query(ctx, query, qb.Args()...)
|
|
if err != nil {
|
|
slog.Error("failed to query media for matching", "error", err)
|
|
return nil, fmt.Errorf("query media candidates: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
candidates, err := scanMediaRows(rows)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("scan media candidates: %w", err)
|
|
}
|
|
|
|
var exactMatch *Media
|
|
var fuzzyMatch *Media
|
|
var fuzzyDist int
|
|
|
|
for i := range candidates {
|
|
c := &candidates[i]
|
|
norm := normalizeTitle(c.Title)
|
|
|
|
if norm == cleanTitle {
|
|
exactMatch = c
|
|
break
|
|
}
|
|
|
|
dist := levenshteinDistance(cleanTitle, norm)
|
|
if dist <= 2 {
|
|
if fuzzyMatch == nil || dist < fuzzyDist {
|
|
fuzzyMatch = c
|
|
fuzzyDist = dist
|
|
}
|
|
}
|
|
}
|
|
|
|
matched := exactMatch
|
|
confidence := "exact"
|
|
if matched == nil && fuzzyMatch != nil {
|
|
matched = fuzzyMatch
|
|
confidence = "fuzzy"
|
|
}
|
|
|
|
if matched == nil {
|
|
return &MatchResult{Confidence: "none"}, nil
|
|
}
|
|
|
|
result := &MatchResult{
|
|
MediaID: matched.ID,
|
|
MediaType: matched.MediaType,
|
|
Title: matched.Title,
|
|
Year: matched.Year,
|
|
Confidence: confidence,
|
|
}
|
|
|
|
if hasSE {
|
|
result.Season = &season
|
|
result.Episode = &episode
|
|
}
|
|
|
|
if matched.RootFolderID != nil {
|
|
var path string
|
|
if err := s.db.Pool.QueryRow(ctx,
|
|
"SELECT path FROM root_folders WHERE id = $1", *matched.RootFolderID).Scan(&path); err != nil {
|
|
slog.Error("failed to query root folder", "error", err, "root_folder_id", *matched.RootFolderID)
|
|
} else {
|
|
result.RootFolder = path
|
|
}
|
|
}
|
|
|
|
return result, nil
|
|
}
|