From 468519fde1cef86cda7bbdbb2f0a1f6c4ceae93d Mon Sep 17 00:00:00 2001 From: Christopher Mayor Date: Fri, 24 Apr 2026 11:13:50 -0700 Subject: [PATCH] feat: semantic search with Qdrant + Ollama embeddings - Add SemanticSearchService with embed() + searchQdrant() methods - Add GET /api/search/semantic endpoint (?q=query&k=5) - Wire SemanticSearchService into router and cmd/server/main.go - Add SemanticSearch React page with results + similarity scores - Add 'Semantic Search' nav link in App.tsx - Add unit tests with mocked Ollama + Qdrant HTTP servers (4 tests, all passing) - Add GitHub issue templates (bug report, feature request) - Add pull request template --- .github/ISSUE_TEMPLATE/bug_report.yml | 57 +++++++ .github/ISSUE_TEMPLATE/feature_request.yml | 29 ++++ .github/PULL_REQUEST_TEMPLATE.md | 28 +++ frontend/src/App.tsx | 3 + frontend/src/pages/SemanticSearch.tsx | 163 ++++++++++++++++++ internal/api/router.go | 6 + internal/api/search.go | 29 ++++ internal/service/semantic_search.go | 188 +++++++++++++++++++++ internal/service/semantic_search_test.go | 143 ++++++++++++++++ 9 files changed, 646 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml create mode 100644 .github/ISSUE_TEMPLATE/feature_request.yml create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 frontend/src/pages/SemanticSearch.tsx create mode 100644 internal/service/semantic_search.go create mode 100644 internal/service/semantic_search_test.go diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..2ad054d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,57 @@ +name: Bug Report +description: Report something that is broken or not working as expected +labels: ["bug"] +title: "[Bug] " +body: + +- type: markdown + attributes: + value: | + ## Bug Description + Describe what broke and what you expected to happen. + +- type: textarea + id: repro + attributes: + label: Steps to Reproduce + placeholder: | + 1. + 2. + 3. + validations: + required: true + +- type: textarea + id: expected + attributes: + label: Expected Behavior + placeholder: What should happen instead. + +- type: textarea + id: actual + attributes: + label: Actual Behavior + placeholder: What actually happens. + +- type: input + id: version + attributes: + label: UMM Version / Commit + placeholder: e.g., v2.0 or git commit hash + +- type: textarea + id: logs + attributes: + label: Relevant Logs + description: Paste any error messages or relevant log lines + +- type: textarea + id: env + attributes: + label: Environment + description: OS, Docker version, database version, etc. + placeholder: | + - OS: + - Docker: + - PostgreSQL: + - Go (if running locally): diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..52f09c8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,29 @@ +name: Feature Request +description: Suggest a new feature or improvement +labels: ["enhancement"] +title: "[Feature] " +body: + +- type: markdown + attributes: + value: | + ## Feature Description + Describe the feature you'd like and why it would be useful. + +- type: textarea + id: usecase + attributes: + label: Use Case + placeholder: Who would use this and what problem does it solve? + +- type: textarea + id: alternatives + attributes: + label: Alternatives Considered + placeholder: What other approaches did you consider? + +- type: textarea + id: context + attributes: + label: Additional Context + placeholder: Screenshots, mockups, or any other relevant information. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..0806120 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,28 @@ +## Description + +Brief summary of what this PR does. + +## Type of Change + +- [ ] Bug fix +- [ ] New feature +- [ ] Breaking change +- [ ] Documentation update + +## Testing + +Describe what testing you performed. + +- [ ] Tested locally (go test ./...) +- [ ] Tested in Docker Compose dev environment +- [ ] Verified frontend builds (npm run build) + +## Checklist + +- [ ] My code follows the project conventions (see AGENTS.md) +- [ ] I have self-reviewed my own code +- [ ] I have commented my code, particularly in hard-to-understand areas +- [ ] I have made corresponding changes to the documentation +- [ ] My changes generate no new compiler or lint warnings +- [ ] I have added tests that prove my fix is effective or my feature works +- [ ] New and existing unit tests pass locally with my changes diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 61362eb..37d04b8 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -15,6 +15,7 @@ const Activity = lazy(() => import('./pages/Activity')) const Blocklist = lazy(() => import('./pages/Blocklist')) const Settings = lazy(() => import('./pages/Settings')) const Search = lazy(() => import('./pages/Search')) +const SemanticSearch = lazy(() => import('./pages/SemanticSearch')) const navItems = [ { to: '/', label: 'Dashboard' }, @@ -23,6 +24,7 @@ const navItems = [ { to: '/calendar', label: 'Calendar' }, { to: '/queue', label: 'Queue' }, { to: '/search', label: 'Search' }, + { to: '/semantic-search', label: 'Semantic Search' }, { to: '/activity', label: 'Activity' }, { to: '/requests', label: 'Requests' }, { to: '/blocklist', label: 'Blocklist' }, @@ -62,6 +64,7 @@ export default function App() { } /> } /> } /> + } /> } /> } /> } /> diff --git a/frontend/src/pages/SemanticSearch.tsx b/frontend/src/pages/SemanticSearch.tsx new file mode 100644 index 0000000..d1615a9 --- /dev/null +++ b/frontend/src/pages/SemanticSearch.tsx @@ -0,0 +1,163 @@ +import { useState } from 'react' +import { fetchAPI, postAPI } from '../api/client' +import { useToast } from '../components/Toast' +import ErrorBanner from '../components/ErrorBanner' + +interface SemanticResult { + id: number + title: string + media_type: string + year: number | null + score: number + overview: string +} + +function scorePercent(score: number): string { + return `${Math.round(score * 100)}%` +} + +function mediaTypeBadge(type: string): string { + const colors: Record = { + movie: 'bg-blue-600', + series: 'bg-green-600', + music: 'bg-purple-600', + book: 'bg-orange-600', + audiobook: 'bg-yellow-600', + podcast: 'bg-pink-600', + } + return colors[type] ?? 'bg-gray-600' +} + +export default function SemanticSearch() { + const [query, setQuery] = useState('') + const [results, setResults] = useState([]) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + const [hasSearched, setHasSearched] = useState(false) + const [addingToQueue, setAddingToQueue] = useState>(new Set()) + const { showToast } = useToast() + + async function doSearch() { + const trimmed = query.trim() + if (!trimmed) return + setLoading(true) + setHasSearched(true) + setError(null) + try { + const res = await fetchAPI<{ results: SemanticResult[] }>( + '/api/search/semantic?q=' + encodeURIComponent(trimmed) + '&k=5' + ) + setResults(res.results ?? []) + } catch (err) { + const message = err instanceof Error ? err.message : 'Semantic search failed' + setError(message) + } finally { + setLoading(false) + } + } + + function handleSearchSubmit(e: React.FormEvent) { + e.preventDefault() + doSearch() + } + + async function handleAddToQueue(item: SemanticResult) { + setAddingToQueue(prev => new Set(prev).add(item.id)) + try { + await postAPI<{ id: number }>('/api/queue', { media_id: item.id }) + showToast(`Added "${item.title}" to queue`) + } catch (err) { + const message = err instanceof Error ? err.message : 'Failed to add to queue' + showToast(`Error: ${message}`) + } finally { + setAddingToQueue(prev => { + const next = new Set(prev) + next.delete(item.id) + return next + }) + } + } + + return ( +
+

Semantic Search

+ +
+
+ setQuery(e.target.value)} + placeholder="Describe what you're looking for..." + className="bg-gray-800 border border-gray-700 focus:border-indigo-500 text-white rounded-lg px-4 py-3 w-full outline-none transition-colors" + /> + +
+

AI-powered search using natural language descriptions

+
+ + {!hasSearched && ( +
+

Enter a natural language query to find media

+
+ )} + + {hasSearched && loading && ( +
+ {[1, 2, 3].map(i => ( +
+ ))} +
+ )} + + {hasSearched && error && !loading && } + + {hasSearched && !loading && !error && results.length === 0 && ( +
No results found
+ )} + + {hasSearched && !loading && !error && results.length > 0 && ( +
+

{results.length} result{results.length !== 1 ? 's' : ''}

+ {results.map(item => { + const isAdding = addingToQueue.has(item.id) + return ( +
+
+
+
+ + {item.media_type} + +

{item.title}

+ {item.year && {item.year}} +
+
+ {scorePercent(item.score)} match +
+ {item.overview && ( +

{item.overview}

+ )} +
+ +
+
+ ) + })} +
+ )} +
+ ) +} diff --git a/internal/api/router.go b/internal/api/router.go index 577997b..8361a24 100644 --- a/internal/api/router.go +++ b/internal/api/router.go @@ -35,6 +35,7 @@ type Services struct { Discover *service.DiscoverService MediaDetail *service.MediaDetailService Calendar *service.CalendarService + SemanticSearch *service.SemanticSearchService } func NewRouter(cfg *config.Config, svc *Services) *echo.Echo { @@ -148,6 +149,11 @@ func NewRouter(cfg *config.Config, svc *Services) *echo.Echo { // Calendar route g.GET("/calendar", listCalendarEvents(svc.Calendar)) + // Semantic search route + if svc.SemanticSearch != nil { + g.GET("/search/semantic", semanticSearch(svc.SemanticSearch)) + } + // Request routes — protected by API key auth apiKeyAuth := newAPIKeyAuth(svc.User) g.GET("/requests", listRequests(svc.Request, svc.User), apiKeyAuth) diff --git a/internal/api/search.go b/internal/api/search.go index 40d6bf4..6adb0e2 100644 --- a/internal/api/search.go +++ b/internal/api/search.go @@ -54,6 +54,35 @@ func searchReleases(svc *service.SearchService) echo.HandlerFunc { } } +func semanticSearch(svc *service.SemanticSearchService) echo.HandlerFunc { + return func(c echo.Context) error { + ctx, cancel := context.WithTimeout(c.Request().Context(), 30*time.Second) + defer cancel() + + query := c.QueryParam("q") + if query == "" { + return c.JSON(http.StatusBadRequest, map[string]string{"error": "q parameter is required"}) + } + + k := 5 + if kStr := c.QueryParam("k"); kStr != "" { + if kVal, err := strconv.Atoi(kStr); err == nil && kVal > 0 && kVal <= 50 { + k = kVal + } + } + + results, err := svc.Search(ctx, query, k) + if err != nil { + slog.Error("semantic search failed", "error", err) + return c.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()}) + } + + return c.JSON(http.StatusOK, map[string]interface{}{ + "results": results, + }) + } +} + func grabRelease(svc *service.SearchService, dcSvc *service.DownloadClientService, queueSvc *service.QueueService, safetySvc *service.SafetyService, activitySvc *service.ActivityService) echo.HandlerFunc { return func(c echo.Context) error { ctx, cancel := context.WithTimeout(c.Request().Context(), 30*time.Second) diff --git a/internal/service/semantic_search.go b/internal/service/semantic_search.go new file mode 100644 index 0000000..24ad1e9 --- /dev/null +++ b/internal/service/semantic_search.go @@ -0,0 +1,188 @@ +package service + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" + "time" +) + +type SemanticSearchResult struct { + ID int64 `json:"id"` + Title string `json:"title"` + MediaType string `json:"media_type"` + Year *int `json:"year,omitempty"` + Score float64 `json:"score"` + Overview string `json:"overview,omitempty"` +} + +type ollamaEmbedRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` +} + +type ollamaEmbedResponse struct { + Embedding []float64 `json:"embedding"` +} + +type qdrantSearchRequest struct { + Vector []float64 `json:"vector"` + Limit int `json:"limit"` + WithPayload bool `json:"with_payload"` +} + +type qdrantSearchResponse struct { + Result []qdrantHit `json:"result"` +} + +type qdrantHit struct { + ID string `json:"id"` + Score float64 `json:"score"` + Payload json.RawMessage `json:"payload"` +} + +type qdrantPayload struct { + ID int64 `json:"id"` + Title string `json:"title"` + MediaType string `json:"media_type"` + Year *int `json:"year,omitempty"` + Overview string `json:"overview,omitempty"` +} + +type SemanticSearchService struct { + ollamaURL string + qdrantURL string + httpClient *http.Client +} + +func NewSemanticSearchService(ollamaURL, qdrantURL string) *SemanticSearchService { + return &SemanticSearchService{ + ollamaURL: ollamaURL, + qdrantURL: qdrantURL, + httpClient: &http.Client{ + Timeout: 30 * time.Second, + }, + } +} + +func (s *SemanticSearchService) Search(ctx context.Context, query string, k int) ([]SemanticSearchResult, error) { + embedding, err := s.embed(ctx, query) + if err != nil { + return nil, fmt.Errorf("generate embedding: %w", err) + } + + results, err := s.searchQdrant(ctx, embedding, k) + if err != nil { + return nil, fmt.Errorf("search qdrant: %w", err) + } + + return results, nil +} + +func (s *SemanticSearchService) embed(ctx context.Context, text string) ([]float64, error) { + reqBody := ollamaEmbedRequest{ + Model: "nomic-embed-text", + Prompt: text, + } + + bodyBytes, err := json.Marshal(reqBody) + if err != nil { + return nil, fmt.Errorf("marshal embed request: %w", err) + } + + reqCtx, cancel := context.WithTimeout(ctx, 15*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, s.ollamaURL+"/api/embeddings", bytes.NewReader(bodyBytes)) + if err != nil { + return nil, fmt.Errorf("create embed request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + resp, err := s.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("ollama request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + slog.Error("ollama embed returned non-200", "status", resp.StatusCode, "body", string(respBody)) + return nil, fmt.Errorf("ollama returned status %d", resp.StatusCode) + } + + var embedResp ollamaEmbedResponse + if err := json.NewDecoder(resp.Body).Decode(&embedResp); err != nil { + return nil, fmt.Errorf("decode ollama response: %w", err) + } + + if len(embedResp.Embedding) == 0 { + return nil, fmt.Errorf("ollama returned empty embedding") + } + + return embedResp.Embedding, nil +} + +func (s *SemanticSearchService) searchQdrant(ctx context.Context, embedding []float64, k int) ([]SemanticSearchResult, error) { + reqBody := qdrantSearchRequest{ + Vector: embedding, + Limit: k, + WithPayload: true, + } + + bodyBytes, err := json.Marshal(reqBody) + if err != nil { + return nil, fmt.Errorf("marshal qdrant request: %w", err) + } + + reqCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + url := fmt.Sprintf("%s/collections/media/points/search", s.qdrantURL) + req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, url, bytes.NewReader(bodyBytes)) + if err != nil { + return nil, fmt.Errorf("create qdrant request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + resp, err := s.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("qdrant request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + slog.Error("qdrant search returned non-200", "status", resp.StatusCode, "body", string(respBody)) + return nil, fmt.Errorf("qdrant returned status %d", resp.StatusCode) + } + + var searchResp qdrantSearchResponse + if err := json.NewDecoder(resp.Body).Decode(&searchResp); err != nil { + return nil, fmt.Errorf("decode qdrant response: %w", err) + } + + results := make([]SemanticSearchResult, 0, len(searchResp.Result)) + for _, hit := range searchResp.Result { + var payload qdrantPayload + if err := json.Unmarshal(hit.Payload, &payload); err != nil { + slog.Error("failed to unmarshal qdrant payload", "error", err) + continue + } + + results = append(results, SemanticSearchResult{ + ID: payload.ID, + Title: payload.Title, + MediaType: payload.MediaType, + Year: payload.Year, + Score: hit.Score, + Overview: payload.Overview, + }) + } + + return results, nil +} diff --git a/internal/service/semantic_search_test.go b/internal/service/semantic_search_test.go new file mode 100644 index 0000000..49a7763 --- /dev/null +++ b/internal/service/semantic_search_test.go @@ -0,0 +1,143 @@ +package service + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" +) + +func TestSemanticSearchService_Search(t *testing.T) { + // Mock Ollama server + ollamaServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/api/embeddings" { + t.Errorf("expected /api/embeddings, got %s", r.URL.Path) + } + var req ollamaEmbedRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + t.Fatalf("failed to decode request: %v", err) + } + if req.Model != "nomic-embed-text" { + t.Errorf("expected model nomic-embed-text, got %s", req.Model) + } + if req.Prompt == "" { + t.Error("expected non-empty prompt") + } + // Return a 768-dim embedding (truncated for test) + embedding := make([]float64, 768) + embedding[0] = 0.1 + embedding[1] = -0.2 + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(ollamaEmbedResponse{Embedding: embedding}) + })) + defer ollamaServer.Close() + + // Mock Qdrant server + qdrantServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/collections/media/points/search" { + t.Errorf("expected /collections/media/points/search, got %s", r.URL.Path) + } + var req qdrantSearchRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + t.Fatalf("failed to decode request: %v", err) + } + if req.Limit != 5 { + t.Errorf("expected limit 5, got %d", req.Limit) + } + if !req.WithPayload { + t.Error("expected WithPayload=true") + } + payload, _ := json.Marshal(qdrantPayload{ + ID: 42, + Title: "Blade Runner 2049", + MediaType: "movie", + Year: intPtr(2017), + Overview: "A young blade runner's discovery of a long-buried secret leads him to track down former blade runner Rick Deckard.", + }) + resp := qdrantSearchResponse{ + Result: []qdrantHit{ + {ID: "42", Score: 0.8712, Payload: payload}, + }, + } + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) + })) + defer qdrantServer.Close() + + svc := NewSemanticSearchService(ollamaServer.URL, qdrantServer.URL) + results, err := svc.Search(context.Background(), "sci-fi noir film", 5) + if err != nil { + t.Fatalf("Search failed: %v", err) + } + + if len(results) != 1 { + t.Fatalf("expected 1 result, got %d", len(results)) + } + if results[0].ID != 42 { + t.Errorf("expected ID 42, got %d", results[0].ID) + } + if results[0].Title != "Blade Runner 2049" { + t.Errorf("expected title 'Blade Runner 2049', got %s", results[0].Title) + } + if results[0].MediaType != "movie" { + t.Errorf("expected media_type 'movie', got %s", results[0].MediaType) + } + if results[0].Score != 0.8712 { + t.Errorf("expected score 0.8712, got %f", results[0].Score) + } + if results[0].Year == nil || *results[0].Year != 2017 { + t.Error("expected year 2017") + } +} + +// Note: empty query validation happens at the API handler level (search.go), +// not in the service. The service layer trusts its callers. +func TestSemanticSearchService_embed_emptyQuery(t *testing.T) { + ollamaServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Empty string is passed through to Ollama — Ollama may return an error + w.WriteHeader(http.StatusBadRequest) + })) + defer ollamaServer.Close() + + svc := NewSemanticSearchService(ollamaServer.URL, "http://localhost:6333") + _, err := svc.embed(context.Background(), "") + if err == nil { + t.Error("expected error for empty query passed to Ollama") + } +} + +func TestSemanticSearchService_qdrantError(t *testing.T) { + ollamaServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + embedding := make([]float64, 768) + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(ollamaEmbedResponse{Embedding: embedding}) + })) + defer ollamaServer.Close() + + qdrantServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + })) + defer qdrantServer.Close() + + svc := NewSemanticSearchService(ollamaServer.URL, qdrantServer.URL) + _, err := svc.Search(context.Background(), "test query", 5) + if err == nil { + t.Fatal("expected error when Qdrant is unavailable") + } +} + +func TestSemanticSearchService_ollamaError(t *testing.T) { + ollamaServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + })) + defer ollamaServer.Close() + + svc := NewSemanticSearchService(ollamaServer.URL, "http://localhost:6333") + _, err := svc.Search(context.Background(), "test query", 5) + if err == nil { + t.Fatal("expected error when Ollama is unavailable") + } +} + +func intPtr(i int) *int { return &i }