diff --git a/app/actions/feeds.ts b/app/actions/feeds.ts index 49357c2..4c56c03 100644 --- a/app/actions/feeds.ts +++ b/app/actions/feeds.ts @@ -272,6 +272,12 @@ export async function validateFeedAction(input: ValidateFeedInput) { try { const feedInfo = await parseFeedUrl(normalizedUrl); + if (!feedInfo) { + return { + valid: false, + error: 'Unable to fetch feed information', + }; + } return { valid: true, feedInfo: { diff --git a/app/admin/dashboard/components/tabs/LLMConfigTab.tsx b/app/admin/dashboard/components/tabs/LLMConfigTab.tsx index 5a6b863..981f6c9 100644 --- a/app/admin/dashboard/components/tabs/LLMConfigTab.tsx +++ b/app/admin/dashboard/components/tabs/LLMConfigTab.tsx @@ -10,6 +10,7 @@ import { useTestLLMConfig, useDeleteAllEmbeddings, useUpdateSummarizationConfig, + useUpdateEmbeddingProvider, } from "@/hooks/queries/use-admin"; // Loading Spinner Component @@ -70,6 +71,7 @@ export function LLMConfigTab() { const testConfig = useTestLLMConfig(); const deleteEmbeddings = useDeleteAllEmbeddings(); const updateSummarizationConfig = useUpdateSummarizationConfig(); + const updateEmbeddingProvider = useUpdateEmbeddingProvider(); // Summarization toggle state const [isSummarizationToggling, setIsSummarizationToggling] = useState(false); @@ -135,6 +137,7 @@ export function LLMConfigTab() { setSaveMessage(null); try { + // Save LLM config await updateConfig.mutateAsync({ provider, apiKey: apiKey || undefined, @@ -144,9 +147,14 @@ export function LLMConfigTab() { digestModel: digestModel || undefined, }); + // Save embedding provider if it changed + if (embeddingConfig && embeddingProvider !== embeddingConfig.provider) { + await updateEmbeddingProvider.mutateAsync(embeddingProvider); + } + setSaveMessage({ type: "success", text: "Configuration saved successfully" }); toast.success("LLM configuration saved"); - + // Clear API key input setApiKey(""); @@ -515,10 +523,10 @@ export function LLMConfigTab() {
diff --git a/app/api/admin/embeddings/config/route.ts b/app/api/admin/embeddings/config/route.ts index 0325d06..2cf8a20 100644 --- a/app/api/admin/embeddings/config/route.ts +++ b/app/api/admin/embeddings/config/route.ts @@ -74,6 +74,7 @@ export const GET = createHandler( config: { provider: embeddingConfig.provider, model: embeddingConfig.model, + modelSource: embeddingConfig.modelSource, dimensions, batchSize: embeddingConfig.batchSize, apiKey: envConfig.apiKey, @@ -92,6 +93,7 @@ export const GET = createHandler( }, autoGenerate: embeddingConfig.autoGenerate, autoGenerateSource: embeddingConfig.autoGenerateSource, + providerSource: embeddingConfig.providerSource, envDefault: env.EMBEDDING_AUTO_GENERATE, usingUserConfig: userId ? openaiTest.success : false, message: "OpenAI always available - admin controls enable/disable, users provide credentials", diff --git a/app/api/admin/embeddings/provider/route.ts b/app/api/admin/embeddings/provider/route.ts index 071360e..d8db660 100644 --- a/app/api/admin/embeddings/provider/route.ts +++ b/app/api/admin/embeddings/provider/route.ts @@ -99,12 +99,14 @@ export const GET = createHandler( }, config: { model: config.model, + modelSource: config.modelSource, batchSize: config.batchSize, autoGenerate: config.autoGenerate, + autoGenerateSource: config.autoGenerateSource, }, usingUserConfig: userId ? openaiTest.success : false, - message: openaiTest.success - ? "OpenAI working with provided credentials" + message: openaiTest.success + ? "OpenAI working with provided credentials" : "OpenAI available - users can provide API keys in preferences", }; }, diff --git a/app/api/admin/memory/route.ts b/app/api/admin/memory/route.ts index ff0c7b0..05155d7 100644 --- a/app/api/admin/memory/route.ts +++ b/app/api/admin/memory/route.ts @@ -10,6 +10,8 @@ import { memoryMonitor, formatBytes, getMemoryUsagePercent } from "@/lib/memory- import { prisma } from "@/lib/db"; import { z } from "zod"; +export const dynamic = "force-dynamic"; + /** * GET /api/admin/memory * Returns current memory statistics and history diff --git a/app/api/articles/[id]/related/route.ts b/app/api/articles/[id]/related/route.ts index 9ac1454..1647ff9 100644 --- a/app/api/articles/[id]/related/route.ts +++ b/app/api/articles/[id]/related/route.ts @@ -28,36 +28,16 @@ export const GET = createHandler(async ({ params, request }) => { logger.info("Finding related articles", { articleId: id, limit, minScore }); - try { - const results = await findRelatedArticles(id, { - limit, - minScore, - excludeSameFeed, - }); - - return { - articleId: id, - results, - count: results.length, - }; - } catch (error) { - // Handle "no embedding" error gracefully - const errorMessage = error instanceof Error ? error.message : String(error); - logger.error("Failed to find related articles", { - error: error instanceof Error ? { message: error.message, stack: error.stack } : error, - articleId: id - }); - - if (errorMessage.includes("no embedding")) { - return apiResponse({ - articleId: id, - results: [], - count: 0, - message: "Article has no embedding. Generate embeddings to enable related articles.", - }); - } - - throw error; - } + const results = await findRelatedArticles(id, { + limit, + minScore, + excludeSameFeed, + }); + + return { + articleId: id, + results, + count: results.length, + }; }); diff --git a/app/api/feeds/route.ts b/app/api/feeds/route.ts index 7d544de..443e0dd 100644 --- a/app/api/feeds/route.ts +++ b/app/api/feeds/route.ts @@ -72,14 +72,14 @@ export const GET = createHandler( */ export const POST = createHandler( async ({ body, session }) => { - const { url, name, categoryIds } = body; + const { url, name, categoryIds, settings } = body; let feed; let isNewFeed = false; try { // Try to create the feed - feed = await validateAndCreateFeed(url, name, categoryIds); + feed = await validateAndCreateFeed(url, name, categoryIds, settings); isNewFeed = true; } catch (error) { // If feed already exists, get it instead diff --git a/app/api/feeds/validate/route.ts b/app/api/feeds/validate/route.ts index 825b0e2..096de35 100644 --- a/app/api/feeds/validate/route.ts +++ b/app/api/feeds/validate/route.ts @@ -35,6 +35,12 @@ export const POST = createHandler( // Get feed info try { const feedInfo = await parseFeedUrl(normalizedUrl); + if (!feedInfo) { + return apiResponse({ + valid: false, + error: "Unable to fetch feed information", + }); + } return apiResponse({ valid: true, feedInfo: { diff --git a/app/api/proxy/route.ts b/app/api/proxy/route.ts index c6d4527..c9c878e 100644 --- a/app/api/proxy/route.ts +++ b/app/api/proxy/route.ts @@ -4,6 +4,8 @@ import { rewriteUrls, extractBaseUrl } from "@/lib/url-rewriter"; import { NextResponse } from "next/server"; import { z } from "zod"; +export const dynamic = "force-dynamic"; + // Simple in-memory cache const cache = new Map(); const CACHE_TTL = 5 * 60 * 1000; // 5 minutes diff --git a/app/api/saved-searches/[id]/articles/route.ts b/app/api/saved-searches/[id]/articles/route.ts index 0122e5a..81fa880 100644 --- a/app/api/saved-searches/[id]/articles/route.ts +++ b/app/api/saved-searches/[id]/articles/route.ts @@ -8,6 +8,8 @@ import { createHandler } from "@/lib/api-handler"; import { z } from "zod"; import * as savedSearchService from "@/lib/services/saved-search-service"; +export const dynamic = "force-dynamic"; + // Query schema for filtering and pagination const articlesQuerySchema = z.object({ limit: z.coerce.number().int().min(1).max(100).optional().default(50).catch(50), diff --git a/app/api/saved-searches/[id]/rematch/route.ts b/app/api/saved-searches/[id]/rematch/route.ts index 99aee89..6444507 100644 --- a/app/api/saved-searches/[id]/rematch/route.ts +++ b/app/api/saved-searches/[id]/rematch/route.ts @@ -8,6 +8,8 @@ import { createHandler } from "@/lib/api-handler"; import { rematchSavedSearch } from "@/lib/services/saved-search-matcher"; import { getSavedSearchById } from "@/lib/services/saved-search-service"; +export const dynamic = "force-dynamic"; + /** * POST /api/saved-searches/[id]/rematch * Trigger rematch for a saved search diff --git a/app/api/saved-searches/[id]/route.ts b/app/api/saved-searches/[id]/route.ts index 4943652..89737d8 100644 --- a/app/api/saved-searches/[id]/route.ts +++ b/app/api/saved-searches/[id]/route.ts @@ -12,6 +12,8 @@ import * as savedSearchService from "@/lib/services/saved-search-service"; import { rematchSavedSearch } from "@/lib/services/saved-search-matcher"; import { logger } from "@/lib/logger"; +export const dynamic = "force-dynamic"; + // Validation schema for updating a saved search const updateSavedSearchSchema = z.object({ name: z.string().min(1).max(100).optional(), diff --git a/app/api/saved-searches/insights/route.ts b/app/api/saved-searches/insights/route.ts index e4fac86..a12caf8 100644 --- a/app/api/saved-searches/insights/route.ts +++ b/app/api/saved-searches/insights/route.ts @@ -7,6 +7,8 @@ import { createHandler } from '@/lib/api-handler'; import { prisma } from '@/lib/db'; import { logger } from '@/lib/logger'; +export const dynamic = "force-dynamic"; + interface SavedSearchInsight { id: string; name: string; diff --git a/app/api/saved-searches/preview/route.ts b/app/api/saved-searches/preview/route.ts index 23a7cd5..a07e192 100644 --- a/app/api/saved-searches/preview/route.ts +++ b/app/api/saved-searches/preview/route.ts @@ -8,6 +8,8 @@ import { createHandler } from "@/lib/api-handler"; import { z } from "zod"; import * as savedSearchService from "@/lib/services/saved-search-service"; +export const dynamic = "force-dynamic"; + // Validation schema for preview request const previewSearchSchema = z.object({ query: z.string().min(1), diff --git a/app/api/saved-searches/route.ts b/app/api/saved-searches/route.ts index 14e935b..bccac2c 100644 --- a/app/api/saved-searches/route.ts +++ b/app/api/saved-searches/route.ts @@ -11,6 +11,8 @@ import * as savedSearchService from "@/lib/services/saved-search-service"; import { matchNewArticles } from "@/lib/services/saved-search-matcher"; import { logger } from "@/lib/logger"; +export const dynamic = "force-dynamic"; + // Validation schema for creating a saved search const createSavedSearchSchema = z.object({ name: z.string().min(1).max(100), diff --git a/app/api/saved-searches/templates/route.ts b/app/api/saved-searches/templates/route.ts index c6c3d9b..ff0cf47 100644 --- a/app/api/saved-searches/templates/route.ts +++ b/app/api/saved-searches/templates/route.ts @@ -16,6 +16,8 @@ import { getTemplateById, } from '@/lib/services/search-templates-service'; +export const dynamic = "force-dynamic"; + const querySchema = z.object({ category: z.enum(['technology', 'news', 'research', 'jobs', 'custom']).optional(), keyword: z.string().optional(), diff --git a/app/api/user/notifications/[id]/route.ts b/app/api/user/notifications/[id]/route.ts index b12acc3..3b19f16 100644 --- a/app/api/user/notifications/[id]/route.ts +++ b/app/api/user/notifications/[id]/route.ts @@ -4,6 +4,8 @@ import { deleteNotification, } from "@/lib/services/notification-service"; +export const dynamic = "force-dynamic"; + /** * PATCH /api/user/notifications/[id] * Mark a notification as read diff --git a/app/components/articles/ArticleViewTracker.tsx b/app/components/articles/ArticleViewTracker.tsx index 7357cb0..3a8cd95 100644 --- a/app/components/articles/ArticleViewTracker.tsx +++ b/app/components/articles/ArticleViewTracker.tsx @@ -24,8 +24,15 @@ export function ArticleViewTracker({ articleId, estimatedTime, onReadStatusChang const viewStartTime = useRef(null); const hasTrackedView = useRef(false); + // Use ref to always have latest estimatedTime in cleanup function + const estimatedTimeRef = useRef(estimatedTime); - const autoMarkAsRead = preferences?.autoMarkAsRead ?? false; + const autoMarkAsRead = preferences?.autoMarkAsRead ?? true; + + // Keep ref in sync with prop + useEffect(() => { + estimatedTimeRef.current = estimatedTime; + }, [estimatedTime]); // Track view on mount useEffect(() => { @@ -59,13 +66,14 @@ export function ArticleViewTracker({ articleId, estimatedTime, onReadStatusChang if (timeSpent < 0) return; // Don't track if estimatedTime is not yet calculated (0 or negative) - if (estimatedTime <= 0) return; + const currentEstimatedTime = estimatedTimeRef.current; + if (currentEstimatedTime <= 0) return; trackExit.mutate({ articleId, data: { timeSpent, - estimatedTime, + estimatedTime: currentEstimatedTime, }, }); }; diff --git a/app/components/layout/ReadingPanelLayout.tsx b/app/components/layout/ReadingPanelLayout.tsx index 71dc51c..a43ff3a 100644 --- a/app/components/layout/ReadingPanelLayout.tsx +++ b/app/components/layout/ReadingPanelLayout.tsx @@ -1,6 +1,6 @@ "use client"; -import { ReactNode, useState, useEffect, useCallback, useRef } from "react"; +import { ReactNode, useState, useEffect, useCallback, useRef, useMemo } from "react"; import { useSession } from "next-auth/react"; import { useSearchParams, useRouter } from "next/navigation"; import { ResizableSplitPane } from "./ResizableSplitPane"; @@ -181,47 +181,46 @@ export function ReadingPanelLayout({ children, onArticleReadStatusChange }: Read preferences && readingMode === "standalone"; - // Render children with callback support - const renderChildren = () => { + // Compute children content once to avoid multiple render function calls + const childrenContent = useMemo(() => { if (typeof children === "function") { - // Pass callbacks for side_panel and inline modes - const shouldPassCallbacks = isPanelActive || isInlineMode; + // eslint-disable-next-line react-hooks/refs -- Safe: only passing props, not accessing refs return children({ - onArticleSelect: shouldPassCallbacks ? handleArticleSelect : undefined, - selectedArticleId: shouldPassCallbacks ? selectedArticleId : null + onArticleSelect: (isPanelActive || isInlineMode) ? handleArticleSelect : undefined, + selectedArticleId: (isPanelActive || isInlineMode) ? selectedArticleId : null }); } return children; - }; + }, [children, isPanelActive, isInlineMode, handleArticleSelect, selectedArticleId]); // If loading preferences, show loading state if (isLoadingPreferences) { - return <>{renderChildren()}; + return <>{childrenContent}; } // If not logged in or no preferences, show normal layout if (!session?.user || !preferences) { - return <>{renderChildren()}; + return <>{childrenContent}; } // For inline mode, render without split pane (ArticleList will handle inline expansion) if (isInlineMode) { - return
{renderChildren()}
; + return
{childrenContent}
; } // For standalone mode, render without callbacks (forces full-page navigation) if (isStandaloneMode) { - return <>{renderChildren()}; + return <>{childrenContent}; } // For side_panel mode: if panel disabled or mobile, show normal layout if (!isPanelActive) { - return <>{renderChildren()}; + return <>{childrenContent}; } // If panel enabled but no article selected, show normal layout if (!selectedArticleId) { - return
{renderChildren()}
; + return
{childrenContent}
; } // Safely cast preferences to required types since we verified they exist in isPanelActive @@ -236,14 +235,14 @@ export function ReadingPanelLayout({ children, onArticleReadStatusChange }: Read size={panelSize} onResize={handleResize} panel={ - } > - {renderChildren()} + {childrenContent} ); diff --git a/app/components/preferences/views/ReadingView.tsx b/app/components/preferences/views/ReadingView.tsx index 7c6cdcb..4ee2021 100644 --- a/app/components/preferences/views/ReadingView.tsx +++ b/app/components/preferences/views/ReadingView.tsx @@ -145,7 +145,7 @@ export function ReadingView({ preferences, updatePreference }: ReadingViewProps) updatePreference("autoMarkAsRead", checked)} /> diff --git a/app/components/ui/Tabs/Tabs.stories.tsx b/app/components/ui/Tabs/Tabs.stories.tsx index c19ba3a..5f4e379 100644 --- a/app/components/ui/Tabs/Tabs.stories.tsx +++ b/app/components/ui/Tabs/Tabs.stories.tsx @@ -1,4 +1,5 @@ import type { Meta, StoryObj } from '@storybook/nextjs-vite'; +import React from 'react'; import { Tabs, TabList, Tab, TabPanels, TabPanel } from './index'; const meta: Meta = { @@ -223,6 +224,3 @@ export const ControlledTabs: Story = { ); }, }; - -// @ts-ignore - React is used in the story -import React from 'react'; diff --git a/app/docs/layout.tsx b/app/docs/layout.tsx index 5a80a72..8228cca 100644 --- a/app/docs/layout.tsx +++ b/app/docs/layout.tsx @@ -4,6 +4,10 @@ import { RootProvider } from 'fumadocs-ui/provider/next'; import { source } from '@/lib/source'; import 'fumadocs-ui/style.css'; +export const dynamic = "force-dynamic"; +export const runtime = "nodejs"; +export const revalidate = 0; + export default function RootDocsLayout({ children }: { children: ReactNode }) { return ( diff --git a/app/feeds-management/components/FeedManagementShell.tsx b/app/feeds-management/components/FeedManagementShell.tsx index b753d77..9fe0537 100644 --- a/app/feeds-management/components/FeedManagementShell.tsx +++ b/app/feeds-management/components/FeedManagementShell.tsx @@ -1,6 +1,7 @@ "use client"; import { ReactNode } from "react"; +import Link from "next/link"; import { useFeedNavigation } from "@/hooks/use-feed-navigation"; import { UserMenu } from "@/app/components/auth/UserMenu"; import { NotificationBell } from "@/app/components/notifications/NotificationBell"; @@ -43,9 +44,9 @@ export function FeedManagementShell({ {/* Top Navigation Bar */}
- + NeuReed - + | Feed Management
diff --git a/app/feeds-management/components/modals/AddFeedModal.tsx b/app/feeds-management/components/modals/AddFeedModal.tsx new file mode 100644 index 0000000..72c420c --- /dev/null +++ b/app/feeds-management/components/modals/AddFeedModal.tsx @@ -0,0 +1,263 @@ +"use client"; + +import { useState } from "react"; +import { Modal, ModalHeader, ModalBody, ModalFooter, Button } from "@/app/components/ui"; +import { useValidateFeed, useAddFeed } from "@/hooks/queries/use-feeds"; +import { useCategories } from "@/hooks/queries/use-categories"; + +interface AddFeedModalProps { + onClose: () => void; +} + +type ExtractionMethod = "rss" | "readability" | "playwright" | "custom"; + +/** + * Add Feed Modal + * + * Modal for adding a new RSS feed. + * Features: + * - URL input with validation + * - Auto-detection of feed title + * - Category selection + * - Extraction method selection + */ +export function AddFeedModal({ onClose }: AddFeedModalProps) { + const { data: categories = [] } = useCategories(); + const validateMutation = useValidateFeed(); + const addFeedMutation = useAddFeed(); + + const [url, setUrl] = useState(""); + const [name, setName] = useState(""); + const [selectedCategoryIds, setSelectedCategoryIds] = useState([]); + const [extractionMethod, setExtractionMethod] = useState("readability"); + const [error, setError] = useState(""); + const [feedInfo, setFeedInfo] = useState<{ + title: string; + description?: string; + itemCount: number; + } | null>(null); + + const handleValidate = async () => { + if (!url) return; + + setError(""); + setFeedInfo(null); + + try { + const result = await validateMutation.mutateAsync(url); + + if (result.valid && result.feedInfo) { + setFeedInfo({ + title: result.feedInfo.title || "", + description: result.feedInfo.description, + itemCount: result.feedInfo.itemCount || 0, + }); + // Auto-fill name if empty + if (!name && result.feedInfo.title) { + setName(result.feedInfo.title); + } + } else { + setError(result.error || "Invalid feed URL"); + } + } catch (err) { + setError(err instanceof Error ? err.message : "Failed to validate feed"); + } + }; + + const handleAdd = async () => { + if (!url.trim()) { + setError("Feed URL is required"); + return; + } + + try { + await addFeedMutation.mutateAsync({ + url: url.trim(), + name: name.trim() || undefined, + categoryIds: selectedCategoryIds.length > 0 ? selectedCategoryIds : undefined, + settings: { method: extractionMethod }, + }); + onClose(); + } catch (err) { + setError(err instanceof Error ? err.message : "Failed to add feed"); + console.error("Add feed error:", err); + } + }; + + const handleCategoryToggle = (categoryId: string) => { + setSelectedCategoryIds((prev) => + prev.includes(categoryId) + ? prev.filter((id) => id !== categoryId) + : [...prev, categoryId] + ); + }; + + return ( + + + +
+ {/* Error Message */} + {error && ( +
+
{error}
+
+ )} + + {/* URL Input */} +
+ +
+ { + setUrl(e.target.value); + setError(""); + setFeedInfo(null); + }} + placeholder="https://example.com/feed.xml" + className="flex-1 px-3 py-2 border border-border rounded focus:outline-none focus:ring-2 focus:ring-primary" + autoFocus + /> + +
+

+ Enter the RSS or Atom feed URL +

+
+ + {/* Validation Success */} + {feedInfo && ( +
+
+ + + +
+
+ Valid feed found! +
+
+ {feedInfo.title} + {feedInfo.description && ` - ${feedInfo.description}`} +
+
+ {feedInfo.itemCount} items available +
+
+
+
+ )} + + {/* Feed Name */} +
+ + setName(e.target.value)} + placeholder="Auto-detected from feed" + className="w-full px-3 py-2 border border-border rounded focus:outline-none focus:ring-2 focus:ring-primary" + /> +

+ Leave empty to use the feed's title +

+
+ + {/* Categories */} + {categories.length > 0 && ( +
+ +
+ {categories.map((category) => ( + + ))} +
+

+ Click to select categories for this feed +

+
+ )} + + {/* Extraction Method */} +
+ + +

+ Choose how to extract article content. Readability works for most sites. +

+
+
+
+ + + + +
+ ); +} diff --git a/app/feeds-management/components/modals/ModalManager.tsx b/app/feeds-management/components/modals/ModalManager.tsx index 7307d2f..bb4d8bc 100644 --- a/app/feeds-management/components/modals/ModalManager.tsx +++ b/app/feeds-management/components/modals/ModalManager.tsx @@ -6,6 +6,7 @@ import { OpmlImportModal } from "./OpmlImportModal"; import { OpmlExportModal } from "./OpmlExportModal"; import { BulkEditModal } from "./BulkEditModal"; import { CreateCategoryModal } from "./CreateCategoryModal"; +import { AddFeedModal } from "./AddFeedModal"; /** * Modal Manager @@ -25,6 +26,7 @@ export function ModalManager() { {modal === "opml-export" && } {modal === "bulk-edit" && } {modal === "create-category" && } + {modal === "add-feed" && } ); } diff --git a/app/feeds-management/components/views/CategoryView.tsx b/app/feeds-management/components/views/CategoryView.tsx index 6a73e61..23f86ce 100644 --- a/app/feeds-management/components/views/CategoryView.tsx +++ b/app/feeds-management/components/views/CategoryView.tsx @@ -325,7 +325,7 @@ export function CategoryView({ categoryId }: CategoryViewProps) {

Danger Zone

- Deleting this category will move all feeds to "Uncategorized". This action cannot be undone. + Deleting this category will move all feeds to “Uncategorized”. This action cannot be undone.

diff --git a/app/feeds-management/components/views/OverviewView.tsx b/app/feeds-management/components/views/OverviewView.tsx index bfb1606..fb7cb29 100644 --- a/app/feeds-management/components/views/OverviewView.tsx +++ b/app/feeds-management/components/views/OverviewView.tsx @@ -58,6 +58,12 @@ export function OverviewView() {

Feed Management Overview

+
) : feeds.length === 0 ? ( -
- No feeds yet. Add your first feed to get started. +
+
+ No feeds yet. Add your first feed to get started. +
+
) : (
diff --git a/app/layout.tsx b/app/layout.tsx index c0835c0..e06d348 100644 --- a/app/layout.tsx +++ b/app/layout.tsx @@ -7,6 +7,10 @@ import { QueryProvider } from "./components/providers/QueryProvider"; // Force dynamic rendering for all routes (authentication, database queries, etc.) export const dynamic = "force-dynamic"; +// Force Node.js runtime (not Edge) to ensure full compatibility +export const runtime = "nodejs"; +// Disable static optimization and revalidation +export const revalidate = 0; const geistSans = Geist({ variable: "--font-geist-sans", diff --git a/app/lib/preferences/preference-defaults.ts b/app/lib/preferences/preference-defaults.ts index 74f6d6c..c2a65fa 100644 --- a/app/lib/preferences/preference-defaults.ts +++ b/app/lib/preferences/preference-defaults.ts @@ -15,7 +15,7 @@ export function getDefaultPreferences(): UserPreferences { articlesPerPage: 20, defaultView: "expanded", showReadArticles: true, - autoMarkAsRead: false, + autoMarkAsRead: true, showRelatedExcerpts: false, bounceThreshold: 0.25, searchRecencyWeight: 0.3, diff --git a/docs/LLM_JSON_PARSING_IMPROVEMENTS.md b/docs/LLM_JSON_PARSING_IMPROVEMENTS.md new file mode 100644 index 0000000..dee1d28 --- /dev/null +++ b/docs/LLM_JSON_PARSING_IMPROVEMENTS.md @@ -0,0 +1,229 @@ +# LLM JSON Parsing Improvements + +## Problem + +The application was experiencing JSON parsing errors when processing LLM responses: +- Error: "Unexpected end of JSON input" +- Empty raw responses logged +- LLMs often return JSON wrapped in markdown code blocks or with introductory text +- No detailed logging of the actual LLM response when parsing failed + +## Solution + +Created a robust JSON extraction and parsing utility that: +1. **Extracts JSON** from various response formats +2. **Logs detailed error information** when parsing fails +3. **Handles common LLM response patterns** automatically + +## Changes Made + +### 1. New Utility Module: `json-parser.ts` + +Location: [src/lib/llm/json-parser.ts](../src/lib/llm/json-parser.ts) + +**Key Functions:** + +#### `extractJSON(rawResponse: string): string` +Cleans and extracts JSON from LLM responses that may contain: +- Markdown code blocks: `` ```json\n{...}\n``` `` +- Markdown code blocks without language tag: `` ```\n{...}\n``` `` +- Introductory text: `"Here's the result: {...}"` +- Trailing text: `"{...} I hope this helps!"` + +**Examples:** + +```typescript +// Input: "Here's the JSON:\n```json\n{\"key\": \"value\"}\n```" +// Output: "{\"key\": \"value\"}" + +// Input: "Sure! {\"summary\": \"text\"} Let me know if you need anything else." +// Output: "{\"summary\": \"text\"}" +``` + +#### `parseJSONFromLLM(rawResponse: string, context?: { model?: string; operation?: string }): any` +Parses JSON from LLM response with automatic extraction and detailed error logging. + +**Error Logging includes:** +- Parse error message +- First 1000 characters of raw response +- First 1000 characters of extracted JSON +- Response length +- Model name +- Operation name + +#### `parseJSONFromLLMSafe(rawResponse: string, fallback: T, context?: { ... }): T` +Safe parsing with fallback value if parsing fails. + +### 2. Updated LLM Providers + +**Files Modified:** +- [src/lib/llm/openai-provider.ts](../src/lib/llm/openai-provider.ts) +- [src/lib/llm/ollama-provider.ts](../src/lib/llm/ollama-provider.ts) +- [src/lib/services/summarization-service.ts](../src/lib/services/summarization-service.ts) + +**Changes:** +- All `JSON.parse()` calls replaced with `parseJSONFromLLM()` +- Improved error logging (changed from `logger.error` to `logger.warn` for fallback cases) +- Added context information (model name, operation) to all parse calls + +### 3. Enhanced Error Logging + +**Before:** +```typescript +logger.error("Failed to parse LLM JSON response, using fallback", { + parseError: "Unexpected end of JSON input", + fullRawResponse: "", // Often empty! + responseLength: 0, + model: "gpt-5-nano" +}); +``` + +**After:** +```typescript +logger.error("Failed to parse LLM JSON response", { + parseError: "Unexpected token", + rawResponse: "Here's the result: ```json\n{\"sum...", // First 1000 chars + extractedJSON: "{\"sum...", // First 1000 chars of extracted JSON + responseLength: 2543, + extractedLength: 234, + model: "gpt-4o-mini", + operation: "summarizeArticle" +}); +``` + +## Usage Examples + +### In LLM Providers + +```typescript +import { parseJSONFromLLM } from "./json-parser"; + +// In summarizeArticle method +try { + const parsed = parseJSONFromLLM(response.content, { + model: this.model, + operation: "summarizeArticle", + }); + return { + summary: parsed.summary || "", + keyPoints: Array.isArray(parsed.keyPoints) ? parsed.keyPoints : [], + topics: Array.isArray(parsed.topics) ? parsed.topics : [], + sentiment: ["positive", "neutral", "negative"].includes(parsed.sentiment) + ? parsed.sentiment + : "neutral", + }; +} catch (parseError) { + // Detailed error already logged by parseJSONFromLLM + logger.warn("Using fallback for article summary due to parse error", { + model: this.model, + }); + + return { + summary: response.content.substring(0, 500), + keyPoints: [], + topics: [], + sentiment: "neutral", + }; +} +``` + +## Supported Response Formats + +The parser now handles all these formats automatically: + +1. **Plain JSON** + ``` + {"key": "value"} + ``` + +2. **Markdown with json tag** + ``` + ```json + {"key": "value"} + ``` + ``` + +3. **Markdown without language tag** + ``` + ``` + {"key": "value"} + ``` + ``` + +4. **With introductory text** + ``` + Sure! Here's the JSON response: + {"key": "value"} + ``` + +5. **With trailing text** + ``` + {"key": "value"} + + I hope this helps! + ``` + +6. **Complex multi-line with explanation** + ``` + Here's your article summary: + + ```json + { + "summary": "This is a summary", + "keyPoints": ["point 1", "point 2"], + "topics": ["topic1", "topic2"], + "sentiment": "positive" + } + ``` + + Let me know if you need any clarifications! + ``` + +## Benefits + +1. **Fewer Parse Errors**: Automatically extracts JSON from various formats +2. **Better Debugging**: Detailed error logs show actual LLM responses +3. **Graceful Fallbacks**: Maintains functionality even with malformed responses +4. **Consistent Handling**: Same parsing logic across all LLM providers +5. **Type Safety**: TypeScript support with generic fallback values + +## Testing + +A comprehensive test suite is available at: +[src/lib/llm/__tests__/json-parser.test.ts](../src/lib/llm/__tests__/json-parser.test.ts) + +Tests cover: +- Plain JSON extraction +- Markdown code block extraction (with and without language tags) +- Removal of introductory/trailing text +- JSON arrays +- Complex nested structures +- Error handling and fallbacks + +## Migration Notes + +All existing code using `JSON.parse()` for LLM responses should be updated to use `parseJSONFromLLM()`: + +**Before:** +```typescript +const parsed = JSON.parse(response.content); +``` + +**After:** +```typescript +const parsed = parseJSONFromLLM(response.content, { + model: this.model, + operation: "operationName" +}); +``` + +## Monitoring + +After deployment, monitor logs for: +- Reduced frequency of JSON parse errors +- More informative error messages with actual LLM response content +- Successful extraction from markdown-wrapped responses + +Look for log messages: +- `"Failed to parse LLM JSON response"` (error level) - parsing failed even after extraction +- `"Using fallback for ..."` (warn level) - fallback value used due to parse failure diff --git a/docs/configuration/configuration-reference.md b/docs/configuration/configuration-reference.md index 91f8e58..d7320ba 100644 --- a/docs/configuration/configuration-reference.md +++ b/docs/configuration/configuration-reference.md @@ -146,7 +146,7 @@ All user preferences are stored in the `UserPreferences` database model and can | `articlesPerPage` | Integer | `20` | 5-100 | Number of articles per page | | `infiniteScrollMode` | String | `"both"` | `"auto"`, `"button"`, `"both"` | How to load more articles | | `showReadArticles` | Boolean | `true` | `true`, `false` | Display already-read articles | -| `autoMarkAsRead` | Boolean | `false` | `true`, `false` | Automatically mark articles as read when opened | +| `autoMarkAsRead` | Boolean | `true` | `true`, `false` | Automatically mark articles as read when opened | | `showRelatedExcerpts` | Boolean | `false` | `true`, `false` | Show excerpts in related articles section | **Description:** diff --git a/eslint.config.mjs b/eslint.config.mjs index c78bfc9..25d25b3 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -16,6 +16,10 @@ const eslintConfig = defineConfig([ "out/**", "build/**", "next-env.d.ts", + // Generated and third-party files: + "storybook-static/**", + ".source/**", + "node_modules/**", ]), { rules: { diff --git a/prisma/migrations/20251201183639_set_auto_mark_as_read_default_true/migration.sql b/prisma/migrations/20251201183639_set_auto_mark_as_read_default_true/migration.sql new file mode 100644 index 0000000..1b90885 --- /dev/null +++ b/prisma/migrations/20251201183639_set_auto_mark_as_read_default_true/migration.sql @@ -0,0 +1,6 @@ +-- AlterTable: Change default value of autoMarkAsRead from false to true +ALTER TABLE "user_preferences" ALTER COLUMN "autoMarkAsRead" SET DEFAULT true; + +-- Update existing records: Set autoMarkAsRead to true for users who have it as false +-- This ensures all users have the correct default value +UPDATE "user_preferences" SET "autoMarkAsRead" = true WHERE "autoMarkAsRead" = false; diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 87cc21b..4e1e4d0 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -1,7 +1,7 @@ generator client { provider = "prisma-client" output = "../src/generated/prisma" - previewFeatures = ["postgresqlExtensions", "driverAdapters"] + previewFeatures = ["driverAdapters", "postgresqlExtensions"] } datasource db { @@ -66,10 +66,10 @@ model articles { title String content String url String @unique - publishedAt DateTime? @db.Timestamptz + publishedAt DateTime? @db.Timestamptz(6) embedding Unsupported("vector")? - createdAt DateTime @default(now()) @db.Timestamptz - updatedAt DateTime @updatedAt @db.Timestamptz + createdAt DateTime @default(now()) @db.Timestamptz(6) + updatedAt DateTime @updatedAt @db.Timestamptz(6) author String? contentHash String? excerpt String? @@ -104,14 +104,14 @@ model CronJobRun { id String @id @default(cuid()) jobName String status CronJobStatus - triggeredBy CronJobTrigger startedAt DateTime @default(now()) completedAt DateTime? durationMs Int? stats Json? - metadata Json? errorMessage String? - logs Json? // Array of log entries with timestamp, level, message + logs Json? + triggeredBy CronJobTrigger + metadata Json? createdAt DateTime @default(now()) updatedAt DateTime @updatedAt @@ -123,8 +123,8 @@ model CronJobRun { model JobLock { jobName String @id lockedAt DateTime - lockedBy String // Process ID - expiresAt DateTime // Auto-expire after timeout + lockedBy String + expiresAt DateTime @@index([expiresAt]) @@map("job_locks") @@ -141,33 +141,32 @@ model feed_categories { } model feeds { - id String @id - name String - url String @unique - lastFetched DateTime? - settings Json? - createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt - description String? - errorCount Int @default(0) - etag String? - fetchInterval Int @default(60) - imageUrl String? - lastError String? - lastModified String? - siteUrl String? - // Health tracking fields - healthStatus String @default("healthy") - consecutiveFailures Int @default(0) - lastSuccessfulFetch DateTime? - autoDisableThreshold Int @default(10) - notifyOnError Boolean @default(false) - httpStatus Int? - redirectUrl String? - articles articles[] - feed_categories feed_categories[] - user_feeds user_feeds[] - feed_error_log feed_error_log[] + id String @id + name String + url String @unique + lastFetched DateTime? + settings Json? + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + description String? + errorCount Int @default(0) + etag String? + fetchInterval Int @default(60) + imageUrl String? + lastError String? + lastModified String? + siteUrl String? + healthStatus String? @default("healthy") + consecutiveFailures Int? @default(0) + lastSuccessfulFetch DateTime? @db.Timestamp(6) + autoDisableThreshold Int? @default(10) + notifyOnError Boolean? @default(false) + httpStatus Int? + redirectUrl String? + articles articles[] + feed_categories feed_categories[] + feed_error_log feed_error_log[] + user_feeds user_feeds[] @@index([errorCount]) @@index([lastFetched]) @@ -178,13 +177,13 @@ model feeds { model feed_error_log { id String @id feedId String - timestamp DateTime @default(now()) + timestamp DateTime @default(now()) @db.Timestamp(6) errorType String errorMessage String httpStatus Int? details Json? - resolved Boolean @default(false) - feed feeds @relation(fields: [feedId], references: [id], onDelete: Cascade) + resolved Boolean? @default(false) + feed feeds @relation(fields: [feedId], references: [id], onDelete: Cascade, onUpdate: NoAction) @@index([feedId]) @@index([timestamp(sort: Desc)]) @@ -226,24 +225,22 @@ model user_categories { createdAt DateTime @default(now()) updatedAt DateTime @updatedAt icon String? @default("๐Ÿ“") - // Hierarchical and display fields parentId String? color String? - collapsed Boolean @default(false) - sortOrder String @default("manual") - includeInSearch Boolean @default(true) - isDefault Boolean @default(false) - isReadOnly Boolean @default(false) - users User @relation(fields: [userId], references: [id], onDelete: Cascade) - parent user_categories? @relation("CategoryHierarchy", fields: [parentId], references: [id], onDelete: SetNull) + collapsed Boolean? @default(false) + sortOrder String? @default("manual") + includeInSearch Boolean? @default(true) + isDefault Boolean? @default(false) + isReadOnly Boolean? @default(false) + saved_searches saved_searches[] + parent user_categories? @relation("CategoryHierarchy", fields: [parentId], references: [id], onUpdate: NoAction) children user_categories[] @relation("CategoryHierarchy") + users User @relation(fields: [userId], references: [id], onDelete: Cascade) user_feed_categories user_feed_categories[] - saved_searches saved_searches[] @@unique([userId, name]) @@index([userId]) @@index([userId, order]) - @@index([userId, isDefault]) } model user_feed_categories { @@ -267,7 +264,7 @@ model user_feeds { settings Json? subscribedAt DateTime @default(now()) updatedAt DateTime @updatedAt - tags String[] // User-defined tags + tags String[] user_feed_categories user_feed_categories[] feeds feeds @relation(fields: [feedId], references: [id], onDelete: Cascade) users User @relation(fields: [userId], references: [id], onDelete: Cascade) @@ -299,14 +296,10 @@ model user_preferences { userId String @unique theme String @default("system") fontSize String @default("medium") - sidebarFontSize String @default("smaller") - cardFontSize String @default("same") - modalFontSize String @default("same") - uiFontSize String @default("same") articlesPerPage Int @default(20) defaultView String @default("expanded") showReadArticles Boolean @default(true) - autoMarkAsRead Boolean @default(false) + autoMarkAsRead Boolean @default(true) createdAt DateTime @default(now()) updatedAt DateTime @updatedAt showRelatedExcerpts Boolean @default(false) @@ -320,7 +313,6 @@ model user_preferences { readingPanelSize Int @default(50) categoryStates Json? sidebarCollapsed Boolean @default(false) - sidebarWidth Int @default(20) // Percentage width (10-40) readingFontFamily String @default("Georgia") readingFontSize Int @default(18) readingLineHeight Float @default(1.7) @@ -344,24 +336,29 @@ model user_preferences { showArticleAuthor Boolean @default(true) showArticleFeedInfo Boolean @default(true) showArticleDate Boolean @default(true) - articleCardSectionOrder Json? @default("[\"feedInfo\",\"title\",\"excerpt\",\"actions\"]") + articleCardSectionOrder Json? @default("[\"feedInfo\", \"title\", \"excerpt\", \"actions\"]") articleCardDensity String @default("normal") articleCardBorderWidth String @default("normal") articleCardBorderRadius String @default("normal") articleCardBorderContrast String @default("medium") articleCardSpacing String @default("normal") - readingMode String @default("side_panel") + sidebarWidth Int @default(20) + cardFontSize String @default("same") + modalFontSize String @default("same") + sidebarFontSize String @default("smaller") + uiFontSize String @default("same") inlineAutoScroll Boolean @default(true) + readingMode String @default("side_panel") users User @relation(fields: [userId], references: [id], onDelete: Cascade) } model user_notifications { id String @id userId String - type String // 'feed_refresh', 'info', 'warning', 'error' + type String title String message String - metadata Json? // Additional data like stats + metadata Json? read Boolean @default(false) createdAt DateTime @default(now()) updatedAt DateTime @updatedAt @@ -372,26 +369,26 @@ model user_notifications { } model saved_searches { - id String @id @default(cuid()) + id String @id @default(cuid()) userId String name String - query String @db.Text - icon String? @default("๐Ÿ”") - threshold Float @default(0.6) + query String + icon String? @default("๐Ÿ”") + threshold Float @default(0.6) categoryId String? - notifyOnMatch Boolean @default(false) - notifyThreshold Float @default(0.85) - dailyDigest Boolean @default(false) - recencyBias Float @default(0.0) + notifyOnMatch Boolean @default(false) + notifyThreshold Float @default(0.85) + dailyDigest Boolean @default(false) + recencyBias Float @default(0.0) prioritySources Json? - createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt lastMatchedAt DateTime? - totalMatches Int @default(0) - archived Boolean @default(false) - user User @relation(fields: [userId], references: [id], onDelete: Cascade) - category user_categories? @relation(fields: [categoryId], references: [id], onDelete: SetNull) + totalMatches Int @default(0) + archived Boolean @default(false) matches saved_search_matches[] + category user_categories? @relation(fields: [categoryId], references: [id]) + user User @relation(fields: [userId], references: [id], onDelete: Cascade) @@index([userId]) @@index([userId, archived]) @@ -403,11 +400,11 @@ model saved_search_matches { articleId String relevanceScore Float matchedTerms Json - matchReason String? @db.Text + matchReason String? createdAt DateTime @default(now()) notified Boolean @default(false) - savedSearch saved_searches @relation(fields: [savedSearchId], references: [id], onDelete: Cascade) article articles @relation(fields: [articleId], references: [id], onDelete: Cascade) + savedSearch saved_searches @relation(fields: [savedSearchId], references: [id], onDelete: Cascade) @@unique([savedSearchId, articleId]) @@index([savedSearchId, relevanceScore]) @@ -421,19 +418,19 @@ model User { email String @unique emailVerified DateTime? image String? - role UserRole @default(USER) createdAt DateTime @default(now()) updatedAt DateTime @updatedAt + role UserRole @default(USER) accounts Account[] article_feedback article_feedback[] read_articles read_articles[] + saved_searches saved_searches[] sessions Session[] user_categories user_categories[] user_feeds user_feeds[] + user_notifications user_notifications[] user_patterns user_patterns[] user_preferences user_preferences? - user_notifications user_notifications[] - saved_searches saved_searches[] @@map("users") } diff --git a/prisma/seed.ts b/prisma/seed.ts index 74e67fd..201aba7 100644 --- a/prisma/seed.ts +++ b/prisma/seed.ts @@ -473,7 +473,7 @@ async function main() { articlesPerPage: 20, defaultView: "expanded", showReadArticles: true, - autoMarkAsRead: false, + autoMarkAsRead: true, showRelatedExcerpts: false, readingMode: "side_panel", inlineAutoScroll: true, diff --git a/scripts/verify-json-parser.js b/scripts/verify-json-parser.js new file mode 100644 index 0000000..b3d57a2 --- /dev/null +++ b/scripts/verify-json-parser.js @@ -0,0 +1,108 @@ +/** + * Simple verification script for JSON parser + * Run with: node scripts/verify-json-parser.js + */ + +// Mock extractJSON function +function extractJSON(rawResponse) { + let cleaned = rawResponse.trim(); + + // Try to extract JSON from markdown code blocks + const codeBlockMatch = cleaned.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/); + if (codeBlockMatch?.[1]) { + cleaned = codeBlockMatch[1].trim(); + } + + // Remove any leading text before the first { or [ + const jsonStartMatch = cleaned.match(/^[^{[]*([{[][\s\S]*)/); + if (jsonStartMatch?.[1]) { + cleaned = jsonStartMatch[1]; + } + + // Remove any trailing text after the last } or ] + const jsonEndMatch = cleaned.match(/([\s\S]*[}\]])[^}\]]*$/); + if (jsonEndMatch?.[1]) { + cleaned = jsonEndMatch[1]; + } + + return cleaned.trim(); +} + +// Test cases +const tests = [ + { + name: "Plain JSON", + input: '{"key": "value"}', + expected: '{"key": "value"}' + }, + { + name: "JSON in markdown with json tag", + input: 'Here is the result:\n```json\n{"key": "value"}\n```', + expected: '{"key": "value"}' + }, + { + name: "JSON in markdown without tag", + input: '```\n{"key": "value"}\n```', + expected: '{"key": "value"}' + }, + { + name: "JSON with intro text", + input: 'Sure! Here you go: {"key": "value"}', + expected: '{"key": "value"}' + }, + { + name: "JSON with trailing text", + input: '{"key": "value"} I hope this helps!', + expected: '{"key": "value"}' + }, + { + name: "Complex JSON with markdown", + input: 'Here\'s the summary:\n```json\n{\n "summary": "Test",\n "keyPoints": ["a", "b"],\n "topics": ["topic1"]\n}\n```\nLet me know if you need anything else!', + expected: '{\n "summary": "Test",\n "keyPoints": ["a", "b"],\n "topics": ["topic1"]\n}' + }, + { + name: "Empty response", + input: '', + expected: '' + } +]; + +console.log('๐Ÿงช Testing JSON Parser\n'); + +let passed = 0; +let failed = 0; + +tests.forEach((test, index) => { + try { + const result = extractJSON(test.input); + + if (result === test.expected) { + console.log(`โœ… Test ${index + 1}: ${test.name}`); + passed++; + } else { + console.log(`โŒ Test ${index + 1}: ${test.name}`); + console.log(` Expected: ${test.expected}`); + console.log(` Got: ${result}`); + failed++; + } + + // Try to parse the extracted JSON (if not empty) + if (result && result.trim()) { + JSON.parse(result); + console.log(` โœ“ Valid JSON`); + } + } catch (error) { + console.log(`โŒ Test ${index + 1}: ${test.name} - ${error.message}`); + failed++; + } +}); + +console.log(`\n๐Ÿ“Š Results: ${passed} passed, ${failed} failed out of ${tests.length} tests`); + +if (failed === 0) { + console.log('\nโœจ All tests passed!'); + process.exit(0); +} else { + console.log('\nโš ๏ธ Some tests failed'); + process.exit(1); +} diff --git a/src/hooks/queries/use-feeds.ts b/src/hooks/queries/use-feeds.ts index 55e122f..9e5df0b 100644 --- a/src/hooks/queries/use-feeds.ts +++ b/src/hooks/queries/use-feeds.ts @@ -93,13 +93,22 @@ async function fetchUserFeeds(includeAll = false): Promise ({ + return (response.subscriptions || []).map((sub: any) => { + // Extract category from user_feed_categories (take first category if multiple) + const categoryRelation = sub.user_feed_categories?.[0]?.user_categories; + const category = categoryRelation ? { + id: categoryRelation.id, + name: categoryRelation.name, + color: categoryRelation.color, + } : null; + + return { ...sub.feeds, // Override name with custom name if it exists name: sub.customName || sub.feeds.name, // Add subscription specific fields subscribedAt: sub.createdAt, - category: sub.category, + category, // Compute isActive from healthStatus isActive: sub.feeds.healthStatus !== "disabled", // Preserve feed settings (extraction, etc.) and add user subscription settings @@ -120,7 +129,8 @@ async function fetchUserFeeds(includeAll = false): Promise { /** @@ -75,14 +75,19 @@ export function useViewNavigation( const { modalName, defaultView, isOpen, onNavigate, onClose } = options; const [currentView, setCurrentView] = useState(defaultView); + const wasOpenRef = useRef(isOpen); /** * Reset view to defaultView when modal opens (to handle initialView changes) + * Only resets when transitioning from closed to open */ useEffect(() => { - if (isOpen) { - setCurrentView(defaultView); + // Only reset if we just opened (transition from false to true) + if (isOpen && !wasOpenRef.current) { + // eslint-disable-next-line -- Safe: only sets state once on modal open transition, conditional prevents cascading + setCurrentView(prev => prev !== defaultView ? defaultView : prev); } + wasOpenRef.current = isOpen; }, [isOpen, defaultView]); /** diff --git a/src/lib/auth.ts b/src/lib/auth.ts index 4a24f19..9d57d55 100644 --- a/src/lib/auth.ts +++ b/src/lib/auth.ts @@ -144,7 +144,7 @@ export const authConfig = { articlesPerPage: 20, defaultView: "expanded", showReadArticles: true, - autoMarkAsRead: false, + autoMarkAsRead: true, showRelatedExcerpts: false, bounceThreshold: 0.25, showLowRelevanceArticles: true, diff --git a/src/lib/content-processor.ts b/src/lib/content-processor.ts index 566d92d..9d09228 100644 --- a/src/lib/content-processor.ts +++ b/src/lib/content-processor.ts @@ -2,6 +2,8 @@ * Content processing utilities for articles */ +import { removePlaceholderImagesFromHtml } from "./image-utils"; + /** * Process article content for display * - Sanitize HTML @@ -31,6 +33,9 @@ export function processArticleContent( // Add target="_blank" to all links processed = addTargetBlankToLinks(processed); + // Remove placeholder images from content + processed = removePlaceholderImagesFromHtml(processed); + // Process images (lazy loading, etc.) processed = processImages(processed); diff --git a/src/lib/docs-source.ts b/src/lib/docs-source.ts index 2eecf56..8826cc7 100644 --- a/src/lib/docs-source.ts +++ b/src/lib/docs-source.ts @@ -1,4 +1,4 @@ -import { docs } from '../../.source/server'; +import { docs } from 'fumadocs-mdx:collections/server'; // Export docs directly export const source = docs; diff --git a/src/lib/extractors/playwright-extractor.ts b/src/lib/extractors/playwright-extractor.ts index d4bb7e6..fd60ee0 100644 --- a/src/lib/extractors/playwright-extractor.ts +++ b/src/lib/extractors/playwright-extractor.ts @@ -5,6 +5,7 @@ import { logger } from "@/lib/logger"; import { Readability } from "@mozilla/readability"; import { JSDOM } from "jsdom"; import { sanitizeHtml } from "@/lib/feed-parser"; +import { getFirstValidImage } from "@/lib/image-utils"; /** * Playwright-based content extractor for JavaScript-rendered content @@ -300,14 +301,18 @@ export class PlaywrightExtractor extends BaseExtractor { /** * Extract featured image from document + * Filters out placeholder images and prioritizes real images */ private extractImage(document: Document, baseUrl: string): string | undefined { - // Try Open Graph image + // Collect all candidate images + const candidates: (string | undefined)[] = []; + + // Try Open Graph image (highest priority) const ogImage = document.querySelector('meta[property="og:image"]'); if (ogImage) { const imageUrl = ogImage.getAttribute("content"); if (imageUrl) { - return this.resolveUrl(imageUrl, baseUrl); + candidates.push(this.resolveUrl(imageUrl, baseUrl)); } } @@ -316,20 +321,39 @@ export class PlaywrightExtractor extends BaseExtractor { if (twitterImage) { const imageUrl = twitterImage.getAttribute("content"); if (imageUrl) { - return this.resolveUrl(imageUrl, baseUrl); + candidates.push(this.resolveUrl(imageUrl, baseUrl)); } } - // Try first article image - const articleImage = document.querySelector("article img"); - if (articleImage) { - const imageUrl = articleImage.getAttribute("src"); - if (imageUrl) { - return this.resolveUrl(imageUrl, baseUrl); + // Try article images (Playwright renders JS, so lazy-loaded images should be loaded) + const articleImages = document.querySelectorAll("article img"); + for (const img of articleImages) { + // Check data-src first (in case JS hasn't fully swapped) + const dataSrc = img.getAttribute("data-src"); + if (dataSrc) { + candidates.push(this.resolveUrl(dataSrc, baseUrl)); + } + const src = img.getAttribute("src"); + if (src) { + candidates.push(this.resolveUrl(src, baseUrl)); + } + } + + // Try images in main content areas + const mainImages = document.querySelectorAll("main img, .content img, .article-body img"); + for (const img of mainImages) { + const dataSrc = img.getAttribute("data-src"); + if (dataSrc) { + candidates.push(this.resolveUrl(dataSrc, baseUrl)); + } + const src = img.getAttribute("src"); + if (src) { + candidates.push(this.resolveUrl(src, baseUrl)); } } - return undefined; + // Return the first non-placeholder image + return getFirstValidImage(candidates); } } diff --git a/src/lib/extractors/readability-extractor.ts b/src/lib/extractors/readability-extractor.ts index ee4fa3c..7481a69 100644 --- a/src/lib/extractors/readability-extractor.ts +++ b/src/lib/extractors/readability-extractor.ts @@ -4,6 +4,7 @@ import { BaseExtractor } from "./base-extractor"; import type { ExtractorConfig, ExtractedContent } from "./types"; import { sanitizeHtml } from "@/lib/feed-parser"; import { logger } from "@/lib/logger"; +import { filterPlaceholderImage, getFirstValidImage } from "@/lib/image-utils"; /** * Readability-based content extractor @@ -209,14 +210,18 @@ export class ReadabilityExtractor extends BaseExtractor { /** * Extract featured image from document + * Filters out placeholder images and prioritizes real images */ private extractImage(document: Document, baseUrl: string): string | undefined { - // Try Open Graph image + // Collect all candidate images + const candidates: (string | undefined)[] = []; + + // Try Open Graph image (highest priority) const ogImage = document.querySelector('meta[property="og:image"]'); if (ogImage) { const imageUrl = ogImage.getAttribute("content"); if (imageUrl) { - return this.resolveUrl(imageUrl, baseUrl); + candidates.push(this.resolveUrl(imageUrl, baseUrl)); } } @@ -225,29 +230,39 @@ export class ReadabilityExtractor extends BaseExtractor { if (twitterImage) { const imageUrl = twitterImage.getAttribute("content"); if (imageUrl) { - return this.resolveUrl(imageUrl, baseUrl); + candidates.push(this.resolveUrl(imageUrl, baseUrl)); } } - // Try first article image - const articleImage = document.querySelector("article img"); - if (articleImage) { - const imageUrl = articleImage.getAttribute("src"); - if (imageUrl) { - return this.resolveUrl(imageUrl, baseUrl); + // Try first article image (check both src and data-src for lazy-loaded images) + const articleImages = document.querySelectorAll("article img"); + for (const img of articleImages) { + // Prefer data-src for lazy-loaded images + const dataSrc = img.getAttribute("data-src"); + if (dataSrc) { + candidates.push(this.resolveUrl(dataSrc, baseUrl)); + } + const src = img.getAttribute("src"); + if (src) { + candidates.push(this.resolveUrl(src, baseUrl)); } } - // Try first image in content - const firstImage = document.querySelector("img"); - if (firstImage) { - const imageUrl = firstImage.getAttribute("src"); - if (imageUrl) { - return this.resolveUrl(imageUrl, baseUrl); + // Try images in main content areas + const mainImages = document.querySelectorAll("main img, .content img, .article-body img"); + for (const img of mainImages) { + const dataSrc = img.getAttribute("data-src"); + if (dataSrc) { + candidates.push(this.resolveUrl(dataSrc, baseUrl)); + } + const src = img.getAttribute("src"); + if (src) { + candidates.push(this.resolveUrl(src, baseUrl)); } } - return undefined; + // Return the first non-placeholder image + return getFirstValidImage(candidates); } } diff --git a/src/lib/feed-parser.ts b/src/lib/feed-parser.ts index d74d180..d8cba96 100644 --- a/src/lib/feed-parser.ts +++ b/src/lib/feed-parser.ts @@ -2,6 +2,7 @@ import { parseFeed as parseRawFeed } from "@rowanmanning/feed-parser"; import { createHash } from "crypto"; import { decode as decodeHtmlEntities } from "he"; import * as iconv from "iconv-lite"; +import { filterPlaceholderImage, extractFirstValidImageFromHtml } from "./image-utils"; /** * Type definitions for @rowanmanning/feed-parser @@ -10,10 +11,15 @@ interface RawFeed { title?: string; description?: string; url?: string; + language?: string; image?: { url?: string; title?: string; }; + categories?: Array<{ + term?: string; + label?: string; + }>; items: RawFeedItem[]; } @@ -30,51 +36,174 @@ interface RawFeedItem { email?: string; url?: string; }>; + categories?: Array<{ + term?: string; + label?: string; + }>; media?: Array<{ url?: string; + image?: string; type?: string; + mimeType?: string; title?: string; + length?: number; }>; + // Image from media:thumbnail, itunes:image, or media:content + image?: { + url?: string; + title?: string; + }; +} + +/** + * Fetch options for conditional requests (ETag/Last-Modified caching) + */ +export interface FetchOptions { + etag?: string; + lastModified?: string; +} + +/** + * Fetch result with caching headers + */ +export interface FetchResult { + content: string; + etag?: string; + lastModified?: string; + notModified: boolean; } +/** + * Feed parser timeout configuration + */ +const FETCH_TIMEOUT = 30000; // 30 seconds +const MAX_RETRIES = 2; +const RETRY_DELAY = 1000; // 1 second + /** * Fetch and decode feed with proper encoding handling - * Supports both RSS and Atom feeds + * Supports both RSS and Atom feeds, conditional requests, timeouts, and retries */ -async function fetchFeedWithEncoding(url: string): Promise { - const response = await fetch(url, { - headers: { - "User-Agent": "NeuReed/1.0 (RSS/Atom Reader)", - Accept: "application/rss+xml, application/atom+xml, application/xml, text/xml", - }, - }); +async function fetchFeedWithEncoding( + url: string, + options?: FetchOptions +): Promise { + let lastError: Error | null = null; + + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), FETCH_TIMEOUT); + + const headers: Record = { + "User-Agent": "NeuReed/1.0 (RSS/Atom Reader; +https://github.com/neureed)", + Accept: "application/rss+xml, application/atom+xml, application/xml, text/xml, */*;q=0.1", + }; + + // Add conditional request headers for bandwidth optimization + if (options?.etag) { + headers["If-None-Match"] = options.etag; + } + if (options?.lastModified) { + headers["If-Modified-Since"] = options.lastModified; + } + + const response = await fetch(url, { + headers, + signal: controller.signal, + }); + + clearTimeout(timeoutId); + + // Handle 304 Not Modified + if (response.status === 304) { + return { + content: "", + etag: response.headers.get("etag") || options?.etag, + lastModified: response.headers.get("last-modified") || options?.lastModified, + notModified: true, + }; + } - if (!response.ok) { - throw new Error(`HTTP ${response.status}: ${response.statusText}`); + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + + const buffer = await response.arrayBuffer(); + const uint8Array = new Uint8Array(buffer); + + // Try to detect encoding from XML declaration or Content-Type header + const contentType = response.headers.get("content-type") || ""; + let encoding = 'utf-8'; + + // Check Content-Type header first + const charsetMatch = contentType.match(/charset=([^;\s]+)/i); + if (charsetMatch && charsetMatch[1]) { + encoding = charsetMatch[1].toLowerCase().replace(/["']/g, ''); + } else { + // Fall back to XML declaration + const firstBytes = uint8Array.slice(0, 200); + const asciiText = new TextDecoder('ascii').decode(firstBytes); + const encodingMatch = asciiText.match(/encoding=["']([^"']+)["']/i); + if (encodingMatch && encodingMatch[1]) { + encoding = encodingMatch[1].toLowerCase(); + } + } + + // Normalize encoding names and decode + const content = decodeWithEncoding(uint8Array, encoding); + + return { + content, + etag: response.headers.get("etag") || undefined, + lastModified: response.headers.get("last-modified") || undefined, + notModified: false, + }; + } catch (error) { + lastError = error instanceof Error ? error : new Error(String(error)); + + // Don't retry on abort (timeout) or client errors + if (lastError.name === 'AbortError') { + throw new Error(`Feed fetch timeout after ${FETCH_TIMEOUT}ms`); + } + + // Retry on network errors + if (attempt < MAX_RETRIES) { + await new Promise(resolve => setTimeout(resolve, RETRY_DELAY * (attempt + 1))); + continue; + } + } } + + throw lastError || new Error('Failed to fetch feed'); +} - const buffer = await response.arrayBuffer(); - const uint8Array = new Uint8Array(buffer); +/** + * Decode buffer with specified encoding + */ +function decodeWithEncoding(uint8Array: Uint8Array, encoding: string): string { + // Normalize encoding names + const normalizedEncoding = encoding.toLowerCase().replace(/-/g, ''); - // Try to detect encoding from XML declaration - const firstBytes = uint8Array.slice(0, 200); - const asciiText = new TextDecoder('ascii').decode(firstBytes); - const encodingMatch = asciiText.match(/encoding=["']([^"']+)["']/i); + // Map common encoding aliases + const encodingMap: Record = { + 'latin1': 'iso-8859-1', + 'iso88591': 'iso-8859-1', + 'windows1252': 'windows-1252', + 'cp1252': 'windows-1252', + 'iso885915': 'iso-8859-15', + 'utf8': 'utf-8', + }; - let encoding = 'utf-8'; - if (encodingMatch && encodingMatch[1]) { - encoding = encodingMatch[1].toLowerCase(); - } + const targetEncoding = encodingMap[normalizedEncoding] || encoding; - // Convert to UTF-8 if needed - if (encoding === 'iso-8859-1' || encoding === 'latin1') { - return iconv.decode(Buffer.from(uint8Array), 'iso-8859-1'); - } else if (encoding === 'windows-1252') { - return iconv.decode(Buffer.from(uint8Array), 'windows-1252'); - } else { - // Assume UTF-8 - return new TextDecoder('utf-8').decode(uint8Array); + // Use iconv-lite for non-UTF-8 encodings + if (targetEncoding !== 'utf-8' && iconv.encodingExists(targetEncoding)) { + return iconv.decode(Buffer.from(uint8Array), targetEncoding); } + + // Default to UTF-8 + return new TextDecoder('utf-8').decode(uint8Array); } /** @@ -84,8 +213,13 @@ export interface ParsedFeed { title: string; description?: string; link?: string; + language?: string; imageUrl?: string; + categories?: string[]; items: ParsedArticle[]; + // Caching headers for conditional requests + etag?: string; + lastModified?: string; } /** @@ -100,24 +234,30 @@ export interface ParsedArticle { author?: string; publishedAt?: Date; imageUrl?: string; + categories?: string[]; } -/** - * Feed parser timeout configuration - */ -const FETCH_TIMEOUT = 30000; // 30 seconds - /** * Parse an RSS 2.0 or Atom 1.0 feed from a URL * @param url - The feed URL to parse - * @returns Parsed feed data with articles + * @param options - Optional fetch options for conditional requests + * @returns Parsed feed data with articles, or null if not modified (304) * @throws Error if feed cannot be parsed or fetched */ -export async function parseFeedUrl(url: string): Promise { +export async function parseFeedUrl( + url: string, + options?: FetchOptions +): Promise { try { - // Fetch with proper encoding handling - const xmlContent = await fetchFeedWithEncoding(url); - const feed = parseRawFeed(xmlContent) as RawFeed; + // Fetch with proper encoding handling, timeout, and retries + const result = await fetchFeedWithEncoding(url, options); + + // Return null for 304 Not Modified responses + if (result.notModified) { + return null; + } + + const feed = parseRawFeed(result.content) as RawFeed; // Extract and ensure imageUrl is a string let imageUrl = extractFeedImage(feed); @@ -126,13 +266,22 @@ export async function parseFeedUrl(url: string): Promise { if (Array.isArray(imageUrl)) { imageUrl = imageUrl[0]; } + + // Extract categories + const categories = feed.categories + ?.map(cat => cat.label || cat.term) + .filter((c): c is string => !!c); return { title: feed.title || "Untitled Feed", description: feed.description || undefined, link: feed.url || undefined, + language: feed.language || undefined, imageUrl: imageUrl, + categories: categories?.length ? categories : undefined, items: feed.items.map((item) => parseArticle(item)), + etag: result.etag, + lastModified: result.lastModified, }; } catch (error) { if (error instanceof Error) { @@ -160,14 +309,30 @@ export async function validateFeedUrl(url: string): Promise { } // Try to parse the feed with encoding handling - const xmlContent = await fetchFeedWithEncoding(url); - const result = parseRawFeed(xmlContent) as RawFeed; + const result = await fetchFeedWithEncoding(url); + if (result.notModified || !result.content) { + return false; + } + parseRawFeed(result.content) as RawFeed; return true; } catch (error) { return false; } } +/** + * Check if a URL looks like an image based on extension + */ +function looksLikeImageUrl(url: string): boolean { + try { + const pathname = new URL(url).pathname.toLowerCase(); + const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.bmp', '.ico']; + return imageExtensions.some(ext => pathname.endsWith(ext)); + } catch { + return false; + } +} + /** * Parse a single feed item into an article * Handles both RSS and Atom item formats @@ -203,6 +368,11 @@ function parseArticle(item: RawFeedItem): ParsedArticle { // Extract author (handle both RSS and Atom formats) const author = extractAuthor(item); + + // Extract categories/tags + const categories = item.categories + ?.map(cat => cat.label || cat.term) + .filter((c): c is string => !!c); // Decode HTML entities from all text fields const decodedTitle = item.title ? decodeHtmlEntities(item.title) : "Untitled"; @@ -223,6 +393,7 @@ function parseArticle(item: RawFeedItem): ParsedArticle { // publishedAt is already validated above and guaranteed to be a valid Date publishedAt, imageUrl, + categories: categories?.length ? categories : undefined, }; } @@ -253,7 +424,52 @@ function extractAuthor(item: RawFeedItem): string | undefined { function extractContent(item: RawFeedItem): string { // @rowanmanning/feed-parser provides content and description // Prefer content (which includes content:encoded from RSS) over description - return item.content || item.description || ""; + const rawContent = item.content || item.description || ""; + + // Format plain text CDATA content with paragraph breaks + return formatPlainTextContent(rawContent); +} + +/** + * Break long CDATA/plain text into paragraphs by adding HTML breaks after sentence boundaries. + * Only applies if the text has no existing line breaks or HTML tags (typical of CDATA blocks). + * Converts sentence breaks to

for proper HTML display. + */ +function formatPlainTextContent(text: string): string { + // Skip if already has line breaks or HTML tags + if (text.includes('\n') || text.includes('\r') || /<[a-z][\s\S]*>/i.test(text)) { + return text; + } + + // Common abbreviations that shouldn't trigger a line break + const abbreviations = [ + 'Mr', 'Mrs', 'Ms', 'Dr', 'Prof', 'Sr', 'Jr', 'Rev', 'Gen', 'Col', 'Lt', 'Sgt', + 'St', 'Ave', 'Blvd', 'Rd', 'Inc', 'Corp', 'Ltd', 'Co', 'vs', 'etc', 'al', + 'Jan', 'Feb', 'Mar', 'Apr', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', + 'Fig', 'No', 'Vol', 'pp', 'ed', 'trans', 'approx', 'est', 'min', 'max' + ]; + + // Build regex pattern: sentence-ending punctuation followed by space and capital letter + // BUT NOT after single capital letter (initials like "J. K.") or known abbreviations + return text.replace(/([.!?])\s+([A-Z])/g, (match, punct, nextChar, offset) => { + // Get the word before the punctuation + const textBefore = text.substring(0, offset as number); + const wordBeforeMatch = textBefore.match(/(\S+)$/); + const wordBefore = wordBeforeMatch?.[1] ?? ''; + + // Don't break after single capital letter (initials like "J." or "A.") + if (/^[A-Z]$/.test(wordBefore)) { + return match; + } + + // Don't break after common abbreviations + if (abbreviations.some(abbr => wordBefore.toLowerCase() === abbr.toLowerCase())) { + return match; + } + + // This looks like a real sentence boundary + return `${punct}

${nextChar}`; + }); } /** @@ -262,7 +478,8 @@ function extractContent(item: RawFeedItem): string { function extractExcerpt(item: RawFeedItem, content: string): string | undefined { // If description is different from content, use it as excerpt if (item.description && item.description !== content) { - return item.description.substring(0, 500); + const processed = formatPlainTextContent(item.description); + return processed.substring(0, 500); } // Otherwise, generate excerpt from content @@ -277,11 +494,12 @@ function extractExcerpt(item: RawFeedItem, content: string): string | undefined /** * Extract image URL from feed metadata * Supports both RSS and Atom formats + * Filters out placeholder images */ function extractFeedImage(feed: RawFeed): string | undefined { // @rowanmanning/feed-parser provides image as an object with url if (feed.image?.url) { - return feed.image.url; + return filterPlaceholderImage(feed.image.url); } return undefined; @@ -289,36 +507,68 @@ function extractFeedImage(feed: RawFeed): string | undefined { /** * Extract image URL from article + * Filters out placeholder images and prioritizes real images */ function extractArticleImage(item: RawFeedItem, content: string): string | undefined { - // Check media array (includes enclosures and media:content) + // Collect all candidate images + const candidates: (string | undefined)[] = []; + + // 1. Check item.image first (includes media:thumbnail, itunes:image, media:content images) + // This is the primary source for article header images + if (item.image?.url) { + candidates.push(item.image.url); + } + + // 2. Check media array for image types (enclosures and media:content) if (item.media && item.media.length > 0) { - // Find first image media item + // First, check for media items with explicit image property (thumbnails) for (const media of item.media) { - if (media.url && media.type?.startsWith("image/")) { - return media.url; + if (media.image) { + candidates.push(media.image); } } - // If no explicit image type, use first media with URL + // Then, find image media items by type or mimeType + for (const media of item.media) { + const isImage = media.type === "image" || + media.mimeType?.startsWith("image/") || + media.type?.startsWith("image"); + if (media.url && isImage) { + candidates.push(media.url); + } + } + // Only use untyped media if URL looks like an image const firstMedia = item.media[0]; - if (firstMedia?.url) { - return firstMedia.url; + if (firstMedia?.url && looksLikeImageUrl(firstMedia.url)) { + candidates.push(firstMedia.url); } } - // Extract from content as fallback - return extractImageFromContent(content) || undefined; + // 3. Extract from content as fallback - use the new function that filters placeholders + const contentImage = extractFirstValidImageFromHtml(content); + if (contentImage) { + candidates.push(contentImage); + } + + // Return the first non-placeholder image + for (const candidate of candidates) { + const filtered = filterPlaceholderImage(candidate); + if (filtered) { + return filtered; + } + } + + return undefined; } /** * Extract first image URL from HTML content * @param html - HTML content to search * @returns First image URL found, or null + * @deprecated Use extractFirstValidImageFromHtml from image-utils.ts instead */ export function extractImageFromContent(html: string): string | null { - const imgRegex = /]+src=["']([^"']+)["']/i; - const match = html.match(imgRegex); - return (match && match[1]) ? match[1] : null; + // Use the new function that filters out placeholder images + return extractFirstValidImageFromHtml(html); } /** diff --git a/src/lib/image-utils.ts b/src/lib/image-utils.ts new file mode 100644 index 0000000..8a7236a --- /dev/null +++ b/src/lib/image-utils.ts @@ -0,0 +1,256 @@ +/** + * Image utility functions for detecting and filtering placeholder images + */ + +/** + * Known placeholder image URL patterns + * These patterns match common placeholder/lazy-load images from various sources + */ +const PLACEHOLDER_URL_PATTERNS: RegExp[] = [ + // BBC placeholders + /grey-placeholder\.png$/i, + /placeholder\.png$/i, + /placeholder\.jpg$/i, + /placeholder\.gif$/i, + /placeholder\.svg$/i, + /placeholder\.webp$/i, + + // Common placeholder naming patterns + /\/placeholder[_-]?/i, + /\/lazy[_-]?load/i, + /\/loading[_-]?/i, + /\/blank\.(png|jpg|gif|svg|webp)$/i, + /\/empty\.(png|jpg|gif|svg|webp)$/i, + /\/spacer\.(png|jpg|gif|svg|webp)$/i, + /\/pixel\.(png|jpg|gif|svg|webp)$/i, + /\/1x1\.(png|jpg|gif|svg|webp)$/i, + /\/transparent\.(png|gif|svg|webp)$/i, + + // Common CDN placeholder patterns + /via\.placeholder\.com/i, + /placehold\.it/i, + /placeholdit\.imgix\.net/i, + /dummyimage\.com/i, + /placeholder\.pics/i, + /fakeimg\.pl/i, + /lorempixel\.com/i, + /placekitten\.com/i, + /placeimg\.com/i, + + // Data URIs that are likely placeholders (very small base64 images) + /^data:image\/[^;]+;base64,.{0,200}$/i, +]; + +/** + * Known placeholder image hostnames/domains + */ +const PLACEHOLDER_DOMAINS: string[] = [ + 'via.placeholder.com', + 'placehold.it', + 'placeholdit.imgix.net', + 'dummyimage.com', + 'placeholder.pics', + 'fakeimg.pl', + 'lorempixel.com', + 'placekitten.com', + 'placeimg.com', +]; + +/** + * Keywords in URL paths that indicate placeholder images + */ +const PLACEHOLDER_PATH_KEYWORDS: string[] = [ + 'placeholder', + 'grey-placeholder', + 'gray-placeholder', + 'lazy-load', + 'lazyload', + 'loading', + 'blank', + 'empty', + 'spacer', + 'pixel', + '1x1', + 'transparent', + 'default-image', + 'no-image', + 'noimage', + 'missing', +]; + +/** + * Check if an image URL is likely a placeholder image + * + * @param url - The image URL to check + * @returns true if the URL appears to be a placeholder image + */ +export function isPlaceholderImage(url: string | null | undefined): boolean { + if (!url) return true; // Treat null/undefined as placeholder + + const trimmedUrl = url.trim(); + if (!trimmedUrl) return true; + + // Check against URL patterns + for (const pattern of PLACEHOLDER_URL_PATTERNS) { + if (pattern.test(trimmedUrl)) { + return true; + } + } + + // Parse URL for more detailed checks + try { + // Handle data URIs separately + if (trimmedUrl.startsWith('data:')) { + // Very small base64 images are likely placeholders + // A typical 1x1 pixel PNG is about 68 characters in base64 + // Allow up to ~500 chars which would be a very small image + return trimmedUrl.length < 500; + } + + const urlObj = new URL(trimmedUrl); + + // Check hostname against known placeholder domains + const hostname = urlObj.hostname.toLowerCase(); + if (PLACEHOLDER_DOMAINS.some(domain => hostname.includes(domain))) { + return true; + } + + // Check path for placeholder keywords + const pathname = urlObj.pathname.toLowerCase(); + for (const keyword of PLACEHOLDER_PATH_KEYWORDS) { + if (pathname.includes(keyword)) { + return true; + } + } + + // Check for very small dimension indicators in URL + // e.g., /1x1.png, /1/1.png, ?w=1&h=1 + if (/[\/\?&]1x1[\/\.\?&]|[\/\?&]w=1[&$]|[\/\?&]h=1[&$]/i.test(trimmedUrl)) { + return true; + } + + } catch { + // If URL parsing fails, check the raw string + const lowerUrl = trimmedUrl.toLowerCase(); + for (const keyword of PLACEHOLDER_PATH_KEYWORDS) { + if (lowerUrl.includes(keyword)) { + return true; + } + } + } + + return false; +} + +/** + * Filter out placeholder images from a URL, returning undefined if it's a placeholder + * + * @param url - The image URL to check + * @returns The URL if it's not a placeholder, undefined otherwise + */ +export function filterPlaceholderImage(url: string | null | undefined): string | undefined { + if (isPlaceholderImage(url)) { + return undefined; + } + return url || undefined; +} + +/** + * Get the first non-placeholder image from a list of URLs + * + * @param urls - Array of image URLs to check + * @returns The first non-placeholder URL, or undefined if all are placeholders + */ +export function getFirstValidImage(urls: (string | null | undefined)[]): string | undefined { + for (const url of urls) { + if (!isPlaceholderImage(url)) { + return url || undefined; + } + } + return undefined; +} + +/** + * Remove placeholder images from HTML content + * This removes img tags that have placeholder src attributes + * + * @param html - HTML content to process + * @returns HTML with placeholder images removed + */ +export function removePlaceholderImagesFromHtml(html: string): string { + if (!html) return html; + + // Match img tags and check their src + return html.replace( + /]*?)>/gi, + (match, attributes) => { + // Extract src attribute + const srcMatch = attributes.match(/src\s*=\s*["']([^"']+)["']/i); + if (srcMatch && srcMatch[1]) { + const src = srcMatch[1]; + if (isPlaceholderImage(src)) { + // Remove the entire img tag if it's a placeholder + return ''; + } + } + + // Also check data-src for lazy-loaded images that might be placeholders + const dataSrcMatch = attributes.match(/data-src\s*=\s*["']([^"']+)["']/i); + if (dataSrcMatch && dataSrcMatch[1]) { + // If data-src exists and is valid, replace src with data-src + const dataSrc = dataSrcMatch[1]; + if (!isPlaceholderImage(dataSrc)) { + // Replace placeholder src with data-src + const newAttributes = attributes + .replace(/src\s*=\s*["'][^"']+["']/i, `src="${dataSrc}"`) + .replace(/data-src\s*=\s*["'][^"']+["']/i, ''); + return ``; + } + } + + return match; + } + ); +} + +/** + * Extract the first valid (non-placeholder) image from HTML content + * + * @param html - HTML content to search + * @returns The first valid image URL, or null if none found + */ +export function extractFirstValidImageFromHtml(html: string): string | null { + if (!html) return null; + + // Match all img tags + const imgRegex = /]*?)>/gi; + let match; + + while ((match = imgRegex.exec(html)) !== null) { + const attributes = match[1]; + if (!attributes) continue; + + // Try src first + const srcMatch = attributes.match(/src\s*=\s*["']([^"']+)["']/i); + if (srcMatch && srcMatch[1] && !isPlaceholderImage(srcMatch[1])) { + return srcMatch[1]; + } + + // Try data-src for lazy-loaded images + const dataSrcMatch = attributes.match(/data-src\s*=\s*["']([^"']+)["']/i); + if (dataSrcMatch && dataSrcMatch[1] && !isPlaceholderImage(dataSrcMatch[1])) { + return dataSrcMatch[1]; + } + + // Try srcset (get the first URL) + const srcsetMatch = attributes.match(/srcset\s*=\s*["']([^"']+)["']/i); + if (srcsetMatch && srcsetMatch[1]) { + const firstSrcset = srcsetMatch[1].split(',')[0]?.trim().split(/\s+/)[0]; + if (firstSrcset && !isPlaceholderImage(firstSrcset)) { + return firstSrcset; + } + } + } + + return null; +} diff --git a/src/lib/llm/__tests__/json-parser.test.ts b/src/lib/llm/__tests__/json-parser.test.ts new file mode 100644 index 0000000..84357ce --- /dev/null +++ b/src/lib/llm/__tests__/json-parser.test.ts @@ -0,0 +1,119 @@ +/** + * Tests for JSON Parser Utilities + */ + +import { extractJSON, parseJSONFromLLM, parseJSONFromLLMSafe } from "../json-parser"; + +describe("extractJSON", () => { + it("should extract plain JSON", () => { + const input = '{"key": "value"}'; + const result = extractJSON(input); + expect(result).toBe('{"key": "value"}'); + }); + + it("should extract JSON from markdown code blocks with json tag", () => { + const input = "Here's the JSON:\n```json\n{\"key\": \"value\"}\n```"; + const result = extractJSON(input); + expect(result).toBe('{"key": "value"}'); + }); + + it("should extract JSON from markdown code blocks without json tag", () => { + const input = "```\n{\"key\": \"value\"}\n```"; + const result = extractJSON(input); + expect(result).toBe('{"key": "value"}'); + }); + + it("should remove introductory text before JSON", () => { + const input = "Sure! Here's the result: {\"key\": \"value\"}"; + const result = extractJSON(input); + expect(result).toBe('{"key": "value"}'); + }); + + it("should remove trailing text after JSON", () => { + const input = '{\"key\": \"value\"} I hope this helps!'; + const result = extractJSON(input); + expect(result).toBe('{"key": "value"}'); + }); + + it("should handle JSON arrays", () => { + const input = '["item1", "item2", "item3"]'; + const result = extractJSON(input); + expect(result).toBe('["item1", "item2", "item3"]'); + }); + + it("should extract JSON array from markdown", () => { + const input = "```json\n[\"item1\", \"item2\"]\n```"; + const result = extractJSON(input); + expect(result).toBe('["item1", "item2"]'); + }); + + it("should handle complex nested JSON", () => { + const input = ` +Here's your response: +\`\`\`json +{ + "summary": "This is a summary", + "keyPoints": ["point 1", "point 2"], + "topics": ["topic1", "topic2"], + "sentiment": "positive" +} +\`\`\` +I hope this is helpful! + `.trim(); + const result = extractJSON(input); + const parsed = JSON.parse(result); + expect(parsed.summary).toBe("This is a summary"); + expect(parsed.keyPoints).toHaveLength(2); + expect(parsed.topics).toHaveLength(2); + }); +}); + +describe("parseJSONFromLLM", () => { + it("should parse valid JSON", () => { + const input = '{"key": "value"}'; + const result = parseJSONFromLLM(input); + expect(result).toEqual({ key: "value" }); + }); + + it("should parse JSON from markdown", () => { + const input = "```json\n{\"key\": \"value\"}\n```"; + const result = parseJSONFromLLM(input); + expect(result).toEqual({ key: "value" }); + }); + + it("should throw error for invalid JSON", () => { + const input = "This is not JSON"; + expect(() => parseJSONFromLLM(input)).toThrow(); + }); + + it("should parse JSON with context for logging", () => { + const input = '{"key": "value"}'; + const result = parseJSONFromLLM(input, { + model: "gpt-4", + operation: "test", + }); + expect(result).toEqual({ key: "value" }); + }); +}); + +describe("parseJSONFromLLMSafe", () => { + it("should parse valid JSON", () => { + const input = '{"key": "value"}'; + const result = parseJSONFromLLMSafe(input, { fallback: true }); + expect(result).toEqual({ key: "value" }); + }); + + it("should return fallback for invalid JSON", () => { + const input = "This is not JSON"; + const fallback = { fallback: true, empty: [] }; + const result = parseJSONFromLLMSafe(input, fallback); + expect(result).toEqual(fallback); + }); + + it("should return fallback for empty response", () => { + const input = ""; + const fallback = { summary: "", keyPoints: [], topics: [] }; + const result = parseJSONFromLLMSafe(input, fallback); + expect(result).toEqual(fallback); + }); +}); diff --git a/src/lib/llm/json-parser.ts b/src/lib/llm/json-parser.ts new file mode 100644 index 0000000..755e49c --- /dev/null +++ b/src/lib/llm/json-parser.ts @@ -0,0 +1,117 @@ +/** + * JSON Parser Utilities for LLM Responses + * Handles extraction of JSON from responses that may be wrapped in markdown or have introductory text + */ + +import { logger } from "../logger"; + +/** + * Extract JSON from LLM response that may be wrapped in markdown code blocks or have introductory text + * + * Handles cases like: + * - "Here's the result: ```json\n{...}\n```" + * - "```\n{...}\n```" + * - "Sure! ```json {... } ```" + * - "{...}" (plain JSON) + */ +export function extractJSON(rawResponse: string): string { + // Trim whitespace + let cleaned = rawResponse.trim(); + + // Try to extract JSON from markdown code blocks + // Match ```json or ``` followed by JSON content + const codeBlockMatch = cleaned.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/); + if (codeBlockMatch?.[1]) { + cleaned = codeBlockMatch[1].trim(); + } + + // Remove any leading text before the first { or [ + const jsonStartMatch = cleaned.match(/^[^{[]*([{[][\s\S]*)/); + if (jsonStartMatch?.[1]) { + cleaned = jsonStartMatch[1]; + } + + // Remove any trailing text after the last } or ] + const jsonEndMatch = cleaned.match(/([\s\S]*[}\]])[^}\]]*$/); + if (jsonEndMatch?.[1]) { + cleaned = jsonEndMatch[1]; + } + + return cleaned.trim(); +} + +/** + * Parse JSON from LLM response with automatic extraction and detailed error logging + * + * @param rawResponse - Raw LLM response that may contain JSON + * @param context - Context information for error logging (e.g., model name, operation) + * @returns Parsed JSON object + * @throws Error if JSON cannot be parsed even after extraction + */ +export function parseJSONFromLLM( + rawResponse: string, + context?: { model?: string; operation?: string } +): unknown { + // Log what we're trying to parse + logger.debug("Attempting to parse LLM response", { + responseLength: rawResponse.length, + responsePreview: rawResponse.substring(0, 200), + isEmpty: rawResponse.trim().length === 0, + model: context?.model, + operation: context?.operation, + }); + + const extracted = extractJSON(rawResponse); + + logger.debug("Extracted JSON from response", { + extractedLength: extracted.length, + extractedPreview: extracted.substring(0, 200), + wasModified: extracted !== rawResponse, + model: context?.model, + operation: context?.operation, + }); + + try { + return JSON.parse(extracted); + } catch (error) { + // Log detailed error information + logger.error("Failed to parse LLM JSON response", { + parseError: error instanceof Error ? error.message : String(error), + rawResponse: rawResponse.substring(0, 1000), // First 1000 chars to avoid huge logs + extractedJSON: extracted.substring(0, 1000), + responseLength: rawResponse.length, + extractedLength: extracted.length, + isEmpty: rawResponse.trim().length === 0, + model: context?.model, + operation: context?.operation, + }); + + throw new Error( + `Failed to parse JSON from LLM response: ${error instanceof Error ? error.message : String(error)}` + ); + } +} + +/** + * Safely parse JSON from LLM response with fallback value + * + * @param rawResponse - Raw LLM response + * @param fallback - Fallback value if parsing fails + * @param context - Context information for error logging + * @returns Parsed JSON or fallback value + */ +export function parseJSONFromLLMSafe( + rawResponse: string, + fallback: T, + context?: { model?: string; operation?: string } +): T { + try { + return parseJSONFromLLM(rawResponse, context) as T; + } catch (error) { + logger.warn("Using fallback value due to JSON parse error", { + context, + fallbackUsed: true, + }); + return fallback; + } +} diff --git a/src/lib/llm/ollama-provider.ts b/src/lib/llm/ollama-provider.ts index b1e79e8..4eddc68 100644 --- a/src/lib/llm/ollama-provider.ts +++ b/src/lib/llm/ollama-provider.ts @@ -5,6 +5,7 @@ import { env } from "@/env"; import { logger } from "../logger"; +import { parseJSONFromLLM } from "./json-parser"; import type { LLMProviderInterface, LLMCompletionRequest, @@ -103,21 +104,24 @@ Respond in JSON format with keys: summary, keyPoints (array), topics (array), se // Try to parse JSON response try { - const parsed = JSON.parse(response.content); + const parsed = parseJSONFromLLM(response.content, { + model: this.model, + operation: "summarizeArticle", + }) as { summary?: string; keyPoints?: string[]; topics?: string[]; sentiment?: string }; return { summary: parsed.summary || "", keyPoints: Array.isArray(parsed.keyPoints) ? parsed.keyPoints : [], topics: Array.isArray(parsed.topics) ? parsed.topics : [], sentiment: ["positive", "neutral", "negative"].includes( - parsed.sentiment + parsed.sentiment ?? "" ) - ? parsed.sentiment + ? (parsed.sentiment as "positive" | "neutral" | "negative") : "neutral", }; } catch (parseError) { // Fallback: extract from text response - logger.warn("Failed to parse LLM JSON response, using fallback", { - parseError, + logger.warn("Using fallback for article summary due to parse error", { + model: this.model, }); return { @@ -155,12 +159,18 @@ Respond with a JSON array of strings.`; }); try { - const parsed = JSON.parse(response.content); + const parsed = parseJSONFromLLM(response.content, { + model: this.model, + operation: "extractKeyPoints", + }) as string[] | unknown; if (Array.isArray(parsed)) { return parsed.slice(0, count); } } catch (parseError) { // Fallback: split by newlines and filter + logger.warn("Using fallback for key points extraction", { + model: this.model, + }); const lines = response.content .split("\n") .map((line) => line.trim()) @@ -200,7 +210,10 @@ Respond with a JSON array of lowercase strings.`; }); try { - const parsed = JSON.parse(response.content); + const parsed = parseJSONFromLLM(response.content, { + model: this.model, + operation: "detectTopics", + }) as string[] | unknown; if (Array.isArray(parsed)) { return parsed .map((topic) => String(topic).toLowerCase().trim()) @@ -209,6 +222,9 @@ Respond with a JSON array of lowercase strings.`; } } catch (parseError) { // Fallback: extract comma-separated values + logger.warn("Using fallback for topic detection", { + model: this.model, + }); const topics = response.content .split(/[,\n]/) .map((topic) => topic.toLowerCase().trim()) diff --git a/src/lib/llm/openai-provider.ts b/src/lib/llm/openai-provider.ts index 648d62b..9aba1d9 100644 --- a/src/lib/llm/openai-provider.ts +++ b/src/lib/llm/openai-provider.ts @@ -5,6 +5,7 @@ import { env } from "@/env"; import { logger } from "../logger"; +import { parseJSONFromLLM } from "./json-parser"; import type { LLMProviderInterface, LLMCompletionRequest, @@ -85,8 +86,24 @@ export class OpenAILLMProvider implements LLMProviderInterface { } const data = await response.json(); + + // Log the full API response for debugging + logger.debug("Raw LLM API response", { + model: data.model, + choicesCount: data.choices?.length, + hasUsage: !!data.usage, + fullResponse: JSON.stringify(data).substring(0, 2000), // First 2000 chars + }); + const choice = data.choices[0]; + // Log the message content specifically + logger.debug("LLM message content", { + contentLength: choice.message?.content?.length ?? 0, + content: choice.message?.content?.substring(0, 500) ?? "(empty)", // First 500 chars + hasContent: !!choice.message?.content, + }); + logger.debug("LLM response received", { model: data.model, promptTokens: data.usage.prompt_tokens, @@ -144,23 +161,23 @@ You MUST respond ONLY with valid JSON in this exact format: // Try to parse JSON response try { - const parsed = JSON.parse(response.content); + const parsed = parseJSONFromLLM(response.content, { + model: this.model, + operation: "summarizeArticle", + }) as { summary?: string; keyPoints?: string[]; topics?: string[]; sentiment?: string }; return { summary: parsed.summary || "", keyPoints: Array.isArray(parsed.keyPoints) ? parsed.keyPoints : [], topics: Array.isArray(parsed.topics) ? parsed.topics : [], sentiment: ["positive", "neutral", "negative"].includes( - parsed.sentiment + parsed.sentiment ?? "" ) - ? parsed.sentiment + ? (parsed.sentiment as "positive" | "neutral" | "negative") : "neutral", }; } catch (parseError) { // Fallback: extract from text response - logger.error("Failed to parse LLM JSON response, using fallback", { - parseError: parseError instanceof Error ? parseError.message : String(parseError), - fullRawResponse: response.content, - responseLength: response.content.length, + logger.warn("Using fallback for article summary due to parse error", { model: this.model, }); @@ -199,12 +216,18 @@ You MUST respond ONLY with a valid JSON array of strings like: ["point 1", "poin }); try { - const parsed = JSON.parse(response.content); + const parsed = parseJSONFromLLM(response.content, { + model: this.model, + operation: "extractKeyPoints", + }) as string[] | unknown; if (Array.isArray(parsed)) { return parsed.slice(0, count); } } catch (parseError) { // Fallback: split by newlines and filter + logger.warn("Using fallback for key points extraction", { + model: this.model, + }); const lines = response.content .split("\n") .map((line) => line.trim()) @@ -244,7 +267,10 @@ You MUST respond ONLY with a valid JSON array of lowercase strings like: ["topic }); try { - const parsed = JSON.parse(response.content); + const parsed = parseJSONFromLLM(response.content, { + model: this.model, + operation: "detectTopics", + }) as string[] | unknown; if (Array.isArray(parsed)) { return parsed .map((topic) => String(topic).toLowerCase().trim()) @@ -253,6 +279,9 @@ You MUST respond ONLY with a valid JSON array of lowercase strings like: ["topic } } catch (parseError) { // Fallback: extract comma-separated values + logger.warn("Using fallback for topic detection", { + model: this.model, + }); const topics = response.content .split(/[,\n]/) .map((topic) => topic.toLowerCase().trim()) diff --git a/src/lib/services/__tests__/saved-search-execution.test.ts b/src/lib/services/__tests__/saved-search-execution.test.ts index 10c7fed..d476843 100644 --- a/src/lib/services/__tests__/saved-search-execution.test.ts +++ b/src/lib/services/__tests__/saved-search-execution.test.ts @@ -4,7 +4,7 @@ * Tests the search execution and scoring algorithms */ -import { describe, it, expect, beforeAll, afterAll, beforeEach } from '@jest/globals'; +// Vitest globals are enabled in vitest.config.ts import { executeSearch, matchArticle } from '../saved-search-execution'; import { prisma } from '@/lib/db'; import { nanoid } from 'nanoid'; diff --git a/src/lib/services/__tests__/saved-search-integration.test.ts b/src/lib/services/__tests__/saved-search-integration.test.ts index 7e89211..e5f3359 100644 --- a/src/lib/services/__tests__/saved-search-integration.test.ts +++ b/src/lib/services/__tests__/saved-search-integration.test.ts @@ -4,7 +4,7 @@ * Tests the complete flow from creation to matching to notifications */ -import { describe, it, expect, beforeAll, afterAll } from '@jest/globals'; +// Vitest globals are enabled in vitest.config.ts import { prisma } from '@/lib/db'; import { nanoid } from 'nanoid'; import { createSavedSearch } from '../saved-search-service'; diff --git a/src/lib/services/__tests__/saved-search-matcher.test.ts b/src/lib/services/__tests__/saved-search-matcher.test.ts index 245a377..283ba21 100644 --- a/src/lib/services/__tests__/saved-search-matcher.test.ts +++ b/src/lib/services/__tests__/saved-search-matcher.test.ts @@ -4,7 +4,7 @@ * Tests the automatic matching of articles to saved searches */ -import { describe, it, expect, beforeAll, afterAll } from '@jest/globals'; +// Vitest globals are enabled in vitest.config.ts import { matchNewArticles, rematchSavedSearch } from '../saved-search-matcher'; import { createSavedSearch } from '../saved-search-service'; import { prisma } from '@/lib/db'; diff --git a/src/lib/services/__tests__/saved-search-service.test.ts b/src/lib/services/__tests__/saved-search-service.test.ts index c0e04ea..838a9d6 100644 --- a/src/lib/services/__tests__/saved-search-service.test.ts +++ b/src/lib/services/__tests__/saved-search-service.test.ts @@ -4,7 +4,7 @@ * Tests CRUD operations and business logic for saved searches */ -import { describe, it, expect, beforeAll, afterAll, beforeEach } from '@jest/globals'; +// Vitest globals are enabled in vitest.config.ts import { createSavedSearch, updateSavedSearch, diff --git a/src/lib/services/admin-settings-service.ts b/src/lib/services/admin-settings-service.ts index 475f485..3290021 100644 --- a/src/lib/services/admin-settings-service.ts +++ b/src/lib/services/admin-settings-service.ts @@ -131,15 +131,20 @@ export async function getEmbeddingConfiguration(): Promise<{ provider: string; providerSource: "database" | "environment"; model: string; + modelSource: "database" | "environment"; batchSize: number; }> { - const autoGenerateSetting = await prisma.admin_settings.findUnique({ - where: { key: "embedding_auto_generate" }, - }); - - const providerSetting = await prisma.admin_settings.findUnique({ - where: { key: "embedding_provider" }, - }); + const [autoGenerateSetting, providerSetting, modelSetting] = await Promise.all([ + prisma.admin_settings.findUnique({ + where: { key: "embedding_auto_generate" }, + }), + prisma.admin_settings.findUnique({ + where: { key: "embedding_provider" }, + }), + prisma.admin_settings.findUnique({ + where: { key: "system_llm_embedding_model" }, + }), + ]); const autoGenerate = autoGenerateSetting ? (autoGenerateSetting.value as boolean) @@ -151,12 +156,18 @@ export async function getEmbeddingConfiguration(): Promise<{ : env.EMBEDDING_PROVIDER; const providerSource = providerSetting ? "database" : "environment"; + const model = modelSetting + ? (modelSetting.value as string) + : env.EMBEDDING_MODEL; + const modelSource = modelSetting ? "database" : "environment"; + return { autoGenerate, autoGenerateSource, provider, providerSource, - model: env.EMBEDDING_MODEL, + model, + modelSource, batchSize: env.EMBEDDING_BATCH_SIZE, }; } diff --git a/src/lib/services/feed-health-service.ts b/src/lib/services/feed-health-service.ts index a440dea..bb016fe 100644 --- a/src/lib/services/feed-health-service.ts +++ b/src/lib/services/feed-health-service.ts @@ -50,7 +50,7 @@ export async function getFeedHealth(feedId: string): Promise ({ feedId: feed.id, healthStatus: feed.healthStatus as "healthy" | "warning" | "error" | "disabled", - consecutiveFailures: feed.consecutiveFailures, + consecutiveFailures: feed.consecutiveFailures ?? 0, lastSuccessfulFetch: feed.lastSuccessfulFetch, lastError: feed.feed_error_log[0]?.errorMessage || null, httpStatus: feed.httpStatus, @@ -123,8 +123,8 @@ export async function recordFeedFailure( if (!feed) return; - const newFailureCount = feed.consecutiveFailures + 1; - const shouldDisable = newFailureCount >= feed.autoDisableThreshold; + const newFailureCount = (feed.consecutiveFailures ?? 0) + 1; + const shouldDisable = newFailureCount >= (feed.autoDisableThreshold ?? 10); // Update feed health status await prisma.feeds.update({ @@ -227,7 +227,7 @@ export async function getUnhealthyFeeds( return feeds.map((feed) => ({ feedId: feed.id, healthStatus: feed.healthStatus as "healthy" | "warning" | "error" | "disabled", - consecutiveFailures: feed.consecutiveFailures, + consecutiveFailures: feed.consecutiveFailures ?? 0, lastSuccessfulFetch: feed.lastSuccessfulFetch, lastError: feed.feed_error_log[0]?.errorMessage || null, httpStatus: feed.httpStatus, diff --git a/src/lib/services/feed-refresh-service.ts b/src/lib/services/feed-refresh-service.ts index 493931f..0a3b45a 100644 --- a/src/lib/services/feed-refresh-service.ts +++ b/src/lib/services/feed-refresh-service.ts @@ -72,7 +72,20 @@ export async function refreshFeed( let extractionUsed = false; // Parse feed (always try RSS first) + // TODO: Pass etag/lastModified from feed record for conditional requests const parsedFeed = await parseFeedUrl(feed.url); + + // Handle 304 Not Modified - feed hasn't changed + if (!parsedFeed) { + logger.info(`[FeedRefresh] Feed ${feedId} returned 304 Not Modified, skipping`); + return { + feedId, + success: true, + newArticles: 0, + updatedArticles: 0, + duration: Date.now() - startTime, + }; + } // If feed has extraction settings and method is not RSS, try content extraction if (settings && settings.method !== "rss") { diff --git a/src/lib/services/feed-service.ts b/src/lib/services/feed-service.ts index 49ec87c..54771f9 100644 --- a/src/lib/services/feed-service.ts +++ b/src/lib/services/feed-service.ts @@ -13,6 +13,7 @@ export interface CreateFeedInput { imageUrl?: string; categoryIds?: string[]; fetchInterval?: number; + extractionMethod?: "rss" | "readability" | "playwright" | "custom"; } export interface UpdateFeedInput { @@ -54,7 +55,8 @@ export async function createFeed(data: CreateFeedInput): Promise { throw new Error("Feed already exists"); } - // Create feed with default extraction method set to "readability" + // Create feed with extraction method (default to "readability") + const extractionMethod = data.extractionMethod || "readability"; const feed = await prisma.feeds.create({ data: { id: `feed_${Date.now()}_${Math.random().toString(36).substring(7)}`, @@ -66,7 +68,7 @@ export async function createFeed(data: CreateFeedInput): Promise { fetchInterval: data.fetchInterval || 60, settings: { extraction: { - method: "readability", + method: extractionMethod, }, }, // updatedAt is auto-managed by Prisma via @updatedAt directive @@ -97,7 +99,8 @@ export async function createFeed(data: CreateFeedInput): Promise { export async function validateAndCreateFeed( url: string, name?: string, - categoryIds?: string[] + categoryIds?: string[], + settings?: { method?: "rss" | "readability" | "playwright" | "custom" } ): Promise { // Normalize URL const normalizedUrl = normalizeFeedUrl(url); @@ -115,6 +118,11 @@ export async function validateAndCreateFeed( // Parse feed to get metadata const parsedFeed = await parseFeedUrl(normalizedUrl); + + // Handle case where feed returns null (304 Not Modified - shouldn't happen for new feeds) + if (!parsedFeed) { + throw new Error("Unable to parse feed content"); + } // Ensure imageUrl is a string (handle array case) let imageUrl = parsedFeed.imageUrl; @@ -130,6 +138,7 @@ export async function validateAndCreateFeed( siteUrl: parsedFeed.link, imageUrl: imageUrl, categoryIds, + extractionMethod: settings?.method, }); } diff --git a/src/lib/services/semantic-search-service.ts b/src/lib/services/semantic-search-service.ts index 2238ec9..b75ab03 100644 --- a/src/lib/services/semantic-search-service.ts +++ b/src/lib/services/semantic-search-service.ts @@ -230,7 +230,8 @@ export async function findRelatedArticles( throw new Error("Article not found"); } if (!article.hasEmbedding) { - throw new Error("Article has no embedding"); + logger.info("Article has no embedding, returning empty related articles", { articleId }); + return []; } // Now fetch the actual embedding using the article's embedding directly in the query diff --git a/src/lib/services/summarization-service.ts b/src/lib/services/summarization-service.ts index eb9f181..b3d2e69 100644 --- a/src/lib/services/summarization-service.ts +++ b/src/lib/services/summarization-service.ts @@ -8,6 +8,7 @@ import { env } from "@/env"; import { logger } from "../logger"; import { OpenAILLMProvider } from "../llm/openai-provider"; import { OllamaLLMProvider } from "../llm/ollama-provider"; +import { parseJSONFromLLM } from "../llm/json-parser"; import { cacheGetOrSet } from "../cache/cache-service"; import { CacheKeys, CacheTTL } from "../cache/cache-keys"; import { @@ -511,7 +512,10 @@ Respond in JSON format with keys: summary, keyPoints (array), topics (array), se // Parse the response let summary: ArticleSummary; try { - const parsed = JSON.parse(response.content); + const parsed = parseJSONFromLLM(response.content, { + model: response.model, + operation: "summarizeArticleWithTracking", + }) as { summary?: string; keyPoints?: string[]; topics?: string[]; sentiment?: string }; summary = { summary: parsed.summary || "", keyPoints: @@ -522,13 +526,13 @@ Respond in JSON format with keys: summary, keyPoints (array), topics (array), se options?.includeTopics && Array.isArray(parsed.topics) ? parsed.topics : [], - sentiment: ["positive", "neutral", "negative"].includes(parsed.sentiment) - ? parsed.sentiment + sentiment: ["positive", "neutral", "negative"].includes(parsed.sentiment ?? "") + ? (parsed.sentiment as "positive" | "neutral" | "negative") : "neutral", }; } catch (parseError) { - logger.warn("Failed to parse LLM JSON response, using fallback", { - parseError, + logger.warn("Using fallback for article summary with tracking", { + model: response.model, }); summary = { summary: response.content.substring(0, 500), diff --git a/src/lib/services/user-feed-service.ts b/src/lib/services/user-feed-service.ts index b6ef108..a2849c9 100644 --- a/src/lib/services/user-feed-service.ts +++ b/src/lib/services/user-feed-service.ts @@ -9,7 +9,14 @@ import { assignFeedToCategory } from "./user-category-service"; export async function getUserFeeds(userId: string): Promise { return await prisma.user_feeds.findMany({ where: { userId }, - include: { feeds: true }, + include: { + feeds: true, + user_feed_categories: { + include: { + user_categories: true, + }, + }, + }, orderBy: { subscribedAt: "desc" }, }); } diff --git a/src/lib/services/user-preferences-service.ts b/src/lib/services/user-preferences-service.ts index 9143502..bd37e32 100644 --- a/src/lib/services/user-preferences-service.ts +++ b/src/lib/services/user-preferences-service.ts @@ -188,7 +188,7 @@ export async function getDefaultPreferences(): Promise< articlesPerPage: 20, defaultView: "expanded", showReadArticles: true, - autoMarkAsRead: false, + autoMarkAsRead: true, showRelatedExcerpts: false, bounceThreshold: 0.25, showLowRelevanceArticles: true, diff --git a/src/types/fumadocs.d.ts b/src/types/fumadocs.d.ts new file mode 100644 index 0000000..e4fe97f --- /dev/null +++ b/src/types/fumadocs.d.ts @@ -0,0 +1,12 @@ +/** + * Type declarations for fumadocs-mdx generated modules + * These modules are generated at build time in the .source directory + */ + +declare module '../../.source/server' { + export const docs: any; +} + +declare module 'fumadocs-mdx:collections/server' { + export const docs: any; +} diff --git a/tests/components/PreferencesModal.snapshot.test.tsx b/tests/components/PreferencesModal.snapshot.test.tsx index 8d700bc..253b1f6 100644 --- a/tests/components/PreferencesModal.snapshot.test.tsx +++ b/tests/components/PreferencesModal.snapshot.test.tsx @@ -19,7 +19,7 @@ vi.mock('@/hooks/queries/use-user-preferences', () => ({ articlesPerPage: 20, defaultView: 'expanded', showReadArticles: true, - autoMarkAsRead: false, + autoMarkAsRead: true, }, isLoading: false, error: null, diff --git a/tsconfig.json b/tsconfig.json index 609d23e..cd860c5 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -38,5 +38,5 @@ "**/*.mts", "tests/**/*.ts" ], - "exclude": ["node_modules", "scripts/tests", "**/__tests__/**", "**/*.test.ts", "**/*.test.tsx"] + "exclude": ["node_modules", "scripts/tests", "**/__tests__/**", "**/*.test.ts", "**/*.test.tsx", ".source"] }