diff --git a/app/api/admin/embeddings/config/route.ts b/app/api/admin/embeddings/config/route.ts
index 0325d06..2cf8a20 100644
--- a/app/api/admin/embeddings/config/route.ts
+++ b/app/api/admin/embeddings/config/route.ts
@@ -74,6 +74,7 @@ export const GET = createHandler(
config: {
provider: embeddingConfig.provider,
model: embeddingConfig.model,
+ modelSource: embeddingConfig.modelSource,
dimensions,
batchSize: embeddingConfig.batchSize,
apiKey: envConfig.apiKey,
@@ -92,6 +93,7 @@ export const GET = createHandler(
},
autoGenerate: embeddingConfig.autoGenerate,
autoGenerateSource: embeddingConfig.autoGenerateSource,
+ providerSource: embeddingConfig.providerSource,
envDefault: env.EMBEDDING_AUTO_GENERATE,
usingUserConfig: userId ? openaiTest.success : false,
message: "OpenAI always available - admin controls enable/disable, users provide credentials",
diff --git a/app/api/admin/embeddings/provider/route.ts b/app/api/admin/embeddings/provider/route.ts
index 071360e..d8db660 100644
--- a/app/api/admin/embeddings/provider/route.ts
+++ b/app/api/admin/embeddings/provider/route.ts
@@ -99,12 +99,14 @@ export const GET = createHandler(
},
config: {
model: config.model,
+ modelSource: config.modelSource,
batchSize: config.batchSize,
autoGenerate: config.autoGenerate,
+ autoGenerateSource: config.autoGenerateSource,
},
usingUserConfig: userId ? openaiTest.success : false,
- message: openaiTest.success
- ? "OpenAI working with provided credentials"
+ message: openaiTest.success
+ ? "OpenAI working with provided credentials"
: "OpenAI available - users can provide API keys in preferences",
};
},
diff --git a/app/api/admin/memory/route.ts b/app/api/admin/memory/route.ts
index ff0c7b0..05155d7 100644
--- a/app/api/admin/memory/route.ts
+++ b/app/api/admin/memory/route.ts
@@ -10,6 +10,8 @@ import { memoryMonitor, formatBytes, getMemoryUsagePercent } from "@/lib/memory-
import { prisma } from "@/lib/db";
import { z } from "zod";
+export const dynamic = "force-dynamic";
+
/**
* GET /api/admin/memory
* Returns current memory statistics and history
diff --git a/app/api/articles/[id]/related/route.ts b/app/api/articles/[id]/related/route.ts
index 9ac1454..1647ff9 100644
--- a/app/api/articles/[id]/related/route.ts
+++ b/app/api/articles/[id]/related/route.ts
@@ -28,36 +28,16 @@ export const GET = createHandler(async ({ params, request }) => {
logger.info("Finding related articles", { articleId: id, limit, minScore });
- try {
- const results = await findRelatedArticles(id, {
- limit,
- minScore,
- excludeSameFeed,
- });
-
- return {
- articleId: id,
- results,
- count: results.length,
- };
- } catch (error) {
- // Handle "no embedding" error gracefully
- const errorMessage = error instanceof Error ? error.message : String(error);
- logger.error("Failed to find related articles", {
- error: error instanceof Error ? { message: error.message, stack: error.stack } : error,
- articleId: id
- });
-
- if (errorMessage.includes("no embedding")) {
- return apiResponse({
- articleId: id,
- results: [],
- count: 0,
- message: "Article has no embedding. Generate embeddings to enable related articles.",
- });
- }
-
- throw error;
- }
+ const results = await findRelatedArticles(id, {
+ limit,
+ minScore,
+ excludeSameFeed,
+ });
+
+ return {
+ articleId: id,
+ results,
+ count: results.length,
+ };
});
diff --git a/app/api/feeds/route.ts b/app/api/feeds/route.ts
index 7d544de..443e0dd 100644
--- a/app/api/feeds/route.ts
+++ b/app/api/feeds/route.ts
@@ -72,14 +72,14 @@ export const GET = createHandler(
*/
export const POST = createHandler(
async ({ body, session }) => {
- const { url, name, categoryIds } = body;
+ const { url, name, categoryIds, settings } = body;
let feed;
let isNewFeed = false;
try {
// Try to create the feed
- feed = await validateAndCreateFeed(url, name, categoryIds);
+ feed = await validateAndCreateFeed(url, name, categoryIds, settings);
isNewFeed = true;
} catch (error) {
// If feed already exists, get it instead
diff --git a/app/api/feeds/validate/route.ts b/app/api/feeds/validate/route.ts
index 825b0e2..096de35 100644
--- a/app/api/feeds/validate/route.ts
+++ b/app/api/feeds/validate/route.ts
@@ -35,6 +35,12 @@ export const POST = createHandler(
// Get feed info
try {
const feedInfo = await parseFeedUrl(normalizedUrl);
+ if (!feedInfo) {
+ return apiResponse({
+ valid: false,
+ error: "Unable to fetch feed information",
+ });
+ }
return apiResponse({
valid: true,
feedInfo: {
diff --git a/app/api/proxy/route.ts b/app/api/proxy/route.ts
index c6d4527..c9c878e 100644
--- a/app/api/proxy/route.ts
+++ b/app/api/proxy/route.ts
@@ -4,6 +4,8 @@ import { rewriteUrls, extractBaseUrl } from "@/lib/url-rewriter";
import { NextResponse } from "next/server";
import { z } from "zod";
+export const dynamic = "force-dynamic";
+
// Simple in-memory cache
const cache = new Map();
const CACHE_TTL = 5 * 60 * 1000; // 5 minutes
diff --git a/app/api/saved-searches/[id]/articles/route.ts b/app/api/saved-searches/[id]/articles/route.ts
index 0122e5a..81fa880 100644
--- a/app/api/saved-searches/[id]/articles/route.ts
+++ b/app/api/saved-searches/[id]/articles/route.ts
@@ -8,6 +8,8 @@ import { createHandler } from "@/lib/api-handler";
import { z } from "zod";
import * as savedSearchService from "@/lib/services/saved-search-service";
+export const dynamic = "force-dynamic";
+
// Query schema for filtering and pagination
const articlesQuerySchema = z.object({
limit: z.coerce.number().int().min(1).max(100).optional().default(50).catch(50),
diff --git a/app/api/saved-searches/[id]/rematch/route.ts b/app/api/saved-searches/[id]/rematch/route.ts
index 99aee89..6444507 100644
--- a/app/api/saved-searches/[id]/rematch/route.ts
+++ b/app/api/saved-searches/[id]/rematch/route.ts
@@ -8,6 +8,8 @@ import { createHandler } from "@/lib/api-handler";
import { rematchSavedSearch } from "@/lib/services/saved-search-matcher";
import { getSavedSearchById } from "@/lib/services/saved-search-service";
+export const dynamic = "force-dynamic";
+
/**
* POST /api/saved-searches/[id]/rematch
* Trigger rematch for a saved search
diff --git a/app/api/saved-searches/[id]/route.ts b/app/api/saved-searches/[id]/route.ts
index 4943652..89737d8 100644
--- a/app/api/saved-searches/[id]/route.ts
+++ b/app/api/saved-searches/[id]/route.ts
@@ -12,6 +12,8 @@ import * as savedSearchService from "@/lib/services/saved-search-service";
import { rematchSavedSearch } from "@/lib/services/saved-search-matcher";
import { logger } from "@/lib/logger";
+export const dynamic = "force-dynamic";
+
// Validation schema for updating a saved search
const updateSavedSearchSchema = z.object({
name: z.string().min(1).max(100).optional(),
diff --git a/app/api/saved-searches/insights/route.ts b/app/api/saved-searches/insights/route.ts
index e4fac86..a12caf8 100644
--- a/app/api/saved-searches/insights/route.ts
+++ b/app/api/saved-searches/insights/route.ts
@@ -7,6 +7,8 @@ import { createHandler } from '@/lib/api-handler';
import { prisma } from '@/lib/db';
import { logger } from '@/lib/logger';
+export const dynamic = "force-dynamic";
+
interface SavedSearchInsight {
id: string;
name: string;
diff --git a/app/api/saved-searches/preview/route.ts b/app/api/saved-searches/preview/route.ts
index 23a7cd5..a07e192 100644
--- a/app/api/saved-searches/preview/route.ts
+++ b/app/api/saved-searches/preview/route.ts
@@ -8,6 +8,8 @@ import { createHandler } from "@/lib/api-handler";
import { z } from "zod";
import * as savedSearchService from "@/lib/services/saved-search-service";
+export const dynamic = "force-dynamic";
+
// Validation schema for preview request
const previewSearchSchema = z.object({
query: z.string().min(1),
diff --git a/app/api/saved-searches/route.ts b/app/api/saved-searches/route.ts
index 14e935b..bccac2c 100644
--- a/app/api/saved-searches/route.ts
+++ b/app/api/saved-searches/route.ts
@@ -11,6 +11,8 @@ import * as savedSearchService from "@/lib/services/saved-search-service";
import { matchNewArticles } from "@/lib/services/saved-search-matcher";
import { logger } from "@/lib/logger";
+export const dynamic = "force-dynamic";
+
// Validation schema for creating a saved search
const createSavedSearchSchema = z.object({
name: z.string().min(1).max(100),
diff --git a/app/api/saved-searches/templates/route.ts b/app/api/saved-searches/templates/route.ts
index c6c3d9b..ff0cf47 100644
--- a/app/api/saved-searches/templates/route.ts
+++ b/app/api/saved-searches/templates/route.ts
@@ -16,6 +16,8 @@ import {
getTemplateById,
} from '@/lib/services/search-templates-service';
+export const dynamic = "force-dynamic";
+
const querySchema = z.object({
category: z.enum(['technology', 'news', 'research', 'jobs', 'custom']).optional(),
keyword: z.string().optional(),
diff --git a/app/api/user/notifications/[id]/route.ts b/app/api/user/notifications/[id]/route.ts
index b12acc3..3b19f16 100644
--- a/app/api/user/notifications/[id]/route.ts
+++ b/app/api/user/notifications/[id]/route.ts
@@ -4,6 +4,8 @@ import {
deleteNotification,
} from "@/lib/services/notification-service";
+export const dynamic = "force-dynamic";
+
/**
* PATCH /api/user/notifications/[id]
* Mark a notification as read
diff --git a/app/components/articles/ArticleViewTracker.tsx b/app/components/articles/ArticleViewTracker.tsx
index 7357cb0..3a8cd95 100644
--- a/app/components/articles/ArticleViewTracker.tsx
+++ b/app/components/articles/ArticleViewTracker.tsx
@@ -24,8 +24,15 @@ export function ArticleViewTracker({ articleId, estimatedTime, onReadStatusChang
const viewStartTime = useRef(null);
const hasTrackedView = useRef(false);
+ // Use ref to always have latest estimatedTime in cleanup function
+ const estimatedTimeRef = useRef(estimatedTime);
- const autoMarkAsRead = preferences?.autoMarkAsRead ?? false;
+ const autoMarkAsRead = preferences?.autoMarkAsRead ?? true;
+
+ // Keep ref in sync with prop
+ useEffect(() => {
+ estimatedTimeRef.current = estimatedTime;
+ }, [estimatedTime]);
// Track view on mount
useEffect(() => {
@@ -59,13 +66,14 @@ export function ArticleViewTracker({ articleId, estimatedTime, onReadStatusChang
if (timeSpent < 0) return;
// Don't track if estimatedTime is not yet calculated (0 or negative)
- if (estimatedTime <= 0) return;
+ const currentEstimatedTime = estimatedTimeRef.current;
+ if (currentEstimatedTime <= 0) return;
trackExit.mutate({
articleId,
data: {
timeSpent,
- estimatedTime,
+ estimatedTime: currentEstimatedTime,
},
});
};
diff --git a/app/components/layout/ReadingPanelLayout.tsx b/app/components/layout/ReadingPanelLayout.tsx
index 71dc51c..a43ff3a 100644
--- a/app/components/layout/ReadingPanelLayout.tsx
+++ b/app/components/layout/ReadingPanelLayout.tsx
@@ -1,6 +1,6 @@
"use client";
-import { ReactNode, useState, useEffect, useCallback, useRef } from "react";
+import { ReactNode, useState, useEffect, useCallback, useRef, useMemo } from "react";
import { useSession } from "next-auth/react";
import { useSearchParams, useRouter } from "next/navigation";
import { ResizableSplitPane } from "./ResizableSplitPane";
@@ -181,47 +181,46 @@ export function ReadingPanelLayout({ children, onArticleReadStatusChange }: Read
preferences &&
readingMode === "standalone";
- // Render children with callback support
- const renderChildren = () => {
+ // Compute children content once to avoid multiple render function calls
+ const childrenContent = useMemo(() => {
if (typeof children === "function") {
- // Pass callbacks for side_panel and inline modes
- const shouldPassCallbacks = isPanelActive || isInlineMode;
+ // eslint-disable-next-line react-hooks/refs -- Safe: only passing props, not accessing refs
return children({
- onArticleSelect: shouldPassCallbacks ? handleArticleSelect : undefined,
- selectedArticleId: shouldPassCallbacks ? selectedArticleId : null
+ onArticleSelect: (isPanelActive || isInlineMode) ? handleArticleSelect : undefined,
+ selectedArticleId: (isPanelActive || isInlineMode) ? selectedArticleId : null
});
}
return children;
- };
+ }, [children, isPanelActive, isInlineMode, handleArticleSelect, selectedArticleId]);
// If loading preferences, show loading state
if (isLoadingPreferences) {
- return <>{renderChildren()}>;
+ return <>{childrenContent}>;
}
// If not logged in or no preferences, show normal layout
if (!session?.user || !preferences) {
- return <>{renderChildren()}>;
+ return <>{childrenContent}>;
}
// For inline mode, render without split pane (ArticleList will handle inline expansion)
if (isInlineMode) {
- return
{renderChildren()}
;
+ return
{childrenContent}
;
}
// For standalone mode, render without callbacks (forces full-page navigation)
if (isStandaloneMode) {
- return <>{renderChildren()}>;
+ return <>{childrenContent}>;
}
// For side_panel mode: if panel disabled or mobile, show normal layout
if (!isPanelActive) {
- return <>{renderChildren()}>;
+ return <>{childrenContent}>;
}
// If panel enabled but no article selected, show normal layout
if (!selectedArticleId) {
- return
{renderChildren()}
;
+ return
{childrenContent}
;
}
// Safely cast preferences to required types since we verified they exist in isPanelActive
@@ -236,14 +235,14 @@ export function ReadingPanelLayout({ children, onArticleReadStatusChange }: Read
size={panelSize}
onResize={handleResize}
panel={
-
}
>
- {renderChildren()}
+ {childrenContent}
);
diff --git a/app/components/preferences/views/ReadingView.tsx b/app/components/preferences/views/ReadingView.tsx
index 7c6cdcb..4ee2021 100644
--- a/app/components/preferences/views/ReadingView.tsx
+++ b/app/components/preferences/views/ReadingView.tsx
@@ -145,7 +145,7 @@ export function ReadingView({ preferences, updatePreference }: ReadingViewProps)
updatePreference("autoMarkAsRead", checked)}
/>
diff --git a/app/components/ui/Tabs/Tabs.stories.tsx b/app/components/ui/Tabs/Tabs.stories.tsx
index c19ba3a..5f4e379 100644
--- a/app/components/ui/Tabs/Tabs.stories.tsx
+++ b/app/components/ui/Tabs/Tabs.stories.tsx
@@ -1,4 +1,5 @@
import type { Meta, StoryObj } from '@storybook/nextjs-vite';
+import React from 'react';
import { Tabs, TabList, Tab, TabPanels, TabPanel } from './index';
const meta: Meta = {
@@ -223,6 +224,3 @@ export const ControlledTabs: Story = {
);
},
};
-
-// @ts-ignore - React is used in the story
-import React from 'react';
diff --git a/app/docs/layout.tsx b/app/docs/layout.tsx
index 5a80a72..8228cca 100644
--- a/app/docs/layout.tsx
+++ b/app/docs/layout.tsx
@@ -4,6 +4,10 @@ import { RootProvider } from 'fumadocs-ui/provider/next';
import { source } from '@/lib/source';
import 'fumadocs-ui/style.css';
+export const dynamic = "force-dynamic";
+export const runtime = "nodejs";
+export const revalidate = 0;
+
export default function RootDocsLayout({ children }: { children: ReactNode }) {
return (
diff --git a/app/feeds-management/components/FeedManagementShell.tsx b/app/feeds-management/components/FeedManagementShell.tsx
index b753d77..9fe0537 100644
--- a/app/feeds-management/components/FeedManagementShell.tsx
+++ b/app/feeds-management/components/FeedManagementShell.tsx
@@ -1,6 +1,7 @@
"use client";
import { ReactNode } from "react";
+import Link from "next/link";
import { useFeedNavigation } from "@/hooks/use-feed-navigation";
import { UserMenu } from "@/app/components/auth/UserMenu";
import { NotificationBell } from "@/app/components/notifications/NotificationBell";
@@ -43,9 +44,9 @@ export function FeedManagementShell({
{/* Top Navigation Bar */}
- Deleting this category will move all feeds to "Uncategorized". This action cannot be undone.
+ Deleting this category will move all feeds to “Uncategorized”. This action cannot be undone.
diff --git a/app/feeds-management/components/views/OverviewView.tsx b/app/feeds-management/components/views/OverviewView.tsx
index bfb1606..fb7cb29 100644
--- a/app/feeds-management/components/views/OverviewView.tsx
+++ b/app/feeds-management/components/views/OverviewView.tsx
@@ -58,6 +58,12 @@ export function OverviewView() {
Feed Management Overview
+
) : feeds.length === 0 ? (
-
- No feeds yet. Add your first feed to get started.
+
+
+ No feeds yet. Add your first feed to get started.
+
+
) : (
diff --git a/app/layout.tsx b/app/layout.tsx
index c0835c0..e06d348 100644
--- a/app/layout.tsx
+++ b/app/layout.tsx
@@ -7,6 +7,10 @@ import { QueryProvider } from "./components/providers/QueryProvider";
// Force dynamic rendering for all routes (authentication, database queries, etc.)
export const dynamic = "force-dynamic";
+// Force Node.js runtime (not Edge) to ensure full compatibility
+export const runtime = "nodejs";
+// Disable static optimization and revalidation
+export const revalidate = 0;
const geistSans = Geist({
variable: "--font-geist-sans",
diff --git a/app/lib/preferences/preference-defaults.ts b/app/lib/preferences/preference-defaults.ts
index 74f6d6c..c2a65fa 100644
--- a/app/lib/preferences/preference-defaults.ts
+++ b/app/lib/preferences/preference-defaults.ts
@@ -15,7 +15,7 @@ export function getDefaultPreferences(): UserPreferences {
articlesPerPage: 20,
defaultView: "expanded",
showReadArticles: true,
- autoMarkAsRead: false,
+ autoMarkAsRead: true,
showRelatedExcerpts: false,
bounceThreshold: 0.25,
searchRecencyWeight: 0.3,
diff --git a/docs/LLM_JSON_PARSING_IMPROVEMENTS.md b/docs/LLM_JSON_PARSING_IMPROVEMENTS.md
new file mode 100644
index 0000000..dee1d28
--- /dev/null
+++ b/docs/LLM_JSON_PARSING_IMPROVEMENTS.md
@@ -0,0 +1,229 @@
+# LLM JSON Parsing Improvements
+
+## Problem
+
+The application was experiencing JSON parsing errors when processing LLM responses:
+- Error: "Unexpected end of JSON input"
+- Empty raw responses logged
+- LLMs often return JSON wrapped in markdown code blocks or with introductory text
+- No detailed logging of the actual LLM response when parsing failed
+
+## Solution
+
+Created a robust JSON extraction and parsing utility that:
+1. **Extracts JSON** from various response formats
+2. **Logs detailed error information** when parsing fails
+3. **Handles common LLM response patterns** automatically
+
+## Changes Made
+
+### 1. New Utility Module: `json-parser.ts`
+
+Location: [src/lib/llm/json-parser.ts](../src/lib/llm/json-parser.ts)
+
+**Key Functions:**
+
+#### `extractJSON(rawResponse: string): string`
+Cleans and extracts JSON from LLM responses that may contain:
+- Markdown code blocks: `` ```json\n{...}\n``` ``
+- Markdown code blocks without language tag: `` ```\n{...}\n``` ``
+- Introductory text: `"Here's the result: {...}"`
+- Trailing text: `"{...} I hope this helps!"`
+
+**Examples:**
+
+```typescript
+// Input: "Here's the JSON:\n```json\n{\"key\": \"value\"}\n```"
+// Output: "{\"key\": \"value\"}"
+
+// Input: "Sure! {\"summary\": \"text\"} Let me know if you need anything else."
+// Output: "{\"summary\": \"text\"}"
+```
+
+#### `parseJSONFromLLM(rawResponse: string, context?: { model?: string; operation?: string }): any`
+Parses JSON from LLM response with automatic extraction and detailed error logging.
+
+**Error Logging includes:**
+- Parse error message
+- First 1000 characters of raw response
+- First 1000 characters of extracted JSON
+- Response length
+- Model name
+- Operation name
+
+#### `parseJSONFromLLMSafe(rawResponse: string, fallback: T, context?: { ... }): T`
+Safe parsing with fallback value if parsing fails.
+
+### 2. Updated LLM Providers
+
+**Files Modified:**
+- [src/lib/llm/openai-provider.ts](../src/lib/llm/openai-provider.ts)
+- [src/lib/llm/ollama-provider.ts](../src/lib/llm/ollama-provider.ts)
+- [src/lib/services/summarization-service.ts](../src/lib/services/summarization-service.ts)
+
+**Changes:**
+- All `JSON.parse()` calls replaced with `parseJSONFromLLM()`
+- Improved error logging (changed from `logger.error` to `logger.warn` for fallback cases)
+- Added context information (model name, operation) to all parse calls
+
+### 3. Enhanced Error Logging
+
+**Before:**
+```typescript
+logger.error("Failed to parse LLM JSON response, using fallback", {
+ parseError: "Unexpected end of JSON input",
+ fullRawResponse: "", // Often empty!
+ responseLength: 0,
+ model: "gpt-5-nano"
+});
+```
+
+**After:**
+```typescript
+logger.error("Failed to parse LLM JSON response", {
+ parseError: "Unexpected token",
+ rawResponse: "Here's the result: ```json\n{\"sum...", // First 1000 chars
+ extractedJSON: "{\"sum...", // First 1000 chars of extracted JSON
+ responseLength: 2543,
+ extractedLength: 234,
+ model: "gpt-4o-mini",
+ operation: "summarizeArticle"
+});
+```
+
+## Usage Examples
+
+### In LLM Providers
+
+```typescript
+import { parseJSONFromLLM } from "./json-parser";
+
+// In summarizeArticle method
+try {
+ const parsed = parseJSONFromLLM(response.content, {
+ model: this.model,
+ operation: "summarizeArticle",
+ });
+ return {
+ summary: parsed.summary || "",
+ keyPoints: Array.isArray(parsed.keyPoints) ? parsed.keyPoints : [],
+ topics: Array.isArray(parsed.topics) ? parsed.topics : [],
+ sentiment: ["positive", "neutral", "negative"].includes(parsed.sentiment)
+ ? parsed.sentiment
+ : "neutral",
+ };
+} catch (parseError) {
+ // Detailed error already logged by parseJSONFromLLM
+ logger.warn("Using fallback for article summary due to parse error", {
+ model: this.model,
+ });
+
+ return {
+ summary: response.content.substring(0, 500),
+ keyPoints: [],
+ topics: [],
+ sentiment: "neutral",
+ };
+}
+```
+
+## Supported Response Formats
+
+The parser now handles all these formats automatically:
+
+1. **Plain JSON**
+ ```
+ {"key": "value"}
+ ```
+
+2. **Markdown with json tag**
+ ```
+ ```json
+ {"key": "value"}
+ ```
+ ```
+
+3. **Markdown without language tag**
+ ```
+ ```
+ {"key": "value"}
+ ```
+ ```
+
+4. **With introductory text**
+ ```
+ Sure! Here's the JSON response:
+ {"key": "value"}
+ ```
+
+5. **With trailing text**
+ ```
+ {"key": "value"}
+
+ I hope this helps!
+ ```
+
+6. **Complex multi-line with explanation**
+ ```
+ Here's your article summary:
+
+ ```json
+ {
+ "summary": "This is a summary",
+ "keyPoints": ["point 1", "point 2"],
+ "topics": ["topic1", "topic2"],
+ "sentiment": "positive"
+ }
+ ```
+
+ Let me know if you need any clarifications!
+ ```
+
+## Benefits
+
+1. **Fewer Parse Errors**: Automatically extracts JSON from various formats
+2. **Better Debugging**: Detailed error logs show actual LLM responses
+3. **Graceful Fallbacks**: Maintains functionality even with malformed responses
+4. **Consistent Handling**: Same parsing logic across all LLM providers
+5. **Type Safety**: TypeScript support with generic fallback values
+
+## Testing
+
+A comprehensive test suite is available at:
+[src/lib/llm/__tests__/json-parser.test.ts](../src/lib/llm/__tests__/json-parser.test.ts)
+
+Tests cover:
+- Plain JSON extraction
+- Markdown code block extraction (with and without language tags)
+- Removal of introductory/trailing text
+- JSON arrays
+- Complex nested structures
+- Error handling and fallbacks
+
+## Migration Notes
+
+All existing code using `JSON.parse()` for LLM responses should be updated to use `parseJSONFromLLM()`:
+
+**Before:**
+```typescript
+const parsed = JSON.parse(response.content);
+```
+
+**After:**
+```typescript
+const parsed = parseJSONFromLLM(response.content, {
+ model: this.model,
+ operation: "operationName"
+});
+```
+
+## Monitoring
+
+After deployment, monitor logs for:
+- Reduced frequency of JSON parse errors
+- More informative error messages with actual LLM response content
+- Successful extraction from markdown-wrapped responses
+
+Look for log messages:
+- `"Failed to parse LLM JSON response"` (error level) - parsing failed even after extraction
+- `"Using fallback for ..."` (warn level) - fallback value used due to parse failure
diff --git a/docs/configuration/configuration-reference.md b/docs/configuration/configuration-reference.md
index 91f8e58..d7320ba 100644
--- a/docs/configuration/configuration-reference.md
+++ b/docs/configuration/configuration-reference.md
@@ -146,7 +146,7 @@ All user preferences are stored in the `UserPreferences` database model and can
| `articlesPerPage` | Integer | `20` | 5-100 | Number of articles per page |
| `infiniteScrollMode` | String | `"both"` | `"auto"`, `"button"`, `"both"` | How to load more articles |
| `showReadArticles` | Boolean | `true` | `true`, `false` | Display already-read articles |
-| `autoMarkAsRead` | Boolean | `false` | `true`, `false` | Automatically mark articles as read when opened |
+| `autoMarkAsRead` | Boolean | `true` | `true`, `false` | Automatically mark articles as read when opened |
| `showRelatedExcerpts` | Boolean | `false` | `true`, `false` | Show excerpts in related articles section |
**Description:**
diff --git a/eslint.config.mjs b/eslint.config.mjs
index c78bfc9..25d25b3 100644
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -16,6 +16,10 @@ const eslintConfig = defineConfig([
"out/**",
"build/**",
"next-env.d.ts",
+ // Generated and third-party files:
+ "storybook-static/**",
+ ".source/**",
+ "node_modules/**",
]),
{
rules: {
diff --git a/prisma/migrations/20251201183639_set_auto_mark_as_read_default_true/migration.sql b/prisma/migrations/20251201183639_set_auto_mark_as_read_default_true/migration.sql
new file mode 100644
index 0000000..1b90885
--- /dev/null
+++ b/prisma/migrations/20251201183639_set_auto_mark_as_read_default_true/migration.sql
@@ -0,0 +1,6 @@
+-- AlterTable: Change default value of autoMarkAsRead from false to true
+ALTER TABLE "user_preferences" ALTER COLUMN "autoMarkAsRead" SET DEFAULT true;
+
+-- Update existing records: Set autoMarkAsRead to true for users who have it as false
+-- This ensures all users have the correct default value
+UPDATE "user_preferences" SET "autoMarkAsRead" = true WHERE "autoMarkAsRead" = false;
diff --git a/prisma/schema.prisma b/prisma/schema.prisma
index 87cc21b..4e1e4d0 100644
--- a/prisma/schema.prisma
+++ b/prisma/schema.prisma
@@ -1,7 +1,7 @@
generator client {
provider = "prisma-client"
output = "../src/generated/prisma"
- previewFeatures = ["postgresqlExtensions", "driverAdapters"]
+ previewFeatures = ["driverAdapters", "postgresqlExtensions"]
}
datasource db {
@@ -66,10 +66,10 @@ model articles {
title String
content String
url String @unique
- publishedAt DateTime? @db.Timestamptz
+ publishedAt DateTime? @db.Timestamptz(6)
embedding Unsupported("vector")?
- createdAt DateTime @default(now()) @db.Timestamptz
- updatedAt DateTime @updatedAt @db.Timestamptz
+ createdAt DateTime @default(now()) @db.Timestamptz(6)
+ updatedAt DateTime @updatedAt @db.Timestamptz(6)
author String?
contentHash String?
excerpt String?
@@ -104,14 +104,14 @@ model CronJobRun {
id String @id @default(cuid())
jobName String
status CronJobStatus
- triggeredBy CronJobTrigger
startedAt DateTime @default(now())
completedAt DateTime?
durationMs Int?
stats Json?
- metadata Json?
errorMessage String?
- logs Json? // Array of log entries with timestamp, level, message
+ logs Json?
+ triggeredBy CronJobTrigger
+ metadata Json?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@ -123,8 +123,8 @@ model CronJobRun {
model JobLock {
jobName String @id
lockedAt DateTime
- lockedBy String // Process ID
- expiresAt DateTime // Auto-expire after timeout
+ lockedBy String
+ expiresAt DateTime
@@index([expiresAt])
@@map("job_locks")
@@ -141,33 +141,32 @@ model feed_categories {
}
model feeds {
- id String @id
- name String
- url String @unique
- lastFetched DateTime?
- settings Json?
- createdAt DateTime @default(now())
- updatedAt DateTime @updatedAt
- description String?
- errorCount Int @default(0)
- etag String?
- fetchInterval Int @default(60)
- imageUrl String?
- lastError String?
- lastModified String?
- siteUrl String?
- // Health tracking fields
- healthStatus String @default("healthy")
- consecutiveFailures Int @default(0)
- lastSuccessfulFetch DateTime?
- autoDisableThreshold Int @default(10)
- notifyOnError Boolean @default(false)
- httpStatus Int?
- redirectUrl String?
- articles articles[]
- feed_categories feed_categories[]
- user_feeds user_feeds[]
- feed_error_log feed_error_log[]
+ id String @id
+ name String
+ url String @unique
+ lastFetched DateTime?
+ settings Json?
+ createdAt DateTime @default(now())
+ updatedAt DateTime @updatedAt
+ description String?
+ errorCount Int @default(0)
+ etag String?
+ fetchInterval Int @default(60)
+ imageUrl String?
+ lastError String?
+ lastModified String?
+ siteUrl String?
+ healthStatus String? @default("healthy")
+ consecutiveFailures Int? @default(0)
+ lastSuccessfulFetch DateTime? @db.Timestamp(6)
+ autoDisableThreshold Int? @default(10)
+ notifyOnError Boolean? @default(false)
+ httpStatus Int?
+ redirectUrl String?
+ articles articles[]
+ feed_categories feed_categories[]
+ feed_error_log feed_error_log[]
+ user_feeds user_feeds[]
@@index([errorCount])
@@index([lastFetched])
@@ -178,13 +177,13 @@ model feeds {
model feed_error_log {
id String @id
feedId String
- timestamp DateTime @default(now())
+ timestamp DateTime @default(now()) @db.Timestamp(6)
errorType String
errorMessage String
httpStatus Int?
details Json?
- resolved Boolean @default(false)
- feed feeds @relation(fields: [feedId], references: [id], onDelete: Cascade)
+ resolved Boolean? @default(false)
+ feed feeds @relation(fields: [feedId], references: [id], onDelete: Cascade, onUpdate: NoAction)
@@index([feedId])
@@index([timestamp(sort: Desc)])
@@ -226,24 +225,22 @@ model user_categories {
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
icon String? @default("๐")
- // Hierarchical and display fields
parentId String?
color String?
- collapsed Boolean @default(false)
- sortOrder String @default("manual")
- includeInSearch Boolean @default(true)
- isDefault Boolean @default(false)
- isReadOnly Boolean @default(false)
- users User @relation(fields: [userId], references: [id], onDelete: Cascade)
- parent user_categories? @relation("CategoryHierarchy", fields: [parentId], references: [id], onDelete: SetNull)
+ collapsed Boolean? @default(false)
+ sortOrder String? @default("manual")
+ includeInSearch Boolean? @default(true)
+ isDefault Boolean? @default(false)
+ isReadOnly Boolean? @default(false)
+ saved_searches saved_searches[]
+ parent user_categories? @relation("CategoryHierarchy", fields: [parentId], references: [id], onUpdate: NoAction)
children user_categories[] @relation("CategoryHierarchy")
+ users User @relation(fields: [userId], references: [id], onDelete: Cascade)
user_feed_categories user_feed_categories[]
- saved_searches saved_searches[]
@@unique([userId, name])
@@index([userId])
@@index([userId, order])
- @@index([userId, isDefault])
}
model user_feed_categories {
@@ -267,7 +264,7 @@ model user_feeds {
settings Json?
subscribedAt DateTime @default(now())
updatedAt DateTime @updatedAt
- tags String[] // User-defined tags
+ tags String[]
user_feed_categories user_feed_categories[]
feeds feeds @relation(fields: [feedId], references: [id], onDelete: Cascade)
users User @relation(fields: [userId], references: [id], onDelete: Cascade)
@@ -299,14 +296,10 @@ model user_preferences {
userId String @unique
theme String @default("system")
fontSize String @default("medium")
- sidebarFontSize String @default("smaller")
- cardFontSize String @default("same")
- modalFontSize String @default("same")
- uiFontSize String @default("same")
articlesPerPage Int @default(20)
defaultView String @default("expanded")
showReadArticles Boolean @default(true)
- autoMarkAsRead Boolean @default(false)
+ autoMarkAsRead Boolean @default(true)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
showRelatedExcerpts Boolean @default(false)
@@ -320,7 +313,6 @@ model user_preferences {
readingPanelSize Int @default(50)
categoryStates Json?
sidebarCollapsed Boolean @default(false)
- sidebarWidth Int @default(20) // Percentage width (10-40)
readingFontFamily String @default("Georgia")
readingFontSize Int @default(18)
readingLineHeight Float @default(1.7)
@@ -344,24 +336,29 @@ model user_preferences {
showArticleAuthor Boolean @default(true)
showArticleFeedInfo Boolean @default(true)
showArticleDate Boolean @default(true)
- articleCardSectionOrder Json? @default("[\"feedInfo\",\"title\",\"excerpt\",\"actions\"]")
+ articleCardSectionOrder Json? @default("[\"feedInfo\", \"title\", \"excerpt\", \"actions\"]")
articleCardDensity String @default("normal")
articleCardBorderWidth String @default("normal")
articleCardBorderRadius String @default("normal")
articleCardBorderContrast String @default("medium")
articleCardSpacing String @default("normal")
- readingMode String @default("side_panel")
+ sidebarWidth Int @default(20)
+ cardFontSize String @default("same")
+ modalFontSize String @default("same")
+ sidebarFontSize String @default("smaller")
+ uiFontSize String @default("same")
inlineAutoScroll Boolean @default(true)
+ readingMode String @default("side_panel")
users User @relation(fields: [userId], references: [id], onDelete: Cascade)
}
model user_notifications {
id String @id
userId String
- type String // 'feed_refresh', 'info', 'warning', 'error'
+ type String
title String
message String
- metadata Json? // Additional data like stats
+ metadata Json?
read Boolean @default(false)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@ -372,26 +369,26 @@ model user_notifications {
}
model saved_searches {
- id String @id @default(cuid())
+ id String @id @default(cuid())
userId String
name String
- query String @db.Text
- icon String? @default("๐")
- threshold Float @default(0.6)
+ query String
+ icon String? @default("๐")
+ threshold Float @default(0.6)
categoryId String?
- notifyOnMatch Boolean @default(false)
- notifyThreshold Float @default(0.85)
- dailyDigest Boolean @default(false)
- recencyBias Float @default(0.0)
+ notifyOnMatch Boolean @default(false)
+ notifyThreshold Float @default(0.85)
+ dailyDigest Boolean @default(false)
+ recencyBias Float @default(0.0)
prioritySources Json?
- createdAt DateTime @default(now())
- updatedAt DateTime @updatedAt
+ createdAt DateTime @default(now())
+ updatedAt DateTime @updatedAt
lastMatchedAt DateTime?
- totalMatches Int @default(0)
- archived Boolean @default(false)
- user User @relation(fields: [userId], references: [id], onDelete: Cascade)
- category user_categories? @relation(fields: [categoryId], references: [id], onDelete: SetNull)
+ totalMatches Int @default(0)
+ archived Boolean @default(false)
matches saved_search_matches[]
+ category user_categories? @relation(fields: [categoryId], references: [id])
+ user User @relation(fields: [userId], references: [id], onDelete: Cascade)
@@index([userId])
@@index([userId, archived])
@@ -403,11 +400,11 @@ model saved_search_matches {
articleId String
relevanceScore Float
matchedTerms Json
- matchReason String? @db.Text
+ matchReason String?
createdAt DateTime @default(now())
notified Boolean @default(false)
- savedSearch saved_searches @relation(fields: [savedSearchId], references: [id], onDelete: Cascade)
article articles @relation(fields: [articleId], references: [id], onDelete: Cascade)
+ savedSearch saved_searches @relation(fields: [savedSearchId], references: [id], onDelete: Cascade)
@@unique([savedSearchId, articleId])
@@index([savedSearchId, relevanceScore])
@@ -421,19 +418,19 @@ model User {
email String @unique
emailVerified DateTime?
image String?
- role UserRole @default(USER)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
+ role UserRole @default(USER)
accounts Account[]
article_feedback article_feedback[]
read_articles read_articles[]
+ saved_searches saved_searches[]
sessions Session[]
user_categories user_categories[]
user_feeds user_feeds[]
+ user_notifications user_notifications[]
user_patterns user_patterns[]
user_preferences user_preferences?
- user_notifications user_notifications[]
- saved_searches saved_searches[]
@@map("users")
}
diff --git a/prisma/seed.ts b/prisma/seed.ts
index 74e67fd..201aba7 100644
--- a/prisma/seed.ts
+++ b/prisma/seed.ts
@@ -473,7 +473,7 @@ async function main() {
articlesPerPage: 20,
defaultView: "expanded",
showReadArticles: true,
- autoMarkAsRead: false,
+ autoMarkAsRead: true,
showRelatedExcerpts: false,
readingMode: "side_panel",
inlineAutoScroll: true,
diff --git a/scripts/verify-json-parser.js b/scripts/verify-json-parser.js
new file mode 100644
index 0000000..b3d57a2
--- /dev/null
+++ b/scripts/verify-json-parser.js
@@ -0,0 +1,108 @@
+/**
+ * Simple verification script for JSON parser
+ * Run with: node scripts/verify-json-parser.js
+ */
+
+// Mock extractJSON function
+function extractJSON(rawResponse) {
+ let cleaned = rawResponse.trim();
+
+ // Try to extract JSON from markdown code blocks
+ const codeBlockMatch = cleaned.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
+ if (codeBlockMatch?.[1]) {
+ cleaned = codeBlockMatch[1].trim();
+ }
+
+ // Remove any leading text before the first { or [
+ const jsonStartMatch = cleaned.match(/^[^{[]*([{[][\s\S]*)/);
+ if (jsonStartMatch?.[1]) {
+ cleaned = jsonStartMatch[1];
+ }
+
+ // Remove any trailing text after the last } or ]
+ const jsonEndMatch = cleaned.match(/([\s\S]*[}\]])[^}\]]*$/);
+ if (jsonEndMatch?.[1]) {
+ cleaned = jsonEndMatch[1];
+ }
+
+ return cleaned.trim();
+}
+
+// Test cases
+const tests = [
+ {
+ name: "Plain JSON",
+ input: '{"key": "value"}',
+ expected: '{"key": "value"}'
+ },
+ {
+ name: "JSON in markdown with json tag",
+ input: 'Here is the result:\n```json\n{"key": "value"}\n```',
+ expected: '{"key": "value"}'
+ },
+ {
+ name: "JSON in markdown without tag",
+ input: '```\n{"key": "value"}\n```',
+ expected: '{"key": "value"}'
+ },
+ {
+ name: "JSON with intro text",
+ input: 'Sure! Here you go: {"key": "value"}',
+ expected: '{"key": "value"}'
+ },
+ {
+ name: "JSON with trailing text",
+ input: '{"key": "value"} I hope this helps!',
+ expected: '{"key": "value"}'
+ },
+ {
+ name: "Complex JSON with markdown",
+ input: 'Here\'s the summary:\n```json\n{\n "summary": "Test",\n "keyPoints": ["a", "b"],\n "topics": ["topic1"]\n}\n```\nLet me know if you need anything else!',
+ expected: '{\n "summary": "Test",\n "keyPoints": ["a", "b"],\n "topics": ["topic1"]\n}'
+ },
+ {
+ name: "Empty response",
+ input: '',
+ expected: ''
+ }
+];
+
+console.log('๐งช Testing JSON Parser\n');
+
+let passed = 0;
+let failed = 0;
+
+tests.forEach((test, index) => {
+ try {
+ const result = extractJSON(test.input);
+
+ if (result === test.expected) {
+ console.log(`โ Test ${index + 1}: ${test.name}`);
+ passed++;
+ } else {
+ console.log(`โ Test ${index + 1}: ${test.name}`);
+ console.log(` Expected: ${test.expected}`);
+ console.log(` Got: ${result}`);
+ failed++;
+ }
+
+ // Try to parse the extracted JSON (if not empty)
+ if (result && result.trim()) {
+ JSON.parse(result);
+ console.log(` โ Valid JSON`);
+ }
+ } catch (error) {
+ console.log(`โ Test ${index + 1}: ${test.name} - ${error.message}`);
+ failed++;
+ }
+});
+
+console.log(`\n๐ Results: ${passed} passed, ${failed} failed out of ${tests.length} tests`);
+
+if (failed === 0) {
+ console.log('\nโจ All tests passed!');
+ process.exit(0);
+} else {
+ console.log('\nโ ๏ธ Some tests failed');
+ process.exit(1);
+}
diff --git a/src/hooks/queries/use-feeds.ts b/src/hooks/queries/use-feeds.ts
index 55e122f..9e5df0b 100644
--- a/src/hooks/queries/use-feeds.ts
+++ b/src/hooks/queries/use-feeds.ts
@@ -93,13 +93,22 @@ async function fetchUserFeeds(includeAll = false): Promise ({
+ return (response.subscriptions || []).map((sub: any) => {
+ // Extract category from user_feed_categories (take first category if multiple)
+ const categoryRelation = sub.user_feed_categories?.[0]?.user_categories;
+ const category = categoryRelation ? {
+ id: categoryRelation.id,
+ name: categoryRelation.name,
+ color: categoryRelation.color,
+ } : null;
+
+ return {
...sub.feeds,
// Override name with custom name if it exists
name: sub.customName || sub.feeds.name,
// Add subscription specific fields
subscribedAt: sub.createdAt,
- category: sub.category,
+ category,
// Compute isActive from healthStatus
isActive: sub.feeds.healthStatus !== "disabled",
// Preserve feed settings (extraction, etc.) and add user subscription settings
@@ -120,7 +129,8 @@ async function fetchUserFeeds(includeAll = false): Promise {
/**
@@ -75,14 +75,19 @@ export function useViewNavigation(
const { modalName, defaultView, isOpen, onNavigate, onClose } = options;
const [currentView, setCurrentView] = useState(defaultView);
+ const wasOpenRef = useRef(isOpen);
/**
* Reset view to defaultView when modal opens (to handle initialView changes)
+ * Only resets when transitioning from closed to open
*/
useEffect(() => {
- if (isOpen) {
- setCurrentView(defaultView);
+ // Only reset if we just opened (transition from false to true)
+ if (isOpen && !wasOpenRef.current) {
+ // eslint-disable-next-line -- Safe: only sets state once on modal open transition, conditional prevents cascading
+ setCurrentView(prev => prev !== defaultView ? defaultView : prev);
}
+ wasOpenRef.current = isOpen;
}, [isOpen, defaultView]);
/**
diff --git a/src/lib/auth.ts b/src/lib/auth.ts
index 4a24f19..9d57d55 100644
--- a/src/lib/auth.ts
+++ b/src/lib/auth.ts
@@ -144,7 +144,7 @@ export const authConfig = {
articlesPerPage: 20,
defaultView: "expanded",
showReadArticles: true,
- autoMarkAsRead: false,
+ autoMarkAsRead: true,
showRelatedExcerpts: false,
bounceThreshold: 0.25,
showLowRelevanceArticles: true,
diff --git a/src/lib/content-processor.ts b/src/lib/content-processor.ts
index 566d92d..9d09228 100644
--- a/src/lib/content-processor.ts
+++ b/src/lib/content-processor.ts
@@ -2,6 +2,8 @@
* Content processing utilities for articles
*/
+import { removePlaceholderImagesFromHtml } from "./image-utils";
+
/**
* Process article content for display
* - Sanitize HTML
@@ -31,6 +33,9 @@ export function processArticleContent(
// Add target="_blank" to all links
processed = addTargetBlankToLinks(processed);
+ // Remove placeholder images from content
+ processed = removePlaceholderImagesFromHtml(processed);
+
// Process images (lazy loading, etc.)
processed = processImages(processed);
diff --git a/src/lib/docs-source.ts b/src/lib/docs-source.ts
index 2eecf56..8826cc7 100644
--- a/src/lib/docs-source.ts
+++ b/src/lib/docs-source.ts
@@ -1,4 +1,4 @@
-import { docs } from '../../.source/server';
+import { docs } from 'fumadocs-mdx:collections/server';
// Export docs directly
export const source = docs;
diff --git a/src/lib/extractors/playwright-extractor.ts b/src/lib/extractors/playwright-extractor.ts
index d4bb7e6..fd60ee0 100644
--- a/src/lib/extractors/playwright-extractor.ts
+++ b/src/lib/extractors/playwright-extractor.ts
@@ -5,6 +5,7 @@ import { logger } from "@/lib/logger";
import { Readability } from "@mozilla/readability";
import { JSDOM } from "jsdom";
import { sanitizeHtml } from "@/lib/feed-parser";
+import { getFirstValidImage } from "@/lib/image-utils";
/**
* Playwright-based content extractor for JavaScript-rendered content
@@ -300,14 +301,18 @@ export class PlaywrightExtractor extends BaseExtractor {
/**
* Extract featured image from document
+ * Filters out placeholder images and prioritizes real images
*/
private extractImage(document: Document, baseUrl: string): string | undefined {
- // Try Open Graph image
+ // Collect all candidate images
+ const candidates: (string | undefined)[] = [];
+
+ // Try Open Graph image (highest priority)
const ogImage = document.querySelector('meta[property="og:image"]');
if (ogImage) {
const imageUrl = ogImage.getAttribute("content");
if (imageUrl) {
- return this.resolveUrl(imageUrl, baseUrl);
+ candidates.push(this.resolveUrl(imageUrl, baseUrl));
}
}
@@ -316,20 +321,39 @@ export class PlaywrightExtractor extends BaseExtractor {
if (twitterImage) {
const imageUrl = twitterImage.getAttribute("content");
if (imageUrl) {
- return this.resolveUrl(imageUrl, baseUrl);
+ candidates.push(this.resolveUrl(imageUrl, baseUrl));
}
}
- // Try first article image
- const articleImage = document.querySelector("article img");
- if (articleImage) {
- const imageUrl = articleImage.getAttribute("src");
- if (imageUrl) {
- return this.resolveUrl(imageUrl, baseUrl);
+ // Try article images (Playwright renders JS, so lazy-loaded images should be loaded)
+ const articleImages = document.querySelectorAll("article img");
+ for (const img of articleImages) {
+ // Check data-src first (in case JS hasn't fully swapped)
+ const dataSrc = img.getAttribute("data-src");
+ if (dataSrc) {
+ candidates.push(this.resolveUrl(dataSrc, baseUrl));
+ }
+ const src = img.getAttribute("src");
+ if (src) {
+ candidates.push(this.resolveUrl(src, baseUrl));
+ }
+ }
+
+ // Try images in main content areas
+ const mainImages = document.querySelectorAll("main img, .content img, .article-body img");
+ for (const img of mainImages) {
+ const dataSrc = img.getAttribute("data-src");
+ if (dataSrc) {
+ candidates.push(this.resolveUrl(dataSrc, baseUrl));
+ }
+ const src = img.getAttribute("src");
+ if (src) {
+ candidates.push(this.resolveUrl(src, baseUrl));
}
}
- return undefined;
+ // Return the first non-placeholder image
+ return getFirstValidImage(candidates);
}
}
diff --git a/src/lib/extractors/readability-extractor.ts b/src/lib/extractors/readability-extractor.ts
index ee4fa3c..7481a69 100644
--- a/src/lib/extractors/readability-extractor.ts
+++ b/src/lib/extractors/readability-extractor.ts
@@ -4,6 +4,7 @@ import { BaseExtractor } from "./base-extractor";
import type { ExtractorConfig, ExtractedContent } from "./types";
import { sanitizeHtml } from "@/lib/feed-parser";
import { logger } from "@/lib/logger";
+import { filterPlaceholderImage, getFirstValidImage } from "@/lib/image-utils";
/**
* Readability-based content extractor
@@ -209,14 +210,18 @@ export class ReadabilityExtractor extends BaseExtractor {
/**
* Extract featured image from document
+ * Filters out placeholder images and prioritizes real images
*/
private extractImage(document: Document, baseUrl: string): string | undefined {
- // Try Open Graph image
+ // Collect all candidate images
+ const candidates: (string | undefined)[] = [];
+
+ // Try Open Graph image (highest priority)
const ogImage = document.querySelector('meta[property="og:image"]');
if (ogImage) {
const imageUrl = ogImage.getAttribute("content");
if (imageUrl) {
- return this.resolveUrl(imageUrl, baseUrl);
+ candidates.push(this.resolveUrl(imageUrl, baseUrl));
}
}
@@ -225,29 +230,39 @@ export class ReadabilityExtractor extends BaseExtractor {
if (twitterImage) {
const imageUrl = twitterImage.getAttribute("content");
if (imageUrl) {
- return this.resolveUrl(imageUrl, baseUrl);
+ candidates.push(this.resolveUrl(imageUrl, baseUrl));
}
}
- // Try first article image
- const articleImage = document.querySelector("article img");
- if (articleImage) {
- const imageUrl = articleImage.getAttribute("src");
- if (imageUrl) {
- return this.resolveUrl(imageUrl, baseUrl);
+ // Try first article image (check both src and data-src for lazy-loaded images)
+ const articleImages = document.querySelectorAll("article img");
+ for (const img of articleImages) {
+ // Prefer data-src for lazy-loaded images
+ const dataSrc = img.getAttribute("data-src");
+ if (dataSrc) {
+ candidates.push(this.resolveUrl(dataSrc, baseUrl));
+ }
+ const src = img.getAttribute("src");
+ if (src) {
+ candidates.push(this.resolveUrl(src, baseUrl));
}
}
- // Try first image in content
- const firstImage = document.querySelector("img");
- if (firstImage) {
- const imageUrl = firstImage.getAttribute("src");
- if (imageUrl) {
- return this.resolveUrl(imageUrl, baseUrl);
+ // Try images in main content areas
+ const mainImages = document.querySelectorAll("main img, .content img, .article-body img");
+ for (const img of mainImages) {
+ const dataSrc = img.getAttribute("data-src");
+ if (dataSrc) {
+ candidates.push(this.resolveUrl(dataSrc, baseUrl));
+ }
+ const src = img.getAttribute("src");
+ if (src) {
+ candidates.push(this.resolveUrl(src, baseUrl));
}
}
- return undefined;
+ // Return the first non-placeholder image
+ return getFirstValidImage(candidates);
}
}
diff --git a/src/lib/feed-parser.ts b/src/lib/feed-parser.ts
index d74d180..d8cba96 100644
--- a/src/lib/feed-parser.ts
+++ b/src/lib/feed-parser.ts
@@ -2,6 +2,7 @@ import { parseFeed as parseRawFeed } from "@rowanmanning/feed-parser";
import { createHash } from "crypto";
import { decode as decodeHtmlEntities } from "he";
import * as iconv from "iconv-lite";
+import { filterPlaceholderImage, extractFirstValidImageFromHtml } from "./image-utils";
/**
* Type definitions for @rowanmanning/feed-parser
@@ -10,10 +11,15 @@ interface RawFeed {
title?: string;
description?: string;
url?: string;
+ language?: string;
image?: {
url?: string;
title?: string;
};
+ categories?: Array<{
+ term?: string;
+ label?: string;
+ }>;
items: RawFeedItem[];
}
@@ -30,51 +36,174 @@ interface RawFeedItem {
email?: string;
url?: string;
}>;
+ categories?: Array<{
+ term?: string;
+ label?: string;
+ }>;
media?: Array<{
url?: string;
+ image?: string;
type?: string;
+ mimeType?: string;
title?: string;
+ length?: number;
}>;
+ // Image from media:thumbnail, itunes:image, or media:content
+ image?: {
+ url?: string;
+ title?: string;
+ };
+}
+
+/**
+ * Fetch options for conditional requests (ETag/Last-Modified caching)
+ */
+export interface FetchOptions {
+ etag?: string;
+ lastModified?: string;
+}
+
+/**
+ * Fetch result with caching headers
+ */
+export interface FetchResult {
+ content: string;
+ etag?: string;
+ lastModified?: string;
+ notModified: boolean;
}
+/**
+ * Feed parser timeout configuration
+ */
+const FETCH_TIMEOUT = 30000; // 30 seconds
+const MAX_RETRIES = 2;
+const RETRY_DELAY = 1000; // 1 second
+
/**
* Fetch and decode feed with proper encoding handling
- * Supports both RSS and Atom feeds
+ * Supports both RSS and Atom feeds, conditional requests, timeouts, and retries
*/
-async function fetchFeedWithEncoding(url: string): Promise {
- const response = await fetch(url, {
- headers: {
- "User-Agent": "NeuReed/1.0 (RSS/Atom Reader)",
- Accept: "application/rss+xml, application/atom+xml, application/xml, text/xml",
- },
- });
+async function fetchFeedWithEncoding(
+ url: string,
+ options?: FetchOptions
+): Promise {
+ let lastError: Error | null = null;
+
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+ try {
+ const controller = new AbortController();
+ const timeoutId = setTimeout(() => controller.abort(), FETCH_TIMEOUT);
+
+ const headers: Record = {
+ "User-Agent": "NeuReed/1.0 (RSS/Atom Reader; +https://github.com/neureed)",
+ Accept: "application/rss+xml, application/atom+xml, application/xml, text/xml, */*;q=0.1",
+ };
+
+ // Add conditional request headers for bandwidth optimization
+ if (options?.etag) {
+ headers["If-None-Match"] = options.etag;
+ }
+ if (options?.lastModified) {
+ headers["If-Modified-Since"] = options.lastModified;
+ }
+
+ const response = await fetch(url, {
+ headers,
+ signal: controller.signal,
+ });
+
+ clearTimeout(timeoutId);
+
+ // Handle 304 Not Modified
+ if (response.status === 304) {
+ return {
+ content: "",
+ etag: response.headers.get("etag") || options?.etag,
+ lastModified: response.headers.get("last-modified") || options?.lastModified,
+ notModified: true,
+ };
+ }
- if (!response.ok) {
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+ if (!response.ok) {
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+ }
+
+ const buffer = await response.arrayBuffer();
+ const uint8Array = new Uint8Array(buffer);
+
+ // Try to detect encoding from XML declaration or Content-Type header
+ const contentType = response.headers.get("content-type") || "";
+ let encoding = 'utf-8';
+
+ // Check Content-Type header first
+ const charsetMatch = contentType.match(/charset=([^;\s]+)/i);
+ if (charsetMatch && charsetMatch[1]) {
+ encoding = charsetMatch[1].toLowerCase().replace(/["']/g, '');
+ } else {
+ // Fall back to XML declaration
+ const firstBytes = uint8Array.slice(0, 200);
+ const asciiText = new TextDecoder('ascii').decode(firstBytes);
+ const encodingMatch = asciiText.match(/encoding=["']([^"']+)["']/i);
+ if (encodingMatch && encodingMatch[1]) {
+ encoding = encodingMatch[1].toLowerCase();
+ }
+ }
+
+ // Normalize encoding names and decode
+ const content = decodeWithEncoding(uint8Array, encoding);
+
+ return {
+ content,
+ etag: response.headers.get("etag") || undefined,
+ lastModified: response.headers.get("last-modified") || undefined,
+ notModified: false,
+ };
+ } catch (error) {
+ lastError = error instanceof Error ? error : new Error(String(error));
+
+ // Don't retry on abort (timeout) or client errors
+ if (lastError.name === 'AbortError') {
+ throw new Error(`Feed fetch timeout after ${FETCH_TIMEOUT}ms`);
+ }
+
+ // Retry on network errors
+ if (attempt < MAX_RETRIES) {
+ await new Promise(resolve => setTimeout(resolve, RETRY_DELAY * (attempt + 1)));
+ continue;
+ }
+ }
}
+
+ throw lastError || new Error('Failed to fetch feed');
+}
- const buffer = await response.arrayBuffer();
- const uint8Array = new Uint8Array(buffer);
+/**
+ * Decode buffer with specified encoding
+ */
+function decodeWithEncoding(uint8Array: Uint8Array, encoding: string): string {
+ // Normalize encoding names
+ const normalizedEncoding = encoding.toLowerCase().replace(/-/g, '');
- // Try to detect encoding from XML declaration
- const firstBytes = uint8Array.slice(0, 200);
- const asciiText = new TextDecoder('ascii').decode(firstBytes);
- const encodingMatch = asciiText.match(/encoding=["']([^"']+)["']/i);
+ // Map common encoding aliases
+ const encodingMap: Record = {
+ 'latin1': 'iso-8859-1',
+ 'iso88591': 'iso-8859-1',
+ 'windows1252': 'windows-1252',
+ 'cp1252': 'windows-1252',
+ 'iso885915': 'iso-8859-15',
+ 'utf8': 'utf-8',
+ };
- let encoding = 'utf-8';
- if (encodingMatch && encodingMatch[1]) {
- encoding = encodingMatch[1].toLowerCase();
- }
+ const targetEncoding = encodingMap[normalizedEncoding] || encoding;
- // Convert to UTF-8 if needed
- if (encoding === 'iso-8859-1' || encoding === 'latin1') {
- return iconv.decode(Buffer.from(uint8Array), 'iso-8859-1');
- } else if (encoding === 'windows-1252') {
- return iconv.decode(Buffer.from(uint8Array), 'windows-1252');
- } else {
- // Assume UTF-8
- return new TextDecoder('utf-8').decode(uint8Array);
+ // Use iconv-lite for non-UTF-8 encodings
+ if (targetEncoding !== 'utf-8' && iconv.encodingExists(targetEncoding)) {
+ return iconv.decode(Buffer.from(uint8Array), targetEncoding);
}
+
+ // Default to UTF-8
+ return new TextDecoder('utf-8').decode(uint8Array);
}
/**
@@ -84,8 +213,13 @@ export interface ParsedFeed {
title: string;
description?: string;
link?: string;
+ language?: string;
imageUrl?: string;
+ categories?: string[];
items: ParsedArticle[];
+ // Caching headers for conditional requests
+ etag?: string;
+ lastModified?: string;
}
/**
@@ -100,24 +234,30 @@ export interface ParsedArticle {
author?: string;
publishedAt?: Date;
imageUrl?: string;
+ categories?: string[];
}
-/**
- * Feed parser timeout configuration
- */
-const FETCH_TIMEOUT = 30000; // 30 seconds
-
/**
* Parse an RSS 2.0 or Atom 1.0 feed from a URL
* @param url - The feed URL to parse
- * @returns Parsed feed data with articles
+ * @param options - Optional fetch options for conditional requests
+ * @returns Parsed feed data with articles, or null if not modified (304)
* @throws Error if feed cannot be parsed or fetched
*/
-export async function parseFeedUrl(url: string): Promise {
+export async function parseFeedUrl(
+ url: string,
+ options?: FetchOptions
+): Promise {
try {
- // Fetch with proper encoding handling
- const xmlContent = await fetchFeedWithEncoding(url);
- const feed = parseRawFeed(xmlContent) as RawFeed;
+ // Fetch with proper encoding handling, timeout, and retries
+ const result = await fetchFeedWithEncoding(url, options);
+
+ // Return null for 304 Not Modified responses
+ if (result.notModified) {
+ return null;
+ }
+
+ const feed = parseRawFeed(result.content) as RawFeed;
// Extract and ensure imageUrl is a string
let imageUrl = extractFeedImage(feed);
@@ -126,13 +266,22 @@ export async function parseFeedUrl(url: string): Promise {
if (Array.isArray(imageUrl)) {
imageUrl = imageUrl[0];
}
+
+ // Extract categories
+ const categories = feed.categories
+ ?.map(cat => cat.label || cat.term)
+ .filter((c): c is string => !!c);
return {
title: feed.title || "Untitled Feed",
description: feed.description || undefined,
link: feed.url || undefined,
+ language: feed.language || undefined,
imageUrl: imageUrl,
+ categories: categories?.length ? categories : undefined,
items: feed.items.map((item) => parseArticle(item)),
+ etag: result.etag,
+ lastModified: result.lastModified,
};
} catch (error) {
if (error instanceof Error) {
@@ -160,14 +309,30 @@ export async function validateFeedUrl(url: string): Promise {
}
// Try to parse the feed with encoding handling
- const xmlContent = await fetchFeedWithEncoding(url);
- const result = parseRawFeed(xmlContent) as RawFeed;
+ const result = await fetchFeedWithEncoding(url);
+ if (result.notModified || !result.content) {
+ return false;
+ }
+ parseRawFeed(result.content) as RawFeed;
return true;
} catch (error) {
return false;
}
}
+/**
+ * Check if a URL looks like an image based on extension
+ */
+function looksLikeImageUrl(url: string): boolean {
+ try {
+ const pathname = new URL(url).pathname.toLowerCase();
+ const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.bmp', '.ico'];
+ return imageExtensions.some(ext => pathname.endsWith(ext));
+ } catch {
+ return false;
+ }
+}
+
/**
* Parse a single feed item into an article
* Handles both RSS and Atom item formats
@@ -203,6 +368,11 @@ function parseArticle(item: RawFeedItem): ParsedArticle {
// Extract author (handle both RSS and Atom formats)
const author = extractAuthor(item);
+
+ // Extract categories/tags
+ const categories = item.categories
+ ?.map(cat => cat.label || cat.term)
+ .filter((c): c is string => !!c);
// Decode HTML entities from all text fields
const decodedTitle = item.title ? decodeHtmlEntities(item.title) : "Untitled";
@@ -223,6 +393,7 @@ function parseArticle(item: RawFeedItem): ParsedArticle {
// publishedAt is already validated above and guaranteed to be a valid Date
publishedAt,
imageUrl,
+ categories: categories?.length ? categories : undefined,
};
}
@@ -253,7 +424,52 @@ function extractAuthor(item: RawFeedItem): string | undefined {
function extractContent(item: RawFeedItem): string {
// @rowanmanning/feed-parser provides content and description
// Prefer content (which includes content:encoded from RSS) over description
- return item.content || item.description || "";
+ const rawContent = item.content || item.description || "";
+
+ // Format plain text CDATA content with paragraph breaks
+ return formatPlainTextContent(rawContent);
+}
+
+/**
+ * Break long CDATA/plain text into paragraphs by adding HTML breaks after sentence boundaries.
+ * Only applies if the text has no existing line breaks or HTML tags (typical of CDATA blocks).
+ * Converts sentence breaks to
for proper HTML display.
+ */
+function formatPlainTextContent(text: string): string {
+ // Skip if already has line breaks or HTML tags
+ if (text.includes('\n') || text.includes('\r') || /<[a-z][\s\S]*>/i.test(text)) {
+ return text;
+ }
+
+ // Common abbreviations that shouldn't trigger a line break
+ const abbreviations = [
+ 'Mr', 'Mrs', 'Ms', 'Dr', 'Prof', 'Sr', 'Jr', 'Rev', 'Gen', 'Col', 'Lt', 'Sgt',
+ 'St', 'Ave', 'Blvd', 'Rd', 'Inc', 'Corp', 'Ltd', 'Co', 'vs', 'etc', 'al',
+ 'Jan', 'Feb', 'Mar', 'Apr', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec',
+ 'Fig', 'No', 'Vol', 'pp', 'ed', 'trans', 'approx', 'est', 'min', 'max'
+ ];
+
+ // Build regex pattern: sentence-ending punctuation followed by space and capital letter
+ // BUT NOT after single capital letter (initials like "J. K.") or known abbreviations
+ return text.replace(/([.!?])\s+([A-Z])/g, (match, punct, nextChar, offset) => {
+ // Get the word before the punctuation
+ const textBefore = text.substring(0, offset as number);
+ const wordBeforeMatch = textBefore.match(/(\S+)$/);
+ const wordBefore = wordBeforeMatch?.[1] ?? '';
+
+ // Don't break after single capital letter (initials like "J." or "A.")
+ if (/^[A-Z]$/.test(wordBefore)) {
+ return match;
+ }
+
+ // Don't break after common abbreviations
+ if (abbreviations.some(abbr => wordBefore.toLowerCase() === abbr.toLowerCase())) {
+ return match;
+ }
+
+ // This looks like a real sentence boundary
+ return `${punct}
${nextChar}`;
+ });
}
/**
@@ -262,7 +478,8 @@ function extractContent(item: RawFeedItem): string {
function extractExcerpt(item: RawFeedItem, content: string): string | undefined {
// If description is different from content, use it as excerpt
if (item.description && item.description !== content) {
- return item.description.substring(0, 500);
+ const processed = formatPlainTextContent(item.description);
+ return processed.substring(0, 500);
}
// Otherwise, generate excerpt from content
@@ -277,11 +494,12 @@ function extractExcerpt(item: RawFeedItem, content: string): string | undefined
/**
* Extract image URL from feed metadata
* Supports both RSS and Atom formats
+ * Filters out placeholder images
*/
function extractFeedImage(feed: RawFeed): string | undefined {
// @rowanmanning/feed-parser provides image as an object with url
if (feed.image?.url) {
- return feed.image.url;
+ return filterPlaceholderImage(feed.image.url);
}
return undefined;
@@ -289,36 +507,68 @@ function extractFeedImage(feed: RawFeed): string | undefined {
/**
* Extract image URL from article
+ * Filters out placeholder images and prioritizes real images
*/
function extractArticleImage(item: RawFeedItem, content: string): string | undefined {
- // Check media array (includes enclosures and media:content)
+ // Collect all candidate images
+ const candidates: (string | undefined)[] = [];
+
+ // 1. Check item.image first (includes media:thumbnail, itunes:image, media:content images)
+ // This is the primary source for article header images
+ if (item.image?.url) {
+ candidates.push(item.image.url);
+ }
+
+ // 2. Check media array for image types (enclosures and media:content)
if (item.media && item.media.length > 0) {
- // Find first image media item
+ // First, check for media items with explicit image property (thumbnails)
for (const media of item.media) {
- if (media.url && media.type?.startsWith("image/")) {
- return media.url;
+ if (media.image) {
+ candidates.push(media.image);
}
}
- // If no explicit image type, use first media with URL
+ // Then, find image media items by type or mimeType
+ for (const media of item.media) {
+ const isImage = media.type === "image" ||
+ media.mimeType?.startsWith("image/") ||
+ media.type?.startsWith("image");
+ if (media.url && isImage) {
+ candidates.push(media.url);
+ }
+ }
+ // Only use untyped media if URL looks like an image
const firstMedia = item.media[0];
- if (firstMedia?.url) {
- return firstMedia.url;
+ if (firstMedia?.url && looksLikeImageUrl(firstMedia.url)) {
+ candidates.push(firstMedia.url);
}
}
- // Extract from content as fallback
- return extractImageFromContent(content) || undefined;
+ // 3. Extract from content as fallback - use the new function that filters placeholders
+ const contentImage = extractFirstValidImageFromHtml(content);
+ if (contentImage) {
+ candidates.push(contentImage);
+ }
+
+ // Return the first non-placeholder image
+ for (const candidate of candidates) {
+ const filtered = filterPlaceholderImage(candidate);
+ if (filtered) {
+ return filtered;
+ }
+ }
+
+ return undefined;
}
/**
* Extract first image URL from HTML content
* @param html - HTML content to search
* @returns First image URL found, or null
+ * @deprecated Use extractFirstValidImageFromHtml from image-utils.ts instead
*/
export function extractImageFromContent(html: string): string | null {
- const imgRegex = /]+src=["']([^"']+)["']/i;
- const match = html.match(imgRegex);
- return (match && match[1]) ? match[1] : null;
+ // Use the new function that filters out placeholder images
+ return extractFirstValidImageFromHtml(html);
}
/**
diff --git a/src/lib/image-utils.ts b/src/lib/image-utils.ts
new file mode 100644
index 0000000..8a7236a
--- /dev/null
+++ b/src/lib/image-utils.ts
@@ -0,0 +1,256 @@
+/**
+ * Image utility functions for detecting and filtering placeholder images
+ */
+
+/**
+ * Known placeholder image URL patterns
+ * These patterns match common placeholder/lazy-load images from various sources
+ */
+const PLACEHOLDER_URL_PATTERNS: RegExp[] = [
+ // BBC placeholders
+ /grey-placeholder\.png$/i,
+ /placeholder\.png$/i,
+ /placeholder\.jpg$/i,
+ /placeholder\.gif$/i,
+ /placeholder\.svg$/i,
+ /placeholder\.webp$/i,
+
+ // Common placeholder naming patterns
+ /\/placeholder[_-]?/i,
+ /\/lazy[_-]?load/i,
+ /\/loading[_-]?/i,
+ /\/blank\.(png|jpg|gif|svg|webp)$/i,
+ /\/empty\.(png|jpg|gif|svg|webp)$/i,
+ /\/spacer\.(png|jpg|gif|svg|webp)$/i,
+ /\/pixel\.(png|jpg|gif|svg|webp)$/i,
+ /\/1x1\.(png|jpg|gif|svg|webp)$/i,
+ /\/transparent\.(png|gif|svg|webp)$/i,
+
+ // Common CDN placeholder patterns
+ /via\.placeholder\.com/i,
+ /placehold\.it/i,
+ /placeholdit\.imgix\.net/i,
+ /dummyimage\.com/i,
+ /placeholder\.pics/i,
+ /fakeimg\.pl/i,
+ /lorempixel\.com/i,
+ /placekitten\.com/i,
+ /placeimg\.com/i,
+
+ // Data URIs that are likely placeholders (very small base64 images)
+ /^data:image\/[^;]+;base64,.{0,200}$/i,
+];
+
+/**
+ * Known placeholder image hostnames/domains
+ */
+const PLACEHOLDER_DOMAINS: string[] = [
+ 'via.placeholder.com',
+ 'placehold.it',
+ 'placeholdit.imgix.net',
+ 'dummyimage.com',
+ 'placeholder.pics',
+ 'fakeimg.pl',
+ 'lorempixel.com',
+ 'placekitten.com',
+ 'placeimg.com',
+];
+
+/**
+ * Keywords in URL paths that indicate placeholder images
+ */
+const PLACEHOLDER_PATH_KEYWORDS: string[] = [
+ 'placeholder',
+ 'grey-placeholder',
+ 'gray-placeholder',
+ 'lazy-load',
+ 'lazyload',
+ 'loading',
+ 'blank',
+ 'empty',
+ 'spacer',
+ 'pixel',
+ '1x1',
+ 'transparent',
+ 'default-image',
+ 'no-image',
+ 'noimage',
+ 'missing',
+];
+
+/**
+ * Check if an image URL is likely a placeholder image
+ *
+ * @param url - The image URL to check
+ * @returns true if the URL appears to be a placeholder image
+ */
+export function isPlaceholderImage(url: string | null | undefined): boolean {
+ if (!url) return true; // Treat null/undefined as placeholder
+
+ const trimmedUrl = url.trim();
+ if (!trimmedUrl) return true;
+
+ // Check against URL patterns
+ for (const pattern of PLACEHOLDER_URL_PATTERNS) {
+ if (pattern.test(trimmedUrl)) {
+ return true;
+ }
+ }
+
+ // Parse URL for more detailed checks
+ try {
+ // Handle data URIs separately
+ if (trimmedUrl.startsWith('data:')) {
+ // Very small base64 images are likely placeholders
+ // A typical 1x1 pixel PNG is about 68 characters in base64
+ // Allow up to ~500 chars which would be a very small image
+ return trimmedUrl.length < 500;
+ }
+
+ const urlObj = new URL(trimmedUrl);
+
+ // Check hostname against known placeholder domains
+ const hostname = urlObj.hostname.toLowerCase();
+ if (PLACEHOLDER_DOMAINS.some(domain => hostname.includes(domain))) {
+ return true;
+ }
+
+ // Check path for placeholder keywords
+ const pathname = urlObj.pathname.toLowerCase();
+ for (const keyword of PLACEHOLDER_PATH_KEYWORDS) {
+ if (pathname.includes(keyword)) {
+ return true;
+ }
+ }
+
+ // Check for very small dimension indicators in URL
+ // e.g., /1x1.png, /1/1.png, ?w=1&h=1
+ if (/[\/\?&]1x1[\/\.\?&]|[\/\?&]w=1[&$]|[\/\?&]h=1[&$]/i.test(trimmedUrl)) {
+ return true;
+ }
+
+ } catch {
+ // If URL parsing fails, check the raw string
+ const lowerUrl = trimmedUrl.toLowerCase();
+ for (const keyword of PLACEHOLDER_PATH_KEYWORDS) {
+ if (lowerUrl.includes(keyword)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/**
+ * Filter out placeholder images from a URL, returning undefined if it's a placeholder
+ *
+ * @param url - The image URL to check
+ * @returns The URL if it's not a placeholder, undefined otherwise
+ */
+export function filterPlaceholderImage(url: string | null | undefined): string | undefined {
+ if (isPlaceholderImage(url)) {
+ return undefined;
+ }
+ return url || undefined;
+}
+
+/**
+ * Get the first non-placeholder image from a list of URLs
+ *
+ * @param urls - Array of image URLs to check
+ * @returns The first non-placeholder URL, or undefined if all are placeholders
+ */
+export function getFirstValidImage(urls: (string | null | undefined)[]): string | undefined {
+ for (const url of urls) {
+ if (!isPlaceholderImage(url)) {
+ return url || undefined;
+ }
+ }
+ return undefined;
+}
+
+/**
+ * Remove placeholder images from HTML content
+ * This removes img tags that have placeholder src attributes
+ *
+ * @param html - HTML content to process
+ * @returns HTML with placeholder images removed
+ */
+export function removePlaceholderImagesFromHtml(html: string): string {
+ if (!html) return html;
+
+ // Match img tags and check their src
+ return html.replace(
+ /]*?)>/gi,
+ (match, attributes) => {
+ // Extract src attribute
+ const srcMatch = attributes.match(/src\s*=\s*["']([^"']+)["']/i);
+ if (srcMatch && srcMatch[1]) {
+ const src = srcMatch[1];
+ if (isPlaceholderImage(src)) {
+ // Remove the entire img tag if it's a placeholder
+ return '';
+ }
+ }
+
+ // Also check data-src for lazy-loaded images that might be placeholders
+ const dataSrcMatch = attributes.match(/data-src\s*=\s*["']([^"']+)["']/i);
+ if (dataSrcMatch && dataSrcMatch[1]) {
+ // If data-src exists and is valid, replace src with data-src
+ const dataSrc = dataSrcMatch[1];
+ if (!isPlaceholderImage(dataSrc)) {
+ // Replace placeholder src with data-src
+ const newAttributes = attributes
+ .replace(/src\s*=\s*["'][^"']+["']/i, `src="${dataSrc}"`)
+ .replace(/data-src\s*=\s*["'][^"']+["']/i, '');
+ return ``;
+ }
+ }
+
+ return match;
+ }
+ );
+}
+
+/**
+ * Extract the first valid (non-placeholder) image from HTML content
+ *
+ * @param html - HTML content to search
+ * @returns The first valid image URL, or null if none found
+ */
+export function extractFirstValidImageFromHtml(html: string): string | null {
+ if (!html) return null;
+
+ // Match all img tags
+ const imgRegex = /]*?)>/gi;
+ let match;
+
+ while ((match = imgRegex.exec(html)) !== null) {
+ const attributes = match[1];
+ if (!attributes) continue;
+
+ // Try src first
+ const srcMatch = attributes.match(/src\s*=\s*["']([^"']+)["']/i);
+ if (srcMatch && srcMatch[1] && !isPlaceholderImage(srcMatch[1])) {
+ return srcMatch[1];
+ }
+
+ // Try data-src for lazy-loaded images
+ const dataSrcMatch = attributes.match(/data-src\s*=\s*["']([^"']+)["']/i);
+ if (dataSrcMatch && dataSrcMatch[1] && !isPlaceholderImage(dataSrcMatch[1])) {
+ return dataSrcMatch[1];
+ }
+
+ // Try srcset (get the first URL)
+ const srcsetMatch = attributes.match(/srcset\s*=\s*["']([^"']+)["']/i);
+ if (srcsetMatch && srcsetMatch[1]) {
+ const firstSrcset = srcsetMatch[1].split(',')[0]?.trim().split(/\s+/)[0];
+ if (firstSrcset && !isPlaceholderImage(firstSrcset)) {
+ return firstSrcset;
+ }
+ }
+ }
+
+ return null;
+}
diff --git a/src/lib/llm/__tests__/json-parser.test.ts b/src/lib/llm/__tests__/json-parser.test.ts
new file mode 100644
index 0000000..84357ce
--- /dev/null
+++ b/src/lib/llm/__tests__/json-parser.test.ts
@@ -0,0 +1,119 @@
+/**
+ * Tests for JSON Parser Utilities
+ */
+
+import { extractJSON, parseJSONFromLLM, parseJSONFromLLMSafe } from "../json-parser";
+
+describe("extractJSON", () => {
+ it("should extract plain JSON", () => {
+ const input = '{"key": "value"}';
+ const result = extractJSON(input);
+ expect(result).toBe('{"key": "value"}');
+ });
+
+ it("should extract JSON from markdown code blocks with json tag", () => {
+ const input = "Here's the JSON:\n```json\n{\"key\": \"value\"}\n```";
+ const result = extractJSON(input);
+ expect(result).toBe('{"key": "value"}');
+ });
+
+ it("should extract JSON from markdown code blocks without json tag", () => {
+ const input = "```\n{\"key\": \"value\"}\n```";
+ const result = extractJSON(input);
+ expect(result).toBe('{"key": "value"}');
+ });
+
+ it("should remove introductory text before JSON", () => {
+ const input = "Sure! Here's the result: {\"key\": \"value\"}";
+ const result = extractJSON(input);
+ expect(result).toBe('{"key": "value"}');
+ });
+
+ it("should remove trailing text after JSON", () => {
+ const input = '{\"key\": \"value\"} I hope this helps!';
+ const result = extractJSON(input);
+ expect(result).toBe('{"key": "value"}');
+ });
+
+ it("should handle JSON arrays", () => {
+ const input = '["item1", "item2", "item3"]';
+ const result = extractJSON(input);
+ expect(result).toBe('["item1", "item2", "item3"]');
+ });
+
+ it("should extract JSON array from markdown", () => {
+ const input = "```json\n[\"item1\", \"item2\"]\n```";
+ const result = extractJSON(input);
+ expect(result).toBe('["item1", "item2"]');
+ });
+
+ it("should handle complex nested JSON", () => {
+ const input = `
+Here's your response:
+\`\`\`json
+{
+ "summary": "This is a summary",
+ "keyPoints": ["point 1", "point 2"],
+ "topics": ["topic1", "topic2"],
+ "sentiment": "positive"
+}
+\`\`\`
+I hope this is helpful!
+ `.trim();
+ const result = extractJSON(input);
+ const parsed = JSON.parse(result);
+ expect(parsed.summary).toBe("This is a summary");
+ expect(parsed.keyPoints).toHaveLength(2);
+ expect(parsed.topics).toHaveLength(2);
+ });
+});
+
+describe("parseJSONFromLLM", () => {
+ it("should parse valid JSON", () => {
+ const input = '{"key": "value"}';
+ const result = parseJSONFromLLM(input);
+ expect(result).toEqual({ key: "value" });
+ });
+
+ it("should parse JSON from markdown", () => {
+ const input = "```json\n{\"key\": \"value\"}\n```";
+ const result = parseJSONFromLLM(input);
+ expect(result).toEqual({ key: "value" });
+ });
+
+ it("should throw error for invalid JSON", () => {
+ const input = "This is not JSON";
+ expect(() => parseJSONFromLLM(input)).toThrow();
+ });
+
+ it("should parse JSON with context for logging", () => {
+ const input = '{"key": "value"}';
+ const result = parseJSONFromLLM(input, {
+ model: "gpt-4",
+ operation: "test",
+ });
+ expect(result).toEqual({ key: "value" });
+ });
+});
+
+describe("parseJSONFromLLMSafe", () => {
+ it("should parse valid JSON", () => {
+ const input = '{"key": "value"}';
+ const result = parseJSONFromLLMSafe(input, { fallback: true });
+ expect(result).toEqual({ key: "value" });
+ });
+
+ it("should return fallback for invalid JSON", () => {
+ const input = "This is not JSON";
+ const fallback = { fallback: true, empty: [] };
+ const result = parseJSONFromLLMSafe(input, fallback);
+ expect(result).toEqual(fallback);
+ });
+
+ it("should return fallback for empty response", () => {
+ const input = "";
+ const fallback = { summary: "", keyPoints: [], topics: [] };
+ const result = parseJSONFromLLMSafe(input, fallback);
+ expect(result).toEqual(fallback);
+ });
+});
diff --git a/src/lib/llm/json-parser.ts b/src/lib/llm/json-parser.ts
new file mode 100644
index 0000000..755e49c
--- /dev/null
+++ b/src/lib/llm/json-parser.ts
@@ -0,0 +1,117 @@
+/**
+ * JSON Parser Utilities for LLM Responses
+ * Handles extraction of JSON from responses that may be wrapped in markdown or have introductory text
+ */
+
+import { logger } from "../logger";
+
+/**
+ * Extract JSON from LLM response that may be wrapped in markdown code blocks or have introductory text
+ *
+ * Handles cases like:
+ * - "Here's the result: ```json\n{...}\n```"
+ * - "```\n{...}\n```"
+ * - "Sure! ```json {... } ```"
+ * - "{...}" (plain JSON)
+ */
+export function extractJSON(rawResponse: string): string {
+ // Trim whitespace
+ let cleaned = rawResponse.trim();
+
+ // Try to extract JSON from markdown code blocks
+ // Match ```json or ``` followed by JSON content
+ const codeBlockMatch = cleaned.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
+ if (codeBlockMatch?.[1]) {
+ cleaned = codeBlockMatch[1].trim();
+ }
+
+ // Remove any leading text before the first { or [
+ const jsonStartMatch = cleaned.match(/^[^{[]*([{[][\s\S]*)/);
+ if (jsonStartMatch?.[1]) {
+ cleaned = jsonStartMatch[1];
+ }
+
+ // Remove any trailing text after the last } or ]
+ const jsonEndMatch = cleaned.match(/([\s\S]*[}\]])[^}\]]*$/);
+ if (jsonEndMatch?.[1]) {
+ cleaned = jsonEndMatch[1];
+ }
+
+ return cleaned.trim();
+}
+
+/**
+ * Parse JSON from LLM response with automatic extraction and detailed error logging
+ *
+ * @param rawResponse - Raw LLM response that may contain JSON
+ * @param context - Context information for error logging (e.g., model name, operation)
+ * @returns Parsed JSON object
+ * @throws Error if JSON cannot be parsed even after extraction
+ */
+export function parseJSONFromLLM(
+ rawResponse: string,
+ context?: { model?: string; operation?: string }
+): unknown {
+ // Log what we're trying to parse
+ logger.debug("Attempting to parse LLM response", {
+ responseLength: rawResponse.length,
+ responsePreview: rawResponse.substring(0, 200),
+ isEmpty: rawResponse.trim().length === 0,
+ model: context?.model,
+ operation: context?.operation,
+ });
+
+ const extracted = extractJSON(rawResponse);
+
+ logger.debug("Extracted JSON from response", {
+ extractedLength: extracted.length,
+ extractedPreview: extracted.substring(0, 200),
+ wasModified: extracted !== rawResponse,
+ model: context?.model,
+ operation: context?.operation,
+ });
+
+ try {
+ return JSON.parse(extracted);
+ } catch (error) {
+ // Log detailed error information
+ logger.error("Failed to parse LLM JSON response", {
+ parseError: error instanceof Error ? error.message : String(error),
+ rawResponse: rawResponse.substring(0, 1000), // First 1000 chars to avoid huge logs
+ extractedJSON: extracted.substring(0, 1000),
+ responseLength: rawResponse.length,
+ extractedLength: extracted.length,
+ isEmpty: rawResponse.trim().length === 0,
+ model: context?.model,
+ operation: context?.operation,
+ });
+
+ throw new Error(
+ `Failed to parse JSON from LLM response: ${error instanceof Error ? error.message : String(error)}`
+ );
+ }
+}
+
+/**
+ * Safely parse JSON from LLM response with fallback value
+ *
+ * @param rawResponse - Raw LLM response
+ * @param fallback - Fallback value if parsing fails
+ * @param context - Context information for error logging
+ * @returns Parsed JSON or fallback value
+ */
+export function parseJSONFromLLMSafe(
+ rawResponse: string,
+ fallback: T,
+ context?: { model?: string; operation?: string }
+): T {
+ try {
+ return parseJSONFromLLM(rawResponse, context) as T;
+ } catch (error) {
+ logger.warn("Using fallback value due to JSON parse error", {
+ context,
+ fallbackUsed: true,
+ });
+ return fallback;
+ }
+}
diff --git a/src/lib/llm/ollama-provider.ts b/src/lib/llm/ollama-provider.ts
index b1e79e8..4eddc68 100644
--- a/src/lib/llm/ollama-provider.ts
+++ b/src/lib/llm/ollama-provider.ts
@@ -5,6 +5,7 @@
import { env } from "@/env";
import { logger } from "../logger";
+import { parseJSONFromLLM } from "./json-parser";
import type {
LLMProviderInterface,
LLMCompletionRequest,
@@ -103,21 +104,24 @@ Respond in JSON format with keys: summary, keyPoints (array), topics (array), se
// Try to parse JSON response
try {
- const parsed = JSON.parse(response.content);
+ const parsed = parseJSONFromLLM(response.content, {
+ model: this.model,
+ operation: "summarizeArticle",
+ }) as { summary?: string; keyPoints?: string[]; topics?: string[]; sentiment?: string };
return {
summary: parsed.summary || "",
keyPoints: Array.isArray(parsed.keyPoints) ? parsed.keyPoints : [],
topics: Array.isArray(parsed.topics) ? parsed.topics : [],
sentiment: ["positive", "neutral", "negative"].includes(
- parsed.sentiment
+ parsed.sentiment ?? ""
)
- ? parsed.sentiment
+ ? (parsed.sentiment as "positive" | "neutral" | "negative")
: "neutral",
};
} catch (parseError) {
// Fallback: extract from text response
- logger.warn("Failed to parse LLM JSON response, using fallback", {
- parseError,
+ logger.warn("Using fallback for article summary due to parse error", {
+ model: this.model,
});
return {
@@ -155,12 +159,18 @@ Respond with a JSON array of strings.`;
});
try {
- const parsed = JSON.parse(response.content);
+ const parsed = parseJSONFromLLM(response.content, {
+ model: this.model,
+ operation: "extractKeyPoints",
+ }) as string[] | unknown;
if (Array.isArray(parsed)) {
return parsed.slice(0, count);
}
} catch (parseError) {
// Fallback: split by newlines and filter
+ logger.warn("Using fallback for key points extraction", {
+ model: this.model,
+ });
const lines = response.content
.split("\n")
.map((line) => line.trim())
@@ -200,7 +210,10 @@ Respond with a JSON array of lowercase strings.`;
});
try {
- const parsed = JSON.parse(response.content);
+ const parsed = parseJSONFromLLM(response.content, {
+ model: this.model,
+ operation: "detectTopics",
+ }) as string[] | unknown;
if (Array.isArray(parsed)) {
return parsed
.map((topic) => String(topic).toLowerCase().trim())
@@ -209,6 +222,9 @@ Respond with a JSON array of lowercase strings.`;
}
} catch (parseError) {
// Fallback: extract comma-separated values
+ logger.warn("Using fallback for topic detection", {
+ model: this.model,
+ });
const topics = response.content
.split(/[,\n]/)
.map((topic) => topic.toLowerCase().trim())
diff --git a/src/lib/llm/openai-provider.ts b/src/lib/llm/openai-provider.ts
index 648d62b..9aba1d9 100644
--- a/src/lib/llm/openai-provider.ts
+++ b/src/lib/llm/openai-provider.ts
@@ -5,6 +5,7 @@
import { env } from "@/env";
import { logger } from "../logger";
+import { parseJSONFromLLM } from "./json-parser";
import type {
LLMProviderInterface,
LLMCompletionRequest,
@@ -85,8 +86,24 @@ export class OpenAILLMProvider implements LLMProviderInterface {
}
const data = await response.json();
+
+ // Log the full API response for debugging
+ logger.debug("Raw LLM API response", {
+ model: data.model,
+ choicesCount: data.choices?.length,
+ hasUsage: !!data.usage,
+ fullResponse: JSON.stringify(data).substring(0, 2000), // First 2000 chars
+ });
+
const choice = data.choices[0];
+ // Log the message content specifically
+ logger.debug("LLM message content", {
+ contentLength: choice.message?.content?.length ?? 0,
+ content: choice.message?.content?.substring(0, 500) ?? "(empty)", // First 500 chars
+ hasContent: !!choice.message?.content,
+ });
+
logger.debug("LLM response received", {
model: data.model,
promptTokens: data.usage.prompt_tokens,
@@ -144,23 +161,23 @@ You MUST respond ONLY with valid JSON in this exact format:
// Try to parse JSON response
try {
- const parsed = JSON.parse(response.content);
+ const parsed = parseJSONFromLLM(response.content, {
+ model: this.model,
+ operation: "summarizeArticle",
+ }) as { summary?: string; keyPoints?: string[]; topics?: string[]; sentiment?: string };
return {
summary: parsed.summary || "",
keyPoints: Array.isArray(parsed.keyPoints) ? parsed.keyPoints : [],
topics: Array.isArray(parsed.topics) ? parsed.topics : [],
sentiment: ["positive", "neutral", "negative"].includes(
- parsed.sentiment
+ parsed.sentiment ?? ""
)
- ? parsed.sentiment
+ ? (parsed.sentiment as "positive" | "neutral" | "negative")
: "neutral",
};
} catch (parseError) {
// Fallback: extract from text response
- logger.error("Failed to parse LLM JSON response, using fallback", {
- parseError: parseError instanceof Error ? parseError.message : String(parseError),
- fullRawResponse: response.content,
- responseLength: response.content.length,
+ logger.warn("Using fallback for article summary due to parse error", {
model: this.model,
});
@@ -199,12 +216,18 @@ You MUST respond ONLY with a valid JSON array of strings like: ["point 1", "poin
});
try {
- const parsed = JSON.parse(response.content);
+ const parsed = parseJSONFromLLM(response.content, {
+ model: this.model,
+ operation: "extractKeyPoints",
+ }) as string[] | unknown;
if (Array.isArray(parsed)) {
return parsed.slice(0, count);
}
} catch (parseError) {
// Fallback: split by newlines and filter
+ logger.warn("Using fallback for key points extraction", {
+ model: this.model,
+ });
const lines = response.content
.split("\n")
.map((line) => line.trim())
@@ -244,7 +267,10 @@ You MUST respond ONLY with a valid JSON array of lowercase strings like: ["topic
});
try {
- const parsed = JSON.parse(response.content);
+ const parsed = parseJSONFromLLM(response.content, {
+ model: this.model,
+ operation: "detectTopics",
+ }) as string[] | unknown;
if (Array.isArray(parsed)) {
return parsed
.map((topic) => String(topic).toLowerCase().trim())
@@ -253,6 +279,9 @@ You MUST respond ONLY with a valid JSON array of lowercase strings like: ["topic
}
} catch (parseError) {
// Fallback: extract comma-separated values
+ logger.warn("Using fallback for topic detection", {
+ model: this.model,
+ });
const topics = response.content
.split(/[,\n]/)
.map((topic) => topic.toLowerCase().trim())
diff --git a/src/lib/services/__tests__/saved-search-execution.test.ts b/src/lib/services/__tests__/saved-search-execution.test.ts
index 10c7fed..d476843 100644
--- a/src/lib/services/__tests__/saved-search-execution.test.ts
+++ b/src/lib/services/__tests__/saved-search-execution.test.ts
@@ -4,7 +4,7 @@
* Tests the search execution and scoring algorithms
*/
-import { describe, it, expect, beforeAll, afterAll, beforeEach } from '@jest/globals';
+// Vitest globals are enabled in vitest.config.ts
import { executeSearch, matchArticle } from '../saved-search-execution';
import { prisma } from '@/lib/db';
import { nanoid } from 'nanoid';
diff --git a/src/lib/services/__tests__/saved-search-integration.test.ts b/src/lib/services/__tests__/saved-search-integration.test.ts
index 7e89211..e5f3359 100644
--- a/src/lib/services/__tests__/saved-search-integration.test.ts
+++ b/src/lib/services/__tests__/saved-search-integration.test.ts
@@ -4,7 +4,7 @@
* Tests the complete flow from creation to matching to notifications
*/
-import { describe, it, expect, beforeAll, afterAll } from '@jest/globals';
+// Vitest globals are enabled in vitest.config.ts
import { prisma } from '@/lib/db';
import { nanoid } from 'nanoid';
import { createSavedSearch } from '../saved-search-service';
diff --git a/src/lib/services/__tests__/saved-search-matcher.test.ts b/src/lib/services/__tests__/saved-search-matcher.test.ts
index 245a377..283ba21 100644
--- a/src/lib/services/__tests__/saved-search-matcher.test.ts
+++ b/src/lib/services/__tests__/saved-search-matcher.test.ts
@@ -4,7 +4,7 @@
* Tests the automatic matching of articles to saved searches
*/
-import { describe, it, expect, beforeAll, afterAll } from '@jest/globals';
+// Vitest globals are enabled in vitest.config.ts
import { matchNewArticles, rematchSavedSearch } from '../saved-search-matcher';
import { createSavedSearch } from '../saved-search-service';
import { prisma } from '@/lib/db';
diff --git a/src/lib/services/__tests__/saved-search-service.test.ts b/src/lib/services/__tests__/saved-search-service.test.ts
index c0e04ea..838a9d6 100644
--- a/src/lib/services/__tests__/saved-search-service.test.ts
+++ b/src/lib/services/__tests__/saved-search-service.test.ts
@@ -4,7 +4,7 @@
* Tests CRUD operations and business logic for saved searches
*/
-import { describe, it, expect, beforeAll, afterAll, beforeEach } from '@jest/globals';
+// Vitest globals are enabled in vitest.config.ts
import {
createSavedSearch,
updateSavedSearch,
diff --git a/src/lib/services/admin-settings-service.ts b/src/lib/services/admin-settings-service.ts
index 475f485..3290021 100644
--- a/src/lib/services/admin-settings-service.ts
+++ b/src/lib/services/admin-settings-service.ts
@@ -131,15 +131,20 @@ export async function getEmbeddingConfiguration(): Promise<{
provider: string;
providerSource: "database" | "environment";
model: string;
+ modelSource: "database" | "environment";
batchSize: number;
}> {
- const autoGenerateSetting = await prisma.admin_settings.findUnique({
- where: { key: "embedding_auto_generate" },
- });
-
- const providerSetting = await prisma.admin_settings.findUnique({
- where: { key: "embedding_provider" },
- });
+ const [autoGenerateSetting, providerSetting, modelSetting] = await Promise.all([
+ prisma.admin_settings.findUnique({
+ where: { key: "embedding_auto_generate" },
+ }),
+ prisma.admin_settings.findUnique({
+ where: { key: "embedding_provider" },
+ }),
+ prisma.admin_settings.findUnique({
+ where: { key: "system_llm_embedding_model" },
+ }),
+ ]);
const autoGenerate = autoGenerateSetting
? (autoGenerateSetting.value as boolean)
@@ -151,12 +156,18 @@ export async function getEmbeddingConfiguration(): Promise<{
: env.EMBEDDING_PROVIDER;
const providerSource = providerSetting ? "database" : "environment";
+ const model = modelSetting
+ ? (modelSetting.value as string)
+ : env.EMBEDDING_MODEL;
+ const modelSource = modelSetting ? "database" : "environment";
+
return {
autoGenerate,
autoGenerateSource,
provider,
providerSource,
- model: env.EMBEDDING_MODEL,
+ model,
+ modelSource,
batchSize: env.EMBEDDING_BATCH_SIZE,
};
}
diff --git a/src/lib/services/feed-health-service.ts b/src/lib/services/feed-health-service.ts
index a440dea..bb016fe 100644
--- a/src/lib/services/feed-health-service.ts
+++ b/src/lib/services/feed-health-service.ts
@@ -50,7 +50,7 @@ export async function getFeedHealth(feedId: string): Promise ({
feedId: feed.id,
healthStatus: feed.healthStatus as "healthy" | "warning" | "error" | "disabled",
- consecutiveFailures: feed.consecutiveFailures,
+ consecutiveFailures: feed.consecutiveFailures ?? 0,
lastSuccessfulFetch: feed.lastSuccessfulFetch,
lastError: feed.feed_error_log[0]?.errorMessage || null,
httpStatus: feed.httpStatus,
@@ -123,8 +123,8 @@ export async function recordFeedFailure(
if (!feed) return;
- const newFailureCount = feed.consecutiveFailures + 1;
- const shouldDisable = newFailureCount >= feed.autoDisableThreshold;
+ const newFailureCount = (feed.consecutiveFailures ?? 0) + 1;
+ const shouldDisable = newFailureCount >= (feed.autoDisableThreshold ?? 10);
// Update feed health status
await prisma.feeds.update({
@@ -227,7 +227,7 @@ export async function getUnhealthyFeeds(
return feeds.map((feed) => ({
feedId: feed.id,
healthStatus: feed.healthStatus as "healthy" | "warning" | "error" | "disabled",
- consecutiveFailures: feed.consecutiveFailures,
+ consecutiveFailures: feed.consecutiveFailures ?? 0,
lastSuccessfulFetch: feed.lastSuccessfulFetch,
lastError: feed.feed_error_log[0]?.errorMessage || null,
httpStatus: feed.httpStatus,
diff --git a/src/lib/services/feed-refresh-service.ts b/src/lib/services/feed-refresh-service.ts
index 493931f..0a3b45a 100644
--- a/src/lib/services/feed-refresh-service.ts
+++ b/src/lib/services/feed-refresh-service.ts
@@ -72,7 +72,20 @@ export async function refreshFeed(
let extractionUsed = false;
// Parse feed (always try RSS first)
+ // TODO: Pass etag/lastModified from feed record for conditional requests
const parsedFeed = await parseFeedUrl(feed.url);
+
+ // Handle 304 Not Modified - feed hasn't changed
+ if (!parsedFeed) {
+ logger.info(`[FeedRefresh] Feed ${feedId} returned 304 Not Modified, skipping`);
+ return {
+ feedId,
+ success: true,
+ newArticles: 0,
+ updatedArticles: 0,
+ duration: Date.now() - startTime,
+ };
+ }
// If feed has extraction settings and method is not RSS, try content extraction
if (settings && settings.method !== "rss") {
diff --git a/src/lib/services/feed-service.ts b/src/lib/services/feed-service.ts
index 49ec87c..54771f9 100644
--- a/src/lib/services/feed-service.ts
+++ b/src/lib/services/feed-service.ts
@@ -13,6 +13,7 @@ export interface CreateFeedInput {
imageUrl?: string;
categoryIds?: string[];
fetchInterval?: number;
+ extractionMethod?: "rss" | "readability" | "playwright" | "custom";
}
export interface UpdateFeedInput {
@@ -54,7 +55,8 @@ export async function createFeed(data: CreateFeedInput): Promise {
throw new Error("Feed already exists");
}
- // Create feed with default extraction method set to "readability"
+ // Create feed with extraction method (default to "readability")
+ const extractionMethod = data.extractionMethod || "readability";
const feed = await prisma.feeds.create({
data: {
id: `feed_${Date.now()}_${Math.random().toString(36).substring(7)}`,
@@ -66,7 +68,7 @@ export async function createFeed(data: CreateFeedInput): Promise {
fetchInterval: data.fetchInterval || 60,
settings: {
extraction: {
- method: "readability",
+ method: extractionMethod,
},
},
// updatedAt is auto-managed by Prisma via @updatedAt directive
@@ -97,7 +99,8 @@ export async function createFeed(data: CreateFeedInput): Promise {
export async function validateAndCreateFeed(
url: string,
name?: string,
- categoryIds?: string[]
+ categoryIds?: string[],
+ settings?: { method?: "rss" | "readability" | "playwright" | "custom" }
): Promise {
// Normalize URL
const normalizedUrl = normalizeFeedUrl(url);
@@ -115,6 +118,11 @@ export async function validateAndCreateFeed(
// Parse feed to get metadata
const parsedFeed = await parseFeedUrl(normalizedUrl);
+
+ // Handle case where feed returns null (304 Not Modified - shouldn't happen for new feeds)
+ if (!parsedFeed) {
+ throw new Error("Unable to parse feed content");
+ }
// Ensure imageUrl is a string (handle array case)
let imageUrl = parsedFeed.imageUrl;
@@ -130,6 +138,7 @@ export async function validateAndCreateFeed(
siteUrl: parsedFeed.link,
imageUrl: imageUrl,
categoryIds,
+ extractionMethod: settings?.method,
});
}
diff --git a/src/lib/services/semantic-search-service.ts b/src/lib/services/semantic-search-service.ts
index 2238ec9..b75ab03 100644
--- a/src/lib/services/semantic-search-service.ts
+++ b/src/lib/services/semantic-search-service.ts
@@ -230,7 +230,8 @@ export async function findRelatedArticles(
throw new Error("Article not found");
}
if (!article.hasEmbedding) {
- throw new Error("Article has no embedding");
+ logger.info("Article has no embedding, returning empty related articles", { articleId });
+ return [];
}
// Now fetch the actual embedding using the article's embedding directly in the query
diff --git a/src/lib/services/summarization-service.ts b/src/lib/services/summarization-service.ts
index eb9f181..b3d2e69 100644
--- a/src/lib/services/summarization-service.ts
+++ b/src/lib/services/summarization-service.ts
@@ -8,6 +8,7 @@ import { env } from "@/env";
import { logger } from "../logger";
import { OpenAILLMProvider } from "../llm/openai-provider";
import { OllamaLLMProvider } from "../llm/ollama-provider";
+import { parseJSONFromLLM } from "../llm/json-parser";
import { cacheGetOrSet } from "../cache/cache-service";
import { CacheKeys, CacheTTL } from "../cache/cache-keys";
import {
@@ -511,7 +512,10 @@ Respond in JSON format with keys: summary, keyPoints (array), topics (array), se
// Parse the response
let summary: ArticleSummary;
try {
- const parsed = JSON.parse(response.content);
+ const parsed = parseJSONFromLLM(response.content, {
+ model: response.model,
+ operation: "summarizeArticleWithTracking",
+ }) as { summary?: string; keyPoints?: string[]; topics?: string[]; sentiment?: string };
summary = {
summary: parsed.summary || "",
keyPoints:
@@ -522,13 +526,13 @@ Respond in JSON format with keys: summary, keyPoints (array), topics (array), se
options?.includeTopics && Array.isArray(parsed.topics)
? parsed.topics
: [],
- sentiment: ["positive", "neutral", "negative"].includes(parsed.sentiment)
- ? parsed.sentiment
+ sentiment: ["positive", "neutral", "negative"].includes(parsed.sentiment ?? "")
+ ? (parsed.sentiment as "positive" | "neutral" | "negative")
: "neutral",
};
} catch (parseError) {
- logger.warn("Failed to parse LLM JSON response, using fallback", {
- parseError,
+ logger.warn("Using fallback for article summary with tracking", {
+ model: response.model,
});
summary = {
summary: response.content.substring(0, 500),
diff --git a/src/lib/services/user-feed-service.ts b/src/lib/services/user-feed-service.ts
index b6ef108..a2849c9 100644
--- a/src/lib/services/user-feed-service.ts
+++ b/src/lib/services/user-feed-service.ts
@@ -9,7 +9,14 @@ import { assignFeedToCategory } from "./user-category-service";
export async function getUserFeeds(userId: string): Promise {
return await prisma.user_feeds.findMany({
where: { userId },
- include: { feeds: true },
+ include: {
+ feeds: true,
+ user_feed_categories: {
+ include: {
+ user_categories: true,
+ },
+ },
+ },
orderBy: { subscribedAt: "desc" },
});
}
diff --git a/src/lib/services/user-preferences-service.ts b/src/lib/services/user-preferences-service.ts
index 9143502..bd37e32 100644
--- a/src/lib/services/user-preferences-service.ts
+++ b/src/lib/services/user-preferences-service.ts
@@ -188,7 +188,7 @@ export async function getDefaultPreferences(): Promise<
articlesPerPage: 20,
defaultView: "expanded",
showReadArticles: true,
- autoMarkAsRead: false,
+ autoMarkAsRead: true,
showRelatedExcerpts: false,
bounceThreshold: 0.25,
showLowRelevanceArticles: true,
diff --git a/src/types/fumadocs.d.ts b/src/types/fumadocs.d.ts
new file mode 100644
index 0000000..e4fe97f
--- /dev/null
+++ b/src/types/fumadocs.d.ts
@@ -0,0 +1,12 @@
+/**
+ * Type declarations for fumadocs-mdx generated modules
+ * These modules are generated at build time in the .source directory
+ */
+
+declare module '../../.source/server' {
+ export const docs: any;
+}
+
+declare module 'fumadocs-mdx:collections/server' {
+ export const docs: any;
+}
diff --git a/tests/components/PreferencesModal.snapshot.test.tsx b/tests/components/PreferencesModal.snapshot.test.tsx
index 8d700bc..253b1f6 100644
--- a/tests/components/PreferencesModal.snapshot.test.tsx
+++ b/tests/components/PreferencesModal.snapshot.test.tsx
@@ -19,7 +19,7 @@ vi.mock('@/hooks/queries/use-user-preferences', () => ({
articlesPerPage: 20,
defaultView: 'expanded',
showReadArticles: true,
- autoMarkAsRead: false,
+ autoMarkAsRead: true,
},
isLoading: false,
error: null,
diff --git a/tsconfig.json b/tsconfig.json
index 609d23e..cd860c5 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -38,5 +38,5 @@
"**/*.mts",
"tests/**/*.ts"
],
- "exclude": ["node_modules", "scripts/tests", "**/__tests__/**", "**/*.test.ts", "**/*.test.tsx"]
+ "exclude": ["node_modules", "scripts/tests", "**/__tests__/**", "**/*.test.ts", "**/*.test.tsx", ".source"]
}