Skip to content

Commit 77a20db

Browse files
committed
Precompute frame timestamps for filtering
Add _get_all_frame_timestamps to load and sort all frame timestamps per episode from Elasticsearch, and include it in the async gather tasks. Modify _segment_passes_all to accept the precomputed timestamps and use bisect to find the next frame timestamp (avoiding per-segment sorting). Import bisect and RequestError, handle ES RequestError by returning an empty map, and log the number of episodes loaded. This optimizes segment overlap checks and reduces repeated sorting work.
1 parent cb55fc0 commit 77a20db

1 file changed

Lines changed: 57 additions & 10 deletions

File tree

bot/search/filter_applicator.py

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
import bisect
23
import logging
34
import math
45
from typing import (
@@ -10,6 +11,8 @@
1011
Tuple,
1112
)
1213

14+
from elasticsearch import RequestError
15+
1316
from bot.responses.not_sending_videos.emotions_handler_responses import map_emotion_to_en
1417
from bot.search.infra.elastic_search_manager import ElasticSearchManager
1518
from bot.search.video_frames.frames_finder import _build_index
@@ -55,19 +58,20 @@ async def apply_to_text_segments(
5558
for seg in segments
5659
}
5760

58-
tasks = []
61+
tasks = [FilterApplicator._get_all_frame_timestamps(episode_keys, series_name, logger)]
5962
for char_group in character_groups:
6063
tasks.append(FilterApplicator._get_character_frame_keys(char_group, episode_keys, series_name, logger))
6164
if emotions:
6265
tasks.append(FilterApplicator._get_emotion_frame_keys(emotions, episode_keys, series_name, logger))
6366
for obj_group in object_groups:
6467
tasks.append(FilterApplicator._get_object_frame_keys(obj_group, episode_keys, series_name, logger))
6568

66-
frame_key_sets = await asyncio.gather(*tasks)
69+
results = await asyncio.gather(*tasks)
70+
all_timestamps, *frame_key_sets = results
6771

6872
filtered = [
6973
seg for seg in segments
70-
if FilterApplicator._segment_passes_all(seg, frame_key_sets)
74+
if FilterApplicator._segment_passes_all(seg, frame_key_sets, all_timestamps)
7175
]
7276
await log_system_message(
7377
logging.INFO,
@@ -80,21 +84,21 @@ async def apply_to_text_segments(
8084
def _segment_passes_all(
8185
segment: SegmentWithScore,
8286
frame_key_sets: List[Set[Tuple[Optional[int], Optional[int], float]]],
87+
all_timestamps: Dict[Tuple[Optional[int], Optional[int]], List[float]],
8388
) -> bool:
8489
meta = segment.get(EpisodeMetadataKeys.EPISODE_METADATA, {})
8590
season = meta.get(EpisodeMetadataKeys.SEASON)
8691
episode = meta.get(EpisodeMetadataKeys.EPISODE_NUMBER)
8792
start = segment.get(SegmentKeys.START_TIME, 0.0)
8893
end = segment.get(SegmentKeys.END_TIME, 0.0)
94+
ep_timestamps = all_timestamps.get((season, episode), [])
8995

9096
def __scene_overlaps(frame_keys: Set[Tuple[Optional[int], Optional[int], float]]) -> bool:
91-
timestamps = sorted(
92-
fk_ts
93-
for fk_season, fk_episode, fk_ts in frame_keys
94-
if fk_season == season and fk_episode == episode
95-
)
96-
for i, ts in enumerate(timestamps):
97-
next_ts = timestamps[i + 1] if i + 1 < len(timestamps) else math.inf
97+
for fk_season, fk_episode, ts in frame_keys:
98+
if fk_season != season or fk_episode != episode:
99+
continue
100+
idx = bisect.bisect_right(ep_timestamps, ts)
101+
next_ts = ep_timestamps[idx] if idx < len(ep_timestamps) else math.inf
98102
if ts <= end and next_ts >= start:
99103
return True
100104
return False
@@ -267,6 +271,49 @@ async def _get_object_frame_keys(
267271

268272
return frame_keys
269273

274+
@staticmethod
275+
async def _get_all_frame_timestamps(
276+
episode_keys: Set[Tuple[Optional[int], Optional[int]]],
277+
series_name: str,
278+
logger: logging.Logger,
279+
) -> Dict[Tuple[Optional[int], Optional[int]], List[float]]:
280+
es = await ElasticSearchManager.connect_to_elasticsearch(logger)
281+
season_list = list({k[0] for k in episode_keys if k[0] is not None})
282+
filter_clauses = []
283+
if season_list:
284+
filter_clauses.append({ElasticsearchQueryKeys.TERMS: {EpisodeMetadataKeys.SEASON_FIELD: season_list}})
285+
query = {
286+
ElasticsearchQueryKeys.QUERY: {
287+
ElasticsearchQueryKeys.BOOL: {
288+
ElasticsearchQueryKeys.FILTER: filter_clauses,
289+
},
290+
},
291+
ElasticsearchQueryKeys.SOURCE: [
292+
EpisodeMetadataKeys.SEASON_FIELD,
293+
EpisodeMetadataKeys.EPISODE_NUMBER_FIELD,
294+
VideoFrameKeys.TIMESTAMP,
295+
],
296+
ElasticsearchQueryKeys.SIZE: 100000,
297+
}
298+
try:
299+
resp = await es.search(index=_build_index(series_name), body=query)
300+
except RequestError:
301+
return {}
302+
result = {}
303+
for hit in resp[ElasticsearchKeys.HITS][ElasticsearchKeys.HITS]:
304+
src = hit[ElasticsearchKeys.SOURCE]
305+
meta = src.get(EpisodeMetadataKeys.EPISODE_METADATA, {})
306+
key = (meta.get(EpisodeMetadataKeys.SEASON), meta.get(EpisodeMetadataKeys.EPISODE_NUMBER))
307+
result.setdefault(key, []).append(src.get(VideoFrameKeys.TIMESTAMP, 0.0))
308+
for timestamps in result.values():
309+
timestamps.sort()
310+
await log_system_message(
311+
logging.INFO,
312+
f"FilterApplicator: loaded all-frame timestamps for {len(result)} episodes.",
313+
logger,
314+
)
315+
return result
316+
270317
@staticmethod
271318
def __frame_passes_object_group(
272319
detected: List[Dict[str, Any]],

0 commit comments

Comments
 (0)