Skip to content

Commit 84543f6

Browse files
committed
Match episode pairs in ES query, reduce size
Build per-episode (season, episode) bool SHOULD clauses instead of a broad season-only TERMS filter. Ignore entries missing either season or episode and return early if none are valid. Wrap the SHOULD clauses in a FILTER with minimum_should_match=1. Also reduce the Elasticsearch result size from 100000 to 10000 to limit returned hits.
1 parent 77a20db commit 84543f6

1 file changed

Lines changed: 23 additions & 6 deletions

File tree

bot/search/filter_applicator.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -278,22 +278,39 @@ async def _get_all_frame_timestamps(
278278
logger: logging.Logger,
279279
) -> Dict[Tuple[Optional[int], Optional[int]], List[float]]:
280280
es = await ElasticSearchManager.connect_to_elasticsearch(logger)
281-
season_list = list({k[0] for k in episode_keys if k[0] is not None})
282-
filter_clauses = []
283-
if season_list:
284-
filter_clauses.append({ElasticsearchQueryKeys.TERMS: {EpisodeMetadataKeys.SEASON_FIELD: season_list}})
281+
valid_keys = [(s, e) for s, e in episode_keys if s is not None and e is not None]
282+
episode_should = [
283+
{
284+
ElasticsearchQueryKeys.BOOL: {
285+
ElasticsearchQueryKeys.FILTER: [
286+
{ElasticsearchQueryKeys.TERM: {EpisodeMetadataKeys.SEASON_FIELD: s}},
287+
{ElasticsearchQueryKeys.TERM: {EpisodeMetadataKeys.EPISODE_NUMBER_FIELD: e}},
288+
],
289+
},
290+
}
291+
for s, e in valid_keys
292+
]
293+
if not episode_should:
294+
return {}
285295
query = {
286296
ElasticsearchQueryKeys.QUERY: {
287297
ElasticsearchQueryKeys.BOOL: {
288-
ElasticsearchQueryKeys.FILTER: filter_clauses,
298+
ElasticsearchQueryKeys.FILTER: [
299+
{
300+
ElasticsearchQueryKeys.BOOL: {
301+
ElasticsearchQueryKeys.SHOULD: episode_should,
302+
ElasticsearchQueryKeys.MINIMUM_SHOULD_MATCH: 1,
303+
},
304+
},
305+
],
289306
},
290307
},
291308
ElasticsearchQueryKeys.SOURCE: [
292309
EpisodeMetadataKeys.SEASON_FIELD,
293310
EpisodeMetadataKeys.EPISODE_NUMBER_FIELD,
294311
VideoFrameKeys.TIMESTAMP,
295312
],
296-
ElasticsearchQueryKeys.SIZE: 100000,
313+
ElasticsearchQueryKeys.SIZE: 10000,
297314
}
298315
try:
299316
resp = await es.search(index=_build_index(series_name), body=query)

0 commit comments

Comments
 (0)