Skip to content

Commit 4b9aaab

Browse files
committed
Added a way to remove scripts
1 parent 787c642 commit 4b9aaab

File tree

9 files changed

+268
-71
lines changed

9 files changed

+268
-71
lines changed

crates/common/src/html_processor.rs

Lines changed: 119 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ use lol_html::{element, html_content::ContentType, text, Settings as RewriterSet
88
use regex::Regex;
99

1010
use crate::integrations::{
11-
IntegrationAttributeContext, IntegrationRegistry, IntegrationScriptContext,
11+
AttributeRewriteOutcome, IntegrationAttributeContext, IntegrationRegistry,
12+
IntegrationScriptContext, ScriptRewriteAction,
1213
};
1314
use crate::settings::Settings;
1415
use crate::streaming_processor::{HtmlRewriterAdapter, StreamProcessor};
@@ -156,6 +157,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
156157
let original_href = href.clone();
157158
if rewrite_prebid && is_prebid_script_url(&href) {
158159
el.remove();
160+
return Ok(());
159161
} else {
160162
let new_href = href
161163
.replace(&patterns.https_origin(), &patterns.replacement_url())
@@ -165,7 +167,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
165167
}
166168
}
167169

168-
if let Some(integration_href) = integrations.rewrite_attribute(
170+
match integrations.rewrite_attribute(
169171
"href",
170172
&href,
171173
&IntegrationAttributeContext {
@@ -175,7 +177,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
175177
origin_host: &patterns.origin_host,
176178
},
177179
) {
178-
href = integration_href;
180+
AttributeRewriteOutcome::Unchanged => {}
181+
AttributeRewriteOutcome::Replaced(integration_href) => {
182+
href = integration_href;
183+
}
184+
AttributeRewriteOutcome::RemoveElement => {
185+
el.remove();
186+
return Ok(());
187+
}
179188
}
180189

181190
if href != original_href {
@@ -195,6 +204,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
195204
let original_src = src.clone();
196205
if rewrite_prebid && is_prebid_script_url(&src) {
197206
el.remove();
207+
return Ok(());
198208
} else {
199209
let new_src = src
200210
.replace(&patterns.https_origin(), &patterns.replacement_url())
@@ -204,7 +214,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
204214
}
205215
}
206216

207-
if let Some(integration_src) = integrations.rewrite_attribute(
217+
match integrations.rewrite_attribute(
208218
"src",
209219
&src,
210220
&IntegrationAttributeContext {
@@ -214,7 +224,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
214224
origin_host: &patterns.origin_host,
215225
},
216226
) {
217-
src = integration_src;
227+
AttributeRewriteOutcome::Unchanged => {}
228+
AttributeRewriteOutcome::Replaced(integration_src) => {
229+
src = integration_src;
230+
}
231+
AttributeRewriteOutcome::RemoveElement => {
232+
el.remove();
233+
return Ok(());
234+
}
218235
}
219236

220237
if src != original_src {
@@ -238,7 +255,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
238255
action = new_action;
239256
}
240257

241-
if let Some(integration_action) = integrations.rewrite_attribute(
258+
match integrations.rewrite_attribute(
242259
"action",
243260
&action,
244261
&IntegrationAttributeContext {
@@ -248,7 +265,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
248265
origin_host: &patterns.origin_host,
249266
},
250267
) {
251-
action = integration_action;
268+
AttributeRewriteOutcome::Unchanged => {}
269+
AttributeRewriteOutcome::Replaced(integration_action) => {
270+
action = integration_action;
271+
}
272+
AttributeRewriteOutcome::RemoveElement => {
273+
el.remove();
274+
return Ok(());
275+
}
252276
}
253277

254278
if action != original_action {
@@ -277,7 +301,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
277301
srcset = new_srcset;
278302
}
279303

280-
if let Some(integration_srcset) = integrations.rewrite_attribute(
304+
match integrations.rewrite_attribute(
281305
"srcset",
282306
&srcset,
283307
&IntegrationAttributeContext {
@@ -287,7 +311,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
287311
origin_host: &patterns.origin_host,
288312
},
289313
) {
290-
srcset = integration_srcset;
314+
AttributeRewriteOutcome::Unchanged => {}
315+
AttributeRewriteOutcome::Replaced(integration_srcset) => {
316+
srcset = integration_srcset;
317+
}
318+
AttributeRewriteOutcome::RemoveElement => {
319+
el.remove();
320+
return Ok(());
321+
}
291322
}
292323

293324
if srcset != original_srcset {
@@ -315,7 +346,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
315346
imagesrcset = new_imagesrcset;
316347
}
317348

318-
if let Some(integration_imagesrcset) = integrations.rewrite_attribute(
349+
match integrations.rewrite_attribute(
319350
"imagesrcset",
320351
&imagesrcset,
321352
&IntegrationAttributeContext {
@@ -325,7 +356,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
325356
origin_host: &patterns.origin_host,
326357
},
327358
) {
328-
imagesrcset = integration_imagesrcset;
359+
AttributeRewriteOutcome::Unchanged => {}
360+
AttributeRewriteOutcome::Replaced(integration_imagesrcset) => {
361+
imagesrcset = integration_imagesrcset;
362+
}
363+
AttributeRewriteOutcome::RemoveElement => {
364+
el.remove();
365+
return Ok(());
366+
}
329367
}
330368

331369
if imagesrcset != original_imagesrcset {
@@ -351,8 +389,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
351389
request_scheme: &patterns.request_scheme,
352390
origin_host: &patterns.origin_host,
353391
};
354-
if let Some(rewritten) = rewriter.rewrite(text.as_str(), &ctx) {
355-
text.replace(&rewritten, ContentType::Text);
392+
match rewriter.rewrite(text.as_str(), &ctx) {
393+
ScriptRewriteAction::Keep => {}
394+
ScriptRewriteAction::Replace(rewritten) => {
395+
text.replace(&rewritten, ContentType::Text);
396+
}
397+
ScriptRewriteAction::RemoveNode => {
398+
text.remove();
399+
}
356400
}
357401
Ok(())
358402
}
@@ -399,8 +443,10 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
399443
#[cfg(test)]
400444
mod tests {
401445
use super::*;
446+
use crate::integrations::{AttributeRewriteAction, IntegrationAttributeRewriter};
402447
use crate::streaming_processor::{Compression, PipelineConfig, StreamingPipeline};
403448
use std::io::Cursor;
449+
use std::sync::Arc;
404450

405451
fn create_test_config() -> HtmlProcessorConfig {
406452
HtmlProcessorConfig {
@@ -414,6 +460,66 @@ mod tests {
414460
}
415461
}
416462

463+
#[test]
464+
fn integration_attribute_rewriter_can_remove_elements() {
465+
struct RemovingLinkRewriter;
466+
467+
impl IntegrationAttributeRewriter for RemovingLinkRewriter {
468+
fn integration_id(&self) -> &'static str {
469+
"removing"
470+
}
471+
472+
fn handles_attribute(&self, attribute: &str) -> bool {
473+
attribute == "href"
474+
}
475+
476+
fn rewrite(
477+
&self,
478+
_attr_name: &str,
479+
attr_value: &str,
480+
_ctx: &IntegrationAttributeContext<'_>,
481+
) -> AttributeRewriteAction {
482+
if attr_value.contains("remove-me") {
483+
AttributeRewriteAction::remove_element()
484+
} else {
485+
AttributeRewriteAction::keep()
486+
}
487+
}
488+
}
489+
490+
let html = r#"<html><body>
491+
<a href="https://origin.example.com/remove-me">remove</a>
492+
<a href="https://origin.example.com/keep-me">keep</a>
493+
</body></html>"#;
494+
495+
let mut config = create_test_config();
496+
config.integrations =
497+
IntegrationRegistry::from_rewriters(vec![Arc::new(RemovingLinkRewriter)], Vec::new());
498+
499+
let processor = create_html_processor(config);
500+
let pipeline_config = PipelineConfig {
501+
input_compression: Compression::None,
502+
output_compression: Compression::None,
503+
chunk_size: 8192,
504+
};
505+
let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
506+
507+
let mut output = Vec::new();
508+
pipeline
509+
.process(Cursor::new(html.as_bytes()), &mut output)
510+
.unwrap();
511+
let processed = String::from_utf8(output).unwrap();
512+
513+
assert!(
514+
processed.contains("keep-me"),
515+
"Expected keep link to remain"
516+
);
517+
assert!(
518+
!processed.contains("remove-me"),
519+
"Removing rewriter should drop matching elements"
520+
);
521+
}
522+
417523
#[test]
418524
fn test_injects_unified_bundle_and_removes_prebid_refs() {
419525
let html = r#"<html><head>

crates/common/src/integrations/mod.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@ mod registry;
66
pub mod testlight;
77

88
pub use registry::{
9-
IntegrationAttributeContext, IntegrationAttributeRewriter, IntegrationEndpoint,
10-
IntegrationMetadata, IntegrationProxy, IntegrationRegistration, IntegrationRegistrationBuilder,
11-
IntegrationRegistry, IntegrationScriptContext, IntegrationScriptRewriter,
9+
AttributeRewriteAction, AttributeRewriteOutcome, IntegrationAttributeContext,
10+
IntegrationAttributeRewriter, IntegrationEndpoint, IntegrationMetadata, IntegrationProxy,
11+
IntegrationRegistration, IntegrationRegistrationBuilder, IntegrationRegistry,
12+
IntegrationScriptContext, IntegrationScriptRewriter, ScriptRewriteAction,
1213
};
1314

1415
type IntegrationBuilder = fn(&Settings) -> Option<IntegrationRegistration>;

0 commit comments

Comments
 (0)