Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/vcr_cleaner/cleaners/uri.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import json
import re


def clean_uri(old: str, new: str):
"""Returns a cleaner function that replaces the request URI
string with all occurrences of substring old replaced by new.
Expand All @@ -16,3 +20,26 @@ def clean_uri(request: dict, response: dict):
clean_uri.__doc__ = f"Replaces the request URI string with all " \
f"occurrences of substring '{old}' replaced by '{new}'."
return clean_uri


def _regex_sub_dict(message: dict, rule: str, replacement: str):
'''Update the dictionary with rule matches replaced.'''
cleaned = re.sub(rule, replacement, json.dumps(message))

# Update the original dict
message.clear()
message.update(json.loads(cleaned))


def clean_domains(domain: str, replacement: str = 'cleaned.example.edu'):
'''Replace anything that looks like the given domain and sub-domains.'''
rule = f"/[^/]*{domain.replace('.', r'\.')}"
rep = f"/{replacement}"

def wrapper(request: dict, response: dict):
_regex_sub_dict(request, rule, rep)
_regex_sub_dict(response, rule, rep)

wrapper.__doc__ = clean_domains.__doc__

return wrapper
29 changes: 28 additions & 1 deletion tests/test_uri.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from vcr_cleaner.cleaners.uri import clean_uri
from vcr_cleaner.cleaners.uri import (
clean_domains,
clean_uri,
)


def test_simple_clean_uri():
Expand All @@ -13,3 +16,27 @@ def test_simple_clean_uri():
assert str(cleaner.__doc__) != 'None'
assert 'example' in str(cleaner.__doc__)
assert 'foo' in str(cleaner.__doc__)


def test_clean_domain():
request = {
'sub-domain': 'https://foo.illinois.edu',
'sub-sub-domain': 'https://foo.bar.illinois.edu',
'uri': 'https://illinois.edu',
'insecure': 'http://illinois.edu',
'essay': 'Lorum ipsum https://illinois.edu, and so on...',
}
response = request.copy()

cleaner = clean_domains('illinois.edu')
cleaner(request, response)

assert str(cleaner.__doc__) != 'None'

for key in ['uri', 'sub-domain', 'sub-sub-domain']:
assert request[key] == 'https://cleaned.example.edu'

assert request['insecure'] == 'http://cleaned.example.edu'
assert 'illinois.edu' not in request['essay']

assert request == response