diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 64d4a2f..ec790fe 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -24,7 +24,6 @@ jobs:
matrix:
tox_env:
- py36
- - py37
- py38
- py39
- py310
diff --git a/lxml_html_clean/clean.py b/lxml_html_clean/clean.py
index 37cf3e0..3eeda47 100644
--- a/lxml_html_clean/clean.py
+++ b/lxml_html_clean/clean.py
@@ -90,15 +90,19 @@ def _has_javascript_scheme(s):
# - 0A - Line Feed
# - 0B - Vertical tab
# - 0D - Carriage Return
-_ascii_control_characters = re.compile(r"[\x00-\x08\x0C\x0E-\x1F\x7F]")
+_ascii_control_characters_str = re.compile("[\x00-\x08\x0C\x0E-\x1F\x7F]")
+_ascii_control_characters_bytes = re.compile(b"[\x00-\x08\x0C\x0E-\x1F\x7F]")
-def fromstring(string):
+def fromstring(data):
"""
Enhanced fromstring function that removes ASCII control chars
before passing the input to the original lxml.html.fromstring.
"""
- return lxml_fromstring(_ascii_control_characters.sub("", string))
+ if isinstance(data, bytes):
+ return lxml_fromstring(_ascii_control_characters_bytes.sub(b"", data))
+ else:
+ return lxml_fromstring(_ascii_control_characters_str.sub("", data))
# This regular expression is inspired by the one in urllib3.
diff --git a/tests/test_clean.py b/tests/test_clean.py
index b22548b..11cc102 100644
--- a/tests/test_clean.py
+++ b/tests/test_clean.py
@@ -355,6 +355,12 @@ def test_ascii_control_chars_removed(self):
cleaner = Cleaner()
self.assertEqual(expected, cleaner.clean_html(html))
+ def test_ascii_control_chars_removed_from_bytes(self):
+ html = b"""Link"""
+ expected = b"""Link"""
+ cleaner = Cleaner()
+ self.assertEqual(expected, cleaner.clean_html(html))
+
def test_memory_usage_many_elements_with_long_tails(self):
comment = "\n"
empty_line = "\t" * 10 + "\n"
diff --git a/tox.ini b/tox.ini
index ce95032..8706ef5 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py36,py37,py38,py39,py310,py311,py312,py313,mypy
+envlist = py36,py38,py39,py310,py311,py312,py313,mypy
skipsdist = True
[testenv]