Skip to content

Commit 704ac07

Browse files
committed
parser: add parts_handler_fn for flexible multipart handling
Replaces max_part_size and skip_large_parts? with a more flexible callback-based approach. The parts_handler_fn receives part metadata and parsed headers, allowing conditional parsing based on size, content-type, or any other criteria.
1 parent 1c9be97 commit 704ac07

File tree

2 files changed

+195
-31
lines changed

2 files changed

+195
-31
lines changed

lib/mail/parsers/rfc_2822.ex

Lines changed: 81 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -45,32 +45,88 @@ defmodule Mail.Parsers.RFC2822 do
4545
4646
* `:charset_handler` - A function that takes a charset and binary and returns a binary. Defaults to return the string as is.
4747
* `:header_only` - Whether to parse only the headers. Defaults to false.
48-
* `:max_part_size` - The maximum size of a part in bytes. Defaults to 10MB.
49-
* `:skip_large_parts?` - Whether to skip parts larger than `max_part_size`. Defaults to false.
48+
* `:parts_handler_fn` - A function invoked for each multipart message part. Receives `part_info`, `message` (with parsed headers), and `opts`. Defaults to nil (normal parsing).
5049
50+
## Parts Handler Function
51+
52+
The `:parts_handler_fn` option allows custom handling of each part in a multipart message:
53+
54+
parts_handler_fn: fn part_info, message, opts ->
55+
# part_info = %{size: 46_000, start: 1234, index: 0}
56+
# message = %Mail.Message{} (with headers already parsed)
57+
# opts = keyword list of options passed to parse/2
58+
59+
:parse # Continue with normal parsing (default behavior)
60+
# or
61+
%{message | headers: %{message.headers | "x-custom" => "true"}}
62+
end
63+
64+
### Handler Return Values
65+
66+
* `:parse` - Continues with normal parsing of the part's body
67+
* `%Mail.Message{}` - Returns a custom message structure (headers are already parsed, you provide the body)
5168
"""
5269
@spec parse(binary() | nonempty_maybe_improper_list(), keyword()) :: Mail.Message.t()
5370
def parse(content, opts \\ []) when is_binary(content) do
54-
{headers, body_offset, has_body} = extract_headers_and_body_offset(content)
71+
parse_part(%{
72+
part_info: %{size: byte_size(content), start: 0, index: 0},
73+
body_content: content,
74+
parts_handler_fn: nil,
75+
opts: opts
76+
})
77+
end
78+
79+
defp parse_part(%{
80+
part_info: part_info,
81+
body_content: body_content,
82+
parts_handler_fn: parts_handler_fn,
83+
opts: opts
84+
}) do
85+
part_data = binary_part(body_content, part_info.start, part_info.size)
86+
{headers, body_offset, has_body} = extract_headers_and_body_offset(part_data)
5587

5688
message =
5789
%Mail.Message{}
5890
|> parse_headers(headers, opts)
5991
|> mark_multipart()
6092

61-
if has_body and not Keyword.get(opts, :header_only, false) do
62-
# Extract body portion using offset
63-
body_content =
64-
if body_offset < byte_size(content) do
65-
binary_part(content, body_offset, byte_size(content) - body_offset)
66-
else
67-
""
68-
end
69-
70-
parse_body_binary(message, body_content, opts)
71-
else
72-
message
93+
cond do
94+
parts_handler_fn == nil -> :parse
95+
is_function(parts_handler_fn, 3) -> parts_handler_fn.(part_info, message, opts)
7396
end
97+
|> apply_handler_result(%{
98+
message: message,
99+
part_info: part_info,
100+
has_body: has_body,
101+
body_offset: body_offset,
102+
part_data: part_data,
103+
opts: opts
104+
})
105+
end
106+
107+
defp apply_handler_result(%Mail.Message{} = message, _context) do
108+
message
109+
end
110+
111+
defp apply_handler_result(:parse, %{message: message, has_body: false}) do
112+
message
113+
end
114+
115+
defp apply_handler_result(:parse, %{
116+
message: message,
117+
body_offset: body_offset,
118+
part_data: part_data,
119+
opts: opts
120+
}) do
121+
# Extract body portion using offset
122+
body_content =
123+
if body_offset < byte_size(part_data) do
124+
binary_part(part_data, body_offset, byte_size(part_data) - body_offset)
125+
else
126+
""
127+
end
128+
129+
parse_body_binary(message, body_content, opts)
74130
end
75131

76132
defp extract_headers_and_body_offset(content) do
@@ -721,15 +777,19 @@ defmodule Mail.Parsers.RFC2822 do
721777
boundary = Mail.Proplist.get(content_type, "boundary")
722778
part_ranges = extract_parts_ranges(body_content, boundary)
723779

724-
size_threshold = Keyword.get(opts, :max_part_size, 10_000_000)
725-
skip_large_parts? = Keyword.get(opts, :skip_large_parts?, false)
780+
parts_handler_fn = Keyword.get(opts, :parts_handler_fn, nil)
726781

727782
parsed_parts =
728783
part_ranges
729-
|> Enum.map(fn {start, size} ->
730-
if skip_large_parts? and size > size_threshold do
731-
# Don't extract or parse large parts: return placeholder
732-
%Mail.Message{body: "[Part skipped: #{size} bytes - too large to parse]"}
784+
|> Enum.with_index()
785+
|> Enum.map(fn {{start, size}, index} ->
786+
if parts_handler_fn do
787+
parse_part(%{
788+
part_info: %{size: size, start: start, index: index},
789+
body_content: body_content,
790+
parts_handler_fn: parts_handler_fn,
791+
opts: opts
792+
})
733793
else
734794
String.trim_trailing(binary_part(body_content, start, size), "\r\n")
735795
|> parse(opts)

test/mail/parsers/rfc_2822_test.exs

Lines changed: 114 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,7 +1292,7 @@ defmodule Mail.Parsers.RFC2822Test do
12921292
end
12931293
end
12941294

1295-
describe "Large part handling" do
1295+
describe "parts_handler_fn" do
12961296
@multi_part_message """
12971297
To: Test User <[email protected]>, Other User <[email protected]>
12981298
CC: The Dude <[email protected]>, Batman <[email protected]>
@@ -1318,24 +1318,128 @@ defmodule Mail.Parsers.RFC2822Test do
13181318
--foobar--
13191319
"""
13201320

1321-
test "large parts are skipped" do
1321+
test "handler returning :parse parses parts normally" do
13221322
message =
13231323
parse_email(
13241324
@multi_part_message,
1325-
skip_large_parts?: true,
1326-
max_part_size: 1
1325+
parts_handler_fn: fn _part_info, _message, _opts ->
1326+
:parse
1327+
end
1328+
)
1329+
1330+
assert message.body == nil
1331+
[text_part, html_part, headers_only_part] = message.parts
1332+
1333+
assert text_part.headers["content-type"] == ["text/plain", {"charset", "us-ascii"}]
1334+
assert text_part.body == "This is some text"
1335+
1336+
assert html_part.headers["content-type"] == ["text/html", {"charset", "us-ascii"}]
1337+
assert html_part.body == "<h1>This is some HTML</h1>"
1338+
1339+
assert headers_only_part.headers["x-my-header"] == "no body!"
1340+
assert headers_only_part.body == ""
1341+
end
1342+
1343+
test "handler returning custom message with skipped body" do
1344+
message =
1345+
parse_email(
1346+
@multi_part_message,
1347+
parts_handler_fn: fn part_info, message, _opts ->
1348+
Map.put(message, :body, "[Headers only - body skipped: #{part_info.size} bytes]")
1349+
end
1350+
)
1351+
1352+
assert message.body == nil
1353+
[text_part, html_part, headers_only_part] = message.parts
1354+
1355+
# Headers are still parsed
1356+
assert text_part.headers["content-type"] == ["text/plain", {"charset", "us-ascii"}]
1357+
# Body is replaced with placeholder
1358+
assert text_part.body =~ "[Headers only - body skipped:"
1359+
1360+
assert html_part.headers["content-type"] == ["text/html", {"charset", "us-ascii"}]
1361+
assert html_part.body =~ "[Headers only - body skipped:"
1362+
1363+
assert headers_only_part.headers["x-my-header"] == "no body!"
1364+
assert headers_only_part.body =~ "[Headers only - body skipped:"
1365+
end
1366+
1367+
test "handler returning custom message" do
1368+
message =
1369+
parse_email(
1370+
@multi_part_message,
1371+
parts_handler_fn: fn part_info, message, _opts ->
1372+
%Mail.Message{
1373+
body: "Custom body for part #{part_info.index}",
1374+
headers: Map.put(message.headers, "x-custom", "true")
1375+
}
1376+
end
13271377
)
13281378

13291379
assert message.body == nil
1380+
[text_part, html_part, headers_only_part] = message.parts
1381+
1382+
assert text_part.body == "Custom body for part 0"
1383+
assert text_part.headers["x-custom"] == "true"
1384+
assert text_part.headers["content-type"] == ["text/plain", {"charset", "us-ascii"}]
1385+
1386+
assert html_part.body == "Custom body for part 1"
1387+
assert html_part.headers["x-custom"] == "true"
1388+
1389+
assert headers_only_part.body == "Custom body for part 2"
1390+
assert headers_only_part.headers["x-custom"] == "true"
1391+
end
1392+
1393+
test "handler can access part_info and skip based on size" do
1394+
message =
1395+
parse_email(
1396+
@multi_part_message,
1397+
parts_handler_fn: fn part_info, message, _opts ->
1398+
if part_info.size > 25 do
1399+
Map.put(message, :body, "[Headers only - body skipped: #{part_info.size} bytes]")
1400+
else
1401+
:parse
1402+
end
1403+
end
1404+
)
1405+
1406+
[text_part, html_part, headers_only_part] = message.parts
1407+
1408+
# Text part is large, should be skipped
1409+
assert text_part.body =~ "[Headers only - body skipped:"
1410+
1411+
# HTML part is large, should be skipped
1412+
assert html_part.body =~ "[Headers only - body skipped:"
1413+
1414+
# Headers-only part is small, should be parsed
1415+
assert headers_only_part.body == ""
1416+
end
1417+
1418+
test "handler can access message and conditionally skip based on content-type" do
1419+
message =
1420+
parse_email(
1421+
@multi_part_message,
1422+
parts_handler_fn: fn part_info, message, _opts ->
1423+
content_type = message.headers["content-type"]
1424+
# Skip HTML parts
1425+
if List.first(content_type || []) == "text/html" do
1426+
Map.put(message, :body, "[Headers only - body skipped: #{part_info.size} bytes]")
1427+
else
1428+
:parse
1429+
end
1430+
end
1431+
)
13301432

13311433
[text_part, html_part, headers_only_part] = message.parts
13321434

1333-
assert text_part.parts == []
1334-
assert text_part.headers == %{}
1335-
assert html_part.parts == []
1336-
assert html_part.headers == %{}
1337-
assert headers_only_part.parts == []
1338-
assert headers_only_part.headers == %{}
1435+
# Text part should be parsed
1436+
assert text_part.body == "This is some text"
1437+
1438+
# HTML part should be skipped
1439+
assert html_part.body =~ "[Headers only - body skipped:"
1440+
1441+
# Headers-only part should be parsed
1442+
assert headers_only_part.body == ""
13391443
end
13401444
end
13411445

0 commit comments

Comments
 (0)