diff --git a/markdown/extractor.go b/markdown/extractor.go index bff5965..89cf913 100644 --- a/markdown/extractor.go +++ b/markdown/extractor.go @@ -33,7 +33,7 @@ func (e *Extractor) PlainText(input string) (*string, error) { for _, fullTag := range listFullTag { var plain = fullTag if tag.StartRegex != nil { - plain = tag.StartRegex.ReplaceAll(plain, nil) + plain = tag.StartRegex.ReplaceAll(plain, tag.StartReplacement) } if tag.EndRegex != nil { plain = tag.EndRegex.ReplaceAll(plain, nil) diff --git a/markdown/extractor_test.go b/markdown/extractor_test.go index 2f9d670..f4adabe 100644 --- a/markdown/extractor_test.go +++ b/markdown/extractor_test.go @@ -17,6 +17,7 @@ func TestExtract(t *testing.T) { {"~~strikethrough~~", "strikethrough"}, {"# H1 \n*italic* **bold** `code` `not code [link](https://example.com)  ~~strikethrough~~", "H1 \nitalic bold code `not code link image strikethrough"}, {"# H1 \n new line", "H1 \n new line"}, + {"# H1 \n new line \n## H2 \n new line", "H1 \n new line \nH2 \n new line"}, {"*italic*", "italic"}, {"**bold**", "bold"}, {"`code`", "code"}, diff --git a/markdown/tag.go b/markdown/tag.go index 37b707b..0f0695f 100644 --- a/markdown/tag.go +++ b/markdown/tag.go @@ -6,14 +6,16 @@ type Tag struct { Name string FullRegex *regexp.Regexp StartRegex *regexp.Regexp + StartReplacement []byte EndRegex *regexp.Regexp } var tags = []Tag{ { Name: "Header", - FullRegex: regexp.MustCompile(`^#{1,6}\s+(.*)`), - StartRegex: regexp.MustCompile(`^#{1,6}\s+`), + FullRegex: regexp.MustCompile(`(^|\n)#{1,6}\s+(.*)`), + StartRegex: regexp.MustCompile(`(^|\n)#{1,6}\s+`), + StartReplacement: []byte("$1"), EndRegex: nil, }, { diff --git a/parser_test.go b/parser_test.go index fcbaec0..1464d4e 100644 --- a/parser_test.go +++ b/parser_test.go @@ -1,10 +1,11 @@ package plaintext import ( + "testing" + "github.com/huantt/plaintext-extractor/html" "github.com/huantt/plaintext-extractor/markdown" "github.com/stretchr/testify/assert" - "testing" ) func TestParseHtml(t *testing.T) { @@ -12,7 +13,7 @@ func TestParseHtml(t *testing.T) { input string expected string }{ - {`