From 3854a6fa6c323d1c09aa71a0626c9eef62709294 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Wed, 25 May 2022 10:56:14 +0200 Subject: [PATCH] Fix Plainify edge cases This commit replaces the main part of `helpers.StripHTML` with Go's implementation in its html/template package. It's a little slower, but correctness is more important: ```bash BenchmarkStripHTMLOld-10 680316 1764 ns/op 728 B/op 4 allocs/op BenchmarkStripHTMLNew-10 384520 3099 ns/op 2089 B/op 10 allocs/op ``` Fixes #9199 Fixes #9909 Closes #9410 --- helpers/content.go | 40 ---------------- helpers/content_test.go | 38 --------------- hugolib/page__per_output.go | 2 +- hugolib/page_test.go | 5 +- .../htmltemplate/hugo_template.go | 5 ++ tpl/strings/strings.go | 7 +-- tpl/template.go | 46 +++++++++++++++++++ tpl/template_test.go | 41 +++++++++++++++++ tpl/transform/transform.go | 3 +- tpl/transform/transform_test.go | 1 + 10 files changed, 103 insertions(+), 85 deletions(-) diff --git a/helpers/content.go b/helpers/content.go index 835663b76..d04e34a07 100644 --- a/helpers/content.go +++ b/helpers/content.go @@ -34,7 +34,6 @@ import ( "github.com/gohugoio/hugo/markup" - bp "github.com/gohugoio/hugo/bufferpool" "github.com/gohugoio/hugo/config" ) @@ -104,45 +103,6 @@ func NewContentSpec(cfg config.Provider, logger loggers.Logger, contentFs afero. return spec, nil } -var stripHTMLReplacer = strings.NewReplacer("\n", " ", "

", "\n", "
", "\n", "
", "\n") - -// StripHTML accepts a string, strips out all HTML tags and returns it. -func StripHTML(s string) string { - // Shortcut strings with no tags in them - if !strings.ContainsAny(s, "<>") { - return s - } - s = stripHTMLReplacer.Replace(s) - - // Walk through the string removing all tags - b := bp.GetBuffer() - defer bp.PutBuffer(b) - var inTag, isSpace, wasSpace bool - for _, r := range s { - if !inTag { - isSpace = false - } - - switch { - case r == '<': - inTag = true - case r == '>': - inTag = false - case unicode.IsSpace(r): - isSpace = true - fallthrough - default: - if !inTag && (!isSpace || (isSpace && !wasSpace)) { - b.WriteRune(r) - } - } - - wasSpace = isSpace - - } - return b.String() -} - // stripEmptyNav strips out empty