From 6049ba99f006389c444aaecee57ae88c1f21abfa Mon Sep 17 00:00:00 2001 From: Joe Mooring Date: Sat, 13 Apr 2024 21:17:39 -0700 Subject: [PATCH] helpers: Fix TrimShortHTML when used with AsciiDoc content Fixes #12369 --- helpers/content.go | 30 ++++++++++++++++-------------- helpers/content_test.go | 31 +++++++++++++++++-------------- hugolib/page__content.go | 2 +- hugolib/page__per_output.go | 8 +++++--- tpl/transform/transform.go | 2 +- 5 files changed, 40 insertions(+), 33 deletions(-) diff --git a/helpers/content.go b/helpers/content.go index 889294382..be79ad540 100644 --- a/helpers/content.go +++ b/helpers/content.go @@ -36,11 +36,6 @@ import ( "github.com/gohugoio/hugo/config" ) -var ( - openingPTag = []byte("

") - closingPTag = []byte("

") -) - // ContentSpec provides functionality to render markdown content. type ContentSpec struct { Converters markup.ConverterProvider @@ -242,19 +237,26 @@ func (c *ContentSpec) TruncateWordsToWholeSentence(s string) (string, bool) { return strings.TrimSpace(s[:endIndex]), endIndex < len(s) } -// TrimShortHTML removes the

/

tags from HTML input in the situation -// where said tags are the only

tags in the input and enclose the content -// of the input (whitespace excluded). -func (c *ContentSpec) TrimShortHTML(input []byte) []byte { - if bytes.Count(input, openingPTag) == 1 { +// TrimShortHTML removes the outer tags from HTML input where (a) the opening +// tag is present only once with the input, and (b) the opening and closing +// tags wrap the input after white space removal. +func (c *ContentSpec) TrimShortHTML(input []byte, markup string) []byte { + openingTag := []byte("

") + closingTag := []byte("

") + + if markup == "asciidocext" { + openingTag = []byte("
\n

") + closingTag = []byte("

\n
") + } + + if bytes.Count(input, openingTag) == 1 { input = bytes.TrimSpace(input) - if bytes.HasPrefix(input, openingPTag) && bytes.HasSuffix(input, closingPTag) { - input = bytes.TrimPrefix(input, openingPTag) - input = bytes.TrimSuffix(input, closingPTag) + if bytes.HasPrefix(input, openingTag) && bytes.HasSuffix(input, closingTag) { + input = bytes.TrimPrefix(input, openingTag) + input = bytes.TrimSuffix(input, closingTag) input = bytes.TrimSpace(input) } } - return input } diff --git a/helpers/content_test.go b/helpers/content_test.go index 53e18e727..f1cbfad04 100644 --- a/helpers/content_test.go +++ b/helpers/content_test.go @@ -26,24 +26,27 @@ import ( func TestTrimShortHTML(t *testing.T) { tests := []struct { - input, output []byte + markup string + input []byte + output []byte }{ - {[]byte(""), []byte("")}, - {[]byte("Plain text"), []byte("Plain text")}, - // This seems wrong. Why touch it if it doesn't have p tag? - // {[]byte(" \t\n Whitespace text\n\n"), []byte("Whitespace text")}, - {[]byte("

Simple paragraph

"), []byte("Simple paragraph")}, - {[]byte("\n \n \t

\t Whitespace\nHTML \n\t

\n\t"), []byte("Whitespace\nHTML")}, - {[]byte("

Multiple

paragraphs

"), []byte("

Multiple

paragraphs

")}, - {[]byte("

Nested

paragraphs

"), []byte("

Nested

paragraphs

")}, - {[]byte("

Hello

\n"), []byte("

Hello

\n")}, - // Issue #11698 - {[]byte("

b

\n\n

c

"), []byte("

b

\n\n

c

")}, + {"markdown", []byte(""), []byte("")}, + {"markdown", []byte("Plain text"), []byte("Plain text")}, + {"markdown", []byte("

Simple paragraph

"), []byte("Simple paragraph")}, + {"markdown", []byte("\n \n \t

\t Whitespace\nHTML \n\t

\n\t"), []byte("Whitespace\nHTML")}, + {"markdown", []byte("

Multiple

paragraphs

"), []byte("

Multiple

paragraphs

")}, + {"markdown", []byte("

Nested

paragraphs

"), []byte("

Nested

paragraphs

")}, + {"markdown", []byte("

Hello

\n"), []byte("

Hello

\n")}, + // Issue 11698 + {"markdown", []byte("

b

\n\n

c

"), []byte("

b

\n\n

c

")}, + // Issue 12369 + {"markdown", []byte("
\n

foo

\n
"), []byte("
\n

foo

\n
")}, + {"asciidocext", []byte("
\n

foo

\n
"), []byte("foo")}, } c := newTestContentSpec(nil) for i, test := range tests { - output := c.TrimShortHTML(test.input) + output := c.TrimShortHTML(test.input, test.markup) if !bytes.Equal(test.output, output) { t.Errorf("Test %d failed. Expected %q got %q", i, test.output, output) } @@ -54,7 +57,7 @@ func BenchmarkTrimShortHTML(b *testing.B) { c := newTestContentSpec(nil) b.ResetTimer() for i := 0; i < b.N; i++ { - c.TrimShortHTML([]byte("

Simple paragraph

")) + c.TrimShortHTML([]byte("

Simple paragraph

"), "markdown") } } diff --git a/hugolib/page__content.go b/hugolib/page__content.go index 54f7be961..799fc89b6 100644 --- a/hugolib/page__content.go +++ b/hugolib/page__content.go @@ -778,7 +778,7 @@ func (c *cachedContent) contentPlain(ctx context.Context, cp *pageContentOutput) if err != nil { return nil, err } - html := cp.po.p.s.ContentSpec.TrimShortHTML(b.Bytes()) + html := cp.po.p.s.ContentSpec.TrimShortHTML(b.Bytes(), cp.po.p.m.pageConfig.Markup) result.summary = helpers.BytesToHTML(html) } else { var summary string diff --git a/hugolib/page__per_output.go b/hugolib/page__per_output.go index 7220c86fe..fac719ea9 100644 --- a/hugolib/page__per_output.go +++ b/hugolib/page__per_output.go @@ -363,9 +363,11 @@ func (pco *pageContentOutput) RenderString(ctx context.Context, args ...any) (te } if opts.Display == "inline" { - // We may have to rethink this in the future when we get other - // renderers. - rendered = pco.po.p.s.ContentSpec.TrimShortHTML(rendered) + markup := pco.po.p.m.pageConfig.Markup + if opts.Markup != "" { + markup = pco.po.p.s.ContentSpec.ResolveMarkup(opts.Markup) + } + rendered = pco.po.p.s.ContentSpec.TrimShortHTML(rendered, markup) } return template.HTML(string(rendered)), nil diff --git a/tpl/transform/transform.go b/tpl/transform/transform.go index 7054c6988..5ef9bff21 100644 --- a/tpl/transform/transform.go +++ b/tpl/transform/transform.go @@ -167,7 +167,7 @@ func (ns *Namespace) Markdownify(ctx context.Context, s any) (template.HTML, err } // Strip if this is a short inline type of text. - bb := ns.deps.ContentSpec.TrimShortHTML([]byte(ss)) + bb := ns.deps.ContentSpec.TrimShortHTML([]byte(ss), "markdown") return helpers.BytesToHTML(bb), nil }