From 004fcddc806d5a05a6d1a908f6ef902ef76aba06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Sun, 21 Jun 2015 13:08:30 +0200 Subject: [PATCH] Remove superfluous p-tags around shortcodes This commit replaces the regexp driven `replaceShortcodeTokens` with a handwritten one. It wasnt't possible to handle the p-tags case without breaking performance. This fix actually improves in that area: ``` benchmark old ns/op new ns/op delta BenchmarkParsePage 142738 142667 -0.05% BenchmarkReplaceShortcodeTokens 665590 575645 -13.51% BenchmarkShortcodeLexer 176038 181074 +2.86% benchmark old allocs new allocs delta BenchmarkParsePage 87 87 +0.00% BenchmarkReplaceShortcodeTokens 9631 9424 -2.15% BenchmarkShortcodeLexer 274 274 +0.00% benchmark old bytes new bytes delta BenchmarkParsePage 141830 141830 +0.00% BenchmarkReplaceShortcodeTokens 52275 35219 -32.63% BenchmarkShortcodeLexer 30177 30178 +0.00% ``` Fixes #1148 --- hugolib/handler_page.go | 10 ++--- hugolib/page.go | 2 +- hugolib/shortcode.go | 88 +++++++++++++++++++++++++-------------- hugolib/shortcode_test.go | 47 ++++++++++++--------- hugolib/site_test.go | 9 +++- 5 files changed, 97 insertions(+), 59 deletions(-) diff --git a/hugolib/handler_page.go b/hugolib/handler_page.go index 24c81021f..f925976f7 100644 --- a/hugolib/handler_page.go +++ b/hugolib/handler_page.go @@ -61,7 +61,7 @@ func (h markdownHandler) PageConvert(p *Page, t tpl.Template) HandledResult { tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.renderContent(helpers.RemoveSummaryDivider(p.rawContent))) if len(p.contentShortCodes) > 0 { - replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, true, p.contentShortCodes, + replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, p.contentShortCodes, tmpContent, tmpTableOfContents) if err != nil { jww.FATAL.Printf("Fail to replace shortcode tokens in %s:\n%s", p.BaseFileName(), err.Error()) @@ -88,7 +88,7 @@ func (h htmlHandler) PageConvert(p *Page, t tpl.Template) HandledResult { var err error if len(p.contentShortCodes) > 0 { - content, err = replaceShortcodeTokens(p.rawContent, shortcodePlaceholderPrefix, true, p.contentShortCodes) + content, err = replaceShortcodeTokens(p.rawContent, shortcodePlaceholderPrefix, p.contentShortCodes) if err != nil { jww.FATAL.Printf("Fail to replace shortcode tokens in %s:\n%s", p.BaseFileName(), err.Error()) @@ -114,7 +114,7 @@ func (h asciidocHandler) PageConvert(p *Page, t tpl.Template) HandledResult { tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.renderContent(helpers.RemoveSummaryDivider(p.rawContent))) if len(p.contentShortCodes) > 0 { - replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, true, p.contentShortCodes, + replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, p.contentShortCodes, tmpContent, tmpTableOfContents) if err != nil { jww.FATAL.Printf("Fail to replace shortcode tokens in %s:\n%s", p.BaseFileName(), err.Error()) @@ -142,7 +142,7 @@ func (h rstHandler) PageConvert(p *Page, t tpl.Template) HandledResult { tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.renderContent(helpers.RemoveSummaryDivider(p.rawContent))) if len(p.contentShortCodes) > 0 { - replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, true, p.contentShortCodes, + replaced, err := replaceShortcodeTokensInsources(shortcodePlaceholderPrefix, p.contentShortCodes, tmpContent, tmpTableOfContents) if err != nil { jww.FATAL.Printf("Fail to replace shortcode tokens in %s:\n%s", p.BaseFileName(), err.Error()) @@ -169,7 +169,7 @@ func (h mmarkHandler) PageConvert(p *Page, t tpl.Template) HandledResult { tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.renderContent(helpers.RemoveSummaryDivider(p.rawContent))) if len(p.contentShortCodes) > 0 { - tmpContentWithTokensReplaced, err := replaceShortcodeTokens(tmpContent, shortcodePlaceholderPrefix, true, p.contentShortCodes) + tmpContentWithTokensReplaced, err := replaceShortcodeTokens(tmpContent, shortcodePlaceholderPrefix, p.contentShortCodes) if err != nil { jww.FATAL.Printf("Fail to replace short code tokens in %s:\n%s", p.BaseFileName(), err.Error()) diff --git a/hugolib/page.go b/hugolib/page.go index 1f895cdcc..19408eee5 100644 --- a/hugolib/page.go +++ b/hugolib/page.go @@ -183,7 +183,7 @@ func (p *Page) setSummary() { renderedHeader := p.renderBytes(header) if len(p.contentShortCodes) > 0 { tmpContentWithTokensReplaced, err := - replaceShortcodeTokens(renderedHeader, shortcodePlaceholderPrefix, true, p.contentShortCodes) + replaceShortcodeTokens(renderedHeader, shortcodePlaceholderPrefix, p.contentShortCodes) if err != nil { jww.FATAL.Printf("Failed to replace short code tokens in Summary for %s:\n%s", p.BaseFileName(), err.Error()) } else { diff --git a/hugolib/shortcode.go b/hugolib/shortcode.go index 2a973f99b..69f712eb8 100644 --- a/hugolib/shortcode.go +++ b/hugolib/shortcode.go @@ -14,6 +14,8 @@ package hugolib import ( + "bytes" + "errors" "fmt" "html/template" "reflect" @@ -132,7 +134,7 @@ func HandleShortcodes(stringToParse string, page *Page, t tpl.Template) (string, } if len(tmpShortcodes) > 0 { - tmpContentWithTokensReplaced, err := replaceShortcodeTokens([]byte(tmpContent), shortcodePlaceholderPrefix, true, tmpShortcodes) + tmpContentWithTokensReplaced, err := replaceShortcodeTokens([]byte(tmpContent), shortcodePlaceholderPrefix, tmpShortcodes) if err != nil { return "", fmt.Errorf("Fail to replace short code tokens in %s:\n%s", page.BaseFileName(), err.Error()) @@ -436,10 +438,10 @@ Loop: } // replaceShortcodeTokensInsources calls replaceShortcodeTokens for every source given. -func replaceShortcodeTokensInsources(prefix string, wrapped bool, replacements map[string]string, sources ...[]byte) (b [][]byte, err error) { +func replaceShortcodeTokensInsources(prefix string, replacements map[string]string, sources ...[]byte) (b [][]byte, err error) { result := make([][]byte, len(sources)) for i, s := range sources { - b, err := replaceShortcodeTokens(s, prefix, wrapped, replacements) + b, err := replaceShortcodeTokens(s, prefix, replacements) if err != nil { return nil, err @@ -450,43 +452,65 @@ func replaceShortcodeTokensInsources(prefix string, wrapped bool, replacements m } // Replace prefixed shortcode tokens (HUGOSHORTCODE-1, HUGOSHORTCODE-2) with the real content. -// wrapped = true means that the token has been wrapped in {@{@/@}@} -func replaceShortcodeTokens(source []byte, prefix string, wrapped bool, replacements map[string]string) (b []byte, err error) { - var re *regexp.Regexp +func replaceShortcodeTokens(source []byte, prefix string, replacements map[string]string) ([]byte, error) { - if wrapped { - re, err = regexp.Compile(`\{@\{@` + regexp.QuoteMeta(prefix) + `-\d+@\}@\}`) - if err != nil { - return nil, err - } - } else { - re, err = regexp.Compile(regexp.QuoteMeta(prefix) + `-(\d+)`) - if err != nil { - return nil, err - } + if len(replacements) == 0 { + return source, nil } - // use panic/recover for reporting if an unknown - defer func() { - if r := recover(); r != nil { - var ok bool - b = nil - err, ok = r.(error) - if !ok { - err = fmt.Errorf("unexpected panic during replaceShortcodeTokens: %v", r) + var buff bytes.Buffer + + sourceLen := len(source) + width := 0 + start := 0 + + pre := []byte("{@{@" + prefix) + post := []byte("@}@}") + pStart := []byte("

") + pEnd := []byte("

") + + k := bytes.Index(source[start:], pre) + + for k != -1 { + j := start + k + postIdx := bytes.Index(source[j:], post) + if postIdx < 0 { + // this should never happen, but let the caller decide to panic or not + return nil, errors.New("illegal state in content; shortcode token missing end delim") + } + + end := j + postIdx + 4 + + newVal := []byte(replacements[string(source[j:end])]) + + // Issue #1148: Check for wrapping p-tags

+ if j >= 3 && bytes.Equal(source[j-3:j], pStart) { + if (k+4) < sourceLen && bytes.Equal(source[end:end+4], pEnd) { + j -= 3 + end += 4 } } - }() - b = re.ReplaceAllFunc(source, func(m []byte) []byte { - key := string(m) - if val, ok := replacements[key]; ok { - return []byte(val) + oldVal := source[j:end] + w, err := buff.Write(source[start:j]) + if err != nil { + return nil, errors.New("buff write failed") } - panic(fmt.Errorf("unknown shortcode token %q", key)) - }) + width += w + w, err = buff.Write(newVal) + if err != nil { + return nil, errors.New("buff write failed") + } + width += w + start = j + len(oldVal) - return b, err + k = bytes.Index(source[start:], pre) + } + _, err := buff.Write(source[start:]) + if err != nil { + return nil, errors.New("buff write failed") + } + return buff.Bytes(), nil } func getShortcodeTemplate(name string, t tpl.Template) *template.Template { diff --git a/hugolib/shortcode_test.go b/hugolib/shortcode_test.go index 6d9c2037b..d7afbc14e 100644 --- a/hugolib/shortcode_test.go +++ b/hugolib/shortcode_test.go @@ -332,16 +332,16 @@ func BenchmarkReplaceShortcodeTokens(b *testing.B) { replacements map[string]string expect interface{} }{ - {"Hello HUGOSHORTCODE-1.", map[string]string{"HUGOSHORTCODE-1": "World"}, "Hello World."}, - {strings.Repeat("A", 100) + " HUGOSHORTCODE-1.", map[string]string{"HUGOSHORTCODE-1": "Hello World"}, strings.Repeat("A", 100) + " Hello World."}, - {strings.Repeat("A", 500) + " HUGOSHORTCODE-1.", map[string]string{"HUGOSHORTCODE-1": "Hello World"}, strings.Repeat("A", 500) + " Hello World."}, - {strings.Repeat("ABCD ", 500) + " HUGOSHORTCODE-1.", map[string]string{"HUGOSHORTCODE-1": "Hello World"}, strings.Repeat("ABCD ", 500) + " Hello World."}, - {strings.Repeat("A", 500) + " HUGOSHORTCODE-1." + strings.Repeat("BC", 500) + " HUGOSHORTCODE-1.", map[string]string{"HUGOSHORTCODE-1": "Hello World"}, strings.Repeat("A", 500) + " Hello World." + strings.Repeat("BC", 500) + " Hello World."}, + {"Hello {@{@HUGOSHORTCODE-1@}@}.", map[string]string{"{@{@HUGOSHORTCODE-1@}@}": "World"}, "Hello World."}, + {strings.Repeat("A", 100) + " {@{@HUGOSHORTCODE-1@}@}.", map[string]string{"{@{@HUGOSHORTCODE-1@}@}": "Hello World"}, strings.Repeat("A", 100) + " Hello World."}, + {strings.Repeat("A", 500) + " {@{@HUGOSHORTCODE-1@}@}.", map[string]string{"{@{@HUGOSHORTCODE-1@}@}": "Hello World"}, strings.Repeat("A", 500) + " Hello World."}, + {strings.Repeat("ABCD ", 500) + " {@{@HUGOSHORTCODE-1@}@}.", map[string]string{"{@{@HUGOSHORTCODE-1@}@}": "Hello World"}, strings.Repeat("ABCD ", 500) + " Hello World."}, + {strings.Repeat("A", 500) + " {@{@HUGOSHORTCODE-1@}@}." + strings.Repeat("BC", 500) + " {@{@HUGOSHORTCODE-1@}@}.", map[string]string{"{@{@HUGOSHORTCODE-1@}@}": "Hello World"}, strings.Repeat("A", 500) + " Hello World." + strings.Repeat("BC", 500) + " Hello World."}, } b.ResetTimer() for i := 0; i < b.N; i++ { for i, this := range data { - results, err := replaceShortcodeTokens([]byte(this.input), "HUGOSHORTCODE", false, this.replacements) + results, err := replaceShortcodeTokens([]byte(this.input), "HUGOSHORTCODE", this.replacements) if expectSuccess, ok := this.expect.(bool); ok && !expectSuccess { if err == nil { @@ -367,21 +367,30 @@ func TestReplaceShortcodeTokens(t *testing.T) { input string prefix string replacements map[string]string - wrappedInDiv bool expect interface{} }{ - {"Hello PREFIX-1.", "PREFIX", map[string]string{"PREFIX-1": "World"}, false, "Hello World."}, - {"A {@{@A-1@}@} asdf {@{@A-2@}@}.", "A", map[string]string{"{@{@A-1@}@}": "v1", "{@{@A-2@}@}": "v2"}, true, "A v1 asdf v2."}, - {"Hello PREFIX2-1. Go PREFIX2-2, Go, Go PREFIX2-3 Go Go!.", "PREFIX2", map[string]string{"PREFIX2-1": "Europe", "PREFIX2-2": "Jonny", "PREFIX2-3": "Johnny"}, false, "Hello Europe. Go Jonny, Go, Go Johnny Go Go!."}, - {"A PREFIX-2 PREFIX-1.", "PREFIX", map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, false, "A B A."}, - {"A PREFIX-1 PREFIX-2", "PREFIX", map[string]string{"PREFIX-1": "A"}, false, false}, - {"A PREFIX-1 but not the second.", "PREFIX", map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, false, "A A but not the second."}, - {"An PREFIX-1.", "PREFIX", map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, false, "An A."}, - {"An PREFIX-1 PREFIX-2.", "PREFIX", map[string]string{"PREFIX-1": "A", "PREFIX-2": "B"}, false, "An A B."}, - {"A PREFIX-1 PREFIX-2 PREFIX-3 PREFIX-1 PREFIX-3.", "PREFIX", map[string]string{"PREFIX-1": "A", "PREFIX-2": "B", "PREFIX-3": "C"}, false, "A A B C A C."}, - {"A {@{@PREFIX-1@}@} {@{@PREFIX-2@}@} {@{@PREFIX-3@}@} {@{@PREFIX-1@}@} {@{@PREFIX-3@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B", "{@{@PREFIX-3@}@}": "C"}, true, "A A B C A C."}, + {"Hello {@{@PREFIX-1@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World"}, "Hello World."}, + {"A {@{@A-1@}@} asdf {@{@A-2@}@}.", "A", map[string]string{"{@{@A-1@}@}": "v1", "{@{@A-2@}@}": "v2"}, "A v1 asdf v2."}, + {"Hello {@{@PREFIX2-1@}@}. Go {@{@PREFIX2-2@}@}, Go, Go {@{@PREFIX2-3@}@} Go Go!.", "PREFIX2", map[string]string{"{@{@PREFIX2-1@}@}": "Europe", "{@{@PREFIX2-2@}@}": "Jonny", "{@{@PREFIX2-3@}@}": "Johnny"}, "Hello Europe. Go Jonny, Go, Go Johnny Go Go!."}, + {"A {@{@PREFIX-2@}@} {@{@PREFIX-1@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B"}, "A B A."}, + {"A {@{@PREFIX-1@}@} {@{@PREFIX-2", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A"}, false}, + {"A {@{@PREFIX-1@}@} but not the second.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B"}, "A A but not the second."}, + {"An {@{@PREFIX-1@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B"}, "An A."}, + {"An {@{@PREFIX-1@}@} {@{@PREFIX-2@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B"}, "An A B."}, + {"A {@{@PREFIX-1@}@} {@{@PREFIX-2@}@} {@{@PREFIX-3@}@} {@{@PREFIX-1@}@} {@{@PREFIX-3@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B", "{@{@PREFIX-3@}@}": "C"}, "A A B C A C."}, + {"A {@{@PREFIX-1@}@} {@{@PREFIX-2@}@} {@{@PREFIX-3@}@} {@{@PREFIX-1@}@} {@{@PREFIX-3@}@}.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "A", "{@{@PREFIX-2@}@}": "B", "{@{@PREFIX-3@}@}": "C"}, "A A B C A C."}, + // Issue #1148 remove p-tags 10 => + {"Hello

{@{@PREFIX-1@}@}

. END.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World"}, "Hello World. END."}, + {"Hello

{@{@PREFIX-1@}@}

.

{@{@PREFIX-2@}@}

END.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World", "{@{@PREFIX-2@}@}": "THE"}, "Hello World. THE END."}, + {"Hello

{@{@PREFIX-1@}@}. END

.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World"}, "Hello

World. END

."}, + {"

Hello {@{@PREFIX-1@}@}

. END.", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World"}, "

Hello World

. END."}, + {"Hello

{@{@PREFIX-1@}@}12", "PREFIX", map[string]string{"{@{@PREFIX-1@}@}": "World"}, "Hello

World12"}, + // Make sure the buffering expands as needed + {"Hello {@{@P-1@}@}. {@{@P-1@}@}-{@{@P-1@}@} {@{@P-1@}@} {@{@P-1@}@} {@{@P-1@}@} END", "P", map[string]string{"{@{@P-1@}@}": strings.Repeat("BC", 100)}, + fmt.Sprintf("Hello %s. %s-%s %s %s %s END", + strings.Repeat("BC", 100), strings.Repeat("BC", 100), strings.Repeat("BC", 100), strings.Repeat("BC", 100), strings.Repeat("BC", 100), strings.Repeat("BC", 100))}, } { - results, err := replaceShortcodeTokens([]byte(this.input), this.prefix, this.wrappedInDiv, this.replacements) + results, err := replaceShortcodeTokens([]byte(this.input), this.prefix, this.replacements) if b, ok := this.expect.(bool); ok && !b { if err == nil { @@ -393,7 +402,7 @@ func TestReplaceShortcodeTokens(t *testing.T) { continue } if !reflect.DeepEqual(results, []byte(this.expect.(string))) { - t.Errorf("[%d] replaceShortcodeTokens, got %q but expected %q", i, results, this.expect) + t.Errorf("[%d] replaceShortcodeTokens, got \n%q but expected \n%q", i, results, this.expect) } } diff --git a/hugolib/site_test.go b/hugolib/site_test.go index 22db1eaf2..5acff8c2a 100644 --- a/hugolib/site_test.go +++ b/hugolib/site_test.go @@ -320,8 +320,13 @@ func doTestCrossrefs(t *testing.T, relative, uglyUrls bool) { sources := []source.ByteSource{ {filepath.FromSlash("sect/doc1.md"), []byte(fmt.Sprintf(`Ref 2: {{< %s "sect/doc2.md" >}}`, refShortcode))}, + // Issue #1148: Make sure that no P-tags is added around shortcodes. {filepath.FromSlash("sect/doc2.md"), - []byte(fmt.Sprintf(`Ref 1: {{< %s "sect/doc1.md" >}}`, refShortcode))}, + []byte(fmt.Sprintf(`**Ref 1:** + +{{< %s "sect/doc1.md" >}} + +THE END.`, refShortcode))}, } s := &Site{ @@ -341,7 +346,7 @@ func doTestCrossrefs(t *testing.T, relative, uglyUrls bool) { expected string }{ {filepath.FromSlash(fmt.Sprintf("sect/doc1%s", expectedPathSuffix)), fmt.Sprintf("

Ref 2: %s/sect/doc2%s

\n", expectedBase, expectedUrlSuffix)}, - {filepath.FromSlash(fmt.Sprintf("sect/doc2%s", expectedPathSuffix)), fmt.Sprintf("

Ref 1: %s/sect/doc1%s

\n", expectedBase, expectedUrlSuffix)}, + {filepath.FromSlash(fmt.Sprintf("sect/doc2%s", expectedPathSuffix)), fmt.Sprintf("

Ref 1:

\n\n%s/sect/doc1%s\n\n

THE END.

\n", expectedBase, expectedUrlSuffix)}, } for _, test := range tests {