markdown: Pass emoji codes to yuin/goldmark-emoji

Removes emoji code conversion from the page and shortcode parsers. Emoji
codes in markdown are now passed to Goldmark, where the goldmark-emoji
extension converts them to decimal numeric character references.

This disables emoji rendering for the alternate content formats: html,
asciidoc, org, pandoc, and rst.

Fixes #7332
Fixes #11587
Closes #11598
This commit is contained in:
Joe Mooring 2023-10-24 03:04:13 -07:00 committed by GitHub
parent de4e466036
commit 272484f8bf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 151 additions and 312 deletions

View file

@ -228,3 +228,7 @@ func (c ConfigLanguage) PaginatePath() string {
func (c ConfigLanguage) StaticDirs() []string {
return c.config.staticDirs()
}
func (c ConfigLanguage) EnableEmoji() bool {
return c.config.EnableEmoji
}

View file

@ -66,6 +66,7 @@ type AllProvider interface {
StaticDirs() []string
IgnoredErrors() map[string]bool
WorkingDir() string
EnableEmoji() bool
}
// Provider provides the configuration settings for Hugo.

1
go.mod
View file

@ -64,6 +64,7 @@ require (
github.com/tdewolff/minify/v2 v2.12.9
github.com/tdewolff/parse/v2 v2.6.8
github.com/yuin/goldmark v1.5.6
github.com/yuin/goldmark-emoji v1.0.2
go.uber.org/atomic v1.11.0
go.uber.org/automaxprocs v1.5.3
gocloud.dev v0.34.0

3
go.sum
View file

@ -444,9 +444,12 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.3.7/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/goldmark v1.5.6 h1:COmQAWTCcGetChm3Ig7G/t8AFAN00t+o8Mt4cf7JpwA=
github.com/yuin/goldmark v1.5.6/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/goldmark-emoji v1.0.2 h1:c/RgTShNgHTtc6xdz2KKI74jJr6rWi7FPgnP9GAsO5s=
github.com/yuin/goldmark-emoji v1.0.2/go.mod h1:RhP/RWpexdp+KHs7ghKnifRoIs/Bq4nDS7tRbCkOwKY=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=

View file

@ -13,13 +13,8 @@
package helpers
import (
"math"
"reflect"
"strings"
"testing"
"github.com/gohugoio/hugo/bufferpool"
"github.com/kyokomi/emoji/v2"
)
func TestEmojiCustom(t *testing.T) {
@ -68,76 +63,3 @@ func TestEmojiCustom(t *testing.T) {
}
}
// The Emoji benchmarks below are heavily skewed in Hugo's direction:
//
// Hugo have a byte slice, wants a byte slice and doesn't mind if the original is modified.
func BenchmarkEmojiKyokomiFprint(b *testing.B) {
f := func(in []byte) []byte {
buff := bufferpool.GetBuffer()
defer bufferpool.PutBuffer(buff)
emoji.Fprint(buff, string(in))
bc := make([]byte, buff.Len())
copy(bc, buff.Bytes())
return bc
}
doBenchmarkEmoji(b, f)
}
func BenchmarkEmojiKyokomiSprint(b *testing.B) {
f := func(in []byte) []byte {
return []byte(emoji.Sprint(string(in)))
}
doBenchmarkEmoji(b, f)
}
func BenchmarkHugoEmoji(b *testing.B) {
doBenchmarkEmoji(b, Emojify)
}
func doBenchmarkEmoji(b *testing.B, f func(in []byte) []byte) {
type input struct {
in []byte
expect []byte
}
data := []struct {
input string
expect string
}{
{"A :smile: a day", emoji.Sprint("A :smile: a day")},
{"A :smile: and a :beer: day keeps the doctor away", emoji.Sprint("A :smile: and a :beer: day keeps the doctor away")},
{"A :smile: a day and 10 " + strings.Repeat(":beer: ", 10), emoji.Sprint("A :smile: a day and 10 " + strings.Repeat(":beer: ", 10))},
{"No smiles today.", "No smiles today."},
{"No smiles for you or " + strings.Repeat("you ", 1000), "No smiles for you or " + strings.Repeat("you ", 1000)},
}
in := make([]input, b.N*len(data))
cnt := 0
for i := 0; i < b.N; i++ {
for _, this := range data {
in[cnt] = input{[]byte(this.input), []byte(this.expect)}
cnt++
}
}
b.ResetTimer()
cnt = 0
for i := 0; i < b.N; i++ {
for j := range data {
currIn := in[cnt]
cnt++
result := f(currIn.in)
// The Emoji implementations gives slightly different output.
diffLen := len(result) - len(currIn.expect)
diffLen = int(math.Abs(float64(diffLen)))
if diffLen > 30 {
b.Fatalf("[%d] emoji std, got \n%q but expected \n%q", j, result, currIn.expect)
}
}
}
}

View file

@ -150,7 +150,7 @@ func (m *pageMap) newPageFromContentNode(n *contentNode, parentBucket *pagesMapB
parseResult, err := pageparser.Parse(
r,
pageparser.Config{EnableEmoji: s.conf.EnableEmoji},
pageparser.Config{},
)
if err != nil {
return nil, err

View file

@ -794,11 +794,7 @@ Loop:
rn.AddShortcode(currShortcode)
case it.Type == pageparser.TypeEmoji:
if emoji := helpers.Emoji(it.ValStr(result.Input())); emoji != nil {
rn.AddReplacement(emoji, it)
} else {
rn.AddBytes(it)
}
rn.AddBytes(it)
case it.IsEOF():
break Loop
case it.IsError():

View file

@ -894,13 +894,13 @@ summary: Summary (zh)
b.Build(BuildCfg{})
b.AssertFileContent("public/index.html", `<html>
<body>
<h2>Translations</h2>
<ul>
<li>Title: Title (zh), Summary (zh)</li>
<li>Content: <p>这是一些内容</p>
</li>
@ -911,7 +911,7 @@ summary: Summary (zh)
<li>Truncated: false</li>
<li>FuzzyWordCount: 100</li>
<li>ReadingTime: 1</li>
<li>Len: 26</li>
<li>Len: 26</li>
</ul>
@ -921,7 +921,7 @@ summary: Summary (zh)
b.AssertFileContent("public/metadata.html", `<h2>Translations metadata</h2>
<ul>
<li>Title: Title (zh), Summary (zh)</li>
<li>Content: <p>这是一些内容</p>
</li>
@ -932,13 +932,13 @@ summary: Summary (zh)
<li>Truncated: false</li>
<li>FuzzyWordCount: 100</li>
<li>ReadingTime: 1</li>
<li>Len: 26</li>
<li>Len: 26</li>
</ul>`)
b.AssertFileContent("public/zh_cn/index.html", `<html>
<body>
<h2>Translations</h2>
<ul>
@ -953,7 +953,7 @@ summary: Summary (zh)
<li>Truncated: false</li>
<li>FuzzyWordCount: 100</li>
<li>ReadingTime: 1</li>
<li>Len: 29</li>
<li>Len: 29</li>
</ul>
@ -963,7 +963,7 @@ summary: Summary (zh)
b.AssertFileContent("public/zh_cn/metadata.html", `<h2>Translations metadata</h2>
<ul>
<li>Title: Title (en), Summary (en)</li>
<li>Content: <p>Here is some content.</p>
</li>
@ -974,7 +974,7 @@ summary: Summary (zh)
<li>Truncated: false</li>
<li>FuzzyWordCount: 100</li>
<li>ReadingTime: 1</li>
<li>Len: 29</li>
<li>Len: 29</li>
</ul>`)
}
@ -1234,60 +1234,6 @@ func TestChompBOM(t *testing.T) {
checkPageTitle(t, p, "Simple")
}
func TestPageWithEmoji(t *testing.T) {
for _, enableEmoji := range []bool{true, false} {
v := config.New()
v.Set("enableEmoji", enableEmoji)
b := newTestSitesBuilder(t).WithViper(v)
b.WithContent("page-emoji.md", `---
title: "Hugo Smile"
---
This is a :smile:.
<!--more-->
Another :smile: This is :not: :an: :emoji:.
O :christmas_tree:
Write me an :e-mail: or :email:?
Too many colons: :: ::: :::: :?: :!: :.:
If you dislike this video, you can hit that :-1: button :stuck_out_tongue_winking_eye:,
but if you like it, hit :+1: and get subscribed!
`)
b.CreateSites().Build(BuildCfg{})
if enableEmoji {
b.AssertFileContent("public/page-emoji/index.html",
"This is a 😄",
"Another 😄",
"This is :not: :an: :emoji:.",
"O 🎄",
"Write me an 📧 or ✉️?",
"Too many colons: :: ::: :::: :?: :!: :.:",
"you can hit that 👎 button 😜,",
"hit 👍 and get subscribed!",
)
} else {
b.AssertFileContent("public/page-emoji/index.html",
"This is a :smile:",
"Another :smile:",
"This is :not: :an: :emoji:.",
"O :christmas_tree:",
"Write me an :e-mail: or :email:?",
"Too many colons: :: ::: :::: :?: :!: :.:",
"you can hit that :-1: button :stuck_out_tongue_winking_eye:,",
"hit :+1: and get subscribed!",
)
}
}
}
func TestPageHTMLContent(t *testing.T) {
b := newTestSitesBuilder(t)
b.WithSimpleConfigFile()
@ -1333,7 +1279,7 @@ func TestPageManualSummary(t *testing.T) {
title: "Hugo"
---
This is a {{< sc >}}.
<!--more-->
<!--more-->
Content.
`)
@ -1342,7 +1288,7 @@ Content.
title: "Hugo"
---
{{< sc >}}
<!--more-->
<!--more-->
{{< sc >}}
`)
@ -1363,7 +1309,7 @@ Summary<!--more-->{{< sc >}}
#+DESCRIPTION: D1
This is a {{< sc >}}.
# more
Content.
Content.
`)
b.WithContent("page-org-variant1.org", `#+TITLE: T1
@ -1371,7 +1317,7 @@ Summary.
# more
Content.
Content.
`)
b.WithTemplatesAdded("layouts/shortcodes/sc.html", "a shortcode")
@ -1665,7 +1611,7 @@ SUMMARY:{{ .Summary }}:{{ len .Summary }}:END
b := newTestSitesBuilder(t)
b.WithSimpleConfigFile().WithTemplatesAdded(single...).WithContent("p1.md", fmt.Sprintf(`---
title: p1
title: p1
---
%s

View file

@ -26,8 +26,6 @@ import (
"strings"
"sync"
"github.com/gohugoio/hugo/helpers"
"errors"
"github.com/gohugoio/hugo/common/herrors"
@ -632,14 +630,7 @@ Loop:
case currItem.IsText():
sc.inner = append(sc.inner, currItem.ValStr(source))
case currItem.Type == pageparser.TypeEmoji:
// TODO(bep) avoid the duplication of these "text cases", to prevent
// more of #6504 in the future.
val := currItem.ValStr(source)
if emoji := helpers.Emoji(val); emoji != nil {
sc.inner = append(sc.inner, string(emoji))
} else {
sc.inner = append(sc.inner, val)
}
sc.inner = append(sc.inner, currItem.ValStr(source))
case currItem.IsShortcodeName():
sc.name = currItem.ValStr(source)

View file

@ -599,7 +599,7 @@ weight: %d
c.Assert(len(s.RegularPages()), qt.Equals, 3)
builder.AssertFileContent("public/en/p1/index.html", `v1: 0 sgo: |v2: 1 sgo: 0|v3: 2 sgo: 1|v4: 3 sgo: 2|v5: 4 sgo: 3`)
builder.AssertFileContent("public/en/p1/index.html", `outer ordinal: 5 inner:
builder.AssertFileContent("public/en/p1/index.html", `outer ordinal: 5 inner:
ordinal: 0 scratch ordinal: 1 scratch get ordinal: 0
ordinal: 2 scratch ordinal: 3 scratch get ordinal: 2
ordinal: 4 scratch ordinal: 5 scratch get ordinal: 4`)
@ -754,33 +754,6 @@ title: "Hugo Rocks!"
)
}
// https://github.com/gohugoio/hugo/issues/6504
func TestShortcodeEmoji(t *testing.T) {
t.Parallel()
v := config.New()
v.Set("enableEmoji", true)
builder := newTestSitesBuilder(t).WithViper(v)
builder.WithContent("page.md", `---
title: "Hugo Rocks!"
---
# doc
{{< event >}}10:30-11:00 My :smile: Event {{< /event >}}
`).WithTemplatesAdded(
"layouts/shortcodes/event.html", `<div>{{ "\u29BE" }} {{ .Inner }} </div>`)
builder.Build(BuildCfg{})
builder.AssertFileContent("public/page/index.html",
"⦾ 10:30-11:00 My 😄 Event",
)
}
func TestShortcodeParams(t *testing.T) {
t.Parallel()
c := qt.New(t)

View file

@ -94,7 +94,6 @@ type siteConfigHolder struct {
taxonomiesConfig taxonomiesConfig
timeout time.Duration
hasCJKLanguage bool
enableEmoji bool
}
// Lazily loaded site dependencies.

View file

@ -28,6 +28,7 @@ import (
"github.com/gohugoio/hugo/markup/converter"
"github.com/gohugoio/hugo/markup/tableofcontents"
"github.com/yuin/goldmark"
emoji "github.com/yuin/goldmark-emoji"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
@ -149,6 +150,10 @@ func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown {
extensions = append(extensions, c)
}
if pcfg.Conf.EnableEmoji() {
extensions = append(extensions, emoji.Emoji)
}
if cfg.Parser.AutoHeadingID {
parserOptions = append(parserOptions, parser.WithAutoHeadingID())
}
@ -156,6 +161,7 @@ func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown {
if cfg.Parser.Attribute.Title {
parserOptions = append(parserOptions, parser.WithAttribute())
}
if cfg.Parser.Attribute.Block {
extensions = append(extensions, attributes.New())
}

View file

@ -410,7 +410,7 @@ func TestHookInfiniteRecursion(t *testing.T) {
files := `
-- config.toml --
-- layouts/_default/_markup/render-link.html --
<a href="{{ .Destination | safeURL }}">{{ .Text | RENDERFUNC }}</a>
<a href="{{ .Destination | safeURL }}">{{ .Text | RENDERFUNC }}</a>
-- layouts/_default/single.html --
{{ .Content }}
-- content/p1.md --
@ -421,8 +421,8 @@ title: "p1"
https://example.org
a@b.com
`
files = strings.ReplaceAll(files, "RENDERFUNC", renderFunc)
@ -578,3 +578,114 @@ a <!-- b --> c
"<li>This is a list item <!-- Comment: an innocent-looking comment --></li>",
)
}
// Issue #7332
// Issue #11587
func TestGoldmarkEmojiExtension(t *testing.T) {
t.Parallel()
files := `
-- config.toml --
enableEmoji = true
-- content/p1.md --
---
title: "p1"
---
~~~text
:x:
~~~
{{% include "/p2" %}}
{{< sc1 >}}:smiley:{{< /sc1 >}}
{{< sc2 >}}:+1:{{< /sc2 >}}
{{% sc3 %}}:-1:{{% /sc3 %}}
-- content/p2.md --
---
title: "p2"
---
:heavy_check_mark:
-- layouts/shortcodes/include.html --
{{ $p := site.GetPage (.Get 0) }}
{{ $p.RenderShortcodes }}
-- layouts/shortcodes/sc1.html --
sc1_begin|{{ .Inner }}|sc1_end
-- layouts/shortcodes/sc2.html --
sc2_begin|{{ .Inner | .Page.RenderString }}|sc2_end
-- layouts/shortcodes/sc3.html --
sc3_begin|{{ .Inner }}|sc3_end
-- layouts/_default/single.html --
{{ .Content }}
`
b := hugolib.NewIntegrationTestBuilder(
hugolib.IntegrationTestConfig{
T: t,
TxtarString: files,
},
).Build()
b.AssertFileContentExact("public/p1/index.html",
// Issue #7332
"<span>:x:\n</span>",
// Issue #11587
"<p>&#x2714;&#xfe0f;</p>",
// Should not be converted to emoji
"sc1_begin|:smiley:|sc1_end",
// Should be converted to emoji
"sc2_begin|&#x1f44d;|sc2_end",
// Should be converted to emoji
"sc3_begin|&#x1f44e;|sc3_end",
)
}
func TestEmojiDisabled(t *testing.T) {
t.Parallel()
files := `
-- config.toml --
enableEmoji = false
-- content/p1.md --
---
title: "p1"
---
:x:
-- layouts/_default/single.html --
{{ .Content }}
`
b := hugolib.NewIntegrationTestBuilder(
hugolib.IntegrationTestConfig{
T: t,
TxtarString: files,
},
).Build()
b.AssertFileContentExact("public/p1/index.html", "<p>:x:</p>")
}
func TestEmojiDefaultConfig(t *testing.T) {
t.Parallel()
files := `
-- content/p1.md --
---
title: "p1"
---
:x:
-- layouts/_default/single.html --
{{ .Content }}
`
b := hugolib.NewIntegrationTestBuilder(
hugolib.IntegrationTestConfig{
T: t,
TxtarString: files,
},
).Build()
b.AssertFileContentExact("public/p1/index.html", "<p>:x:</p>")
}

View file

@ -61,9 +61,7 @@ func (l *pageLexer) Input() []byte {
return l.input
}
type Config struct {
EnableEmoji bool
}
type Config struct{}
// note: the input position here is normally 0 (start), but
// can be set if position of first shortcode is known
@ -103,8 +101,6 @@ var (
delimOrg = []byte("#+")
htmlCommentStart = []byte("<!--")
htmlCommentEnd = []byte("-->")
emojiDelim = byte(':')
)
func (l *pageLexer) next() rune {
@ -276,34 +272,6 @@ func (l *pageLexer) consumeSpace() {
}
}
// lex a string starting at ":"
func lexEmoji(l *pageLexer) stateFunc {
pos := l.pos + 1
valid := false
for i := pos; i < len(l.input); i++ {
if i > pos && l.input[i] == emojiDelim {
pos = i + 1
valid = true
break
}
r, _ := utf8.DecodeRune(l.input[i:])
if !(isAlphaNumericOrHyphen(r) || r == '+') {
break
}
}
if valid {
l.pos = pos
l.emit(TypeEmoji)
} else {
l.pos++
l.emit(tText)
}
return lexMainSection
}
type sectionHandlers struct {
l *pageLexer
@ -399,20 +367,6 @@ func createSectionHandlers(l *pageLexer) *sectionHandlers {
handlers := []*sectionHandler{shortCodeHandler, summaryDividerHandler}
if l.cfg.EnableEmoji {
emojiHandler := &sectionHandler{
l: l,
skipFunc: func(l *pageLexer) int {
return l.indexByte(emojiDelim)
},
lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {
return lexEmoji, true
},
}
handlers = append(handlers, emojiHandler)
}
return &sectionHandlers{
l: l,
handlers: handlers,

View file

@ -1,42 +0,0 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pageparser
import (
"testing"
qt "github.com/frankban/quicktest"
)
func TestMain(t *testing.T) {
t.Parallel()
c := qt.New(t)
mainTests := []lexerTest{
{"emoji #1", "Some text with :emoji:", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), tstEOF}},
{"emoji #2", "Some text with :emoji: and some text.", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), nti(tText, " and some text."), tstEOF}},
{"looks like an emoji #1", "Some text and then :emoji", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, "emoji"), tstEOF}},
{"looks like an emoji #2", "Some text and then ::", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, ":"), tstEOF}},
{"looks like an emoji #3", ":Some :text", []typeText{nti(tText, ":"), nti(tText, "Some "), nti(tText, ":"), nti(tText, "text"), tstEOF}},
}
for i, test := range mainTests {
items := collectWithConfig([]byte(test.input), false, lexMainSection, Config{EnableEmoji: true})
if !equal(test.input, items, test.items) {
got := itemsToString(items, []byte(test.input))
expected := testItemsToString(test.items)
c.Assert(got, qt.Equals, expected, qt.Commentf("Test %d: %s", i, test.name))
}
}
}

View file

@ -24,7 +24,7 @@ import (
func BenchmarkParse(b *testing.B) {
start := `
---
title: "Front Matters"
@ -38,33 +38,7 @@ This is some summary. This is some summary. This is some summary. This is some s
`
input := []byte(start + strings.Repeat(strings.Repeat("this is text", 30)+"{{< myshortcode >}}This is some inner content.{{< /myshortcode >}}", 10))
cfg := Config{EnableEmoji: false}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if _, err := parseBytes(input, cfg, lexIntroSection); err != nil {
b.Fatal(err)
}
}
}
func BenchmarkParseWithEmoji(b *testing.B) {
start := `
---
title: "Front Matters"
description: "It really does"
---
This is some summary. This is some summary. This is some summary. This is some summary.
<!--more-->
`
input := []byte(start + strings.Repeat("this is not emoji: ", 50) + strings.Repeat("some text ", 70) + strings.Repeat("this is not: ", 50) + strings.Repeat("but this is a :smile: ", 3) + strings.Repeat("some text ", 70))
cfg := Config{EnableEmoji: true}
cfg := Config{}
b.ResetTimer()
for i := 0; i < b.N; i++ {