tpl/transform: Add transform.XMLEscape template function

Fixes #3268
This commit is contained in:
Joe Mooring 2023-11-24 15:51:27 -08:00 committed by Bjørn Erik Pedersen
parent ef12d169c8
commit b4c5df42ff
6 changed files with 105 additions and 2 deletions

View file

@ -0,0 +1,38 @@
---
title: transform.XMLEscape
description: Returns the given string, removing disallowed characters then escaping the result to its XML equivalent.
categories: []
keywords: []
action:
aliases: []
related: []
returnType: string
signatures: [transform.XMLEscape INPUT]
---
The `transform.XMLEscape` function removes [disallowed characters] as defined in the XML specification, then escapes the result by replacing the following characters with [HTML entities]:
- `"``"`
- `'``'`
- `&``&`
- `<``&lt;`
- `>``&gt;`
- `\t``&#x9;`
- `\n``&#xA;`
- `\r``&#xD;`
For example:
```go-html-template
transform.XMLEscape "<p>abc</p>" → &lt;p&gt;abc&lt;/p&gt;
```
When using `transform.XMLEscape` in a template rendered by Go's [html/template] package, declare the string to be safe HTML to avoid double escaping. For example, in an RSS template:
{{< code file="layouts/_default/rss.xml" >}}
<description>{{ .Summary | transform.XMLEscape | safeHTML }}</description>
{{< /code >}}
[disallowed characters]: https://www.w3.org/TR/xml/#charsets
[html entities]: https://developer.mozilla.org/en-us/docs/glossary/entity
[html/template]: https://pkg.go.dev/html/template

View file

@ -64,7 +64,7 @@
<pubDate>{{ .Date.Format "Mon, 02 Jan 2006 15:04:05 -0700" | safeHTML }}</pubDate>
{{- with $authorEmail }}<author>{{ . }}{{ with $authorName }} ({{ . }}){{ end }}</author>{{ end }}
<guid>{{ .Permalink }}</guid>
<description>{{ .Summary | html }}</description>
<description>{{ .Summary | transform.XMLEscape | safeHTML }}</description>
</item>
{{- end }}
</channel>

View file

@ -37,7 +37,7 @@ home=["HTML"]
-- files/README.txt --
Hugo Rocks!
-- content/blog/hugo-rocks.md --
---
---
title: "**BatMan**"
---
`
@ -65,6 +65,10 @@ title: "**BatMan**"
// This will fail the build, so skip for now.
continue
}
if strings.Contains(example[0], "transform.XMLEscape") {
// This will fail the build, so skip for now.
continue
}
templates = append(templates, example[0])
expected = append(expected, example[1])
}

View file

@ -112,6 +112,16 @@ func init() {
},
)
ns.AddMethodMapping(ctx.XMLEscape,
nil,
[][2]string{
{
`{{ transform.XMLEscape "<p>abc</p>" }}`,
`&lt;p&gt;abc&lt;/p&gt;`,
},
},
)
return ns
}

View file

@ -65,3 +65,23 @@ foo
b.AssertFileContent("public/p3/index.html", "_<h2 id=\"foo\">foo</h2>\n<p>bar</p>\n_")
b.AssertFileContent("public/p4/index.html", "_<p id=\"bar\">foo</p>\n_")
}
func TestXMLEscape(t *testing.T) {
t.Parallel()
files := `
-- config.toml --
disableKinds = ['section','sitemap','taxonomy','term']
-- content/p1.md --
---
title: p1
---
a **b** c
<!--more-->
`
b := hugolib.Test(t, files)
b.AssertFileContent("public/index.xml", `
<description>&lt;p&gt;a &lt;strong&gt;b&lt;/strong&gt; c&lt;/p&gt;</description>
`)
}

View file

@ -15,9 +15,12 @@
package transform
import (
"bytes"
"context"
"encoding/xml"
"html"
"html/template"
"strings"
"github.com/gohugoio/hugo/cache/namedmemcache"
"github.com/gohugoio/hugo/markup/converter/hooks"
@ -118,6 +121,34 @@ func (ns *Namespace) HTMLUnescape(s any) (string, error) {
return html.UnescapeString(ss), nil
}
// XMLEscape returns the given string, removing disallowed characters then
// escaping the result to its XML equivalent.
func (ns *Namespace) XMLEscape(s any) (string, error) {
ss, err := cast.ToStringE(s)
if err != nil {
return "", err
}
// https://www.w3.org/TR/xml/#NT-Char
cleaned := strings.Map(func(r rune) rune {
if r == 0x9 || r == 0xA || r == 0xD ||
(r >= 0x20 && r <= 0xD7FF) ||
(r >= 0xE000 && r <= 0xFFFD) ||
(r >= 0x10000 && r <= 0x10FFFF) {
return r
}
return -1
}, ss)
var buf bytes.Buffer
err = xml.EscapeText(&buf, []byte(cleaned))
if err != nil {
return "", err
}
return buf.String(), nil
}
// Markdownify renders s from Markdown to HTML.
func (ns *Namespace) Markdownify(ctx context.Context, s any) (template.HTML, error) {