Remove accents in URLs

So the taxonomy `Gérard Depardieu` gives paths on the form `gerard-depardieu`.

Unfortunately this introduces two imports from the `golang.org/`, but Unicode-normalization isn't something we'd want to write from scratch.

See https://blog.golang.org/normalization

See #1180
This commit is contained in:
bep 2015-05-30 14:46:58 +02:00
parent be0903c71a
commit 3ea4df35f2
2 changed files with 13 additions and 2 deletions

View file

@ -19,6 +19,8 @@ import (
"github.com/spf13/afero"
jww "github.com/spf13/jwalterweatherman"
"github.com/spf13/viper"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
"io"
"os"
"path/filepath"
@ -97,9 +99,18 @@ func UnicodeSanitize(s string) string {
}
}
// remove accents - see https://blog.golang.org/normalization
t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
result, _, _ := transform.String(t, string(target))
return result
return string(target)
}
func isMn(r rune) bool {
return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks
}
// ReplaceExtension takes a path and an extension, strips the old extension
// and returns the path with the new extension.
func ReplaceExtension(path string, newExt string) string {

View file

@ -27,7 +27,7 @@ func TestMakePath(t *testing.T) {
{"FOo/BaR.html", "FOo/BaR.html"},
{"трям/трям", "трям/трям"},
{"은행", "은행"},
{"Банковский кассир", "Банковский-кассир"},
{"Банковский кассир", "Банковскии-кассир"},
}
for _, test := range tests {
@ -717,7 +717,7 @@ func TestGetTempDir(t *testing.T) {
{testDir + "FOo/BaR.html", dir + testDir + "FOo/BaR.html" + FilePathSeparator},
{testDir + "трям/трям", dir + testDir + "трям/трям" + FilePathSeparator},
{testDir + "은행", dir + testDir + "은행" + FilePathSeparator},
{testDir + "Банковский кассир", dir + testDir + "Банковский-кассир" + FilePathSeparator},
{testDir + "Банковский кассир", dir + testDir + "Банковскии-кассир" + FilePathSeparator},
}
for _, test := range tests {