From 3ea4df35f2435f1cb371fa54f6fd89fd6d7d980f Mon Sep 17 00:00:00 2001 From: bep Date: Sat, 30 May 2015 14:46:58 +0200 Subject: [PATCH] Remove accents in URLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So the taxonomy `Gérard Depardieu` gives paths on the form `gerard-depardieu`. Unfortunately this introduces two imports from the `golang.org/`, but Unicode-normalization isn't something we'd want to write from scratch. See https://blog.golang.org/normalization See #1180 --- helpers/path.go | 11 +++++++++++ helpers/path_test.go | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/helpers/path.go b/helpers/path.go index 9f47549d2..5e5e3fa89 100644 --- a/helpers/path.go +++ b/helpers/path.go @@ -19,6 +19,8 @@ import ( "github.com/spf13/afero" jww "github.com/spf13/jwalterweatherman" "github.com/spf13/viper" + "golang.org/x/text/transform" + "golang.org/x/text/unicode/norm" "io" "os" "path/filepath" @@ -97,9 +99,18 @@ func UnicodeSanitize(s string) string { } } + // remove accents - see https://blog.golang.org/normalization + t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) + result, _, _ := transform.String(t, string(target)) + return result + return string(target) } +func isMn(r rune) bool { + return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks +} + // ReplaceExtension takes a path and an extension, strips the old extension // and returns the path with the new extension. func ReplaceExtension(path string, newExt string) string { diff --git a/helpers/path_test.go b/helpers/path_test.go index cb8714266..3fdb94a4f 100644 --- a/helpers/path_test.go +++ b/helpers/path_test.go @@ -27,7 +27,7 @@ func TestMakePath(t *testing.T) { {"FOo/BaR.html", "FOo/BaR.html"}, {"трям/трям", "трям/трям"}, {"은행", "은행"}, - {"Банковский кассир", "Банковский-кассир"}, + {"Банковский кассир", "Банковскии-кассир"}, } for _, test := range tests { @@ -717,7 +717,7 @@ func TestGetTempDir(t *testing.T) { {testDir + "FOo/BaR.html", dir + testDir + "FOo/BaR.html" + FilePathSeparator}, {testDir + "трям/трям", dir + testDir + "трям/трям" + FilePathSeparator}, {testDir + "은행", dir + testDir + "은행" + FilePathSeparator}, - {testDir + "Банковский кассир", dir + testDir + "Банковский-кассир" + FilePathSeparator}, + {testDir + "Банковский кассир", dir + testDir + "Банковскии-кассир" + FilePathSeparator}, } for _, test := range tests {