Chomp Unicode BOM if present

Useful if using or sharing files with users that use editors that
append a unicode byte order marker header (like Windows notepad).

This will still assume files are UTF-8 encoded.

Closes #2075
This commit is contained in:
Mattias Wadman 2016-04-13 00:14:00 +02:00 committed by Bjørn Erik Pedersen
parent d48b986c45
commit 5d50c46482
2 changed files with 32 additions and 0 deletions

View file

@ -1052,6 +1052,19 @@ func TestPageSimpleMethods(t *testing.T) {
}
}
func TestChompBOM(t *testing.T) {
p, _ := NewPage("simple.md")
const utf8BOM = "\xef\xbb\xbf"
_, err := p.ReadFrom(strings.NewReader(utf8BOM + simplePage))
p.Convert()
if err != nil {
t.Fatalf("Unable to create a page with BOM prefixed frontmatter and body content: %s", err)
}
checkPageTitle(t, p, "Simple")
}
func listEqual(left, right []string) bool {
if len(left) != len(right) {
return false

View file

@ -50,6 +50,8 @@ const (
HTMLCommentStart = "<!--"
// HTMLCommentEnd identifies the end of HTML comment.
HTMLCommentEnd = "-->"
// BOM Unicode byte order marker
BOM = '\ufeff'
)
var (
@ -101,6 +103,10 @@ func (p *page) Metadata() (meta interface{}, err error) {
func ReadFrom(r io.Reader) (p Page, err error) {
reader := bufio.NewReader(r)
// chomp BOM and assume UTF-8
if err = chompBOM(reader); err != nil && err != io.EOF {
return
}
if err = chompWhitespace(reader); err != nil && err != io.EOF {
return
}
@ -135,6 +141,19 @@ func ReadFrom(r io.Reader) (p Page, err error) {
return newp, nil
}
func chompBOM(r io.RuneScanner) (err error) {
for {
c, _, err := r.ReadRune()
if err != nil {
return err
}
if c != BOM {
r.UnreadRune()
return nil
}
}
}
func chompWhitespace(r io.RuneScanner) (err error) {
for {
c, _, err := r.ReadRune()