Convert the rest to new page parser code paths

And remove some now unused code.

See #5324
This commit is contained in:
Bjørn Erik Pedersen 2018-10-20 17:38:49 +02:00
parent 129c27ee6e
commit eb038cfa0a
No known key found for this signature in database
GPG key ID: 330E6E2BD4859D8F
14 changed files with 120 additions and 1467 deletions

View file

@ -16,6 +16,7 @@ package commands
import (
"bytes"
"fmt"
"io"
"strings"
"time"
@ -34,7 +35,6 @@ import (
"path/filepath"
"github.com/spf13/cast"
"github.com/spf13/cobra"
)
@ -156,7 +156,7 @@ func (cc *convertCmd) convertAndSavePage(p *hugolib.Page, site *hugolib.Site, ta
return nil
}
psr, err := pageparser.Parse(file)
pf, err := parseContentFile(file)
if err != nil {
site.Log.ERROR.Println(errMsg)
file.Close()
@ -165,53 +165,24 @@ func (cc *convertCmd) convertAndSavePage(p *hugolib.Page, site *hugolib.Site, ta
file.Close()
var sourceFormat, sourceContent []byte
var fromFormat metadecoders.Format
iter := psr.Iterator()
walkFn := func(item pageparser.Item) bool {
if sourceFormat != nil {
// The rest is content.
sourceContent = psr.Input()[item.Pos:]
// Done
return false
} else if item.IsFrontMatter() {
fromFormat = metadecoders.FormatFromFrontMatterType(item.Type)
sourceFormat = item.Val
}
return true
}
iter.PeekWalk(walkFn)
metadata, err := metadecoders.UnmarshalToMap(sourceFormat, fromFormat)
if err != nil {
site.Log.ERROR.Println(errMsg)
return err
}
// better handling of dates in formats that don't have support for them
if fromFormat == metadecoders.JSON || fromFormat == metadecoders.YAML || fromFormat == metadecoders.TOML {
newMetadata := cast.ToStringMap(metadata)
for k, v := range newMetadata {
if pf.frontMatterFormat == metadecoders.JSON || pf.frontMatterFormat == metadecoders.YAML || pf.frontMatterFormat == metadecoders.TOML {
for k, v := range pf.frontMatter {
switch vv := v.(type) {
case time.Time:
newMetadata[k] = vv.Format(time.RFC3339)
pf.frontMatter[k] = vv.Format(time.RFC3339)
}
}
metadata = newMetadata
}
var newContent bytes.Buffer
err = parser.InterfaceToFrontMatter2(metadata, targetFormat, &newContent)
err = parser.InterfaceToFrontMatter(pf.frontMatter, targetFormat, &newContent)
if err != nil {
site.Log.ERROR.Println(errMsg)
return err
}
newContent.Write(sourceContent)
newContent.Write(pf.content)
newFilename := p.Filename()
@ -229,3 +200,48 @@ func (cc *convertCmd) convertAndSavePage(p *hugolib.Page, site *hugolib.Site, ta
return nil
}
type parsedFile struct {
frontMatterFormat metadecoders.Format
frontMatterSource []byte
frontMatter map[string]interface{}
// Everything after Front Matter
content []byte
}
func parseContentFile(r io.Reader) (parsedFile, error) {
var pf parsedFile
psr, err := pageparser.Parse(r)
if err != nil {
return pf, err
}
iter := psr.Iterator()
walkFn := func(item pageparser.Item) bool {
if pf.frontMatterSource != nil {
// The rest is content.
pf.content = psr.Input()[item.Pos:]
// Done
return false
} else if item.IsFrontMatter() {
pf.frontMatterFormat = metadecoders.FormatFromFrontMatterType(item.Type)
pf.frontMatterSource = item.Val
}
return true
}
iter.PeekWalk(walkFn)
metadata, err := metadecoders.UnmarshalToMap(pf.frontMatterSource, pf.frontMatterFormat)
if err != nil {
return pf, err
}
pf.frontMatter = metadata
return pf, nil
}

View file

@ -16,6 +16,7 @@ package commands
import (
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
@ -264,7 +265,7 @@ func (i *importCmd) loadJekyllConfig(fs afero.Fs, jekyllRoot string) map[string]
return c
}
func (i *importCmd) createConfigFromJekyll(fs afero.Fs, inpath string, kind string, jekyllConfig map[string]interface{}) (err error) {
func (i *importCmd) createConfigFromJekyll(fs afero.Fs, inpath string, kind metadecoders.Format, jekyllConfig map[string]interface{}) (err error) {
title := "My New Hugo Site"
baseURL := "http://example.org/"
@ -290,15 +291,14 @@ func (i *importCmd) createConfigFromJekyll(fs afero.Fs, inpath string, kind stri
"languageCode": "en-us",
"disablePathToLower": true,
}
kind = parser.FormatSanitize(kind)
var buf bytes.Buffer
err = parser.InterfaceToConfig(in, parser.FormatToLeadRune(kind), &buf)
err = parser.InterfaceToConfig(in, kind, &buf)
if err != nil {
return err
}
return helpers.WriteToDisk(filepath.Join(inpath, "config."+kind), &buf, fs)
return helpers.WriteToDisk(filepath.Join(inpath, "config."+string(kind)), &buf, fs)
}
func copyFile(source string, dest string) error {
@ -447,39 +447,25 @@ func convertJekyllPost(s *hugolib.Site, path, relPath, targetDir string, draft b
return err
}
psr, err := parser.ReadFrom(bytes.NewReader(contentBytes))
pf, err := parseContentFile(bytes.NewReader(contentBytes))
if err != nil {
jww.ERROR.Println("Parse file error:", path)
return err
}
metadata, err := psr.Metadata()
if err != nil {
jww.ERROR.Println("Processing file error:", path)
return err
}
newmetadata, err := convertJekyllMetaData(metadata, postName, postDate, draft)
newmetadata, err := convertJekyllMetaData(pf.frontMatter, postName, postDate, draft)
if err != nil {
jww.ERROR.Println("Convert metadata error:", path)
return err
}
jww.TRACE.Println(newmetadata)
content := convertJekyllContent(newmetadata, string(psr.Content()))
content := convertJekyllContent(newmetadata, string(pf.content))
page, err := s.NewPage(filename)
if err != nil {
jww.ERROR.Println("New page error", filename)
return err
fs := hugofs.Os
if err := helpers.WriteToDisk(targetFile, strings.NewReader(content), fs); err != nil {
return fmt.Errorf("Failed to save file %q:", filename)
}
page.SetSourceContent([]byte(content))
page.SetSourceMetaData(newmetadata, parser.FormatToLeadRune("yaml"))
page.SaveSourceAs(targetFile)
jww.TRACE.Println("Target file:", targetFile)
return nil
}

View file

@ -19,6 +19,8 @@ import (
"path/filepath"
"strings"
"github.com/gohugoio/hugo/parser/metadecoders"
_errors "github.com/pkg/errors"
"github.com/gohugoio/hugo/create"
@ -131,10 +133,9 @@ func createConfig(fs *hugofs.Fs, inpath string, kind string) (err error) {
"title": "My New Hugo Site",
"languageCode": "en-us",
}
kind = parser.FormatSanitize(kind)
var buf bytes.Buffer
err = parser.InterfaceToConfig(in, parser.FormatToLeadRune(kind), &buf)
err = parser.InterfaceToConfig(in, metadecoders.FormatFromString(kind), &buf)
if err != nil {
return err
}

View file

@ -36,7 +36,6 @@ import (
"github.com/gohugoio/hugo/resource"
"github.com/gohugoio/hugo/output"
"github.com/gohugoio/hugo/parser"
"github.com/mitchellh/mapstructure"
"html/template"
@ -485,6 +484,7 @@ func (p *Page) MediaType() media.Type {
return media.OctetType
}
// TODO(bep) 2errors remove
type Source struct {
Frontmatter []byte
Content []byte
@ -1725,36 +1725,6 @@ func (p *Page) SetSourceContent(content []byte) {
p.Source.Content = content
}
func (p *Page) SetSourceMetaData(in interface{}, mark rune) (err error) {
// See https://github.com/gohugoio/hugo/issues/2458
defer func() {
if r := recover(); r != nil {
var ok bool
err, ok = r.(error)
if !ok {
err = fmt.Errorf("error from marshal: %v", r)
}
}
}()
buf := bp.GetBuffer()
defer bp.PutBuffer(buf)
err = parser.InterfaceToFrontMatter(in, mark, buf)
if err != nil {
return
}
_, err = buf.WriteRune('\n')
if err != nil {
return
}
p.Source.Frontmatter = buf.Bytes()
return
}
func (p *Page) SafeSaveSourceAs(path string) error {
return p.saveSourceAs(path, true)
}

View file

@ -13,14 +13,10 @@
package parser
// TODO(bep) archetype remove unused from this package.
import (
"bytes"
"encoding/json"
"errors"
"io"
"strings"
"github.com/gohugoio/hugo/parser/metadecoders"
@ -29,53 +25,12 @@ import (
"gopkg.in/yaml.v2"
)
// FrontmatterType represents a type of frontmatter.
type FrontmatterType struct {
// Parse decodes content into a Go interface.
Parse func([]byte) (map[string]interface{}, error)
const (
yamlDelimLf = "---\n"
tomlDelimLf = "+++\n"
)
markstart, markend []byte // starting and ending delimiters
includeMark bool // include start and end mark in output
}
// InterfaceToConfig encodes a given input based upon the mark and writes to w.
func InterfaceToConfig(in interface{}, mark rune, w io.Writer) error {
if in == nil {
return errors.New("input was nil")
}
switch mark {
case rune(YAMLLead[0]):
b, err := yaml.Marshal(in)
if err != nil {
return err
}
_, err = w.Write(b)
return err
case rune(TOMLLead[0]):
return toml.NewEncoder(w).Encode(in)
case rune(JSONLead[0]):
b, err := json.MarshalIndent(in, "", " ")
if err != nil {
return err
}
_, err = w.Write(b)
if err != nil {
return err
}
_, err = w.Write([]byte{'\n'})
return err
default:
return errors.New("Unsupported Format provided")
}
}
func InterfaceToConfig2(in interface{}, format metadecoders.Format, w io.Writer) error {
func InterfaceToConfig(in interface{}, format metadecoders.Format, w io.Writer) error {
if in == nil {
return errors.New("input was nil")
}
@ -111,136 +66,42 @@ func InterfaceToConfig2(in interface{}, format metadecoders.Format, w io.Writer)
}
}
func InterfaceToFrontMatter2(in interface{}, format metadecoders.Format, w io.Writer) error {
func InterfaceToFrontMatter(in interface{}, format metadecoders.Format, w io.Writer) error {
if in == nil {
return errors.New("input was nil")
}
switch format {
case metadecoders.YAML:
_, err := w.Write([]byte(YAMLDelimUnix))
_, err := w.Write([]byte(yamlDelimLf))
if err != nil {
return err
}
err = InterfaceToConfig2(in, format, w)
err = InterfaceToConfig(in, format, w)
if err != nil {
return err
}
_, err = w.Write([]byte(YAMLDelimUnix))
_, err = w.Write([]byte(yamlDelimLf))
return err
case metadecoders.TOML:
_, err := w.Write([]byte(TOMLDelimUnix))
_, err := w.Write([]byte(tomlDelimLf))
if err != nil {
return err
}
err = InterfaceToConfig2(in, format, w)
err = InterfaceToConfig(in, format, w)
if err != nil {
return err
}
_, err = w.Write([]byte("\n" + TOMLDelimUnix))
_, err = w.Write([]byte("\n" + tomlDelimLf))
return err
default:
return InterfaceToConfig2(in, format, w)
return InterfaceToConfig(in, format, w)
}
}
// InterfaceToFrontMatter encodes a given input into a frontmatter
// representation based upon the mark with the appropriate front matter delimiters
// surrounding the output, which is written to w.
func InterfaceToFrontMatter(in interface{}, mark rune, w io.Writer) error {
if in == nil {
return errors.New("input was nil")
}
switch mark {
case rune(YAMLLead[0]):
_, err := w.Write([]byte(YAMLDelimUnix))
if err != nil {
return err
}
err = InterfaceToConfig(in, mark, w)
if err != nil {
return err
}
_, err = w.Write([]byte(YAMLDelimUnix))
return err
case rune(TOMLLead[0]):
_, err := w.Write([]byte(TOMLDelimUnix))
if err != nil {
return err
}
err = InterfaceToConfig(in, mark, w)
if err != nil {
return err
}
_, err = w.Write([]byte("\n" + TOMLDelimUnix))
return err
default:
return InterfaceToConfig(in, mark, w)
}
}
// FormatToLeadRune takes a given format kind and return the leading front
// matter delimiter.
func FormatToLeadRune(kind string) rune {
switch FormatSanitize(kind) {
case "yaml":
return rune([]byte(YAMLLead)[0])
case "json":
return rune([]byte(JSONLead)[0])
case "org":
return '#'
default:
return rune([]byte(TOMLLead)[0])
}
}
// FormatSanitize returns the canonical format name for a given kind.
//
// TODO(bep) move to helpers
func FormatSanitize(kind string) string {
switch strings.ToLower(kind) {
case "yaml", "yml":
return "yaml"
case "toml", "tml":
return "toml"
case "json", "js":
return "json"
case "org":
return kind
default:
return "toml"
}
}
// removeTOMLIdentifier removes, if necessary, beginning and ending TOML
// frontmatter delimiters from a byte slice.
func removeTOMLIdentifier(datum []byte) []byte {
ld := len(datum)
if ld < 8 {
return datum
}
b := bytes.TrimPrefix(datum, []byte(TOMLDelim))
if ld-len(b) != 3 {
// No TOML prefix trimmed, so bail out
return datum
}
b = bytes.Trim(b, "\r\n")
return bytes.TrimSuffix(b, []byte(TOMLDelim))
}

View file

@ -15,55 +15,55 @@ package parser
import (
"bytes"
"fmt"
"reflect"
"strings"
"testing"
"github.com/gohugoio/hugo/parser/metadecoders"
)
func TestInterfaceToConfig(t *testing.T) {
cases := []struct {
input interface{}
mark byte
want []byte
isErr bool
input interface{}
format metadecoders.Format
want []byte
isErr bool
}{
// TOML
{map[string]interface{}{}, TOMLLead[0], nil, false},
{map[string]interface{}{}, metadecoders.TOML, nil, false},
{
map[string]interface{}{"title": "test 1"},
TOMLLead[0],
metadecoders.TOML,
[]byte("title = \"test 1\"\n"),
false,
},
// YAML
{map[string]interface{}{}, YAMLLead[0], []byte("{}\n"), false},
{map[string]interface{}{}, metadecoders.YAML, []byte("{}\n"), false},
{
map[string]interface{}{"title": "test 1"},
YAMLLead[0],
metadecoders.YAML,
[]byte("title: test 1\n"),
false,
},
// JSON
{map[string]interface{}{}, JSONLead[0], []byte("{}\n"), false},
{map[string]interface{}{}, metadecoders.JSON, []byte("{}\n"), false},
{
map[string]interface{}{"title": "test 1"},
JSONLead[0],
metadecoders.JSON,
[]byte("{\n \"title\": \"test 1\"\n}\n"),
false,
},
// Errors
{nil, TOMLLead[0], nil, true},
{map[string]interface{}{}, '$', nil, true},
{nil, metadecoders.TOML, nil, true},
{map[string]interface{}{}, "foo", nil, true},
}
for i, c := range cases {
var buf bytes.Buffer
err := InterfaceToConfig(c.input, rune(c.mark), &buf)
err := InterfaceToConfig(c.input, c.format, &buf)
if err != nil {
if c.isErr {
continue
@ -76,179 +76,3 @@ func TestInterfaceToConfig(t *testing.T) {
}
}
}
func TestInterfaceToFrontMatter(t *testing.T) {
cases := []struct {
input interface{}
mark rune
want []byte
isErr bool
}{
// TOML
{map[string]interface{}{}, '+', []byte("+++\n\n+++\n"), false},
{
map[string]interface{}{"title": "test 1"},
'+',
[]byte("+++\ntitle = \"test 1\"\n\n+++\n"),
false,
},
// YAML
{map[string]interface{}{}, '-', []byte("---\n{}\n---\n"), false}, //
{
map[string]interface{}{"title": "test 1"},
'-',
[]byte("---\ntitle: test 1\n---\n"),
false,
},
// JSON
{map[string]interface{}{}, '{', []byte("{}\n"), false},
{
map[string]interface{}{"title": "test 1"},
'{',
[]byte("{\n \"title\": \"test 1\"\n}\n"),
false,
},
// Errors
{nil, '+', nil, true},
{map[string]interface{}{}, '$', nil, true},
}
for i, c := range cases {
var buf bytes.Buffer
err := InterfaceToFrontMatter(c.input, c.mark, &buf)
if err != nil {
if c.isErr {
continue
}
t.Fatalf("[%d] unexpected error value: %v", i, err)
}
if !reflect.DeepEqual(buf.Bytes(), c.want) {
t.Errorf("[%d] not equal:\nwant %q,\n got %q", i, c.want, buf.Bytes())
}
}
}
func TestFormatToLeadRune(t *testing.T) {
for i, this := range []struct {
kind string
expect rune
}{
{"yaml", '-'},
{"yml", '-'},
{"toml", '+'},
{"tml", '+'},
{"json", '{'},
{"js", '{'},
{"org", '#'},
{"unknown", '+'},
} {
result := FormatToLeadRune(this.kind)
if result != this.expect {
t.Errorf("[%d] got %q but expected %q", i, result, this.expect)
}
}
}
func TestRemoveTOMLIdentifier(t *testing.T) {
cases := []struct {
input string
want string
}{
{"a = 1", "a = 1"},
{"a = 1\r\n", "a = 1\r\n"},
{"+++\r\na = 1\r\n+++\r\n", "a = 1\r\n"},
{"+++\na = 1\n+++\n", "a = 1\n"},
{"+++\nb = \"+++ oops +++\"\n+++\n", "b = \"+++ oops +++\"\n"},
{"+++\nc = \"\"\"+++\noops\n+++\n\"\"\"\"\n+++\n", "c = \"\"\"+++\noops\n+++\n\"\"\"\"\n"},
{"+++\nd = 1\n+++", "d = 1\n"},
}
for i, c := range cases {
res := removeTOMLIdentifier([]byte(c.input))
if string(res) != c.want {
t.Errorf("[%d] given %q\nwant: %q\n got: %q", i, c.input, c.want, res)
}
}
}
func BenchmarkFrontmatterTags(b *testing.B) {
for _, frontmatter := range []string{"JSON", "YAML", "YAML2", "TOML"} {
for i := 1; i < 60; i += 20 {
doBenchmarkFrontmatter(b, frontmatter, i)
}
}
}
func doBenchmarkFrontmatter(b *testing.B, fileformat string, numTags int) {
yamlTemplate := `---
name: "Tags"
tags:
%s
---
`
yaml2Template := `---
name: "Tags"
tags: %s
---
`
tomlTemplate := `+++
name = "Tags"
tags = %s
+++
`
jsonTemplate := `{
"name": "Tags",
"tags": [
%s
]
}`
name := fmt.Sprintf("%s:%d", fileformat, numTags)
b.Run(name, func(b *testing.B) {
tags := make([]string, numTags)
var (
tagsStr string
frontmatterTemplate string
)
for i := 0; i < numTags; i++ {
tags[i] = fmt.Sprintf("Hugo %d", i+1)
}
if fileformat == "TOML" {
frontmatterTemplate = tomlTemplate
tagsStr = strings.Replace(fmt.Sprintf("%q", tags), " ", ", ", -1)
} else if fileformat == "JSON" {
frontmatterTemplate = jsonTemplate
tagsStr = strings.Replace(fmt.Sprintf("%q", tags), " ", ", ", -1)
} else if fileformat == "YAML2" {
frontmatterTemplate = yaml2Template
tagsStr = strings.Replace(fmt.Sprintf("%q", tags), " ", ", ", -1)
} else {
frontmatterTemplate = yamlTemplate
for _, tag := range tags {
tagsStr += "\n- " + tag
}
}
frontmatter := fmt.Sprintf(frontmatterTemplate, tagsStr)
p := page{frontmatter: []byte(frontmatter)}
b.ResetTimer()
for i := 0; i < b.N; i++ {
meta, err := p.Metadata()
if err != nil {
b.Fatal(err)
}
if meta == nil {
b.Fatal("Meta is nil")
}
}
})
}

View file

@ -1,263 +0,0 @@
---
title: The Git Book - Long Text
---
# Getting Started #
This chapter will be about getting started with Git. We will begin at the beginning by explaining some background on version control tools, then move on to how to get Git running on your system and finally how to get it setup to start working with. At the end of this chapter you should understand why Git is around, why you should use it and you should be all setup to do so.
## About Version Control ##
What is version control, and why should you care? Version control is a system that records changes to a file or set of files over time so that you can recall specific versions later. Even though the examples in this book show software source code as the files under version control, in reality any type of file on a computer can be placed under version control.
If you are a graphic or web designer and want to keep every version of an image or layout (which you certainly would), it is very wise to use a Version Control System (VCS). A VCS allows you to: revert files back to a previous state, revert the entire project back to a previous state, review changes made over time, see who last modified something that might be causing a problem, who introduced an issue and when, and more. Using a VCS also means that if you screw things up or lose files, you can generally recover easily. In addition, you get all this for very little overhead.
### Local Version Control Systems ###
Many peoples version-control method of choice is to copy files into another directory (perhaps a time-stamped directory, if theyre clever). This approach is very common because it is so simple, but it is also incredibly error prone. It is easy to forget which directory youre in and accidentally write to the wrong file or copy over files you dont mean to.
To deal with this issue, programmers long ago developed local VCSs that had a simple database that kept all the changes to files under revision control (see Figure 1-1).
Insert 18333fig0101.png
Figure 1-1. Local version control diagram.
One of the more popular VCS tools was a system called rcs, which is still distributed with many computers today. Even the popular Mac OS X operating system includes the rcs command when you install the Developer Tools. This tool basically works by keeping patch sets (that is, the differences between files) from one revision to another in a special format on disk; it can then recreate what any file looked like at any point in time by adding up all the patches.
### Centralized Version Control Systems ###
The next major issue that people encounter is that they need to collaborate with developers on other systems. To deal with this problem, Centralized Version Control Systems (CVCSs) were developed. These systems, such as CVS, Subversion, and Perforce, have a single server that contains all the versioned files, and a number of clients that check out files from that central place. For many years, this has been the standard for version control (see Figure 1-2).
Insert 18333fig0102.png
Figure 1-2. Centralized version control diagram.
This setup offers many advantages, especially over local VCSs. For example, everyone knows to a certain degree what everyone else on the project is doing. Administrators have fine-grained control over who can do what; and its far easier to administer a CVCS than it is to deal with local databases on every client.
However, this setup also has some serious downsides. The most obvious is the single point of failure that the centralized server represents. If that server goes down for an hour, then during that hour nobody can collaborate at all or save versioned changes to anything theyre working on. If the hard disk the central database is on becomes corrupted, and proper backups havent been kept, you lose absolutely everything—the entire history of the project except whatever single snapshots people happen to have on their local machines. Local VCS systems suffer from this same problem—whenever you have the entire history of the project in a single place, you risk losing everything.
### Distributed Version Control Systems ###
This is where Distributed Version Control Systems (DVCSs) step in. In a DVCS (such as Git, Mercurial, Bazaar or Darcs), clients dont just check out the latest snapshot of the files: they fully mirror the repository. Thus if any server dies, and these systems were collaborating via it, any of the client repositories can be copied back up to the server to restore it. Every checkout is really a full backup of all the data (see Figure 1-3).
Insert 18333fig0103.png
Figure 1-3. Distributed version control diagram.
Furthermore, many of these systems deal pretty well with having several remote repositories they can work with, so you can collaborate with different groups of people in different ways simultaneously within the same project. This allows you to set up several types of workflows that arent possible in centralized systems, such as hierarchical models.
## A Short History of Git ##
As with many great things in life, Git began with a bit of creative destruction and fiery controversy. The Linux kernel is an open source software project of fairly large scope. For most of the lifetime of the Linux kernel maintenance (19912002), changes to the software were passed around as patches and archived files. In 2002, the Linux kernel project began using a proprietary DVCS system called BitKeeper.
In 2005, the relationship between the community that developed the Linux kernel and the commercial company that developed BitKeeper broke down, and the tools free-of-charge status was revoked. This prompted the Linux development community (and in particular Linus Torvalds, the creator of Linux) to develop their own tool based on some of the lessons they learned while using BitKeeper. Some of the goals of the new system were as follows:
* Speed
* Simple design
* Strong support for non-linear development (thousands of parallel branches)
* Fully distributed
* Able to handle large projects like the Linux kernel efficiently (speed and data size)
Since its birth in 2005, Git has evolved and matured to be easy to use and yet retain these initial qualities. Its incredibly fast, its very efficient with large projects, and it has an incredible branching system for non-linear development (See Chapter 3).
## Git Basics ##
So, what is Git in a nutshell? This is an important section to absorb, because if you understand what Git is and the fundamentals of how it works, then using Git effectively will probably be much easier for you. As you learn Git, try to clear your mind of the things you may know about other VCSs, such as Subversion and Perforce; doing so will help you avoid subtle confusion when using the tool. Git stores and thinks about information much differently than these other systems, even though the user interface is fairly similar; understanding those differences will help prevent you from becoming confused while using it.
### Snapshots, Not Differences ###
The major difference between Git and any other VCS (Subversion and friends included) is the way Git thinks about its data. Conceptually, most other systems store information as a list of file-based changes. These systems (CVS, Subversion, Perforce, Bazaar, and so on) think of the information they keep as a set of files and the changes made to each file over time, as illustrated in Figure 1-4.
Insert 18333fig0104.png
Figure 1-4. Other systems tend to store data as changes to a base version of each file.
Git doesnt think of or store its data this way. Instead, Git thinks of its data more like a set of snapshots of a mini filesystem. Every time you commit, or save the state of your project in Git, it basically takes a picture of what all your files look like at that moment and stores a reference to that snapshot. To be efficient, if files have not changed, Git doesnt store the file again—just a link to the previous identical file it has already stored. Git thinks about its data more like Figure 1-5.
Insert 18333fig0105.png
Figure 1-5. Git stores data as snapshots of the project over time.
This is an important distinction between Git and nearly all other VCSs. It makes Git reconsider almost every aspect of version control that most other systems copied from the previous generation. This makes Git more like a mini filesystem with some incredibly powerful tools built on top of it, rather than simply a VCS. Well explore some of the benefits you gain by thinking of your data this way when we cover Git branching in Chapter 3.
### Nearly Every Operation Is Local ###
Most operations in Git only need local files and resources to operate — generally no information is needed from another computer on your network. If youre used to a CVCS where most operations have that network latency overhead, this aspect of Git will make you think that the gods of speed have blessed Git with unworldly powers. Because you have the entire history of the project right there on your local disk, most operations seem almost instantaneous.
For example, to browse the history of the project, Git doesnt need to go out to the server to get the history and display it for you—it simply reads it directly from your local database. This means you see the project history almost instantly. If you want to see the changes introduced between the current version of a file and the file a month ago, Git can look up the file a month ago and do a local difference calculation, instead of having to either ask a remote server to do it or pull an older version of the file from the remote server to do it locally.
This also means that there is very little you cant do if youre offline or off VPN. If you get on an airplane or a train and want to do a little work, you can commit happily until you get to a network connection to upload. If you go home and cant get your VPN client working properly, you can still work. In many other systems, doing so is either impossible or painful. In Perforce, for example, you cant do much when you arent connected to the server; and in Subversion and CVS, you can edit files, but you cant commit changes to your database (because your database is offline). This may not seem like a huge deal, but you may be surprised what a big difference it can make.
### Git Has Integrity ###
Everything in Git is check-summed before it is stored and is then referred to by that checksum. This means its impossible to change the contents of any file or directory without Git knowing about it. This functionality is built into Git at the lowest levels and is integral to its philosophy. You cant lose information in transit or get file corruption without Git being able to detect it.
The mechanism that Git uses for this checksumming is called a SHA-1 hash. This is a 40-character string composed of hexadecimal characters (09 and af) and calculated based on the contents of a file or directory structure in Git. A SHA-1 hash looks something like this:
24b9da6552252987aa493b52f8696cd6d3b00373
You will see these hash values all over the place in Git because it uses them so much. In fact, Git stores everything not by file name but in the Git database addressable by the hash value of its contents.
### Git Generally Only Adds Data ###
When you do actions in Git, nearly all of them only add data to the Git database. It is very difficult to get the system to do anything that is not undoable or to make it erase data in any way. As in any VCS, you can lose or mess up changes you havent committed yet; but after you commit a snapshot into Git, it is very difficult to lose, especially if you regularly push your database to another repository.
This makes using Git a joy because we know we can experiment without the danger of severely screwing things up. For a more in-depth look at how Git stores its data and how you can recover data that seems lost, see Chapter 9.
### The Three States ###
Now, pay attention. This is the main thing to remember about Git if you want the rest of your learning process to go smoothly. Git has three main states that your files can reside in: committed, modified, and staged. Committed means that the data is safely stored in your local database. Modified means that you have changed the file but have not committed it to your database yet. Staged means that you have marked a modified file in its current version to go into your next commit snapshot.
This leads us to the three main sections of a Git project: the Git directory, the working directory, and the staging area.
Insert 18333fig0106.png
Figure 1-6. Working directory, staging area, and git directory.
The Git directory is where Git stores the metadata and object database for your project. This is the most important part of Git, and it is what is copied when you clone a repository from another computer.
The working directory is a single checkout of one version of the project. These files are pulled out of the compressed database in the Git directory and placed on disk for you to use or modify.
The staging area is a simple file, generally contained in your Git directory, that stores information about what will go into your next commit. Its sometimes referred to as the index, but its becoming standard to refer to it as the staging area.
The basic Git workflow goes something like this:
1. You modify files in your working directory.
2. You stage the files, adding snapshots of them to your staging area.
3. You do a commit, which takes the files as they are in the staging area and stores that snapshot permanently to your Git directory.
If a particular version of a file is in the git directory, its considered committed. If its modified but has been added to the staging area, it is staged. And if it was changed since it was checked out but has not been staged, it is modified. In Chapter 2, youll learn more about these states and how you can either take advantage of them or skip the staged part entirely.
## Installing Git ##
Lets get into using some Git. First things first—you have to install it. You can get it a number of ways; the two major ones are to install it from source or to install an existing package for your platform.
### Installing from Source ###
If you can, its generally useful to install Git from source, because youll get the most recent version. Each version of Git tends to include useful UI enhancements, so getting the latest version is often the best route if you feel comfortable compiling software from source. It is also the case that many Linux distributions contain very old packages; so unless youre on a very up-to-date distro or are using backports, installing from source may be the best bet.
To install Git, you need to have the following libraries that Git depends on: curl, zlib, openssl, expat, and libiconv. For example, if youre on a system that has yum (such as Fedora) or apt-get (such as a Debian based system), you can use one of these commands to install all of the dependencies:
$ yum install curl-devel expat-devel gettext-devel \
openssl-devel zlib-devel
$ apt-get install libcurl4-gnutls-dev libexpat1-dev gettext \
libz-dev libssl-dev
When you have all the necessary dependencies, you can go ahead and grab the latest snapshot from the Git web site:
http://git-scm.com/download
Then, compile and install:
$ tar -zxf git-1.7.2.2.tar.gz
$ cd git-1.7.2.2
$ make prefix=/usr/local all
$ sudo make prefix=/usr/local install
After this is done, you can also get Git via Git itself for updates:
$ git clone git://git.kernel.org/pub/scm/git/git.git
### Installing on Linux ###
If you want to install Git on Linux via a binary installer, you can generally do so through the basic package-management tool that comes with your distribution. If youre on Fedora, you can use yum:
$ yum install git-core
Or if youre on a Debian-based distribution like Ubuntu, try apt-get:
$ apt-get install git
### Installing on Mac ###
There are two easy ways to install Git on a Mac. The easiest is to use the graphical Git installer, which you can download from the Google Code page (see Figure 1-7):
http://code.google.com/p/git-osx-installer
Insert 18333fig0107.png
Figure 1-7. Git OS X installer.
The other major way is to install Git via MacPorts (`http://www.macports.org`). If you have MacPorts installed, install Git via
$ sudo port install git-core +svn +doc +bash_completion +gitweb
You dont have to add all the extras, but youll probably want to include +svn in case you ever have to use Git with Subversion repositories (see Chapter 8).
### Installing on Windows ###
Installing Git on Windows is very easy. The msysGit project has one of the easier installation procedures. Simply download the installer exe file from the GitHub page, and run it:
http://msysgit.github.com/
After its installed, you have both a command-line version (including an SSH client that will come in handy later) and the standard GUI.
Note on Windows usage: you should use Git with the provided msysGit shell (Unix style), it allows to use the complex lines of command given in this book. If you need, for some reason, to use the native Windows shell / command line console, you have to use double quotes instead of simple quotes (for parameters with spaces in them) and you must quote the parameters ending with the circumflex accent (^) if they are last on the line, as it is a continuation symbol in Windows.
## First-Time Git Setup ##
Now that you have Git on your system, youll want to do a few things to customize your Git environment. You should have to do these things only once; theyll stick around between upgrades. You can also change them at any time by running through the commands again.
Git comes with a tool called git config that lets you get and set configuration variables that control all aspects of how Git looks and operates. These variables can be stored in three different places:
* `/etc/gitconfig` file: Contains values for every user on the system and all their repositories. If you pass the option` --system` to `git config`, it reads and writes from this file specifically.
* `~/.gitconfig` file: Specific to your user. You can make Git read and write to this file specifically by passing the `--global` option.
* config file in the git directory (that is, `.git/config`) of whatever repository youre currently using: Specific to that single repository. Each level overrides values in the previous level, so values in `.git/config` trump those in `/etc/gitconfig`.
On Windows systems, Git looks for the `.gitconfig` file in the `$HOME` directory (`%USERPROFILE%` in Windows environment), which is `C:\Documents and Settings\$USER` or `C:\Users\$USER` for most people, depending on version (`$USER` is `%USERNAME%` in Windows environment). It also still looks for /etc/gitconfig, although its relative to the MSys root, which is wherever you decide to install Git on your Windows system when you run the installer.
### Your Identity ###
The first thing you should do when you install Git is to set your user name and e-mail address. This is important because every Git commit uses this information, and its immutably baked into the commits you pass around:
$ git config --global user.name "John Doe"
$ git config --global user.email johndoe@example.com
Again, you need to do this only once if you pass the `--global` option, because then Git will always use that information for anything you do on that system. If you want to override this with a different name or e-mail address for specific projects, you can run the command without the `--global` option when youre in that project.
### Your Editor ###
Now that your identity is set up, you can configure the default text editor that will be used when Git needs you to type in a message. By default, Git uses your systems default editor, which is generally Vi or Vim. If you want to use a different text editor, such as Emacs, you can do the following:
$ git config --global core.editor emacs
### Your Diff Tool ###
Another useful option you may want to configure is the default diff tool to use to resolve merge conflicts. Say you want to use vimdiff:
$ git config --global merge.tool vimdiff
Git accepts kdiff3, tkdiff, meld, xxdiff, emerge, vimdiff, gvimdiff, ecmerge, and opendiff as valid merge tools. You can also set up a custom tool; see Chapter 7 for more information about doing that.
### Checking Your Settings ###
If you want to check your settings, you can use the `git config --list` command to list all the settings Git can find at that point:
$ git config --list
user.name=Scott Chacon
user.email=schacon@gmail.com
color.status=auto
color.branch=auto
color.interactive=auto
color.diff=auto
...
You may see keys more than once, because Git reads the same key from different files (`/etc/gitconfig` and `~/.gitconfig`, for example). In this case, Git uses the last value for each unique key it sees.
You can also check what Git thinks a specific keys value is by typing `git config {key}`:
$ git config user.name
Scott Chacon
## Getting Help ##
If you ever need help while using Git, there are three ways to get the manual page (manpage) help for any of the Git commands:
$ git help <verb>
$ git <verb> --help
$ man git-<verb>
For example, you can get the manpage help for the config command by running
$ git help config
These commands are nice because you can access them anywhere, even offline.
If the manpages and this book arent enough and you need in-person help, you can try the `#git` or `#github` channel on the Freenode IRC server (irc.freenode.net). These channels are regularly filled with hundreds of people who are all very knowledgeable about Git and are often willing to help.
## Summary ##
You should have a basic understanding of what Git is and how its different from the CVCS you may have been using. You should also now have a working version of Git on your system thats set up with your personal identity. Its now time to learn some Git basics.

View file

@ -1,408 +0,0 @@
// Copyright 2016n The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package parser
import (
"bufio"
"bytes"
"fmt"
"io"
"regexp"
"strings"
"unicode"
"github.com/chaseadamsio/goorgeous"
)
const (
// TODO(bep) Do we really have to export these?
// HTMLLead identifies the start of HTML documents.
HTMLLead = "<"
// YAMLLead identifies the start of YAML frontmatter.
YAMLLead = "-"
// YAMLDelimUnix identifies the end of YAML front matter on Unix.
YAMLDelimUnix = "---\n"
// YAMLDelimDOS identifies the end of YAML front matter on Windows.
YAMLDelimDOS = "---\r\n"
// YAMLDelim identifies the YAML front matter delimiter.
YAMLDelim = "---"
// TOMLLead identifies the start of TOML front matter.
TOMLLead = "+"
// TOMLDelimUnix identifies the end of TOML front matter on Unix.
TOMLDelimUnix = "+++\n"
// TOMLDelimDOS identifies the end of TOML front matter on Windows.
TOMLDelimDOS = "+++\r\n"
// TOMLDelim identifies the TOML front matter delimiter.
TOMLDelim = "+++"
// JSONLead identifies the start of JSON frontmatter.
JSONLead = "{"
// HTMLCommentStart identifies the start of HTML comment.
HTMLCommentStart = "<!--"
// HTMLCommentEnd identifies the end of HTML comment.
HTMLCommentEnd = "-->"
// BOM Unicode byte order marker
BOM = '\ufeff'
)
var (
delims = regexp.MustCompile(
"^(" + regexp.QuoteMeta(YAMLDelim) + `\s*\n|` + regexp.QuoteMeta(TOMLDelim) + `\s*\n|` + regexp.QuoteMeta(JSONLead) + ")",
)
)
// Page represents a parsed content page.
type Page interface {
// FrontMatter contains the raw frontmatter with relevant delimiters.
FrontMatter() []byte
// Content contains the raw page content.
Content() []byte
// IsRenderable denotes that the page should be rendered.
IsRenderable() bool
// Metadata returns the unmarshalled frontmatter data.
Metadata() (map[string]interface{}, error)
}
// page implements the Page interface.
type page struct {
render bool
frontmatter []byte
content []byte
}
// Content returns the raw page content.
func (p *page) Content() []byte {
return p.content
}
// FrontMatter contains the raw frontmatter with relevant delimiters.
func (p *page) FrontMatter() []byte {
return p.frontmatter
}
// IsRenderable denotes that the page should be rendered.
func (p *page) IsRenderable() bool {
return p.render
}
// Metadata returns the unmarshalled frontmatter data.
func (p *page) Metadata() (meta map[string]interface{}, err error) {
return nil, nil
}
// ReadFrom reads the content from an io.Reader and constructs a page.
func ReadFrom(r io.Reader) (p Page, err error) {
reader := bufio.NewReader(r)
// chomp BOM and assume UTF-8
if err = chompBOM(reader); err != nil && err != io.EOF {
return
}
if err = chompWhitespace(reader); err != nil && err != io.EOF {
return
}
if err = chompFrontmatterStartComment(reader); err != nil && err != io.EOF {
return
}
firstLine, err := peekLine(reader)
if err != nil && err != io.EOF {
return
}
newp := new(page)
newp.render = shouldRender(firstLine)
if newp.render && isFrontMatterDelim(firstLine) {
left, right := determineDelims(firstLine)
fm, err := extractFrontMatterDelims(reader, left, right)
if err != nil {
return nil, err
}
newp.frontmatter = fm
} else if newp.render && goorgeous.IsKeyword(firstLine) {
fm, err := goorgeous.ExtractOrgHeaders(reader)
if err != nil {
return nil, err
}
newp.frontmatter = fm
}
content, err := extractContent(reader)
if err != nil {
return nil, err
}
newp.content = content
return newp, nil
}
// chompBOM scans any leading Unicode Byte Order Markers from r.
func chompBOM(r io.RuneScanner) (err error) {
for {
c, _, err := r.ReadRune()
if err != nil {
return err
}
if c != BOM {
r.UnreadRune()
return nil
}
}
}
// chompWhitespace scans any leading Unicode whitespace from r.
func chompWhitespace(r io.RuneScanner) (err error) {
for {
c, _, err := r.ReadRune()
if err != nil {
return err
}
if !unicode.IsSpace(c) {
r.UnreadRune()
return nil
}
}
}
// chompFrontmatterStartComment checks r for a leading HTML comment. If a
// comment is found, it is read from r and then whitespace is trimmed from the
// beginning of r.
func chompFrontmatterStartComment(r *bufio.Reader) (err error) {
candidate, err := r.Peek(32)
if err != nil {
return err
}
str := string(candidate)
if strings.HasPrefix(str, HTMLCommentStart) {
lineEnd := strings.IndexAny(str, "\n")
if lineEnd == -1 {
//TODO: if we can't find it, Peek more?
return nil
}
testStr := strings.TrimSuffix(str[0:lineEnd], "\r")
if strings.Contains(testStr, HTMLCommentEnd) {
return nil
}
buf := make([]byte, lineEnd)
if _, err = r.Read(buf); err != nil {
return
}
if err = chompWhitespace(r); err != nil {
return err
}
}
return nil
}
// chompFrontmatterEndComment checks r for a trailing HTML comment.
func chompFrontmatterEndComment(r *bufio.Reader) (err error) {
candidate, err := r.Peek(32)
if err != nil {
return err
}
str := string(candidate)
lineEnd := strings.IndexAny(str, "\n")
if lineEnd == -1 {
return nil
}
testStr := strings.TrimSuffix(str[0:lineEnd], "\r")
if strings.Contains(testStr, HTMLCommentStart) {
return nil
}
//TODO: if we can't find it, Peek more?
if strings.HasSuffix(testStr, HTMLCommentEnd) {
buf := make([]byte, lineEnd)
if _, err = r.Read(buf); err != nil {
return
}
if err = chompWhitespace(r); err != nil {
return err
}
}
return nil
}
func peekLine(r *bufio.Reader) (line []byte, err error) {
firstFive, err := r.Peek(5)
if err != nil {
return
}
idx := bytes.IndexByte(firstFive, '\n')
if idx == -1 {
return firstFive, nil
}
idx++ // include newline.
return firstFive[:idx], nil
}
func shouldRender(lead []byte) (frontmatter bool) {
if len(lead) <= 0 {
return
}
if bytes.Equal(lead[:1], []byte(HTMLLead)) {
return
}
return true
}
func isFrontMatterDelim(data []byte) bool {
return delims.Match(data)
}
func determineDelims(firstLine []byte) (left, right []byte) {
switch firstLine[0] {
case YAMLLead[0]:
return []byte(YAMLDelim), []byte(YAMLDelim)
case TOMLLead[0]:
return []byte(TOMLDelim), []byte(TOMLDelim)
case JSONLead[0]:
return []byte(JSONLead), []byte("}")
default:
panic(fmt.Sprintf("Unable to determine delims from %q", firstLine))
}
}
// extractFrontMatterDelims takes a frontmatter from the content bufio.Reader.
// Beginning white spaces of the bufio.Reader must be trimmed before call this
// function.
func extractFrontMatterDelims(r *bufio.Reader, left, right []byte) (fm []byte, err error) {
var (
c byte
buf bytes.Buffer
level int
sameDelim = bytes.Equal(left, right)
inQuote bool
escapeState int
)
// Frontmatter must start with a delimiter. To check it first,
// pre-reads beginning delimiter length - 1 bytes from Reader
for i := 0; i < len(left)-1; i++ {
if c, err = r.ReadByte(); err != nil {
return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s\n%.100s...", buf.Len(), err, buf.String())
}
if err = buf.WriteByte(c); err != nil {
return nil, err
}
}
// Reads a character from Reader one by one and checks it matches the
// last character of one of delimiters to find the last character of
// frontmatter. If it matches, makes sure it contains the delimiter
// and if so, also checks it is followed by CR+LF or LF when YAML,
// TOML case. In JSON case, nested delimiters must be parsed and it
// is expected that the delimiter only contains one character.
for {
if c, err = r.ReadByte(); err != nil {
return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s\n%.100s...", buf.Len(), err, buf.String())
}
if err = buf.WriteByte(c); err != nil {
return nil, err
}
switch c {
case '"':
if escapeState != 1 {
inQuote = !inQuote
}
case '\\':
escapeState++
case left[len(left)-1]:
if sameDelim { // YAML, TOML case
if bytes.HasSuffix(buf.Bytes(), left) && (buf.Len() == len(left) || buf.Bytes()[buf.Len()-len(left)-1] == '\n') {
nextByte:
c, err = r.ReadByte()
if err != nil {
// It is ok that the end delimiter ends with EOF
if err != io.EOF || level != 1 {
return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s\n%.100s...", buf.Len(), err, buf.String())
}
} else {
switch c {
case '\n':
// ok
case ' ':
// Consume this byte and try to match again
goto nextByte
case '\r':
if err = buf.WriteByte(c); err != nil {
return nil, err
}
if c, err = r.ReadByte(); err != nil {
return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s\n%.100s...", buf.Len(), err, buf.String())
}
if c != '\n' {
return nil, fmt.Errorf("frontmatter delimiter must be followed by CR+LF or LF but those can't be found at filepos %d", buf.Len())
}
default:
return nil, fmt.Errorf("frontmatter delimiter must be followed by CR+LF or LF but those can't be found at filepos %d", buf.Len())
}
if err = buf.WriteByte(c); err != nil {
return nil, err
}
}
if level == 0 {
level = 1
} else {
level = 0
}
}
} else { // JSON case
if !inQuote {
level++
}
}
case right[len(right)-1]: // JSON case only reaches here
if !inQuote {
level--
}
}
if level == 0 {
// Consumes white spaces immediately behind frontmatter
if err = chompWhitespace(r); err != nil && err != io.EOF {
return nil, err
}
if err = chompFrontmatterEndComment(r); err != nil && err != io.EOF {
return nil, err
}
return buf.Bytes(), nil
}
if c != '\\' {
escapeState = 0
}
}
}
func extractContent(r io.Reader) (content []byte, err error) {
wr := new(bytes.Buffer)
if _, err = wr.ReadFrom(r); err != nil {
return
}
return wr.Bytes(), nil
}

View file

@ -1 +0,0 @@
package parser

View file

@ -20,8 +20,8 @@ import (
type Item struct {
Type ItemType
Pos Pos
Val []byte
Pos int
Val []byte
}
type Items []Item

View file

@ -24,9 +24,6 @@ import (
"unicode/utf8"
)
// position (in bytes)
type Pos int
const eof = -1
// returns the next state in scanner.
@ -47,9 +44,9 @@ type pageLexer struct {
input []byte
stateStart stateFunc
state stateFunc
pos Pos // input position
start Pos // item start position
width Pos // width of last element
pos int // input position
start int // item start position
width int // width of last element
// Set when we have parsed any summary divider
summaryDividerChecked bool
@ -73,7 +70,7 @@ func (l *pageLexer) Input() []byte {
// note: the input position here is normally 0 (start), but
// can be set if position of first shortcode is known
// TODO(bep) 2errors byte
func newPageLexer(input []byte, inputPosition Pos, stateStart stateFunc) *pageLexer {
func newPageLexer(input []byte, inputPosition int, stateStart stateFunc) *pageLexer {
lexer := &pageLexer{
input: input,
pos: inputPosition,
@ -131,7 +128,7 @@ func (l *pageLexer) next() rune {
}
runeValue, runeWidth := utf8.DecodeRune(l.input[l.pos:])
l.width = Pos(runeWidth)
l.width = runeWidth
l.pos += l.width
return runeValue
}
@ -210,7 +207,7 @@ func lexMainSection(l *pageLexer) stateFunc {
l3 = l.index(leftDelimSc)
skip := minPositiveIndex(l1, l2, l3)
if skip > 0 {
l.pos += Pos(skip)
l.pos += skip
}
for {
@ -234,7 +231,7 @@ func lexMainSection(l *pageLexer) stateFunc {
l.emit(tText)
}
l.summaryDividerChecked = true
l.pos += Pos(len(summaryDivider))
l.pos += len(summaryDivider)
//l.consumeCRLF()
l.emit(TypeLeadSummaryDivider)
} else if l.hasPrefix(summaryDividerOrg) {
@ -242,7 +239,7 @@ func lexMainSection(l *pageLexer) stateFunc {
l.emit(tText)
}
l.summaryDividerChecked = true
l.pos += Pos(len(summaryDividerOrg))
l.pos += len(summaryDividerOrg)
//l.consumeCRLF()
l.emit(TypeSummaryDividerOrg)
}
@ -291,12 +288,12 @@ LOOP:
if right == -1 {
return l.errorf("starting HTML comment with no end")
}
l.pos += Pos(right) + Pos(len(htmlCOmmentEnd))
l.pos += right + len(htmlCOmmentEnd)
l.emit(TypeHTMLComment)
} else {
// Not need to look further. Hugo treats this as plain HTML,
// no front matter, no shortcodes, no nothing.
l.pos = Pos(len(l.input))
l.pos = len(l.input)
l.emit(TypeHTMLDocument)
}
}
@ -434,7 +431,7 @@ func (l *pageLexer) lexFrontMatterSection(tp ItemType, delimr rune, name string,
}
func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
l.pos += Pos(len(l.currentLeftShortcodeDelim()))
l.pos += len(l.currentLeftShortcodeDelim())
if l.hasPrefix(leftComment) {
return lexShortcodeComment
}
@ -451,20 +448,20 @@ func lexShortcodeComment(l *pageLexer) stateFunc {
}
// we emit all as text, except the comment markers
l.emit(tText)
l.pos += Pos(len(leftComment))
l.pos += len(leftComment)
l.ignore()
l.pos += Pos(posRightComment - len(leftComment))
l.pos += posRightComment - len(leftComment)
l.emit(tText)
l.pos += Pos(len(rightComment))
l.pos += len(rightComment)
l.ignore()
l.pos += Pos(len(l.currentRightShortcodeDelim()))
l.pos += len(l.currentRightShortcodeDelim())
l.emit(tText)
return lexMainSection
}
func lexShortcodeRightDelim(l *pageLexer) stateFunc {
l.closingState = 0
l.pos += Pos(len(l.currentRightShortcodeDelim()))
l.pos += len(l.currentRightShortcodeDelim())
l.emit(l.currentRightShortcodeDelimItem())
return lexMainSection
}

View file

@ -48,7 +48,7 @@ func Parse(r io.Reader) (Result, error) {
}
func parseMainSection(input []byte, from int) Result {
lexer := newPageLexer(input, Pos(from), lexMainSection) // TODO(bep) 2errors
lexer := newPageLexer(input, from, lexMainSection) // TODO(bep) 2errors
lexer.run()
return lexer
}
@ -57,7 +57,7 @@ func parseMainSection(input []byte, from int) Result {
// if needed.
type Iterator struct {
l *pageLexer
lastPos Pos // position of the last item returned by nextItem
lastPos int // position of the last item returned by nextItem
}
// consumes and returns the next item
@ -69,7 +69,7 @@ func (t *Iterator) Next() Item {
var errIndexOutOfBounds = Item{tError, 0, []byte("no more tokens")}
func (t *Iterator) current() Item {
if t.lastPos >= Pos(len(t.l.items)) {
if t.lastPos >= len(t.l.items) {
return errIndexOutOfBounds
}
return t.l.items[t.lastPos]
@ -98,7 +98,7 @@ func (t *Iterator) Peek() Item {
// PeekWalk will feed the next items in the iterator to walkFn
// until it returns false.
func (t *Iterator) PeekWalk(walkFn func(item Item) bool) {
for i := t.lastPos + 1; i < Pos(len(t.l.items)); i++ {
for i := t.lastPos + 1; i < len(t.l.items); i++ {
item := t.l.items[i]
if !walkFn(item) {
break

View file

@ -1,324 +0,0 @@
// Copyright 2015 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package parser
// TODO Support Mac Encoding (\r)
import (
"bufio"
"bytes"
"io"
"os"
"path/filepath"
"strings"
"testing"
)
const (
contentNoFrontmatter = "a page with no front matter"
contentWithFrontmatter = "---\ntitle: front matter\n---\nContent with front matter"
contentHTMLNoDoctype = "<html>\n\t<body>\n\t</body>\n</html>"
contentHTMLWithDoctype = "<!doctype html><html><body></body></html>"
contentHTMLWithFrontmatter = "---\ntitle: front matter\n---\n<!doctype><html><body></body></html>"
contentHTML = " <html><body></body></html>"
contentLinefeedAndHTML = "\n<html><body></body></html>"
contentIncompleteEndFrontmatterDelim = "---\ntitle: incomplete end fm delim\n--\nincomplete frontmatter delim"
contentMissingEndFrontmatterDelim = "---\ntitle: incomplete end fm delim\nincomplete frontmatter delim"
contentSlugWorking = "---\ntitle: slug doc 2\nslug: slug-doc-2\n\n---\nslug doc 2 content"
contentSlugWorkingVariation = "---\ntitle: slug doc 3\nslug: slug-doc 3\n---\nslug doc 3 content"
contentSlugBug = "---\ntitle: slug doc 2\nslug: slug-doc-2\n---\nslug doc 2 content"
contentSlugWithJSONFrontMatter = "{\n \"categories\": \"d\",\n \"tags\": [\n \"a\", \n \"b\", \n \"c\"\n ]\n}\nJSON Front Matter with tags and categories"
contentWithJSONLooseFrontmatter = "{\n \"categories\": \"d\"\n \"tags\": [\n \"a\" \n \"b\" \n \"c\"\n ]\n}\nJSON Front Matter with tags and categories"
contentSlugWithJSONFrontMatterFirstLineOnly = "{\"categories\":\"d\",\"tags\":[\"a\",\"b\",\"c\"]}\nJSON Front Matter with tags and categories"
contentSlugWithJSONFrontMatterFirstLine = "{\"categories\":\"d\",\n \"tags\":[\"a\",\"b\",\"c\"]}\nJSON Front Matter with tags and categories"
)
var lineEndings = []string{"\n", "\r\n"}
var delimiters = []string{"---", "+++"}
func pageMust(p Page, err error) *page {
if err != nil {
panic(err)
}
return p.(*page)
}
func TestDegenerateCreatePageFrom(t *testing.T) {
tests := []struct {
content string
}{
{contentMissingEndFrontmatterDelim},
{contentIncompleteEndFrontmatterDelim},
}
for _, test := range tests {
for _, ending := range lineEndings {
test.content = strings.Replace(test.content, "\n", ending, -1)
_, err := ReadFrom(strings.NewReader(test.content))
if err == nil {
t.Errorf("Content should return an err:\n%q\n", test.content)
}
}
}
}
func checkPageRender(t *testing.T, p *page, expected bool) {
if p.render != expected {
t.Errorf("page.render should be %t, got: %t", expected, p.render)
}
}
func checkPageFrontMatterIsNil(t *testing.T, p *page, content string, expected bool) {
if bool(p.frontmatter == nil) != expected {
t.Logf("\n%q\n", content)
t.Errorf("page.frontmatter == nil? %t, got %t", expected, p.frontmatter == nil)
}
}
func checkPageFrontMatterContent(t *testing.T, p *page, frontMatter string) {
if p.frontmatter == nil {
return
}
if !bytes.Equal(p.frontmatter, []byte(frontMatter)) {
t.Errorf("frontmatter mismatch\nexp: %q\ngot: %q", frontMatter, p.frontmatter)
}
}
func checkPageContent(t *testing.T, p *page, expected string) {
if !bytes.Equal(p.content, []byte(expected)) {
t.Errorf("content mismatch\nexp: %q\ngot: %q", expected, p.content)
}
}
func TestStandaloneCreatePageFrom(t *testing.T) {
tests := []struct {
content string
expectedMustRender bool
frontMatterIsNil bool
frontMatter string
bodycontent string
}{
{contentNoFrontmatter, true, true, "", "a page with no front matter"},
{contentWithFrontmatter, true, false, "---\ntitle: front matter\n---\n", "Content with front matter"},
{contentHTMLNoDoctype, false, true, "", "<html>\n\t<body>\n\t</body>\n</html>"},
{contentHTMLWithDoctype, false, true, "", "<!doctype html><html><body></body></html>"},
{contentHTMLWithFrontmatter, true, false, "---\ntitle: front matter\n---\n", "<!doctype><html><body></body></html>"},
{contentHTML, false, true, "", "<html><body></body></html>"},
{contentLinefeedAndHTML, false, true, "", "<html><body></body></html>"},
{contentSlugWithJSONFrontMatter, true, false, "{\n \"categories\": \"d\",\n \"tags\": [\n \"a\", \n \"b\", \n \"c\"\n ]\n}", "JSON Front Matter with tags and categories"},
{contentWithJSONLooseFrontmatter, true, false, "{\n \"categories\": \"d\"\n \"tags\": [\n \"a\" \n \"b\" \n \"c\"\n ]\n}", "JSON Front Matter with tags and categories"},
{contentSlugWithJSONFrontMatterFirstLineOnly, true, false, "{\"categories\":\"d\",\"tags\":[\"a\",\"b\",\"c\"]}", "JSON Front Matter with tags and categories"},
{contentSlugWithJSONFrontMatterFirstLine, true, false, "{\"categories\":\"d\",\n \"tags\":[\"a\",\"b\",\"c\"]}", "JSON Front Matter with tags and categories"},
{contentSlugWorking, true, false, "---\ntitle: slug doc 2\nslug: slug-doc-2\n\n---\n", "slug doc 2 content"},
{contentSlugWorkingVariation, true, false, "---\ntitle: slug doc 3\nslug: slug-doc 3\n---\n", "slug doc 3 content"},
{contentSlugBug, true, false, "---\ntitle: slug doc 2\nslug: slug-doc-2\n---\n", "slug doc 2 content"},
}
for _, test := range tests {
for _, ending := range lineEndings {
test.content = strings.Replace(test.content, "\n", ending, -1)
test.frontMatter = strings.Replace(test.frontMatter, "\n", ending, -1)
test.bodycontent = strings.Replace(test.bodycontent, "\n", ending, -1)
p := pageMust(ReadFrom(strings.NewReader(test.content)))
checkPageRender(t, p, test.expectedMustRender)
checkPageFrontMatterIsNil(t, p, test.content, test.frontMatterIsNil)
checkPageFrontMatterContent(t, p, test.frontMatter)
checkPageContent(t, p, test.bodycontent)
}
}
}
func BenchmarkLongFormRender(b *testing.B) {
tests := []struct {
filename string
buf []byte
}{
{filename: "long_text_test.md"},
}
for i, test := range tests {
path := filepath.FromSlash(test.filename)
f, err := os.Open(path)
if err != nil {
b.Fatalf("Unable to open %s: %s", path, err)
}
defer f.Close()
membuf := new(bytes.Buffer)
if _, err := io.Copy(membuf, f); err != nil {
b.Fatalf("Unable to read %s: %s", path, err)
}
tests[i].buf = membuf.Bytes()
}
b.ResetTimer()
for i := 0; i <= b.N; i++ {
for _, test := range tests {
ReadFrom(bytes.NewReader(test.buf))
}
}
}
func TestPageShouldRender(t *testing.T) {
tests := []struct {
content []byte
expected bool
}{
{[]byte{}, false},
{[]byte{'<'}, false},
{[]byte{'-'}, true},
{[]byte("--"), true},
{[]byte("---"), true},
{[]byte("---\n"), true},
{[]byte{'a'}, true},
}
for _, test := range tests {
for _, ending := range lineEndings {
test.content = bytes.Replace(test.content, []byte("\n"), []byte(ending), -1)
if render := shouldRender(test.content); render != test.expected {
t.Errorf("Expected %s to shouldRender = %t, got: %t", test.content, test.expected, render)
}
}
}
}
func TestPageHasFrontMatter(t *testing.T) {
tests := []struct {
content []byte
expected bool
}{
{[]byte{'-'}, false},
{[]byte("--"), false},
{[]byte("---"), false},
{[]byte("---\n"), true},
{[]byte("---\n"), true},
{[]byte("--- \n"), true},
{[]byte("--- \n"), true},
{[]byte{'a'}, false},
{[]byte{'{'}, true},
{[]byte("{\n "), true},
{[]byte{'}'}, false},
}
for _, test := range tests {
for _, ending := range lineEndings {
test.content = bytes.Replace(test.content, []byte("\n"), []byte(ending), -1)
if isFrontMatterDelim := isFrontMatterDelim(test.content); isFrontMatterDelim != test.expected {
t.Errorf("Expected %q isFrontMatterDelim = %t, got: %t", test.content, test.expected, isFrontMatterDelim)
}
}
}
}
func TestExtractFrontMatter(t *testing.T) {
tests := []struct {
frontmatter string
extracted []byte
errIsNil bool
}{
{"", nil, false},
{"-", nil, false},
{"---\n", nil, false},
{"---\nfoobar", nil, false},
{"---\nfoobar\nbarfoo\nfizbaz\n", nil, false},
{"---\nblar\n-\n", nil, false},
{"---\nralb\n---\n", []byte("---\nralb\n---\n"), true},
{"---\neof\n---", []byte("---\neof\n---"), true},
{"--- \neof\n---", []byte("---\neof\n---"), true},
{"---\nminc\n---\ncontent", []byte("---\nminc\n---\n"), true},
{"---\nminc\n--- \ncontent", []byte("---\nminc\n---\n"), true},
{"--- \nminc\n--- \ncontent", []byte("---\nminc\n---\n"), true},
{"---\ncnim\n---\ncontent\n", []byte("---\ncnim\n---\n"), true},
{"---\ntitle: slug doc 2\nslug: slug-doc-2\n---\ncontent\n", []byte("---\ntitle: slug doc 2\nslug: slug-doc-2\n---\n"), true},
{"---\npermalink: '/blog/title---subtitle.html'\n---\ncontent\n", []byte("---\npermalink: '/blog/title---subtitle.html'\n---\n"), true},
}
for _, test := range tests {
for _, ending := range lineEndings {
test.frontmatter = strings.Replace(test.frontmatter, "\n", ending, -1)
test.extracted = bytes.Replace(test.extracted, []byte("\n"), []byte(ending), -1)
for _, delim := range delimiters {
test.frontmatter = strings.Replace(test.frontmatter, "---", delim, -1)
test.extracted = bytes.Replace(test.extracted, []byte("---"), []byte(delim), -1)
line, err := peekLine(bufio.NewReader(strings.NewReader(test.frontmatter)))
if err != nil {
continue
}
l, r := determineDelims(line)
fm, err := extractFrontMatterDelims(bufio.NewReader(strings.NewReader(test.frontmatter)), l, r)
if (err == nil) != test.errIsNil {
t.Logf("\n%q\n", string(test.frontmatter))
t.Errorf("Expected err == nil => %t, got: %t. err: %s", test.errIsNil, err == nil, err)
continue
}
if !bytes.Equal(fm, test.extracted) {
t.Errorf("Frontmatter did not match:\nexp: %q\ngot: %q", string(test.extracted), fm)
}
}
}
}
}
func TestExtractFrontMatterDelim(t *testing.T) {
var (
noErrExpected = true
errExpected = false
)
tests := []struct {
frontmatter string
extracted string
errIsNil bool
}{
{"", "", errExpected},
{"{", "", errExpected},
{"{}", "{}", noErrExpected},
{"{} ", "{}", noErrExpected},
{"{ } ", "{ }", noErrExpected},
{"{ { }", "", errExpected},
{"{ { } }", "{ { } }", noErrExpected},
{"{ { } { } }", "{ { } { } }", noErrExpected},
{"{\n{\n}\n}\n", "{\n{\n}\n}", noErrExpected},
{"{\n \"categories\": \"d\",\n \"tags\": [\n \"a\", \n \"b\", \n \"c\"\n ]\n}\nJSON Front Matter with tags and categories", "{\n \"categories\": \"d\",\n \"tags\": [\n \"a\", \n \"b\", \n \"c\"\n ]\n}", noErrExpected},
{"{\n \"categories\": \"d\"\n \"tags\": [\n \"a\" \n \"b\" \n \"c\"\n ]\n}\nJSON Front Matter with tags and categories", "{\n \"categories\": \"d\"\n \"tags\": [\n \"a\" \n \"b\" \n \"c\"\n ]\n}", noErrExpected},
// Issue #3511
{`{ "title": "{" }`, `{ "title": "{" }`, noErrExpected},
{`{ "title": "{}" }`, `{ "title": "{}" }`, noErrExpected},
// Issue #3661
{`{ "title": "\"" }`, `{ "title": "\"" }`, noErrExpected},
{`{ "title": "\"{", "other": "\"{}" }`, `{ "title": "\"{", "other": "\"{}" }`, noErrExpected},
{`{ "title": "\"Foo\"" }`, `{ "title": "\"Foo\"" }`, noErrExpected},
{`{ "title": "\"Foo\"\"" }`, `{ "title": "\"Foo\"\"" }`, noErrExpected},
{`{ "url": "http:\/\/example.com\/play\/url?id=1" }`, `{ "url": "http:\/\/example.com\/play\/url?id=1" }`, noErrExpected},
{`{ "test": "\"New\r\nString\"" }`, `{ "test": "\"New\r\nString\"" }`, noErrExpected},
{`{ "test": "RTS\/RPG" }`, `{ "test": "RTS\/RPG" }`, noErrExpected},
}
for i, test := range tests {
fm, err := extractFrontMatterDelims(bufio.NewReader(strings.NewReader(test.frontmatter)), []byte("{"), []byte("}"))
if (err == nil) != test.errIsNil {
t.Logf("\n%q\n", string(test.frontmatter))
t.Errorf("[%d] Expected err == nil => %t, got: %t. err: %s", i, test.errIsNil, err == nil, err)
continue
}
if !bytes.Equal(fm, []byte(test.extracted)) {
t.Logf("\n%q\n", string(test.frontmatter))
t.Errorf("[%d] Frontmatter did not match:\nexp: %q\ngot: %q", i, string(test.extracted), fm)
}
}
}

View file

@ -49,18 +49,12 @@ func (ns *Namespace) Remarshal(format string, data interface{}) (string, error)
return result.String(), nil
}
func toFormatMark(format string) (rune, error) {
// TODO(bep) the parser package needs a cleaning.
switch format {
case "yaml":
return rune(parser.YAMLLead[0]), nil
case "toml":
return rune(parser.TOMLLead[0]), nil
case "json":
return rune(parser.JSONLead[0]), nil
func toFormatMark(format string) (metadecoders.Format, error) {
if f := metadecoders.FormatFromString(format); f != "" {
return f, nil
}
return 0, errors.New("failed to detect target data serialization format")
return "", errors.New("failed to detect target data serialization format")
}
func detectFormat(data string) (metadecoders.Format, error) {