hugo/helpers/content.go

293 lines
7.6 KiB
Go
Raw Normal View History

// Copyright 2019 The Hugo Authors. All rights reserved.
//
2015-11-24 03:16:36 +00:00
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
2015-11-24 03:16:36 +00:00
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package helpers implements general utility functions that work with
// and on content. The helper functions defined here lay down the
// foundation of how Hugo works with files and filepaths, and perform
// string operations on content.
package helpers
import (
"bytes"
"html/template"
"strings"
"unicode"
"unicode/utf8"
"github.com/gohugoio/hugo/common/hexec"
"github.com/gohugoio/hugo/common/loggers"
"github.com/spf13/afero"
"github.com/gohugoio/hugo/markup/converter"
"github.com/gohugoio/hugo/markup/converter/hooks"
"github.com/gohugoio/hugo/markup"
Add support for theme composition and inheritance This commit adds support for theme composition and inheritance in Hugo. With this, it helps thinking about a theme as a set of ordered components: ```toml theme = ["my-shortcodes", "base-theme", "hyde"] ``` The theme definition example above in `config.toml` creates a theme with the 3 components with presedence from left to right. So, Hugo will, for any given file, data entry etc., look first in the project, and then in `my-shortcode`, `base-theme` and lastly `hyde`. Hugo uses two different algorithms to merge the filesystems, depending on the file type: * For `i18n` and `data` files, Hugo merges deeply using the translation id and data key inside the files. * For `static`, `layouts` (templates) and `archetypes` files, these are merged on file level. So the left-most file will be chosen. The name used in the `theme` definition above must match a folder in `/your-site/themes`, e.g. `/your-site/themes/my-shortcodes`. There are plans to improve on this and get a URL scheme so this can be resolved automatically. Also note that a component that is part of a theme can have its own configuration file, e.g. `config.toml`. There are currently some restrictions to what a theme component can configure: * `params` (global and per language) * `menu` (global and per language) * `outputformats` and `mediatypes` The same rules apply here: The left-most param/menu etc. with the same ID will win. There are some hidden and experimental namespace support in the above, which we will work to improve in the future, but theme authors are encouraged to create their own namespaces to avoid naming conflicts. A final note: Themes/components can also have a `theme` definition in their `config.toml` and similar, which is the "inheritance" part of this commit's title. This is currently not supported by the Hugo theme site. We will have to wait for some "auto dependency" feature to be implemented for that to happen, but this can be a powerful feature if you want to create your own theme-variant based on others. Fixes #4460 Fixes #4450
2018-03-01 14:01:25 +00:00
"github.com/gohugoio/hugo/config"
)
var (
openingPTag = []byte("<p>")
closingPTag = []byte("</p>")
paragraphIndicator = []byte("<p")
closingIndicator = []byte("</")
)
// ContentSpec provides functionality to render markdown content.
type ContentSpec struct {
Converters markup.ConverterProvider
anchorNameSanitizer converter.AnchorNameSanitizer
getRenderer func(t hooks.RendererType, id any) any
Cfg config.AllProvider
}
// NewContentSpec returns a ContentSpec initialized
// with the appropriate fields from the given config.Provider.
func NewContentSpec(cfg config.AllProvider, logger loggers.Logger, contentFs afero.Fs, ex *hexec.Exec) (*ContentSpec, error) {
spec := &ContentSpec{
Cfg: cfg,
}
converterProvider, err := markup.NewConverterProvider(converter.ProviderConfig{
Conf: cfg,
ContentFs: contentFs,
Logger: logger,
Exec: ex,
})
if err != nil {
return nil, err
}
spec.Converters = converterProvider
p := converterProvider.Get("markdown")
conv, err := p.New(converter.DocumentContext{})
if err != nil {
return nil, err
}
if as, ok := conv.(converter.AnchorNameSanitizer); ok {
spec.anchorNameSanitizer = as
} else {
// Use Goldmark's sanitizer
p := converterProvider.Get("goldmark")
conv, err := p.New(converter.DocumentContext{})
if err != nil {
return nil, err
}
spec.anchorNameSanitizer = conv.(converter.AnchorNameSanitizer)
}
return spec, nil
}
2016-03-14 16:27:15 +00:00
// stripEmptyNav strips out empty <nav> tags from content.
func stripEmptyNav(in []byte) []byte {
return bytes.Replace(in, []byte("<nav>\n</nav>\n\n"), []byte(``), -1)
}
// BytesToHTML converts bytes to type template.HTML.
func BytesToHTML(b []byte) template.HTML {
return template.HTML(string(b))
}
// ExtractTOC extracts Table of Contents from content.
func ExtractTOC(content []byte) (newcontent []byte, toc []byte) {
if !bytes.Contains(content, []byte("<nav>")) {
return content, nil
}
origContent := make([]byte, len(content))
copy(origContent, content)
first := []byte(`<nav>
<ul>`)
last := []byte(`</ul>
</nav>`)
replacement := []byte(`<nav id="TableOfContents">
<ul>`)
startOfTOC := bytes.Index(content, first)
peekEnd := len(content)
if peekEnd > 70+startOfTOC {
peekEnd = 70 + startOfTOC
}
if startOfTOC < 0 {
2016-03-14 16:27:15 +00:00
return stripEmptyNav(content), toc
}
// Need to peek ahead to see if this nav element is actually the right one.
correctNav := bytes.Index(content[startOfTOC:peekEnd], []byte(`<li><a href="#`))
if correctNav < 0 { // no match found
return content, toc
}
lengthOfTOC := bytes.Index(content[startOfTOC:], last) + len(last)
endOfTOC := startOfTOC + lengthOfTOC
newcontent = append(content[:startOfTOC], content[endOfTOC:]...)
toc = append(replacement, origContent[startOfTOC+len(first):endOfTOC]...)
return
}
func (c *ContentSpec) SanitizeAnchorName(s string) string {
return c.anchorNameSanitizer.SanitizeAnchorName(s)
}
func (c *ContentSpec) ResolveMarkup(in string) string {
if c == nil {
panic("nil ContentSpec")
}
in = strings.ToLower(in)
switch in {
case "md", "markdown", "mdown":
return "markdown"
case "html", "htm":
return "html"
default:
if conv := c.Converters.Get(in); conv != nil {
return conv.Name()
}
}
return ""
}
// TotalWords counts instance of one or more consecutive white space
// characters, as defined by unicode.IsSpace, in s.
// This is a cheaper way of word counting than the obvious len(strings.Fields(s)).
func TotalWords(s string) int {
n := 0
inWord := false
for _, r := range s {
wasInWord := inWord
inWord = !unicode.IsSpace(r)
if inWord && !wasInWord {
n++
}
}
return n
}
// TruncateWordsByRune truncates words by runes.
func (c *ContentSpec) TruncateWordsByRune(in []string) (string, bool) {
words := make([]string, len(in))
copy(words, in)
2015-09-03 10:22:20 +00:00
count := 0
for index, word := range words {
if count >= c.Cfg.SummaryLength() {
return strings.Join(words[:index], " "), true
}
2015-09-03 10:22:20 +00:00
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
count++
} else if count+runeCount < c.Cfg.SummaryLength() {
count += runeCount
2015-09-03 10:22:20 +00:00
} else {
2016-02-06 11:29:13 +00:00
for ri := range word {
if count >= c.Cfg.SummaryLength() {
truncatedWords := append(words[:index], word[:ri])
return strings.Join(truncatedWords, " "), true
2015-09-03 10:22:20 +00:00
}
count++
2015-09-03 10:22:20 +00:00
}
}
}
return strings.Join(words, " "), false
}
// TruncateWordsToWholeSentence takes content and truncates to whole sentence
// limited by max number of words. It also returns whether it is truncated.
func (c *ContentSpec) TruncateWordsToWholeSentence(s string) (string, bool) {
var (
wordCount = 0
lastWordIndex = -1
)
for i, r := range s {
if unicode.IsSpace(r) {
wordCount++
lastWordIndex = i
if wordCount >= c.Cfg.SummaryLength() {
break
}
}
}
if lastWordIndex == -1 {
return s, false
}
endIndex := -1
for j, r := range s[lastWordIndex:] {
if isEndOfSentence(r) {
endIndex = j + lastWordIndex + utf8.RuneLen(r)
break
}
}
if endIndex == -1 {
return s, false
}
return strings.TrimSpace(s[:endIndex]), endIndex < len(s)
}
// TrimShortHTML removes the <p>/</p> tags from HTML input in the situation
// where said tags are the only <p> tags in the input and enclose the content
// of the input (whitespace excluded).
func (c *ContentSpec) TrimShortHTML(input []byte) []byte {
firstOpeningP := bytes.Index(input, paragraphIndicator)
lastOpeningP := bytes.LastIndex(input, paragraphIndicator)
lastClosingP := bytes.LastIndex(input, closingPTag)
lastClosing := bytes.LastIndex(input, closingIndicator)
if firstOpeningP == lastOpeningP && lastClosingP == lastClosing {
input = bytes.TrimSpace(input)
input = bytes.TrimPrefix(input, openingPTag)
input = bytes.TrimSuffix(input, closingPTag)
input = bytes.TrimSpace(input)
}
return input
}
func isEndOfSentence(r rune) bool {
return r == '.' || r == '?' || r == '!' || r == '"' || r == '\n'
}
// Kept only for benchmark.
func (c *ContentSpec) truncateWordsToWholeSentenceOld(content string) (string, bool) {
words := strings.Fields(content)
if c.Cfg.SummaryLength() >= len(words) {
return strings.Join(words, " "), false
}
for counter, word := range words[c.Cfg.SummaryLength():] {
if strings.HasSuffix(word, ".") ||
strings.HasSuffix(word, "?") ||
strings.HasSuffix(word, ".\"") ||
strings.HasSuffix(word, "!") {
upper := c.Cfg.SummaryLength() + counter + 1
return strings.Join(words[:upper], " "), (upper < len(words))
}
}
return strings.Join(words[:c.Cfg.SummaryLength()], " "), true
}