// Copyright 2019 The Hugo Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package helpers implements general utility functions that work with // and on content. The helper functions defined here lay down the // foundation of how Hugo works with files and filepaths, and perform // string operations on content. package helpers import ( "bytes" "fmt" "html/template" "os/exec" "runtime" "unicode" "unicode/utf8" "github.com/gohugoio/hugo/common/maps" "github.com/niklasfasching/go-org/org" bp "github.com/gohugoio/hugo/bufferpool" "github.com/gohugoio/hugo/config" "github.com/miekg/mmark" "github.com/mitchellh/mapstructure" "github.com/russross/blackfriday" jww "github.com/spf13/jwalterweatherman" "strings" ) // SummaryDivider denotes where content summarization should end. The default is "". var SummaryDivider = []byte("") var ( openingPTag = []byte("

") closingPTag = []byte("

") paragraphIndicator = []byte("", "\n", "
", "\n", "
", "\n") var mmarkExtensionMap = map[string]int{ "tables": mmark.EXTENSION_TABLES, "fencedCode": mmark.EXTENSION_FENCED_CODE, "autolink": mmark.EXTENSION_AUTOLINK, "laxHtmlBlocks": mmark.EXTENSION_LAX_HTML_BLOCKS, "spaceHeaders": mmark.EXTENSION_SPACE_HEADERS, "hardLineBreak": mmark.EXTENSION_HARD_LINE_BREAK, "footnotes": mmark.EXTENSION_FOOTNOTES, "noEmptyLineBeforeBlock": mmark.EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK, "headerIds": mmark.EXTENSION_HEADER_IDS, "autoHeaderIds": mmark.EXTENSION_AUTO_HEADER_IDS, } // StripHTML accepts a string, strips out all HTML tags and returns it. func StripHTML(s string) string { // Shortcut strings with no tags in them if !strings.ContainsAny(s, "<>") { return s } s = stripHTMLReplacer.Replace(s) // Walk through the string removing all tags b := bp.GetBuffer() defer bp.PutBuffer(b) var inTag, isSpace, wasSpace bool for _, r := range s { if !inTag { isSpace = false } switch { case r == '<': inTag = true case r == '>': inTag = false case unicode.IsSpace(r): isSpace = true fallthrough default: if !inTag && (!isSpace || (isSpace && !wasSpace)) { b.WriteRune(r) } } wasSpace = isSpace } return b.String() } // stripEmptyNav strips out empty