From f7691fe9652aa12b6c582dea0ae2555e772d1a5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?=
 <bjorn.erik.pedersen@gmail.com>
Date: Mon, 17 Dec 2018 17:42:46 +0100
Subject: [PATCH] transform/urlreplacers: Simplify implementation

---
 transform/urlreplacers/absurlreplacer.go | 146 ++++++-----------------
 1 file changed, 36 insertions(+), 110 deletions(-)

diff --git a/transform/urlreplacers/absurlreplacer.go b/transform/urlreplacers/absurlreplacer.go
index 45b98f821..27f32b200 100644
--- a/transform/urlreplacers/absurlreplacer.go
+++ b/transform/urlreplacers/absurlreplacer.go
@@ -22,15 +22,6 @@ import (
 	"github.com/gohugoio/hugo/transform"
 )
 
-type matchState int
-
-const (
-	matchStateNone matchState = iota
-	matchStateWhitespace
-	matchStatePartial
-	matchStateFull
-)
-
 type absurllexer struct {
 	// the source to absurlify
 	content []byte
@@ -42,31 +33,24 @@ type absurllexer struct {
 
 	pos   int // input position
 	start int // item start position
-	width int // width of last element
 
 	quotes [][]byte
-
-	ms      matchState
-	matches [3]bool // track matches of the 3 prefixes
-	idx     int     // last index in matches checked
-
 }
 
 type stateFunc func(*absurllexer) stateFunc
 
-// prefix is how to identify and which func to handle the replacement.
 type prefix struct {
-	r []rune
-	f func(l *absurllexer)
+	disabled bool
+	b        []byte
+	f        func(l *absurllexer)
 }
 
-// new prefixes can be added below, but note:
-// - the matches array above must be expanded.
-// - the prefix must with the current logic end with '='
-var prefixes = []*prefix{
-	{r: []rune{'s', 'r', 'c', '='}, f: checkCandidateBase},
-	{r: []rune{'h', 'r', 'e', 'f', '='}, f: checkCandidateBase},
-	{r: []rune{'s', 'r', 'c', 's', 'e', 't', '='}, f: checkCandidateSrcset},
+func newPrefixState() []*prefix {
+	return []*prefix{
+		{b: []byte("src="), f: checkCandidateBase},
+		{b: []byte("href="), f: checkCandidateBase},
+		{b: []byte("srcset="), f: checkCandidateSrcset},
+	}
 }
 
 type absURLMatcher struct {
@@ -74,68 +58,6 @@ type absURLMatcher struct {
 	quote []byte
 }
 
-// match check rune inside word. Will be != ' '.
-func (l *absurllexer) match(r rune) {
-
-	var found bool
-
-	// note, the prefixes can start off on the same foot, i.e.
-	// src and srcset.
-	if l.ms == matchStateWhitespace {
-		l.idx = 0
-		for j, p := range prefixes {
-			if r == p.r[l.idx] {
-				l.matches[j] = true
-				found = true
-				// checkMatchState will only return true when r=='=', so
-				// we can safely ignore the return value here.
-				l.checkMatchState(r, j)
-			}
-		}
-
-		if !found {
-			l.ms = matchStateNone
-		}
-
-		return
-	}
-
-	l.idx++
-	for j, m := range l.matches {
-		// still a match?
-		if m {
-			if prefixes[j].r[l.idx] == r {
-				found = true
-				if l.checkMatchState(r, j) {
-					return
-				}
-			} else {
-				l.matches[j] = false
-			}
-		}
-	}
-
-	if !found {
-		l.ms = matchStateNone
-	}
-}
-
-func (l *absurllexer) checkMatchState(r rune, idx int) bool {
-	if r == '=' {
-		l.ms = matchStateFull
-		for k := range l.matches {
-			if k != idx {
-				l.matches[k] = false
-			}
-		}
-		return true
-	}
-
-	l.ms = matchStatePartial
-
-	return false
-}
-
 func (l *absurllexer) emit() {
 	l.w.Write(l.content[l.start:l.pos])
 	l.start = l.pos
@@ -255,36 +177,40 @@ func checkCandidateSrcset(l *absurllexer) {
 // main loop
 func (l *absurllexer) replace() {
 	contentLength := len(l.content)
-	var r rune
+
+	prefixes := newPrefixState()
 
 	for {
 		if l.pos >= contentLength {
-			l.width = 0
 			break
 		}
 
-		var width = 1
-		r = rune(l.content[l.pos])
-		if r >= utf8.RuneSelf {
-			r, width = utf8.DecodeRune(l.content[l.pos:])
-		}
-		l.width = width
-		l.pos += l.width
-		if r == ' ' {
-			l.ms = matchStateWhitespace
-		} else if l.ms != matchStateNone {
-			l.match(r)
-			if l.ms == matchStateFull {
-				var p *prefix
-				for i, m := range l.matches {
-					if m {
-						p = prefixes[i]
-						l.matches[i] = false
-					}
-				}
-				l.ms = matchStateNone
-				p.f(l)
+		nextPos := -1
+
+		var match *prefix
+
+		for _, p := range prefixes {
+			if p.disabled {
+				continue
 			}
+			idx := bytes.Index(l.content[l.pos:], p.b)
+
+			if idx == -1 {
+				p.disabled = true
+				// Find the closest match
+			} else if nextPos == -1 || idx < nextPos {
+				nextPos = idx
+				match = p
+			}
+		}
+
+		if nextPos == -1 {
+			// Done!
+			l.pos = contentLength
+			break
+		} else {
+			l.pos += nextPos + len(match.b)
+			match.f(l)
 		}
 	}