241 lines
5.6 KiB
Go
241 lines
5.6 KiB
Go
// Package stringutil Exports common rune utilities for parsing and emitting javascript
|
|
package stringutil
|
|
|
|
import (
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
func IsWhiteSpaceLike(ch rune) bool {
|
|
return IsWhiteSpaceSingleLine(ch) || IsLineBreak(ch)
|
|
}
|
|
|
|
func IsWhiteSpaceSingleLine(ch rune) bool {
|
|
// Note: nextLine is in the Zs space, and should be considered to be a whitespace.
|
|
// It is explicitly not a line-break as it isn't in the exact set specified by EcmaScript.
|
|
switch ch {
|
|
case
|
|
' ', // space
|
|
'\t', // tab
|
|
'\v', // verticalTab
|
|
'\f', // formFeed
|
|
0x0085, // nextLine
|
|
0x00A0, // nonBreakingSpace
|
|
0x1680, // ogham
|
|
0x2000, // enQuad
|
|
0x2001, // emQuad
|
|
0x2002, // enSpace
|
|
0x2003, // emSpace
|
|
0x2004, // threePerEmSpace
|
|
0x2005, // fourPerEmSpace
|
|
0x2006, // sixPerEmSpace
|
|
0x2007, // figureSpace
|
|
0x2008, // punctuationEmSpace
|
|
0x2009, // thinSpace
|
|
0x200A, // hairSpace
|
|
0x200B, // zeroWidthSpace
|
|
0x202F, // narrowNoBreakSpace
|
|
0x205F, // mathematicalSpace
|
|
0x3000, // ideographicSpace
|
|
0xFEFF: // byteOrderMark
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func IsLineBreak(ch rune) bool {
|
|
// ES5 7.3:
|
|
// The ECMAScript line terminator characters are listed in Table 3.
|
|
// Table 3: Line Terminator Characters
|
|
// Code Unit Value Name Formal Name
|
|
// \u000A Line Feed <LF>
|
|
// \u000D Carriage Return <CR>
|
|
// \u2028 Line separator <LS>
|
|
// \u2029 Paragraph separator <PS>
|
|
// Only the characters in Table 3 are treated as line terminators. Other new line or line
|
|
// breaking characters are treated as white space but not as line terminators.
|
|
switch ch {
|
|
case
|
|
'\n', // lineFeed
|
|
'\r', // carriageReturn
|
|
0x2028, // lineSeparator
|
|
0x2029: // paragraphSeparator
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func IsDigit(ch rune) bool {
|
|
return ch >= '0' && ch <= '9'
|
|
}
|
|
|
|
func IsOctalDigit(ch rune) bool {
|
|
return ch >= '0' && ch <= '7'
|
|
}
|
|
|
|
func IsHexDigit(ch rune) bool {
|
|
return ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f'
|
|
}
|
|
|
|
func IsASCIILetter(ch rune) bool {
|
|
return ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z'
|
|
}
|
|
|
|
func SplitLines(text string) []string {
|
|
lines := make([]string, 0, strings.Count(text, "\n")+1) // preallocate
|
|
start := 0
|
|
pos := 0
|
|
for pos < len(text) {
|
|
switch text[pos] {
|
|
case '\r':
|
|
if pos+1 < len(text) && text[pos+1] == '\n' {
|
|
lines = append(lines, text[start:pos])
|
|
pos += 2
|
|
start = pos
|
|
continue
|
|
}
|
|
fallthrough
|
|
case '\n':
|
|
lines = append(lines, text[start:pos])
|
|
pos++
|
|
start = pos
|
|
continue
|
|
}
|
|
pos++
|
|
}
|
|
if start < len(text) {
|
|
lines = append(lines, text[start:])
|
|
}
|
|
return lines
|
|
}
|
|
|
|
func GuessIndentation(lines []string) int {
|
|
const MAX_SMI_X86 int = 0x3fff_ffff
|
|
indentation := MAX_SMI_X86
|
|
for _, line := range lines {
|
|
if len(line) == 0 {
|
|
continue
|
|
}
|
|
i := 0
|
|
for i < len(line) && i < indentation {
|
|
ch, size := utf8.DecodeRuneInString(line[i:])
|
|
if !IsWhiteSpaceLike(ch) {
|
|
break
|
|
}
|
|
i += size
|
|
}
|
|
if i < indentation {
|
|
indentation = i
|
|
}
|
|
if indentation == 0 {
|
|
return 0
|
|
}
|
|
}
|
|
if indentation == MAX_SMI_X86 {
|
|
return 0
|
|
}
|
|
return indentation
|
|
}
|
|
|
|
// https://tc39.es/ecma262/multipage/global-object.html#sec-encodeuri-uri
|
|
func EncodeURI(s string) string {
|
|
var builder strings.Builder
|
|
start := 0
|
|
pos := indexAny(s, ";/?:@&=+$,#", 0)
|
|
for pos >= 0 {
|
|
builder.WriteString(url.QueryEscape(s[start:pos]))
|
|
builder.WriteString(s[pos : pos+1])
|
|
start = pos + 1
|
|
pos = indexAny(s, ";/?:@&=+$,#", start)
|
|
}
|
|
if start < len(s) {
|
|
builder.WriteString(url.QueryEscape(s[start:]))
|
|
}
|
|
return builder.String()
|
|
}
|
|
|
|
func indexAny(s, chars string, start int) int {
|
|
if start < 0 || start >= len(s) {
|
|
return -1
|
|
}
|
|
index := strings.IndexAny(s[start:], chars)
|
|
if index < 0 {
|
|
return -1
|
|
}
|
|
return start + index
|
|
}
|
|
|
|
func getByteOrderMarkLength(text string) int {
|
|
if len(text) >= 1 {
|
|
ch0 := text[0]
|
|
if ch0 == 0xfe {
|
|
if len(text) >= 2 && text[1] == 0xff {
|
|
return 2 // utf16be
|
|
}
|
|
return 0
|
|
}
|
|
if ch0 == 0xff {
|
|
if len(text) >= 2 && text[1] == 0xfe {
|
|
return 2 // utf16le
|
|
}
|
|
return 0
|
|
}
|
|
if ch0 == 0xef {
|
|
if len(text) >= 3 && text[1] == 0xbb && text[2] == 0xbf {
|
|
return 3 // utf8
|
|
}
|
|
return 0
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func RemoveByteOrderMark(text string) string {
|
|
length := getByteOrderMarkLength(text)
|
|
if length > 0 {
|
|
return text[length:]
|
|
}
|
|
return text
|
|
}
|
|
|
|
func AddUTF8ByteOrderMark(text string) string {
|
|
if getByteOrderMarkLength(text) == 0 {
|
|
return "\xEF\xBB\xBF" + text
|
|
}
|
|
return text
|
|
}
|
|
|
|
func StripQuotes(name string) string {
|
|
firstChar, _ := utf8.DecodeRuneInString(name)
|
|
lastChar, _ := utf8.DecodeLastRuneInString(name)
|
|
if firstChar == lastChar && (firstChar == '\'' || firstChar == '"' || firstChar == '`') {
|
|
return name[1 : len(name)-1]
|
|
}
|
|
return name
|
|
}
|
|
|
|
var matchSlashSomething = regexp.MustCompile(`\.`)
|
|
|
|
func matchSlashReplacer(in string) string {
|
|
return in[1:]
|
|
}
|
|
|
|
func UnquoteString(str string) string {
|
|
// strconv.Unquote is insufficient as that only handles a single character inside single quotes, as those are character literals in go
|
|
inner := StripQuotes(str)
|
|
// In strada we do str.replace(/\\./g, s => s.substring(1)) - which is to say, replace all backslash-something with just something
|
|
// That's replicated here faithfully, but it seems wrong! This should probably be an actual unquote operation?
|
|
return matchSlashSomething.ReplaceAllStringFunc(inner, matchSlashReplacer)
|
|
}
|
|
|
|
func LowerFirstChar(str string) string {
|
|
char, size := utf8.DecodeRuneInString(str)
|
|
if size > 0 {
|
|
return string(unicode.ToLower(char)) + str[size:]
|
|
}
|
|
return str
|
|
}
|