diff --git a/kitcom/internal/tsgo/glob/glob.go b/kitcom/internal/tsgo/glob/glob.go deleted file mode 100644 index f54f691..0000000 --- a/kitcom/internal/tsgo/glob/glob.go +++ /dev/null @@ -1,349 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package glob - -import ( - "errors" - "fmt" - "strings" - "unicode/utf8" -) - -// A Glob is an LSP-compliant glob pattern, as defined by the spec: -// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#documentFilter -// -// NOTE: this implementation is currently only intended for testing. In order -// to make it production ready, we'd need to: -// - verify it against the VS Code implementation -// - add more tests -// - microbenchmark, likely avoiding the element interface -// - resolve the question of what is meant by "character". If it's a UTF-16 -// code (as we suspect) it'll be a bit more work. -// -// Quoting from the spec: -// Glob patterns can have the following syntax: -// - `*` to match one or more characters in a path segment -// - `?` to match on one character in a path segment -// - `**` to match any number of path segments, including none -// - `{}` to group sub patterns into an OR expression. (e.g. `**/*.{ts,js}` -// matches all TypeScript and JavaScript files) -// - `[]` to declare a range of characters to match in a path segment -// (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …) -// - `[!...]` to negate a range of characters to match in a path segment -// (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but -// not `example.0`) -// -// Expanding on this: -// - '/' matches one or more literal slashes. -// - any other character matches itself literally. -type Glob struct { - elems []element // pattern elements -} - -// Parse builds a Glob for the given pattern, returning an error if the pattern -// is invalid. -func Parse(pattern string) (*Glob, error) { - g, _, err := parse(pattern, false) - return g, err -} - -func parse(pattern string, nested bool) (*Glob, string, error) { - g := new(Glob) - for len(pattern) > 0 { - switch pattern[0] { - case '/': - pattern = pattern[1:] - g.elems = append(g.elems, slash{}) - - case '*': - if len(pattern) > 1 && pattern[1] == '*' { - if (len(g.elems) > 0 && g.elems[len(g.elems)-1] != slash{}) || (len(pattern) > 2 && pattern[2] != '/') { - return nil, "", errors.New("** may only be adjacent to '/'") - } - pattern = pattern[2:] - g.elems = append(g.elems, starStar{}) - break - } - pattern = pattern[1:] - g.elems = append(g.elems, star{}) - - case '?': - pattern = pattern[1:] - g.elems = append(g.elems, anyChar{}) - - case '{': - var gs group - for pattern[0] != '}' { - pattern = pattern[1:] - groupG, pat, err := parse(pattern, true) - if err != nil { - return nil, "", err - } - if len(pat) == 0 { - return nil, "", errors.New("unmatched '{'") - } - pattern = pat - gs = append(gs, groupG) - } - pattern = pattern[1:] - g.elems = append(g.elems, gs) - - case '}', ',': - if nested { - return g, pattern, nil - } - pattern = g.parseLiteral(pattern, false) - - case '[': - pattern = pattern[1:] - if len(pattern) == 0 { - return nil, "", errBadRange - } - negate := false - if pattern[0] == '!' { - pattern = pattern[1:] - negate = true - } - low, sz, err := readRangeRune(pattern) - if err != nil { - return nil, "", err - } - pattern = pattern[sz:] - if len(pattern) == 0 || pattern[0] != '-' { - return nil, "", errBadRange - } - pattern = pattern[1:] - high, sz, err := readRangeRune(pattern) - if err != nil { - return nil, "", err - } - pattern = pattern[sz:] - if len(pattern) == 0 || pattern[0] != ']' { - return nil, "", errBadRange - } - pattern = pattern[1:] - g.elems = append(g.elems, charRange{negate, low, high}) - - default: - pattern = g.parseLiteral(pattern, nested) - } - } - return g, "", nil -} - -// helper for decoding a rune in range elements, e.g. [a-z] -func readRangeRune(input string) (rune, int, error) { - r, sz := utf8.DecodeRuneInString(input) - var err error - if r == utf8.RuneError { - // See the documentation for DecodeRuneInString. - switch sz { - case 0: - err = errBadRange - case 1: - err = errInvalidUTF8 - } - } - return r, sz, err -} - -var ( - errBadRange = errors.New("'[' patterns must be of the form [x-y]") - errInvalidUTF8 = errors.New("invalid UTF-8 encoding") -) - -func (g *Glob) parseLiteral(pattern string, nested bool) string { - var specialChars string - if nested { - specialChars = "*?{[/}," - } else { - specialChars = "*?{[/" - } - end := strings.IndexAny(pattern, specialChars) - if end == -1 { - end = len(pattern) - } - g.elems = append(g.elems, literal(pattern[:end])) - return pattern[end:] -} - -func (g *Glob) String() string { - var b strings.Builder - for _, e := range g.elems { - fmt.Fprint(&b, e) - } - return b.String() -} - -// element holds a glob pattern element, as defined below. -type element fmt.Stringer - -// element types. -type ( - slash struct{} // One or more '/' separators - literal string // string literal, not containing /, *, ?, {}, or [] - star struct{} // * - anyChar struct{} // ? - starStar struct{} // ** - group []*Glob // {foo, bar, ...} grouping - charRange struct { // [a-z] character range - negate bool - low, high rune - } -) - -func (s slash) String() string { return "/" } -func (l literal) String() string { return string(l) } -func (s star) String() string { return "*" } -func (a anyChar) String() string { return "?" } -func (s starStar) String() string { return "**" } -func (g group) String() string { - var parts []string - for _, g := range g { - parts = append(parts, g.String()) - } - return "{" + strings.Join(parts, ",") + "}" -} - -func (r charRange) String() string { - return "[" + string(r.low) + "-" + string(r.high) + "]" -} - -// Match reports whether the input string matches the glob pattern. -func (g *Glob) Match(input string) bool { - return match(g.elems, input) -} - -func match(elems []element, input string) (ok bool) { - var elem interface{} - for len(elems) > 0 { - elem, elems = elems[0], elems[1:] - switch elem := elem.(type) { - case slash: - if len(input) == 0 || input[0] != '/' { - return false - } - for input[0] == '/' { - input = input[1:] - } - - case starStar: - // Special cases: - // - **/a matches "a" - // - **/ matches everything - // - // Note that if ** is followed by anything, it must be '/' (this is - // enforced by Parse). - if len(elems) > 0 { - elems = elems[1:] - } - - // A trailing ** matches anything. - if len(elems) == 0 { - return true - } - - // Backtracking: advance pattern segments until the remaining pattern - // elements match. - for len(input) != 0 { - if match(elems, input) { - return true - } - _, input = split(input) - } - return false - - case literal: - if !strings.HasPrefix(input, string(elem)) { - return false - } - input = input[len(elem):] - - case star: - var segInput string - segInput, input = split(input) - - elemEnd := len(elems) - for i, e := range elems { - if e == (slash{}) { - elemEnd = i - break - } - } - segElems := elems[:elemEnd] - elems = elems[elemEnd:] - - // A trailing * matches the entire segment. - if len(segElems) == 0 { - break - } - - // Backtracking: advance characters until remaining subpattern elements - // match. - matched := false - for i := range segInput { - if match(segElems, segInput[i:]) { - matched = true - break - } - } - if !matched { - return false - } - - case anyChar: - if len(input) == 0 || input[0] == '/' { - return false - } - input = input[1:] - - case group: - // Append remaining pattern elements to each group member looking for a - // match. - var branch []element - for _, m := range elem { - branch = branch[:0] - branch = append(branch, m.elems...) - branch = append(branch, elems...) - if match(branch, input) { - return true - } - } - return false - - case charRange: - if len(input) == 0 || input[0] == '/' { - return false - } - c, sz := utf8.DecodeRuneInString(input) - if c < elem.low || c > elem.high { - return false - } - input = input[sz:] - - default: - panic(fmt.Sprintf("segment type %T not implemented", elem)) - } - } - - return len(input) == 0 -} - -// split returns the portion before and after the first slash -// (or sequence of consecutive slashes). If there is no slash -// it returns (input, nil). -func split(input string) (first, rest string) { - i := strings.IndexByte(input, '/') - if i < 0 { - return input, "" - } - first = input[:i] - for j := i; j < len(input); j++ { - if input[j] != '/' { - return first, input[j:] - } - } - return first, "" -}