465 lines
16 KiB
Go
465 lines
16 KiB
Go
package vfs
|
|
|
|
import (
|
|
"fmt"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
|
|
"efprojects.com/kitten-ipc/kitcom/internal/tsgo/collections"
|
|
"efprojects.com/kitten-ipc/kitcom/internal/tsgo/core"
|
|
"efprojects.com/kitten-ipc/kitcom/internal/tsgo/stringutil"
|
|
"efprojects.com/kitten-ipc/kitcom/internal/tsgo/tspath"
|
|
"github.com/dlclark/regexp2"
|
|
)
|
|
|
|
type FileMatcherPatterns struct {
|
|
// One pattern for each "include" spec.
|
|
includeFilePatterns []string
|
|
// One pattern matching one of any of the "include" specs.
|
|
includeFilePattern string
|
|
includeDirectoryPattern string
|
|
excludePattern string
|
|
basePaths []string
|
|
}
|
|
|
|
type usage string
|
|
|
|
const (
|
|
usageFiles usage = "files"
|
|
usageDirectories usage = "directories"
|
|
usageExclude usage = "exclude"
|
|
)
|
|
|
|
func GetRegularExpressionsForWildcards(specs []string, basePath string, usage usage) []string {
|
|
if len(specs) == 0 {
|
|
return nil
|
|
}
|
|
return core.Map(specs, func(spec string) string {
|
|
return getSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage])
|
|
})
|
|
}
|
|
|
|
func GetRegularExpressionForWildcard(specs []string, basePath string, usage usage) string {
|
|
patterns := GetRegularExpressionsForWildcards(specs, basePath, usage)
|
|
if len(patterns) == 0 {
|
|
return ""
|
|
}
|
|
|
|
mappedPatterns := make([]string, len(patterns))
|
|
for i, pattern := range patterns {
|
|
mappedPatterns[i] = fmt.Sprintf("(%s)", pattern)
|
|
}
|
|
pattern := strings.Join(mappedPatterns, "|")
|
|
|
|
// If excluding, match "foo/bar/baz...", but if including, only allow "foo".
|
|
var terminator string
|
|
if usage == "exclude" {
|
|
terminator = "($|/)"
|
|
} else {
|
|
terminator = "$"
|
|
}
|
|
return fmt.Sprintf("^(%s)%s", pattern, terminator)
|
|
}
|
|
|
|
func replaceWildcardCharacter(match string, singleAsteriskRegexFragment string) string {
|
|
if match == "*" {
|
|
return singleAsteriskRegexFragment
|
|
} else {
|
|
if match == "?" {
|
|
return "[^/]"
|
|
} else {
|
|
return "\\" + match
|
|
}
|
|
}
|
|
}
|
|
|
|
// An "includes" path "foo" is implicitly a glob "foo/** /*" (without the space) if its last component has no extension,
|
|
// and does not contain any glob characters itself.
|
|
func IsImplicitGlob(lastPathComponent string) bool {
|
|
return !strings.ContainsAny(lastPathComponent, ".*?")
|
|
}
|
|
|
|
// Reserved characters, forces escaping of any non-word (or digit), non-whitespace character.
|
|
// It may be inefficient (we could just match (/[-[\]{}()*+?.,\\^$|#\s]/g), but this is future
|
|
// proof.
|
|
var (
|
|
reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[^\w\s/]`)
|
|
wildcardCharCodes = []rune{'*', '?'}
|
|
)
|
|
|
|
var (
|
|
commonPackageFolders = []string{"node_modules", "bower_components", "jspm_packages"}
|
|
implicitExcludePathRegexPattern = "(?!(" + strings.Join(commonPackageFolders, "|") + ")(/|$))"
|
|
)
|
|
|
|
type WildcardMatcher struct {
|
|
singleAsteriskRegexFragment string
|
|
doubleAsteriskRegexFragment string
|
|
replaceWildcardCharacter func(match string) string
|
|
}
|
|
|
|
const (
|
|
// Matches any single directory segment unless it is the last segment and a .min.js file
|
|
// Breakdown:
|
|
//
|
|
// [^./] # matches everything up to the first . character (excluding directory separators)
|
|
// (\\.(?!min\\.js$))? # matches . characters but not if they are part of the .min.js file extension
|
|
singleAsteriskRegexFragmentFilesMatcher = "([^./]|(\\.(?!min\\.js$))?)*"
|
|
singleAsteriskRegexFragment = "[^/]*"
|
|
)
|
|
|
|
var filesMatcher = WildcardMatcher{
|
|
singleAsteriskRegexFragment: singleAsteriskRegexFragmentFilesMatcher,
|
|
// Regex for the ** wildcard. Matches any number of subdirectories. When used for including
|
|
// files or directories, does not match subdirectories that start with a . character
|
|
doubleAsteriskRegexFragment: "(/" + implicitExcludePathRegexPattern + "[^/.][^/]*)*?",
|
|
replaceWildcardCharacter: func(match string) string {
|
|
return replaceWildcardCharacter(match, singleAsteriskRegexFragmentFilesMatcher)
|
|
},
|
|
}
|
|
|
|
var directoriesMatcher = WildcardMatcher{
|
|
singleAsteriskRegexFragment: singleAsteriskRegexFragment,
|
|
// Regex for the ** wildcard. Matches any number of subdirectories. When used for including
|
|
// files or directories, does not match subdirectories that start with a . character
|
|
doubleAsteriskRegexFragment: "(/" + implicitExcludePathRegexPattern + "[^/.][^/]*)*?",
|
|
replaceWildcardCharacter: func(match string) string {
|
|
return replaceWildcardCharacter(match, singleAsteriskRegexFragment)
|
|
},
|
|
}
|
|
|
|
var excludeMatcher = WildcardMatcher{
|
|
singleAsteriskRegexFragment: singleAsteriskRegexFragment,
|
|
doubleAsteriskRegexFragment: "(/.+?)?",
|
|
replaceWildcardCharacter: func(match string) string {
|
|
return replaceWildcardCharacter(match, singleAsteriskRegexFragment)
|
|
},
|
|
}
|
|
|
|
var wildcardMatchers = map[usage]WildcardMatcher{
|
|
usageFiles: filesMatcher,
|
|
usageDirectories: directoriesMatcher,
|
|
usageExclude: excludeMatcher,
|
|
}
|
|
|
|
func GetPatternFromSpec(
|
|
spec string,
|
|
basePath string,
|
|
usage usage,
|
|
) string {
|
|
pattern := getSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage])
|
|
if pattern == "" {
|
|
return ""
|
|
}
|
|
ending := core.IfElse(usage == "exclude", "($|/)", "$")
|
|
return fmt.Sprintf("^(%s)%s", pattern, ending)
|
|
}
|
|
|
|
func getSubPatternFromSpec(
|
|
spec string,
|
|
basePath string,
|
|
usage usage,
|
|
matcher WildcardMatcher,
|
|
) string {
|
|
matcher = wildcardMatchers[usage]
|
|
|
|
replaceWildcardCharacter := matcher.replaceWildcardCharacter
|
|
|
|
var subpattern strings.Builder
|
|
hasWrittenComponent := false
|
|
components := tspath.GetNormalizedPathComponents(spec, basePath)
|
|
lastComponent := core.LastOrNil(components)
|
|
if usage != "exclude" && lastComponent == "**" {
|
|
return ""
|
|
}
|
|
|
|
// getNormalizedPathComponents includes the separator for the root component.
|
|
// We need to remove to create our regex correctly.
|
|
components[0] = tspath.RemoveTrailingDirectorySeparator(components[0])
|
|
|
|
if IsImplicitGlob(lastComponent) {
|
|
components = append(components, "**", "*")
|
|
}
|
|
|
|
optionalCount := 0
|
|
for _, component := range components {
|
|
if component == "**" {
|
|
subpattern.WriteString(matcher.doubleAsteriskRegexFragment)
|
|
} else {
|
|
if usage == "directories" {
|
|
subpattern.WriteString("(")
|
|
optionalCount++
|
|
}
|
|
|
|
if hasWrittenComponent {
|
|
subpattern.WriteRune(tspath.DirectorySeparator)
|
|
}
|
|
|
|
if usage != "exclude" {
|
|
var componentPattern strings.Builder
|
|
if strings.HasPrefix(component, "*") {
|
|
componentPattern.WriteString("([^./]" + matcher.singleAsteriskRegexFragment + ")?")
|
|
component = component[1:]
|
|
} else if strings.HasPrefix(component, "?") {
|
|
componentPattern.WriteString("[^./]")
|
|
component = component[1:]
|
|
}
|
|
componentPattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter))
|
|
|
|
// Patterns should not include subfolders like node_modules unless they are
|
|
// explicitly included as part of the path.
|
|
//
|
|
// As an optimization, if the component pattern is the same as the component,
|
|
// then there definitely were no wildcard characters and we do not need to
|
|
// add the exclusion pattern.
|
|
if componentPattern.String() != component {
|
|
subpattern.WriteString(implicitExcludePathRegexPattern)
|
|
}
|
|
subpattern.WriteString(componentPattern.String())
|
|
} else {
|
|
subpattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter))
|
|
}
|
|
}
|
|
hasWrittenComponent = true
|
|
}
|
|
|
|
for optionalCount > 0 {
|
|
subpattern.WriteString(")?")
|
|
optionalCount--
|
|
}
|
|
|
|
return subpattern.String()
|
|
}
|
|
|
|
func getIncludeBasePath(absolute string) string {
|
|
wildcardOffset := strings.IndexAny(absolute, string(wildcardCharCodes))
|
|
if wildcardOffset < 0 {
|
|
// No "*" or "?" in the path
|
|
if !tspath.HasExtension(absolute) {
|
|
return absolute
|
|
} else {
|
|
return tspath.RemoveTrailingDirectorySeparator(tspath.GetDirectoryPath(absolute))
|
|
}
|
|
}
|
|
return absolute[:max(strings.LastIndex(absolute[:wildcardOffset], string(tspath.DirectorySeparator)), 0)]
|
|
}
|
|
|
|
// getBasePaths computes the unique non-wildcard base paths amongst the provided include patterns.
|
|
func getBasePaths(path string, includes []string, useCaseSensitiveFileNames bool) []string {
|
|
// Storage for our results in the form of literal paths (e.g. the paths as written by the user).
|
|
basePaths := []string{path}
|
|
|
|
if len(includes) > 0 {
|
|
// Storage for literal base paths amongst the include patterns.
|
|
includeBasePaths := []string{}
|
|
for _, include := range includes {
|
|
// We also need to check the relative paths by converting them to absolute and normalizing
|
|
// in case they escape the base path (e.g "..\somedirectory")
|
|
var absolute string
|
|
if tspath.IsRootedDiskPath(include) {
|
|
absolute = include
|
|
} else {
|
|
absolute = tspath.NormalizePath(tspath.CombinePaths(path, include))
|
|
}
|
|
// Append the literal and canonical candidate base paths.
|
|
includeBasePaths = append(includeBasePaths, getIncludeBasePath(absolute))
|
|
}
|
|
|
|
// Sort the offsets array using either the literal or canonical path representations.
|
|
stringComparer := stringutil.GetStringComparer(!useCaseSensitiveFileNames)
|
|
sort.SliceStable(includeBasePaths, func(i, j int) bool {
|
|
return stringComparer(includeBasePaths[i], includeBasePaths[j]) < 0
|
|
})
|
|
|
|
// Iterate over each include base path and include unique base paths that are not a
|
|
// subpath of an existing base path
|
|
for _, includeBasePath := range includeBasePaths {
|
|
if core.Every(basePaths, func(basepath string) bool {
|
|
return !tspath.ContainsPath(basepath, includeBasePath, tspath.ComparePathsOptions{CurrentDirectory: path, UseCaseSensitiveFileNames: !useCaseSensitiveFileNames})
|
|
}) {
|
|
basePaths = append(basePaths, includeBasePath)
|
|
}
|
|
}
|
|
}
|
|
|
|
return basePaths
|
|
}
|
|
|
|
// getFileMatcherPatterns generates file matching patterns based on the provided path,
|
|
// includes, excludes, and other parameters. path is the directory of the tsconfig.json file.
|
|
func getFileMatcherPatterns(path string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string) FileMatcherPatterns {
|
|
path = tspath.NormalizePath(path)
|
|
currentDirectory = tspath.NormalizePath(currentDirectory)
|
|
absolutePath := tspath.CombinePaths(currentDirectory, path)
|
|
|
|
return FileMatcherPatterns{
|
|
includeFilePatterns: core.Map(GetRegularExpressionsForWildcards(includes, absolutePath, "files"), func(pattern string) string { return "^" + pattern + "$" }),
|
|
includeFilePattern: GetRegularExpressionForWildcard(includes, absolutePath, "files"),
|
|
includeDirectoryPattern: GetRegularExpressionForWildcard(includes, absolutePath, "directories"),
|
|
excludePattern: GetRegularExpressionForWildcard(excludes, absolutePath, "exclude"),
|
|
basePaths: getBasePaths(path, includes, useCaseSensitiveFileNames),
|
|
}
|
|
}
|
|
|
|
type regexp2CacheKey struct {
|
|
pattern string
|
|
opts regexp2.RegexOptions
|
|
}
|
|
|
|
var (
|
|
regexp2CacheMu sync.RWMutex
|
|
regexp2Cache = make(map[regexp2CacheKey]*regexp2.Regexp)
|
|
)
|
|
|
|
func GetRegexFromPattern(pattern string, useCaseSensitiveFileNames bool) *regexp2.Regexp {
|
|
flags := regexp2.ECMAScript
|
|
if !useCaseSensitiveFileNames {
|
|
flags |= regexp2.IgnoreCase
|
|
}
|
|
opts := regexp2.RegexOptions(flags)
|
|
|
|
key := regexp2CacheKey{pattern, opts}
|
|
|
|
regexp2CacheMu.RLock()
|
|
re, ok := regexp2Cache[key]
|
|
regexp2CacheMu.RUnlock()
|
|
if ok {
|
|
return re
|
|
}
|
|
|
|
regexp2CacheMu.Lock()
|
|
defer regexp2CacheMu.Unlock()
|
|
|
|
re, ok = regexp2Cache[key]
|
|
if ok {
|
|
return re
|
|
}
|
|
|
|
// Avoid infinite growth; may cause thrashing but no worse than not caching at all.
|
|
if len(regexp2Cache) > 1000 {
|
|
clear(regexp2Cache)
|
|
}
|
|
|
|
// Avoid holding onto the pattern string, since this may pin a full config file in memory.
|
|
pattern = strings.Clone(pattern)
|
|
key.pattern = pattern
|
|
|
|
re = regexp2.MustCompile(pattern, opts)
|
|
regexp2Cache[key] = re
|
|
return re
|
|
}
|
|
|
|
type visitor struct {
|
|
includeFileRegexes []*regexp2.Regexp
|
|
excludeRegex *regexp2.Regexp
|
|
includeDirectoryRegex *regexp2.Regexp
|
|
extensions []string
|
|
useCaseSensitiveFileNames bool
|
|
host FS
|
|
visited collections.Set[string]
|
|
results [][]string
|
|
}
|
|
|
|
func (v *visitor) visitDirectory(
|
|
path string,
|
|
absolutePath string,
|
|
depth *int,
|
|
) {
|
|
canonicalPath := tspath.GetCanonicalFileName(absolutePath, v.useCaseSensitiveFileNames)
|
|
if v.visited.Has(canonicalPath) {
|
|
return
|
|
}
|
|
v.visited.Add(canonicalPath)
|
|
systemEntries := v.host.GetAccessibleEntries(absolutePath)
|
|
files := systemEntries.Files
|
|
directories := systemEntries.Directories
|
|
|
|
for _, current := range files {
|
|
name := tspath.CombinePaths(path, current)
|
|
absoluteName := tspath.CombinePaths(absolutePath, current)
|
|
if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(name, v.extensions) {
|
|
continue
|
|
}
|
|
if v.excludeRegex != nil && core.Must(v.excludeRegex.MatchString(absoluteName)) {
|
|
continue
|
|
}
|
|
if v.includeFileRegexes == nil {
|
|
(v.results)[0] = append((v.results)[0], name)
|
|
} else {
|
|
includeIndex := core.FindIndex(v.includeFileRegexes, func(re *regexp2.Regexp) bool { return core.Must(re.MatchString(absoluteName)) })
|
|
if includeIndex != -1 {
|
|
(v.results)[includeIndex] = append((v.results)[includeIndex], name)
|
|
}
|
|
}
|
|
}
|
|
|
|
if depth != nil {
|
|
newDepth := *depth - 1
|
|
if newDepth == 0 {
|
|
return
|
|
}
|
|
depth = &newDepth
|
|
}
|
|
|
|
for _, current := range directories {
|
|
name := tspath.CombinePaths(path, current)
|
|
absoluteName := tspath.CombinePaths(absolutePath, current)
|
|
if (v.includeDirectoryRegex == nil || core.Must(v.includeDirectoryRegex.MatchString(absoluteName))) && (v.excludeRegex == nil || !core.Must(v.excludeRegex.MatchString(absoluteName))) {
|
|
v.visitDirectory(name, absoluteName, depth)
|
|
}
|
|
}
|
|
}
|
|
|
|
// path is the directory of the tsconfig.json
|
|
func matchFiles(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host FS) []string {
|
|
path = tspath.NormalizePath(path)
|
|
currentDirectory = tspath.NormalizePath(currentDirectory)
|
|
|
|
patterns := getFileMatcherPatterns(path, excludes, includes, useCaseSensitiveFileNames, currentDirectory)
|
|
var includeFileRegexes []*regexp2.Regexp
|
|
if patterns.includeFilePatterns != nil {
|
|
includeFileRegexes = core.Map(patterns.includeFilePatterns, func(pattern string) *regexp2.Regexp { return GetRegexFromPattern(pattern, useCaseSensitiveFileNames) })
|
|
}
|
|
var includeDirectoryRegex *regexp2.Regexp
|
|
if patterns.includeDirectoryPattern != "" {
|
|
includeDirectoryRegex = GetRegexFromPattern(patterns.includeDirectoryPattern, useCaseSensitiveFileNames)
|
|
}
|
|
var excludeRegex *regexp2.Regexp
|
|
if patterns.excludePattern != "" {
|
|
excludeRegex = GetRegexFromPattern(patterns.excludePattern, useCaseSensitiveFileNames)
|
|
}
|
|
|
|
// Associate an array of results with each include regex. This keeps results in order of the "include" order.
|
|
// If there are no "includes", then just put everything in results[0].
|
|
var results [][]string
|
|
if len(includeFileRegexes) > 0 {
|
|
tempResults := make([][]string, len(includeFileRegexes))
|
|
for i := range includeFileRegexes {
|
|
tempResults[i] = []string{}
|
|
}
|
|
results = tempResults
|
|
} else {
|
|
results = [][]string{{}}
|
|
}
|
|
v := visitor{
|
|
useCaseSensitiveFileNames: useCaseSensitiveFileNames,
|
|
host: host,
|
|
includeFileRegexes: includeFileRegexes,
|
|
excludeRegex: excludeRegex,
|
|
includeDirectoryRegex: includeDirectoryRegex,
|
|
extensions: extensions,
|
|
results: results,
|
|
}
|
|
for _, basePath := range patterns.basePaths {
|
|
v.visitDirectory(basePath, tspath.CombinePaths(currentDirectory, basePath), depth)
|
|
}
|
|
|
|
return core.Flatten(results)
|
|
}
|
|
|
|
func ReadDirectory(host FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string {
|
|
return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host)
|
|
}
|