package vfs import ( "fmt" "regexp" "sort" "strings" "sync" "efprojects.com/kitten-ipc/kitcom/internal/tsgo/collections" "efprojects.com/kitten-ipc/kitcom/internal/tsgo/core" "efprojects.com/kitten-ipc/kitcom/internal/tsgo/stringutil" "efprojects.com/kitten-ipc/kitcom/internal/tsgo/tspath" "github.com/dlclark/regexp2" ) type FileMatcherPatterns struct { // One pattern for each "include" spec. includeFilePatterns []string // One pattern matching one of any of the "include" specs. includeFilePattern string includeDirectoryPattern string excludePattern string basePaths []string } type usage string const ( usageFiles usage = "files" usageDirectories usage = "directories" usageExclude usage = "exclude" ) func GetRegularExpressionsForWildcards(specs []string, basePath string, usage usage) []string { if len(specs) == 0 { return nil } return core.Map(specs, func(spec string) string { return getSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) }) } func GetRegularExpressionForWildcard(specs []string, basePath string, usage usage) string { patterns := GetRegularExpressionsForWildcards(specs, basePath, usage) if len(patterns) == 0 { return "" } mappedPatterns := make([]string, len(patterns)) for i, pattern := range patterns { mappedPatterns[i] = fmt.Sprintf("(%s)", pattern) } pattern := strings.Join(mappedPatterns, "|") // If excluding, match "foo/bar/baz...", but if including, only allow "foo". var terminator string if usage == "exclude" { terminator = "($|/)" } else { terminator = "$" } return fmt.Sprintf("^(%s)%s", pattern, terminator) } func replaceWildcardCharacter(match string, singleAsteriskRegexFragment string) string { if match == "*" { return singleAsteriskRegexFragment } else { if match == "?" { return "[^/]" } else { return "\\" + match } } } // An "includes" path "foo" is implicitly a glob "foo/** /*" (without the space) if its last component has no extension, // and does not contain any glob characters itself. func IsImplicitGlob(lastPathComponent string) bool { return !strings.ContainsAny(lastPathComponent, ".*?") } // Reserved characters, forces escaping of any non-word (or digit), non-whitespace character. // It may be inefficient (we could just match (/[-[\]{}()*+?.,\\^$|#\s]/g), but this is future // proof. var ( reservedCharacterPattern *regexp.Regexp = regexp.MustCompile(`[^\w\s/]`) wildcardCharCodes = []rune{'*', '?'} ) var ( commonPackageFolders = []string{"node_modules", "bower_components", "jspm_packages"} implicitExcludePathRegexPattern = "(?!(" + strings.Join(commonPackageFolders, "|") + ")(/|$))" ) type WildcardMatcher struct { singleAsteriskRegexFragment string doubleAsteriskRegexFragment string replaceWildcardCharacter func(match string) string } const ( // Matches any single directory segment unless it is the last segment and a .min.js file // Breakdown: // // [^./] # matches everything up to the first . character (excluding directory separators) // (\\.(?!min\\.js$))? # matches . characters but not if they are part of the .min.js file extension singleAsteriskRegexFragmentFilesMatcher = "([^./]|(\\.(?!min\\.js$))?)*" singleAsteriskRegexFragment = "[^/]*" ) var filesMatcher = WildcardMatcher{ singleAsteriskRegexFragment: singleAsteriskRegexFragmentFilesMatcher, // Regex for the ** wildcard. Matches any number of subdirectories. When used for including // files or directories, does not match subdirectories that start with a . character doubleAsteriskRegexFragment: "(/" + implicitExcludePathRegexPattern + "[^/.][^/]*)*?", replaceWildcardCharacter: func(match string) string { return replaceWildcardCharacter(match, singleAsteriskRegexFragmentFilesMatcher) }, } var directoriesMatcher = WildcardMatcher{ singleAsteriskRegexFragment: singleAsteriskRegexFragment, // Regex for the ** wildcard. Matches any number of subdirectories. When used for including // files or directories, does not match subdirectories that start with a . character doubleAsteriskRegexFragment: "(/" + implicitExcludePathRegexPattern + "[^/.][^/]*)*?", replaceWildcardCharacter: func(match string) string { return replaceWildcardCharacter(match, singleAsteriskRegexFragment) }, } var excludeMatcher = WildcardMatcher{ singleAsteriskRegexFragment: singleAsteriskRegexFragment, doubleAsteriskRegexFragment: "(/.+?)?", replaceWildcardCharacter: func(match string) string { return replaceWildcardCharacter(match, singleAsteriskRegexFragment) }, } var wildcardMatchers = map[usage]WildcardMatcher{ usageFiles: filesMatcher, usageDirectories: directoriesMatcher, usageExclude: excludeMatcher, } func GetPatternFromSpec( spec string, basePath string, usage usage, ) string { pattern := getSubPatternFromSpec(spec, basePath, usage, wildcardMatchers[usage]) if pattern == "" { return "" } ending := core.IfElse(usage == "exclude", "($|/)", "$") return fmt.Sprintf("^(%s)%s", pattern, ending) } func getSubPatternFromSpec( spec string, basePath string, usage usage, matcher WildcardMatcher, ) string { matcher = wildcardMatchers[usage] replaceWildcardCharacter := matcher.replaceWildcardCharacter var subpattern strings.Builder hasWrittenComponent := false components := tspath.GetNormalizedPathComponents(spec, basePath) lastComponent := core.LastOrNil(components) if usage != "exclude" && lastComponent == "**" { return "" } // getNormalizedPathComponents includes the separator for the root component. // We need to remove to create our regex correctly. components[0] = tspath.RemoveTrailingDirectorySeparator(components[0]) if IsImplicitGlob(lastComponent) { components = append(components, "**", "*") } optionalCount := 0 for _, component := range components { if component == "**" { subpattern.WriteString(matcher.doubleAsteriskRegexFragment) } else { if usage == "directories" { subpattern.WriteString("(") optionalCount++ } if hasWrittenComponent { subpattern.WriteRune(tspath.DirectorySeparator) } if usage != "exclude" { var componentPattern strings.Builder if strings.HasPrefix(component, "*") { componentPattern.WriteString("([^./]" + matcher.singleAsteriskRegexFragment + ")?") component = component[1:] } else if strings.HasPrefix(component, "?") { componentPattern.WriteString("[^./]") component = component[1:] } componentPattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter)) // Patterns should not include subfolders like node_modules unless they are // explicitly included as part of the path. // // As an optimization, if the component pattern is the same as the component, // then there definitely were no wildcard characters and we do not need to // add the exclusion pattern. if componentPattern.String() != component { subpattern.WriteString(implicitExcludePathRegexPattern) } subpattern.WriteString(componentPattern.String()) } else { subpattern.WriteString(reservedCharacterPattern.ReplaceAllStringFunc(component, replaceWildcardCharacter)) } } hasWrittenComponent = true } for optionalCount > 0 { subpattern.WriteString(")?") optionalCount-- } return subpattern.String() } func getIncludeBasePath(absolute string) string { wildcardOffset := strings.IndexAny(absolute, string(wildcardCharCodes)) if wildcardOffset < 0 { // No "*" or "?" in the path if !tspath.HasExtension(absolute) { return absolute } else { return tspath.RemoveTrailingDirectorySeparator(tspath.GetDirectoryPath(absolute)) } } return absolute[:max(strings.LastIndex(absolute[:wildcardOffset], string(tspath.DirectorySeparator)), 0)] } // getBasePaths computes the unique non-wildcard base paths amongst the provided include patterns. func getBasePaths(path string, includes []string, useCaseSensitiveFileNames bool) []string { // Storage for our results in the form of literal paths (e.g. the paths as written by the user). basePaths := []string{path} if len(includes) > 0 { // Storage for literal base paths amongst the include patterns. includeBasePaths := []string{} for _, include := range includes { // We also need to check the relative paths by converting them to absolute and normalizing // in case they escape the base path (e.g "..\somedirectory") var absolute string if tspath.IsRootedDiskPath(include) { absolute = include } else { absolute = tspath.NormalizePath(tspath.CombinePaths(path, include)) } // Append the literal and canonical candidate base paths. includeBasePaths = append(includeBasePaths, getIncludeBasePath(absolute)) } // Sort the offsets array using either the literal or canonical path representations. stringComparer := stringutil.GetStringComparer(!useCaseSensitiveFileNames) sort.SliceStable(includeBasePaths, func(i, j int) bool { return stringComparer(includeBasePaths[i], includeBasePaths[j]) < 0 }) // Iterate over each include base path and include unique base paths that are not a // subpath of an existing base path for _, includeBasePath := range includeBasePaths { if core.Every(basePaths, func(basepath string) bool { return !tspath.ContainsPath(basepath, includeBasePath, tspath.ComparePathsOptions{CurrentDirectory: path, UseCaseSensitiveFileNames: !useCaseSensitiveFileNames}) }) { basePaths = append(basePaths, includeBasePath) } } } return basePaths } // getFileMatcherPatterns generates file matching patterns based on the provided path, // includes, excludes, and other parameters. path is the directory of the tsconfig.json file. func getFileMatcherPatterns(path string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string) FileMatcherPatterns { path = tspath.NormalizePath(path) currentDirectory = tspath.NormalizePath(currentDirectory) absolutePath := tspath.CombinePaths(currentDirectory, path) return FileMatcherPatterns{ includeFilePatterns: core.Map(GetRegularExpressionsForWildcards(includes, absolutePath, "files"), func(pattern string) string { return "^" + pattern + "$" }), includeFilePattern: GetRegularExpressionForWildcard(includes, absolutePath, "files"), includeDirectoryPattern: GetRegularExpressionForWildcard(includes, absolutePath, "directories"), excludePattern: GetRegularExpressionForWildcard(excludes, absolutePath, "exclude"), basePaths: getBasePaths(path, includes, useCaseSensitiveFileNames), } } type regexp2CacheKey struct { pattern string opts regexp2.RegexOptions } var ( regexp2CacheMu sync.RWMutex regexp2Cache = make(map[regexp2CacheKey]*regexp2.Regexp) ) func GetRegexFromPattern(pattern string, useCaseSensitiveFileNames bool) *regexp2.Regexp { flags := regexp2.ECMAScript if !useCaseSensitiveFileNames { flags |= regexp2.IgnoreCase } opts := regexp2.RegexOptions(flags) key := regexp2CacheKey{pattern, opts} regexp2CacheMu.RLock() re, ok := regexp2Cache[key] regexp2CacheMu.RUnlock() if ok { return re } regexp2CacheMu.Lock() defer regexp2CacheMu.Unlock() re, ok = regexp2Cache[key] if ok { return re } // Avoid infinite growth; may cause thrashing but no worse than not caching at all. if len(regexp2Cache) > 1000 { clear(regexp2Cache) } // Avoid holding onto the pattern string, since this may pin a full config file in memory. pattern = strings.Clone(pattern) key.pattern = pattern re = regexp2.MustCompile(pattern, opts) regexp2Cache[key] = re return re } type visitor struct { includeFileRegexes []*regexp2.Regexp excludeRegex *regexp2.Regexp includeDirectoryRegex *regexp2.Regexp extensions []string useCaseSensitiveFileNames bool host FS visited collections.Set[string] results [][]string } func (v *visitor) visitDirectory( path string, absolutePath string, depth *int, ) { canonicalPath := tspath.GetCanonicalFileName(absolutePath, v.useCaseSensitiveFileNames) if v.visited.Has(canonicalPath) { return } v.visited.Add(canonicalPath) systemEntries := v.host.GetAccessibleEntries(absolutePath) files := systemEntries.Files directories := systemEntries.Directories for _, current := range files { name := tspath.CombinePaths(path, current) absoluteName := tspath.CombinePaths(absolutePath, current) if len(v.extensions) > 0 && !tspath.FileExtensionIsOneOf(name, v.extensions) { continue } if v.excludeRegex != nil && core.Must(v.excludeRegex.MatchString(absoluteName)) { continue } if v.includeFileRegexes == nil { (v.results)[0] = append((v.results)[0], name) } else { includeIndex := core.FindIndex(v.includeFileRegexes, func(re *regexp2.Regexp) bool { return core.Must(re.MatchString(absoluteName)) }) if includeIndex != -1 { (v.results)[includeIndex] = append((v.results)[includeIndex], name) } } } if depth != nil { newDepth := *depth - 1 if newDepth == 0 { return } depth = &newDepth } for _, current := range directories { name := tspath.CombinePaths(path, current) absoluteName := tspath.CombinePaths(absolutePath, current) if (v.includeDirectoryRegex == nil || core.Must(v.includeDirectoryRegex.MatchString(absoluteName))) && (v.excludeRegex == nil || !core.Must(v.excludeRegex.MatchString(absoluteName))) { v.visitDirectory(name, absoluteName, depth) } } } // path is the directory of the tsconfig.json func matchFiles(path string, extensions []string, excludes []string, includes []string, useCaseSensitiveFileNames bool, currentDirectory string, depth *int, host FS) []string { path = tspath.NormalizePath(path) currentDirectory = tspath.NormalizePath(currentDirectory) patterns := getFileMatcherPatterns(path, excludes, includes, useCaseSensitiveFileNames, currentDirectory) var includeFileRegexes []*regexp2.Regexp if patterns.includeFilePatterns != nil { includeFileRegexes = core.Map(patterns.includeFilePatterns, func(pattern string) *regexp2.Regexp { return GetRegexFromPattern(pattern, useCaseSensitiveFileNames) }) } var includeDirectoryRegex *regexp2.Regexp if patterns.includeDirectoryPattern != "" { includeDirectoryRegex = GetRegexFromPattern(patterns.includeDirectoryPattern, useCaseSensitiveFileNames) } var excludeRegex *regexp2.Regexp if patterns.excludePattern != "" { excludeRegex = GetRegexFromPattern(patterns.excludePattern, useCaseSensitiveFileNames) } // Associate an array of results with each include regex. This keeps results in order of the "include" order. // If there are no "includes", then just put everything in results[0]. var results [][]string if len(includeFileRegexes) > 0 { tempResults := make([][]string, len(includeFileRegexes)) for i := range includeFileRegexes { tempResults[i] = []string{} } results = tempResults } else { results = [][]string{{}} } v := visitor{ useCaseSensitiveFileNames: useCaseSensitiveFileNames, host: host, includeFileRegexes: includeFileRegexes, excludeRegex: excludeRegex, includeDirectoryRegex: includeDirectoryRegex, extensions: extensions, results: results, } for _, basePath := range patterns.basePaths { v.visitDirectory(basePath, tspath.CombinePaths(currentDirectory, basePath), depth) } return core.Flatten(results) } func ReadDirectory(host FS, currentDir string, path string, extensions []string, excludes []string, includes []string, depth *int) []string { return matchFiles(path, extensions, excludes, includes, host.UseCaseSensitiveFileNames(), currentDir, depth, host) }