Skip to content

Commit e0ec95f

Browse files
jpinzCopilot
andauthored
Address follow-up review feedback for matcher reuse and null-pattern validation (#1770)
* Migrate to use FileSystemName.MatchesSimpleExpression for matching glob patterns efficiently. And align on one implementation of pattern/file matching. * Address Copilot comment about the patterns array * Address pattern matching review thread feedback * Optimize pattern matcher reuse and validate null pattern elements --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: jpinz <8357054+jpinz@users.noreply.github.com>
1 parent c3ef82c commit e0ec95f

10 files changed

Lines changed: 261 additions & 141 deletions

File tree

src/Microsoft.ComponentDetection.Common/FastDirectoryWalkerFactory.cs

Lines changed: 40 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,12 @@ public IObservable<FileSystemInfo> GetDirectoryScanner(DirectoryInfo root, Concu
3939
return Task.CompletedTask;
4040
}
4141

42-
PatternMatchingUtility.FilePatternMatcher fileIsMatch = null;
42+
PatternMatchingUtility.CompiledMatcher fileIsMatch = null;
43+
var patternsArray = filePatterns?.ToArray();
4344

44-
if (filePatterns == null || !filePatterns.Any())
45+
if (patternsArray is { Length: > 0 })
4546
{
46-
fileIsMatch = span => true;
47-
}
48-
else
49-
{
50-
fileIsMatch = PatternMatchingUtility.GetFilePatternMatcher(filePatterns);
47+
fileIsMatch = PatternMatchingUtility.Compile(patternsArray);
5148
}
5249

5350
var sw = Stopwatch.StartNew();
@@ -100,7 +97,7 @@ public IObservable<FileSystemInfo> GetDirectoryScanner(DirectoryInfo root, Concu
10097
{
10198
ShouldIncludePredicate = (ref FileSystemEntry entry) =>
10299
{
103-
if (!entry.IsDirectory && fileIsMatch(entry.FileName))
100+
if (!entry.IsDirectory && (fileIsMatch == null || fileIsMatch.IsMatch(entry.FileName)))
104101
{
105102
return true;
106103
}
@@ -210,46 +207,26 @@ public void Initialize(DirectoryInfo root, ExcludeDirectoryPredicate directoryEx
210207

211208
public IObservable<FileSystemInfo> Subscribe(DirectoryInfo root, IEnumerable<string> patterns)
212209
{
213-
var patternArray = patterns.ToArray();
214-
215-
if (this.pendingScans.TryGetValue(root, out var scannerObservable))
216-
{
217-
this.logger.LogDebug("Logging patterns {Patterns} for {Root}", string.Join(":", patterns), root.FullName);
218-
219-
var inner = scannerObservable.Value.Where(fsi =>
220-
{
221-
if (fsi is FileInfo fi)
222-
{
223-
return this.MatchesAnyPattern(fi, patternArray);
224-
}
225-
else
226-
{
227-
return true;
228-
}
229-
});
230-
231-
return inner;
232-
}
233-
234-
throw new InvalidOperationException("Subscribe called without initializing scanner");
210+
var patternsArray = patterns as string[] ?? patterns.ToArray();
211+
var compiled = PatternMatchingUtility.Compile(patternsArray);
212+
return this.Subscribe(root, patternsArray, compiled);
235213
}
236214

237215
public IObservable<ProcessRequest> GetFilteredComponentStreamObservable(DirectoryInfo root, IEnumerable<string> patterns, IComponentRecorder componentRecorder)
238216
{
239-
var observable = this.Subscribe(root, patterns).OfType<FileInfo>().SelectMany(f => patterns.Select(sp => new
240-
{
241-
SearchPattern = sp,
242-
File = f,
243-
})).Where(x =>
244-
{
245-
var searchPattern = x.SearchPattern;
246-
var fileName = x.File.Name;
217+
var patternsArray = patterns as string[] ?? patterns.ToArray();
218+
var compiled = PatternMatchingUtility.Compile(patternsArray);
247219

248-
return this.pathUtilityService.MatchesPattern(searchPattern, fileName);
249-
}).Where(x => x.File.Exists)
220+
var observable = this.Subscribe(root, patternsArray, compiled).OfType<FileInfo>()
221+
.Select(f => new
222+
{
223+
File = f,
224+
MatchedPattern = compiled.GetMatchingPattern(f.Name),
225+
})
226+
.Where(x => x.MatchedPattern != null && x.File.Exists)
250227
.Select(x =>
251228
{
252-
var lazyComponentStream = new LazyComponentStream(x.File, x.SearchPattern, this.logger);
229+
var lazyComponentStream = new LazyComponentStream(x.File, x.MatchedPattern, this.logger);
253230
return new ProcessRequest
254231
{
255232
ComponentStream = lazyComponentStream,
@@ -280,14 +257,31 @@ private FileSystemInfo Transform(ref FileSystemEntry entry)
280257
return entry.ToFileSystemInfo();
281258
}
282259

283-
private IObservable<FileSystemInfo> CreateDirectoryWalker(DirectoryInfo di, ExcludeDirectoryPredicate directoryExclusionPredicate, int minimumConnectionCount, IEnumerable<string> filePatterns)
260+
private IObservable<FileSystemInfo> Subscribe(DirectoryInfo root, string[] patterns, PatternMatchingUtility.CompiledMatcher compiled)
284261
{
285-
return this.GetDirectoryScanner(di, new ConcurrentDictionary<string, bool>(), directoryExclusionPredicate, filePatterns, true).Replay() // Returns a replay subject which will republish anything found to new subscribers.
286-
.AutoConnect(minimumConnectionCount); // Specifies that this connectable observable should start when minimumConnectionCount subscribe.
262+
if (this.pendingScans.TryGetValue(root, out var scannerObservable))
263+
{
264+
this.logger.LogDebug("Logging patterns {Patterns} for {Root}", string.Join(":", patterns), root.FullName);
265+
266+
var inner = scannerObservable.Value.Where(fsi =>
267+
{
268+
if (fsi is FileInfo fi)
269+
{
270+
return compiled.IsMatch(fi.Name.AsSpan());
271+
}
272+
273+
return true;
274+
});
275+
276+
return inner;
277+
}
278+
279+
throw new InvalidOperationException("Subscribe called without initializing scanner");
287280
}
288281

289-
private bool MatchesAnyPattern(FileInfo fi, params string[] searchPatterns)
282+
private IObservable<FileSystemInfo> CreateDirectoryWalker(DirectoryInfo di, ExcludeDirectoryPredicate directoryExclusionPredicate, int minimumConnectionCount, IEnumerable<string> filePatterns)
290283
{
291-
return searchPatterns != null && searchPatterns.Any(sp => this.pathUtilityService.MatchesPattern(sp, fi.Name));
284+
return this.GetDirectoryScanner(di, new ConcurrentDictionary<string, bool>(), directoryExclusionPredicate, filePatterns, true).Replay() // Returns a replay subject which will republish anything found to new subscribers.
285+
.AutoConnect(minimumConnectionCount); // Specifies that this connectable observable should start when minimumConnectionCount subscribe.
292286
}
293287
}

src/Microsoft.ComponentDetection.Common/PathUtilityService.cs

Lines changed: 3 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ namespace Microsoft.ComponentDetection.Common;
33

44
using System;
55
using System.IO;
6-
using System.IO.Enumeration;
76
using Microsoft.ComponentDetection.Contracts;
87
using Microsoft.Extensions.Logging;
98

@@ -22,21 +21,6 @@ internal class PathUtilityService : IPathUtilityService
2221

2322
public PathUtilityService(ILogger<PathUtilityService> logger) => this.logger = logger;
2423

25-
public static bool MatchesPattern(string searchPattern, ref FileSystemEntry fse)
26-
{
27-
if (searchPattern.StartsWith('*') && fse.FileName.EndsWith(searchPattern.AsSpan()[1..], StringComparison.OrdinalIgnoreCase))
28-
{
29-
return true;
30-
}
31-
32-
if (searchPattern.EndsWith('*') && fse.FileName.StartsWith(searchPattern.AsSpan()[..^1], StringComparison.OrdinalIgnoreCase))
33-
{
34-
return true;
35-
}
36-
37-
return fse.FileName.Equals(searchPattern.AsSpan(), StringComparison.OrdinalIgnoreCase);
38-
}
39-
4024
public string GetParentDirectory(string path) => Path.GetDirectoryName(path);
4125

4226
public bool IsFileBelowAnother(string aboveFilePath, string belowFilePath)
@@ -48,21 +32,6 @@ public bool IsFileBelowAnother(string aboveFilePath, string belowFilePath)
4832
return (aboveDirectoryPath.Length != belowDirectoryPath.Length) && belowDirectoryPath.StartsWith(aboveDirectoryPath);
4933
}
5034

51-
public bool MatchesPattern(string searchPattern, string fileName)
52-
{
53-
if (searchPattern.StartsWith('*') && fileName.EndsWith(searchPattern[1..], StringComparison.OrdinalIgnoreCase))
54-
{
55-
return true;
56-
}
57-
58-
if (searchPattern.EndsWith('*') && fileName.StartsWith(searchPattern[..^1], StringComparison.OrdinalIgnoreCase))
59-
{
60-
return true;
61-
}
62-
63-
return searchPattern.Equals(fileName, StringComparison.OrdinalIgnoreCase);
64-
}
65-
6635
public string ResolvePhysicalPath(string path)
6736
{
6837
var directoryInfo = new DirectoryInfo(path);
@@ -75,6 +44,9 @@ public string ResolvePhysicalPath(string path)
7544
return fileInfo.Exists ? this.ResolvePathFromInfo(fileInfo) : null;
7645
}
7746

47+
[Obsolete("Use PatternMatchingUtility.MatchesPattern instead.")]
48+
public bool MatchesPattern(string pattern, string fileName) => PatternMatchingUtility.MatchesPattern(pattern, fileName);
49+
7850
private string ResolvePathFromInfo(FileSystemInfo info) => info.LinkTarget ?? info.FullName;
7951

8052
public string NormalizePath(string path)

src/Microsoft.ComponentDetection.Common/PatternMatchingUtility.cs

Lines changed: 76 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,91 @@ namespace Microsoft.ComponentDetection.Common;
22

33
using System;
44
using System.Collections.Generic;
5+
using System.IO.Enumeration;
56
using System.Linq;
67

78
public static class PatternMatchingUtility
89
{
9-
public delegate bool FilePatternMatcher(ReadOnlySpan<char> span);
10+
public static bool MatchesPattern(string pattern, string fileName)
11+
{
12+
ArgumentNullException.ThrowIfNull(pattern);
13+
ArgumentNullException.ThrowIfNull(fileName);
14+
15+
return IsPatternMatch(pattern, fileName.AsSpan());
16+
}
1017

11-
public static FilePatternMatcher GetFilePatternMatcher(IEnumerable<string> patterns)
18+
/// <summary>
19+
/// Returns the first matching pattern for <paramref name="fileName"/>.
20+
/// Earlier patterns in <paramref name="patterns"/> have higher priority when multiple match.
21+
/// </summary>
22+
/// <returns>The first matching pattern, or <see langword="null"/> if no patterns match.</returns>
23+
public static string? GetMatchingPattern(string fileName, IEnumerable<string> patterns)
1224
{
13-
var matchers = patterns.Select<string, FilePatternMatcher>(pattern => pattern switch
14-
{
15-
_ when pattern.StartsWith('*') && pattern.EndsWith('*') =>
16-
pattern.Length <= 2
17-
? _ => true
18-
: span => span.Contains(pattern.AsSpan(1, pattern.Length - 2), StringComparison.Ordinal),
19-
_ when pattern.StartsWith('*') =>
20-
span => span.EndsWith(pattern.AsSpan(1), StringComparison.Ordinal),
21-
_ when pattern.EndsWith('*') =>
22-
span => span.StartsWith(pattern.AsSpan(0, pattern.Length - 1), StringComparison.Ordinal),
23-
_ => span => span.Equals(pattern.AsSpan(), StringComparison.Ordinal),
24-
}).ToList();
25-
26-
return span =>
25+
ArgumentNullException.ThrowIfNull(fileName);
26+
ArgumentNullException.ThrowIfNull(patterns);
27+
28+
return Compile(patterns).GetMatchingPattern(fileName.AsSpan());
29+
}
30+
31+
public static CompiledMatcher Compile(IEnumerable<string> patterns)
32+
{
33+
ArgumentNullException.ThrowIfNull(patterns);
34+
return patterns is string[] array ? Compile(array) : new(patterns.ToArray());
35+
}
36+
37+
public static CompiledMatcher Compile(string[] patterns)
38+
{
39+
ArgumentNullException.ThrowIfNull(patterns);
40+
return new(patterns);
41+
}
42+
43+
private static string? GetFirstMatchingPattern(ReadOnlySpan<char> fileName, string[] patterns)
44+
{
45+
foreach (var pattern in patterns)
2746
{
28-
foreach (var matcher in matchers)
47+
if (IsPatternMatch(pattern, fileName))
2948
{
30-
if (matcher(span))
31-
{
32-
return true;
33-
}
49+
return pattern;
3450
}
51+
}
3552

36-
return false;
37-
};
53+
return null;
54+
}
55+
56+
private static bool IsPatternMatch(string pattern, ReadOnlySpan<char> fileName) =>
57+
FileSystemName.MatchesSimpleExpression(pattern, fileName, ignoreCase: true);
58+
59+
public sealed class CompiledMatcher
60+
{
61+
private readonly string[] patterns;
62+
63+
public CompiledMatcher(IEnumerable<string> patterns)
64+
: this(patterns is string[] arr ? arr : (patterns ?? throw new ArgumentNullException(nameof(patterns))).ToArray())
65+
{
66+
}
67+
68+
internal CompiledMatcher(string[] patterns)
69+
{
70+
ArgumentNullException.ThrowIfNull(patterns);
71+
this.patterns = (string[])patterns.Clone();
72+
ValidatePatternElements(this.patterns);
73+
}
74+
75+
public bool IsMatch(ReadOnlySpan<char> fileName) => GetFirstMatchingPattern(fileName, this.patterns) is not null;
76+
77+
/// <summary>
78+
/// Returns the first matching pattern for <paramref name="fileName"/>.
79+
/// Earlier patterns in the compiled set have higher priority when multiple match.
80+
/// </summary>
81+
/// <returns>The first matching pattern, or <see langword="null"/> if no patterns match.</returns>
82+
public string? GetMatchingPattern(ReadOnlySpan<char> fileName) => GetFirstMatchingPattern(fileName, this.patterns);
83+
84+
private static void ValidatePatternElements(string[] patterns)
85+
{
86+
foreach (var pattern in patterns)
87+
{
88+
ArgumentNullException.ThrowIfNull(pattern);
89+
}
90+
}
3891
}
3992
}

src/Microsoft.ComponentDetection.Common/SafeFileEnumerable.cs

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ namespace Microsoft.ComponentDetection.Common;
1111

1212
public class SafeFileEnumerable : IEnumerable<MatchedFile>
1313
{
14-
private readonly IEnumerable<string> searchPatterns;
1514
private readonly ExcludeDirectoryPredicate directoryExclusionPredicate;
1615
private readonly DirectoryInfo directory;
1716
private readonly IPathUtilityService pathUtilityService;
1817
private readonly bool recursivelyScanDirectories;
1918
private readonly Func<FileInfo, bool> fileMatchingPredicate;
19+
private readonly PatternMatchingUtility.CompiledMatcher compiledMatcher;
2020

2121
private readonly EnumerationOptions enumerationOptions;
2222

@@ -27,11 +27,11 @@ public SafeFileEnumerable(DirectoryInfo directory, IEnumerable<string> searchPat
2727
{
2828
this.directory = directory;
2929
this.logger = logger;
30-
this.searchPatterns = searchPatterns;
3130
this.directoryExclusionPredicate = directoryExclusionPredicate;
3231
this.recursivelyScanDirectories = recursivelyScanDirectories;
3332
this.pathUtilityService = pathUtilityService;
3433
this.enumeratedDirectories = previouslyEnumeratedDirectories;
34+
this.compiledMatcher = PatternMatchingUtility.Compile(searchPatterns);
3535

3636
this.enumerationOptions = new EnumerationOptions()
3737
{
@@ -58,14 +58,10 @@ public IEnumerator<MatchedFile> GetEnumerator()
5858
throw new InvalidOperationException("Encountered directory when expecting a file");
5959
}
6060

61-
var foundPattern = entry.FileName.ToString();
62-
foreach (var searchPattern in this.searchPatterns)
63-
{
64-
if (PathUtilityService.MatchesPattern(searchPattern, ref entry))
65-
{
66-
foundPattern = searchPattern;
67-
}
68-
}
61+
// Pattern priority is first-match-wins: earlier entries in searchPatterns
62+
// are treated as higher priority when multiple patterns match.
63+
var foundPattern = this.compiledMatcher.GetMatchingPattern(entry.FileName)
64+
?? entry.FileName.ToString();
6965

7066
return new MatchedFile() { File = fi, Pattern = foundPattern };
7167
},
@@ -78,15 +74,7 @@ public IEnumerator<MatchedFile> GetEnumerator()
7874
return false;
7975
}
8076

81-
foreach (var searchPattern in this.searchPatterns)
82-
{
83-
if (PathUtilityService.MatchesPattern(searchPattern, ref entry))
84-
{
85-
return true;
86-
}
87-
}
88-
89-
return false;
77+
return this.compiledMatcher.IsMatch(entry.FileName);
9078
},
9179
ShouldRecursePredicate = (ref FileSystemEntry entry) =>
9280
{

src/Microsoft.ComponentDetection.Contracts/FileComponentDetector.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,10 @@ public abstract class FileComponentDetector : IComponentDetector
3434
/// <inheritdoc />
3535
public abstract string Id { get; }
3636

37-
/// <summary> Gets the search patterns used to produce the list of valid folders to scan. These patterns are evaluated with .Net's Directory.EnumerateFiles function. </summary>
37+
/// <summary>
38+
/// Gets the search patterns used to produce the list of valid files to scan.
39+
/// The first pattern that matches a given file will be used to determine how that file is processed, so more specific patterns should be listed before more general ones. Wildcards are accepted.
40+
/// </summary>
3841
public abstract IList<string> SearchPatterns { get; }
3942

4043
/// <summary>Gets the categories this detector is considered a member of. Used by the DetectorCategories arg to include detectors.</summary>

0 commit comments

Comments
 (0)