1 // Copyright (c) 2017-2018 Matthew Brennan Jones <matthew.brennan.jones@gmail.com>
2 // Boost Software License - Version 1.0
3 // Search file systems with glob patterns using the D programming language
4 // https://github.com/workhorsy/d-glob
5 
6 /++
7 Search file systems with glob patterns using the D programming language
8 
9 See Glob  $(LINK https://en.wikipedia.org/wiki/Glob_(programming))
10 
11 Home page:
12 $(LINK https://github.com/workhorsy/d-glob)
13 
14 Version: 0.4.0
15 
16 License:
17 Boost Software License - Version 1.0
18 +/
19 
20 // https://en.wikipedia.org/wiki/Glob_%28programming%29
21 
22 module glob;
23 
24 
25 /++
26 Return all the paths that match the glob pattern
27 Params:
28  path_name = The path with paterns to match.
29 +/
30 string[] glob(string path_name) {
31 	return getGlobMatches(path_name, false);
32 }
33 
34 ///
35 unittest {
36 	import glob : glob;
37 
38 	// Use * to match zero or more instances of a character
39 	string[] entries = glob("/usr/bin/python*");
40 	/*
41 	entries would contain:
42 	/usr/bin/python2
43 	/usr/bin/python2.7
44 	/usr/bin/python3
45 	/usr/bin/python3.5
46 	*/
47 
48 	// Use ? to match one instance of a character
49 	entries = glob("/usr/bin/python2.?");
50 	/*
51 	entries would contain:
52 	/usr/bin/python2.6
53 	/usr/bin/python2.7
54 	*/
55 
56 	// Use [] to match one instance of a character between the brackets
57 	entries = glob("/usr/bin/python[23]");
58 	/*
59 	entries would contain:
60 	/usr/bin/python2
61 	/usr/bin/python3
62 	*/
63 
64 	// Use [!] to match one instance of a character NOT between the brackets
65 	entries = glob("/usr/bin/python[!3]");
66 	/*
67 	entries would contain:
68 	/usr/bin/python2
69 	*/
70 
71 	// Use {} to match any of the full strings
72 	entries = glob("/usr/bin/{python,ruby}");
73 	/*
74 	entries would contain:
75 	/usr/bin/python
76 	/usr/bin/ruby
77 	*/
78 }
79 
80 /++
81 Return all the paths that match the regex pattern
82 Params:
83  path_regex = The path with regex paterns to match.
84 +/
85 string[] globRegex(string path_regex) {
86 	import std..string : startsWith, endsWith;
87 
88 	if (! path_regex.startsWith('^') || ! path_regex.endsWith('$')) {
89 		throw new Exception("The regex must start with ^ and end with $.");
90 	}
91 
92 	// Remove the regex ^ and $ from the start and end
93 	string path_name = path_regex[1 .. $-1];
94 
95 	return getGlobMatches(path_name, true);
96 }
97 
98 ///
99 unittest {
100 	import glob : globRegex;
101 
102 	// Use a regex to match all the number files in /proc/
103 	string[] entries = globRegex(`^/proc/[0-9]*$`);
104 	/*
105 	entries would contain:
106 	/proc/111
107 	/proc/245
108 	/proc/19533
109 	/proc/1
110 	*/
111 }
112 
113 private string[] getGlobMatches(string path_name, bool is_regex) {
114 	import std.algorithm : map, filter;
115 	import std.array : array, replace;
116 	import std..string : split, startsWith;
117 	import std.file : getcwd;
118 
119 	// Convert the Windows path to a posix path
120 	path_name = path_name.replace("\\", "/");
121 
122 	// Break the path into a stack separated by /
123 	string[] patterns = path_name.split("/").filter!(n => n != "").array();
124 
125 	// Figure out if using a relative path
126 	string cwd = getcwd();
127 	bool is_relative_path = ! path_name.startsWith("/");
128 
129 	// Make the first path to search the cwd or /
130 	string[] paths = [is_relative_path ? cwd : "/"];
131 //	stdout.writefln("path_name: \"%s\"", path_name);
132 //	stdout.writefln("patterns: %s", patterns);
133 
134 	// For each pattern get the directory entries that match the pattern
135 	while (patterns.length > 0) {
136 		// Pop the next pattern off the stack
137 		string pattern = patterns[0];
138 		patterns = patterns[1 .. $];
139 
140 		// Get the matches
141 		paths = getMatches(paths, pattern, is_regex);
142 //		stdout.writefln("            paths: %s", paths);
143 	}
144 
145 	// Convert from an absolute path to a relative one, if path_name is relative
146 	if (is_relative_path) {
147 		size_t len = cwd.length + 1;
148 		paths =
149 			paths
150 				.filter!(n => n.length > len) // Remove paths that are just the prefix
151 				.map!(n => n[len .. $]) // Remove the path prefix
152 				.array();
153 	}
154 
155 	return paths;
156 }
157 
158 private string[] getMatches(string[] path_candidates, string pattern, bool is_regex=false) {
159 	import std.path : baseName, globMatch;
160 	import std.regex : match, regex;
161 
162 	string[] matches;
163 
164 	if (is_regex) {
165 		auto r = regex("^" ~ pattern ~ "$");
166 		foreach (path ; path_candidates) {
167 	//		stdout.writefln("    searching \"%s\" for \"%s\"", path, pattern);
168 			foreach (entry ; getEntries(path)) {
169 				if (match(baseName(entry), r)) {
170 	//				stdout.writefln("        match: \"%s\"", entry);
171 					matches ~= entry;
172 				}
173 			}
174 		}
175 	} else {
176 		// Iterate through all the entries in the paths
177 		// and return the ones that match the pattern
178 		foreach (path ; path_candidates) {
179 	//		stdout.writefln("    searching \"%s\" for \"%s\"", path, pattern);
180 			foreach (entry ; getEntries(path)) {
181 				if (globMatch(baseName(entry), pattern)) {
182 	//				stdout.writefln("        match: \"%s\"", entry);
183 					matches ~= entry;
184 				}
185 			}
186 		}
187 	}
188 
189 	return matches;
190 }
191 
192 // Returns the name of all the shallow entries in a directory
193 private string[] getEntries(string path_name) {
194 	import std.file : dirEntries, SpanMode, FileException;
195 	import std.algorithm : map, sort;
196 	import std.array : array, replace;
197 
198 	string[] entries;
199 	try {
200 		entries = dirEntries(path_name, SpanMode.shallow).map!(n => n.name).array();
201 	} catch (FileException) {
202 	}
203 
204 	// Convert Windows file paths to posix format
205 	version (Windows) {
206 		entries = entries.map!(n => n.replace("\\", "/")).array();
207 	}
208 
209 	entries.sort!("a < b");
210 	return entries;
211 }