1%> \brief
2%> Find all files and directory names matching the input pattern by expanding wildcards.<br>
4%> \details
5%> This function performs pattern matching of file and directory names, based on wildcard characters.<br>
6%> This function is similar to wildcard expansion performed by the Unix shell
7%> and Python ``glob.glob`` function, but it can handle by the Unix shell and
8%> Python glob.glob function, but it can handle more types of wildcards.<br>
9%> The following list highlights the key differences between
10%> this function and the MATLAB intrinsic ``dir()``.<br>
11%> <ol>
12%> <li> ``glob()`` supports wildcards for directories.<br>
13%> <li> ``glob()`` returns the directory part of ``pattern``.<br>
14%> <li> ``glob()`` returns a cell array of matching names.<br>
15%> <li> ``glob()`` does not return hidden files and directories that
16%> start with ``'.'`` unless explicitly specified in ``pattern``.<br>
17%> <li> ``glob()`` does not return ``'.'`` and ``'..'`` unless explicitly specified in ``pattern``.<br>
18%> <li> ``glob()`` adds a trailing file separator to directory names.<br>
19%> <li> ``glob()`` does not return the contents of a directory when a directory is specified.<br>
20%> To return contents of a directory, add a trailing ``'/*'``.<br>
21%> <li> ``glob()`` returns only directory names when a trailing file separator is specified.<br>
22%> <li> On Windows, ``glob()`` is not case sensitive, but it returns matching
23%> names exactly in the case as they are defined on the filesystem.<br>
24%> Case of host and sharename of a UNC path and case of drive
25%> letters will be returned as specified in ``pattern``.<br>
26%> </ol>
28%> \param[in] pattern : The input scalar MATLAB string, containing the search pattern.<br>
29%> Wildcards may be used for basenames and for the directory parts.<br>
30%> If pattern contains directory parts, then these will be included in the output ``pathList``.<br>
31%> Following wildcards can be used:<br>
32%> <ol>
33%> <li> ``*`` match zero or more characters.
34%> <li> ``?`` match any single character.
35%> <li> ``[ab12]`` match one of the specified characters.
36%> <li> ``[^ab12]`` match none of the specified characters
37%> <li> ``[a-z]`` match one character in range of characters
38%> <li> ``{a,b,c}`` matches any one of strings a, b or c<br>
39%> <li> All above wildcards do not match a file separator.<br>
40%> <li> ``**`` match zero or more characters including file separators.<br>
41%> This can be used to match zero or more directory parts
42%> and will recursively list matching names.<br>
43%> Beware that **symbolic linked directories or
44%> junctions may cause an infinite loop** when using the ``**``.<br>
45%> </ol>
46%> \param[in] anycase : The input scalar MATLAB logical.<br>
47%> If ``true``, the search will be case-sensitive.<br>
48%> If ``false``, the search will be case-insensitive.<br>
49%> On Windows, ``anycase`` is always reset to ``true`` even if user-specified.<br>
50%> (**optional**. default = ``false`` on Unix and ``true`` on Windows.)
52%> \return
53%> ``pathList`` : The output MATLAB cell array of strings containing the files
54%> or directories that match the path specified by string ``pattern``.<br>
55%> ``isdirList`` : The output MATLAB cell array of the same size as ``pathList``,
56%> each element of which is a MATLAB logical value that is ``true`` if
57%> and only if the corresponding element of ``pathList`` is a directory.<br>
59%> \interface{glob}
60%> \code{.m}
62%> [pathList, isdirList] = pm.sys.path.glob(pattern)
63%> [pathList, isdirList] = pm.sys.path.glob(pattern, anycase)
65%> \endcode
67%> \example{glob-raw}
68%> \code{.m}
70%> pm.sys.path.glob("*.m") % list all .m files in current directory.
71%> pm.sys.path.glob("baz/*") % list all files and directories in subdirectory "baz".
72%> pm.sys.path.glob("b*/*.m") % list all .m files in subdirectory names starting with "b".
73%> % The list will include the names of the matching subdirectories.
74%> pm.sys.path.glob("?z*.m") % list all .m files where the second character is 'z'.
75%> pm.sys.path.glob("baz.[ch]") % matches baz.c and baz.h
76%> pm.sys.path.glob("test.[^ch]") % matches test.a but not test.c or test.h
77%> pm.sys.path.glob("demo.[a-c]") % matches demo.a, demo.b, and demo.c
78%> pm.sys.path.glob("test.{foo,bar,baz}") % matches,, and test.baz
79%> pm.sys.path.glob(".*") % list all hidden files in current directory, excluding '.' and '..'
80%> pm.sys.path.glob("*/") % list all subdirectories.
81%> pm.sys.path.glob("**") % recursively list all files and directories,
82%> % starting in current directory (current directory name,
83%> % hidden files and hidden directories are excluded).
84%> pm.sys.path.glob("**.m") % list all m-files anywhere in directory tree,
85%> % including m-files in current directory. This
86%> % is equivalent with '**/*.m'.
87%> pm.sys.path.glob("foo/**/") % recursively list all directories, starting in directory 'foo'.
88%> pm.sys.path.glob("**/.svn/") % list all .svn directories in directory tree.
89%> pm.sys.path.glob("**/.*/**") % recursively list all files in hidden directories only.
90%> [paths, isdir] = pm.sys.path.glob('**'); paths(~isdir) % get all files in directory tree.
92%> \endcode
94%> \example{glob}
95%> \include{lineno} example/sys/path/glob/main.m
96%> \output{glob}
97%> \include{lineno} example/sys/path/glob/main.out.m
99%> \final{glob}
125%> \author
126%> \JoshuaOsborne, May 21 2024, 5:24 AM, University of Texas at Arlington<br>
127%> \FatemehBagheri, May 20 2024, 1:25 PM, NASA Goddard Space Flight Center (GSFC), Washington, D.C.<br>
128%> \AmirShahmoradi, May 16 2016, 9:03 AM, Oden Institute for Computational Engineering and Sciences (ICES), UT Austin<br>
129function [pathList, isdirList] = glob(pattern, anycase)
131 if isstring(pattern)
132 pattern = convertStringsToChars(pattern);
133 end
135 %%%%
136 %%%% check pattern input
137 %%%%
139 if ischar(pattern)
140 if isempty(pattern)
141 % return when pattern is empty
142 pathList = cell(0);
143 isdirList = false(0);
144 return
145 elseif size(pattern,1)>1
146 error('glob:invalidInput', 'pattern must be a single string.')
147 end
148 else
149 error('glob:invalidInput', 'pattern must be a string.')
150 end
152 %%%%
153 %%%% check anycase option
154 %%%%
156 if nargin == 2
157 pm.introspection.verify(anycase, "logical", 1, "anycase");
158 else
159 % Windows is not case sensitive
160 % Unix is case sensitive
161 anycase = ispc;
162 end
164 %%%%
165 %%%% define function handle to regular expression function for the specified case sensitivity
166 %%%%
168 if anycase
169 regexp_fhandle = @regexpi;
170 else
171 regexp_fhandle = @regexp;
172 end
174 %%%%
175 %%%% only use forward slashes as file separator to prevent escaping backslashes in regular expressions
176 %%%%
178 filespec = strrep(pattern, '\', '/');
180 %%%%
181 %%%% split pathroot part from pattern
182 %%%%
184 if strncmp(filespec, '//',2)
185 if ispc
186 % pattern specifies a UNC path
187 % It is not allowed to get a directory listing of share names of a
188 % host with the DIR command.
189 % pathroot will contains e.g. //host/share/
190 pathroot = regexprep(filespec, '(^//+[^/]+/[^/]+/)(.*)', '$1');
191 filespec = regexprep(filespec, '(^//+[^/]+/[^/]+/)(.*)', '$2');
192 else
193 % for Unix, multiple leading file separators are equivalent with a single file separator
194 filespec = regexprep(filespec, '^/*', '/');
195 end
196 elseif strncmp(filespec, '/', 1)
197 % pattern specifies a absolute path
198 pathroot = '/';
199 filespec(1) = [];
200 elseif ispc && numel(filespec)>=2 && filespec(2) == ':'
201 % pattern specifies a absolute path starting with a drive letter
202 % check for a fileseparator after ':'. e.g. 'C:\'
203 if numel(filespec)<3 || filespec(3)~='/'
204 error('glob:invalidInput','Drive letter must be followed by '':\''.')
205 end
206 pathroot = filespec(1:3);
207 filespec(1:3) = [];
208 else
209 % pattern specifies a relative path
210 pathroot = './';
211 end
213 %%%% replace multiple file separators by a single file separator
215 filespec = regexprep(filespec, '/+', '/');
217 %%%% replace 'a**' with 'a*/**', where 'a' can be any character but not '/'
219 filespec = regexprep(filespec, '([^/])(\.\*\.\*)', '$1\*/$2');
221 %%%% replace '**a' with '**/*a', where a can be any character but not '/'
223 filespec = regexprep(filespec, '(\.\*\.\*)([^/])', '$1/\*$2');
225 %%%% split filespec into chunks at file separator
227 chunks = strread(filespec, '%s', 'delimiter', '/'); %#ok<FPARK>
229 %%%% add empty chunk at the end when filespec ends with a file separator
231 if ~isempty(filespec) && filespec(end)=='/'
232 chunks{end+1} = '';
233 end
235 %%%% translate chunks to regular expressions
237 for i=1:numel(chunks)
238 chunks{i} = glob2regexp(chunks{i});
239 end
241 %%%% determine file list using LS_REGEXP
242 %%%% this function requires that PATHROOT does not to contain any wildcards
244 if ~isempty(chunks)
245 list = ls_regexp(regexp_fhandle, pathroot, chunks{1:end});
246 else
247 list = {pathroot};
248 end
249 if strcmp(pathroot, './')
250 % remove relative pathroot from result
251 list = regexprep(list, '^\./', '');
252 end
253 if nargout == 2
254 % determine directories by checking for '/' at the end
255 I = regexp(list', '/$');
256 isdirList = ~cellfun('isempty', I);
257 end
259 %%%%
260 %%%% convert to standard file separators for PC
261 %%%%
263 if ispc
264 list = strrep(list, '/', '\');
265 end
267 %%%%
268 %%%% return output
269 %%%%
271 if nargout == 0
272 if ~isempty(list)
273 % display list
274 disp(string(list))
275 else
276 disp(['''' pattern ''' not found.']);
277 end
278 else
279 pathList = string(list');
280 end
282 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
284 function regexp_str = glob2regexp(glob_str)
285 %%%%
286 %%%% translate glob_str to regular expression string initialize
287 %%%%
288 regexp_str = '';
289 in_curlies = 0; % is > 0 within curly braces
290 %%%%
291 %%%% handle characters in glob_str one-by-one
292 %%%%
293 for c = glob_str
295 if any(c=='.()|+^$@%')
296 % escape simple special characters
297 regexp_str = [regexp_str '\' c]; %#ok<AGROW>
299 elseif c=='*'
300 % '*' should not match '/'
301 regexp_str = [regexp_str '[^/]*']; %#ok<AGROW>
303 elseif c=='?'
304 % '?' should not match '/'
305 regexp_str = [regexp_str '[^/]']; %#ok<AGROW>
307 elseif c=='{'
308 regexp_str = [regexp_str '(']; %#ok<AGROW>
309 in_curlies = in_curlies+1;
310 elseif c=='}' && in_curlies
311 regexp_str = [regexp_str ')']; %#ok<AGROW>
312 in_curlies = in_curlies-1;
313 elseif c==',' && in_curlies
314 regexp_str = [regexp_str '|']; %#ok<AGROW>
316 else
317 regexp_str = [regexp_str c]; %#ok<AGROW>
318 end
319 end
320 % replace original '**' (that has now become '[^/]*[^/]*') with '.*.*'
321 regexp_str = strrep(regexp_str, '[^/]*[^/]*', '.*.*');
322 end
324 function L = ls_regexp(regexp_fhandle, path, varargin)
325 % List files that match PATH/r1/r2/r3/... where PATH is a string without
326 % any wildcards and r1..rn are regular expresions that contain the parts of
327 % a filespec between the file separators.
328 % L is a cell array with matching file or directory names.
329 % REGEXP_FHANDLE contain a file handle to REGEXP or REGEXPI depending
330 % on specified case sensitivity.
332 % if first regular expressions contains '**', examine complete file tree
333 if nargin >= 3 && any(regexp(varargin{1}, '\.\*\.\*'))
334 L = ls_regexp_tree(regexp_fhandle, path, varargin{:});
336 else
337 % get contents of path
338 list = dir(path);
340 if nargin >= 3
341 if strcmp(varargin{1}, '\.') || strcmp(varargin{1}, '\.\.')
342 % keep explicitly specified '.' or '..' in first regular expression
343 if ispc && ~any(strcmp({}, '.'))
344 % fix strange windows behaviour: root of a volume has no '.' and '..'
345 list(end+1).name = '.';
346 list(end).isdir = true;
347 list(end+1).name = '..';
348 list(end).isdir = true;
349 end
350 else
351 % remove '.' and '..'
352 list(strcmp({},'.')) = [];
353 list(strcmp({},'..')) = [];
355 % remove files starting with '.' specified in first regular expression
356 if ~strncmp(varargin{1},'\.',2)
357 % remove files starting with '.' from list
358 list(strncmp({},'.',1)) = [];
359 end
360 end
361 end
363 % define shortcuts
364 list_isdir = [list.isdir];
365 list_name = {};
367 L = {}; % initialize
368 if nargin==2 % no regular expressions
369 % return filename
370 if ~isempty(list_name)
371 % add a trailing slash to directories
372 trailing_fsep = repmat({''}, size(list_name));
373 trailing_fsep(list_isdir) = {'/'};
374 L = strcat(path, list_name, trailing_fsep);
375 end
376 elseif nargin==3 % last regular expression
377 % return list_name matching regular expression
378 I = regexp_fhandle(list_name, ['^' varargin{1} '$']);
379 I = ~cellfun('isempty', I);
380 list_name = list_name(I);
381 list_isdir = list_isdir(I);
382 if ~isempty(list_name)
383 % add a trailing slash to directories
384 trailing_fsep = repmat({''}, size(list_name));
385 trailing_fsep(list_isdir) = {'/'};
386 L = strcat(path, list_name, trailing_fsep);
387 end
389 elseif nargin==4 && isempty(varargin{2})
390 % only return directories when last regexp is empty
391 % return list_name matching regular expression and that are directories
392 I = regexp_fhandle(list_name, ['^' varargin{1} '$']);
393 I = ~cellfun('isempty', I);
394 % only return directories
395 list_name = list_name(I);
396 list_isdir = list_isdir(I);
397 if any(list_isdir)
398 % add a trailing file separator
399 L = strcat(path, list_name(list_isdir), '/');
400 end
401 else
402 % traverse for list_name matching regular expression
403 I = regexp_fhandle(list_name, ['^' varargin{1} '$']);
404 I = ~cellfun('isempty', I);
405 for name = list_name(I)
406 L = [L ls_regexp(regexp_fhandle, [path char(name) '/'], varargin{2:end})]; %#ok<AGROW>
407 end
408 end
409 end
410 end
412 function L = ls_regexp_tree(regexp_fhandle, path, varargin)
413 % use this function when first argument of varargin contains '**'
414 % build list of complete directory tree
415 % if any regexp starts with '\.', keep hidden files and directories
416 I = regexp(varargin, '^\\\.');
417 I = ~cellfun('isempty', I);
418 keep_hidden = any(I);
419 list = dir_recur(path, keep_hidden);
420 L = {};
421 % make one regular expression of all individual regexps
422 expression = [regexptranslate('escape',path) sprintf('%s/', varargin{1:end-1}) varargin{end}];
423 % note that /**/ must also match zero directories
424 % replace '' with (|/)
425 expression = regexprep(expression, '/\.\*\.\*/', '(/\.\*\.\*/|/)');
426 % return matching names
427 if ~isempty(varargin{end})
428 % determing matching names ignoring trailing '/'
429 L_no_trailing_fsep = regexprep(L, '/$', '');
430 I = regexp_fhandle(L_no_trailing_fsep, ['^' expression '$']);
431 else
432 % determing matching names including trailing '/'
433 I = regexp_fhandle(L, ['^' expression '$']);
434 end
435 I = cellfun('isempty', I);
436 L(I) = [];
437 end
439 function d = dir_recur(startdir, keep_hidden)
440 % determine recursive directory contents
441 % get directory contents
442 d = dir(startdir);
443 % remove hidden files
444 if keep_hidden
445 % only remove '.' and '..'
446 d(strcmp({},'.')) = [];
447 d(strcmp({},'..')) = [];
448 else
449 % remove all hidden files and directories
450 d(strncmp({},'.',1)) = [];
451 end
452 if ~isempty(d)
453 % add trailing fileseparator to directories
454 trailing_fsep = repmat({''}, size(d));
455 trailing_fsep([d.isdir]) = {'/'};
456 % prefix startdir to name and postfix fileseparator for directories
457 dname = strcat(startdir, {}, trailing_fsep');
458 [d(:).name] = deal(dname{:});
459 % recurse into subdirectories
460 for subd = {d([d.isdir]).name}
461 d = [d; dir_recur(char(subd), keep_hidden)]; %#ok<AGROW>
462 end
463 end
464 end
