001    /*
002     * Modified by Qin Zhang, originally from Ant source code.
003     * Modifications:
004     *   (1) class package and class name changed.
005     *   (2) one method is commented out.
006     *   (3) class access right changed from public to package private.
007     */
008    
009    /*
010     * Copyright  2002-2004 The Apache Software Foundation
011     *
012     *  Licensed under the Apache License, Version 2.0 (the "License");
013     *  you may not use this file except in compliance with the License.
014     *  You may obtain a copy of the License at
015     *
016     *      http://www.apache.org/licenses/LICENSE-2.0
017     *
018     *  Unless required by applicable law or agreed to in writing, software
019     *  distributed under the License is distributed on an "AS IS" BASIS,
020     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
021     *  See the License for the specific language governing permissions and
022     *  limitations under the License.
023     *
024     */
025    package org.hackystat.sensorbase.uripattern;
026    
027    import java.io.File;
028    import java.util.StringTokenizer;
029    import java.util.Vector;
030    
031    
032    //import org.apache.tools.ant.types.Resource;
033    
034    /**
035     * <p><b>For package internal use only. Copied from Ant 1.6.</b></p>
036     *
037     * <p>This is a utility class used by selectors and DirectoryScanner. The
038     * functionality more properly belongs just to selectors, but unfortunately
039     * DirectoryScanner exposed these as protected methods. Thus we have to
040     * support any subclasses of DirectoryScanner that may access these methods.
041     * </p>
042     * <p>This is a Singleton.</p>
043     *
044     * @author Arnout J. Kuiper
045     * <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a>
046     * @author Magesh Umasankar
047     * @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a>
048     * @since 1.5
049     *
050     * @version Ant 1.6
051     */
052    final class PatternMatcherImpl {
053    
054      private static PatternMatcherImpl instance = new PatternMatcherImpl();
055    
056      /**
057       * Private Constructor.
058       */
059      private PatternMatcherImpl() {
060      }
061    
062      /**
063       * Retrieves the instance of the Singleton.
064       * @return singleton instance
065       */
066      public static PatternMatcherImpl getInstance() {
067        return instance;
068      }
069      
070      private static String separator = "/";
071      private static char separatorChar = '/';
072    
073      /**
074       * Tests whether or not a given path matches the start of a given
075       * pattern up to the first "**".
076       * <p>
077       * This is not a general purpose test and should only be used if you
078       * can live with false positives. For example, <code>pattern=**\a</code>
079       * and <code>str=b</code> will yield <code>true</code>.
080       *
081       * @param pattern The pattern to match against. Must not be
082       *                <code>null</code>.
083       * @param str     The path to match, as a String. Must not be
084       *                <code>null</code>.
085       *
086       * @return whether or not a given path matches the start of a given
087       * pattern up to the first "**".
088       */
089      public static boolean matchPatternStart(String pattern, String str) {
090        return matchPatternStart(pattern, str, true);
091      }
092    
093      /**
094       * Tests whether or not a given path matches the start of a given
095       * pattern up to the first "**".
096       * <p>
097       * This is not a general purpose test and should only be used if you
098       * can live with false positives. For example, <code>pattern=**\a</code>
099       * and <code>str=b</code> will yield <code>true</code>.
100       *
101       * @param pattern The pattern to match against. Must not be
102       *                <code>null</code>.
103       * @param str     The path to match, as a String. Must not be
104       *                <code>null</code>.
105       * @param isCaseSensitive Whether or not matching should be performed
106       *                        case sensitively.
107       *
108       * @return whether or not a given path matches the start of a given
109       * pattern up to the first "**".
110       */
111      public static boolean matchPatternStart(String pattern, String str,
112                                              boolean isCaseSensitive) {
113        // When str starts with a File.separator, pattern has to start with a
114        // File.separator.
115        // When pattern starts with a File.separator, str has to start with a
116        // File.separator.
117        if (str.startsWith(separator)
118            != pattern.startsWith(separator)) {
119          return false;
120        }
121    
122        String[] patDirs = tokenizePathAsArray(pattern);
123        String[] strDirs = tokenizePathAsArray(str);
124    
125        int patIdxStart = 0;
126        int patIdxEnd = patDirs.length - 1;
127        int strIdxStart = 0;
128        int strIdxEnd = strDirs.length - 1;
129    
130        // up to first '**'
131        while (patIdxStart <= patIdxEnd && strIdxStart <= strIdxEnd) {
132          String patDir = patDirs[patIdxStart];
133          if ("**".equals(patDir)) {
134            break;
135          }
136          if (!match(patDir, strDirs[strIdxStart], isCaseSensitive)) {
137            return false;
138          }
139          patIdxStart++;
140          strIdxStart++;
141        }
142    
143    
144    //    if (strIdxStart > strIdxEnd) {
145    //      // String is exhausted
146    //      return true;
147    //    }
148    //    else if (patIdxStart > patIdxEnd) {
149    //      // String not exhausted, but pattern is. Failure.
150    //      return false;
151    //    }
152    //    else {
153    //      // pattern now holds ** while string is not exhausted
154    //      // this will generate false positives but we can live with that.
155    //      return true;
156    //    }
157    
158        //check style is too stupid, compains, modify to please it.
159        boolean theResult;
160        if (strIdxStart > strIdxEnd) {
161          theResult = true;
162        }
163        else if (patIdxStart > patIdxEnd) {
164          theResult = false;
165        }
166        else {
167          theResult = true;
168        }
169        return theResult;
170      }
171    
172      /**
173       * Tests whether or not a given path matches a given pattern.
174       *
175       * @param pattern The pattern to match against. Must not be
176       *                <code>null</code>.
177       * @param str     The path to match, as a String. Must not be
178       *                <code>null</code>.
179       *
180       * @return <code>true</code> if the pattern matches against the string,
181       *         or <code>false</code> otherwise.
182       */
183      public static boolean matchPath(String pattern, String str) {
184        return matchPath(pattern, str, true);
185      }
186    
187      /**
188       * Tests whether or not a given path matches a given pattern.
189       *
190       * @param pattern The pattern to match against. Must not be
191       *                <code>null</code>.
192       * @param str     The path to match, as a String. Must not be
193       *                <code>null</code>.
194       * @param isCaseSensitive Whether or not matching should be performed
195       *                        case sensitively.
196       *
197       * @return <code>true</code> if the pattern matches against the string,
198       *         or <code>false</code> otherwise.
199       */
200      public static boolean matchPath(String pattern, String str, boolean isCaseSensitive) {
201        // I DON'T THINK THIS APPLIES TO HACKYSTAT 8 (pmj 6/2007)
202        // When str starts with a File.separator, pattern has to start with a
203        // File.separator.
204        // When pattern starts with a File.separator, str has to start with a
205        // File.separator.
206        //    if (str.startsWith(File.separator)
207        //        != pattern.startsWith(File.separator)) {
208        //      return false;
209        //    }
210    
211        String[] patDirs = tokenizePathAsArray(pattern);
212        String[] strDirs = tokenizePathAsArray(str);
213    
214        int patIdxStart = 0;
215        int patIdxEnd = patDirs.length - 1;
216        int strIdxStart = 0;
217        int strIdxEnd = strDirs.length - 1;
218    
219        // up to first '**'
220        while (patIdxStart <= patIdxEnd && strIdxStart <= strIdxEnd) {
221          String patDir = patDirs[patIdxStart];
222          if ("**".equals(patDir)) {
223            break;
224          }
225          if (!match(patDir, strDirs[strIdxStart], isCaseSensitive)) {
226            patDirs = null;
227            strDirs = null;
228            return false;
229          }
230          patIdxStart++;
231          strIdxStart++;
232        }
233        if (strIdxStart > strIdxEnd) {
234          // String is exhausted
235          for (int i = patIdxStart; i <= patIdxEnd; i++) {
236            if (!patDirs[i].equals("**")) {
237              patDirs = null;
238              strDirs = null;
239              return false;
240            }
241          }
242          return true;
243        }
244        else {
245          if (patIdxStart > patIdxEnd) {
246            // String not exhausted, but pattern is. Failure.
247            patDirs = null;
248            strDirs = null;
249            return false;
250          }
251        }
252    
253        // up to last '**'
254        while (patIdxStart <= patIdxEnd && strIdxStart <= strIdxEnd) {
255          String patDir = patDirs[patIdxEnd];
256          if ("**".equals(patDir)) {
257            break;
258          }
259          if (!match(patDir, strDirs[strIdxEnd], isCaseSensitive)) {
260            patDirs = null;
261            strDirs = null;
262            return false;
263          }
264          patIdxEnd--;
265          strIdxEnd--;
266        }
267        if (strIdxStart > strIdxEnd) {
268          // String is exhausted
269          for (int i = patIdxStart; i <= patIdxEnd; i++) {
270            if (!patDirs[i].equals("**")) {
271              patDirs = null;
272              strDirs = null;
273              return false;
274            }
275          }
276          return true;
277        }
278    
279        while (patIdxStart != patIdxEnd && strIdxStart <= strIdxEnd) {
280          int patIdxTmp = -1;
281          for (int i = patIdxStart + 1; i <= patIdxEnd; i++) {
282            if (patDirs[i].equals("**")) {
283              patIdxTmp = i;
284              break;
285            }
286          }
287          if (patIdxTmp == patIdxStart + 1) {
288            // '**/**' situation, so skip one
289            patIdxStart++;
290            continue;
291          }
292          // Find the pattern between padIdxStart & padIdxTmp in str between
293          // strIdxStart & strIdxEnd
294          int patLength = (patIdxTmp - patIdxStart - 1);
295          int strLength = (strIdxEnd - strIdxStart + 1);
296          int foundIdx = -1;
297          strLoop:
298              for (int i = 0; i <= strLength - patLength; i++) {
299            for (int j = 0; j < patLength; j++) {
300              String subPat = patDirs[patIdxStart + j + 1];
301              String subStr = strDirs[strIdxStart + i + j];
302              if (!match(subPat, subStr, isCaseSensitive)) {
303                continue strLoop;
304              }
305            }
306    
307            foundIdx = strIdxStart + i;
308            break;
309          }
310    
311          if (foundIdx == -1) {
312            patDirs = null;
313            strDirs = null;
314            return false;
315          }
316    
317          patIdxStart = patIdxTmp;
318          strIdxStart = foundIdx + patLength;
319        }
320    
321        for (int i = patIdxStart; i <= patIdxEnd; i++) {
322          if (!patDirs[i].equals("**")) {
323            patDirs = null;
324            strDirs = null;
325            return false;
326          }
327        }
328    
329        return true;
330      }
331    
332      /**
333       * Tests whether or not a string matches against a pattern.
334       * The pattern may contain two special characters:<br>
335       * '*' means zero or more characters<br>
336       * '?' means one and only one character
337       *
338       * @param pattern The pattern to match against.
339       *                Must not be <code>null</code>.
340       * @param str     The string which must be matched against the pattern.
341       *                Must not be <code>null</code>.
342       *
343       * @return <code>true</code> if the string matches against the pattern,
344       *         or <code>false</code> otherwise.
345       */
346      public static boolean match(String pattern, String str) {
347        return match(pattern, str, true);
348      }
349    
350      /**
351       * Tests whether or not a string matches against a pattern.
352       * The pattern may contain two special characters:<br>
353       * '*' means zero or more characters<br>
354       * '?' means one and only one character
355       *
356       * @param pattern The pattern to match against.
357       *                Must not be <code>null</code>.
358       * @param str     The string which must be matched against the pattern.
359       *                Must not be <code>null</code>.
360       * @param isCaseSensitive Whether or not matching should be performed
361       *                        case sensitively.
362       *
363       *
364       * @return <code>true</code> if the string matches against the pattern,
365       *         or <code>false</code> otherwise.
366       */
367      public static boolean match(String pattern, String str,
368                                  boolean isCaseSensitive) {
369        char[] patArr = pattern.toCharArray();
370        char[] strArr = str.toCharArray();
371        int patIdxStart = 0;
372        int patIdxEnd = patArr.length - 1;
373        int strIdxStart = 0;
374        int strIdxEnd = strArr.length - 1;
375        char ch;
376    
377        boolean containsStar = false;
378        for (int i = 0; i < patArr.length; i++) {
379          if (patArr[i] == '*') {
380            containsStar = true;
381            break;
382          }
383        }
384    
385        if (!containsStar) {
386          // No '*'s, so we make a shortcut
387          if (patIdxEnd != strIdxEnd) {
388            return false; // Pattern and string do not have the same size
389          }
390          for (int i = 0; i <= patIdxEnd; i++) {
391            ch = patArr[i];
392            if (ch != '?') {
393              if (isCaseSensitive && ch != strArr[i]) {
394                return false; // Character mismatch
395              }
396              if (!isCaseSensitive && Character.toUpperCase(ch)
397                  != Character.toUpperCase(strArr[i])) {
398                return false; // Character mismatch
399              }
400            }
401          }
402          return true; // String matches against pattern
403        }
404    
405        if (patIdxEnd == 0) {
406          return true; // Pattern contains only '*', which matches anything
407        }
408    
409        // Process characters before first star
410        while ((ch = patArr[patIdxStart]) != '*' && strIdxStart <= strIdxEnd) {
411          if (ch != '?') {
412            if (isCaseSensitive && ch != strArr[strIdxStart]) {
413              return false; // Character mismatch
414            }
415            if (!isCaseSensitive && Character.toUpperCase(ch)
416                != Character.toUpperCase(strArr[strIdxStart])) {
417              return false; // Character mismatch
418            }
419          }
420          patIdxStart++;
421          strIdxStart++;
422        }
423        if (strIdxStart > strIdxEnd) {
424          // All characters in the string are used. Check if only '*'s are
425          // left in the pattern. If so, we succeeded. Otherwise failure.
426          for (int i = patIdxStart; i <= patIdxEnd; i++) {
427            if (patArr[i] != '*') {
428              return false;
429            }
430          }
431          return true;
432        }
433    
434        // Process characters after last star
435        while ((ch = patArr[patIdxEnd]) != '*' && strIdxStart <= strIdxEnd) {
436          if (ch != '?') {
437            if (isCaseSensitive && ch != strArr[strIdxEnd]) {
438              return false; // Character mismatch
439            }
440            if (!isCaseSensitive && Character.toUpperCase(ch)
441                != Character.toUpperCase(strArr[strIdxEnd])) {
442              return false; // Character mismatch
443            }
444          }
445          patIdxEnd--;
446          strIdxEnd--;
447        }
448        if (strIdxStart > strIdxEnd) {
449          // All characters in the string are used. Check if only '*'s are
450          // left in the pattern. If so, we succeeded. Otherwise failure.
451          for (int i = patIdxStart; i <= patIdxEnd; i++) {
452            if (patArr[i] != '*') {
453              return false;
454            }
455          }
456          return true;
457        }
458    
459        // process pattern between stars. padIdxStart and patIdxEnd point
460        // always to a '*'.
461        while (patIdxStart != patIdxEnd && strIdxStart <= strIdxEnd) {
462          int patIdxTmp = -1;
463          for (int i = patIdxStart + 1; i <= patIdxEnd; i++) {
464            if (patArr[i] == '*') {
465              patIdxTmp = i;
466              break;
467            }
468          }
469          if (patIdxTmp == patIdxStart + 1) {
470            // Two stars next to each other, skip the first one.
471            patIdxStart++;
472            continue;
473          }
474          // Find the pattern between padIdxStart & padIdxTmp in str between
475          // strIdxStart & strIdxEnd
476          int patLength = (patIdxTmp - patIdxStart - 1);
477          int strLength = (strIdxEnd - strIdxStart + 1);
478          int foundIdx = -1;
479          strLoop:
480              for (int i = 0; i <= strLength - patLength; i++) {
481            for (int j = 0; j < patLength; j++) {
482              ch = patArr[patIdxStart + j + 1];
483              if (ch != '?') {
484                if (isCaseSensitive && ch != strArr[strIdxStart + i
485                    + j]) {
486                  continue strLoop;
487                }
488                if (!isCaseSensitive
489                    && Character.toUpperCase(ch)
490                    != Character.toUpperCase(strArr[strIdxStart + i + j])) {
491                  continue strLoop;
492                }
493              }
494            }
495    
496            foundIdx = strIdxStart + i;
497            break;
498          }
499    
500          if (foundIdx == -1) {
501            return false;
502          }
503    
504          patIdxStart = patIdxTmp;
505          strIdxStart = foundIdx + patLength;
506        }
507    
508        // All characters in the string are used. Check if only '*'s are left
509        // in the pattern. If so, we succeeded. Otherwise failure.
510        for (int i = patIdxStart; i <= patIdxEnd; i++) {
511          if (patArr[i] != '*') {
512            return false;
513          }
514        }
515        return true;
516      }
517    
518      /**
519       * Breaks a path up into a Vector of path elements, tokenizing on
520       * <code>File.separator</code>.
521       *
522       * @param path Path to tokenize. Must not be <code>null</code>.
523       *
524       * @return a Vector of path elements from the tokenized path
525       */
526      public static Vector<String> tokenizePath(String path) { //NOPMD
527        return tokenizePath(path, separator);
528      }
529    
530      /**
531       * Breaks a path up into a Vector of path elements.  It tokenizes on
532       *
533       * @param path Path to tokenize. Must not be <code>null</code>.
534       * @param separator the separator against which to tokenize.
535       *
536       * @return a Vector of path elements from the tokenized path
537       * @since Ant 1.6
538       */
539      public static Vector<String> tokenizePath(String path, String separator) {  //NOPMD
540        Vector<String> ret = new Vector<String>();
541        StringTokenizer st = new StringTokenizer(path, separator);
542        while (st.hasMoreTokens()) {
543          ret.addElement(st.nextToken());
544        }
545        return ret;
546      }
547    
548      /**
549       * Same as {@link #tokenizePath tokenizePath} but hopefully faster.
550       *
551       * @param path Path to tokenize. Must not be <code>null</code>.
552       * @return An array of path elements from the tokenized path.
553       */
554      private static String[] tokenizePathAsArray(String path) {
555        char sep = separatorChar;
556        int start = 0;
557        int len = path.length();
558        int count = 0;
559        for (int pos = 0; pos < len; pos++) {
560          if (path.charAt(pos) == sep) {
561            if (pos != start) {
562              count++;
563            }
564            start = pos + 1;
565          }
566        }
567        if (len != start) {
568          count++;
569        }
570        String[] l = new String[count];
571        count = 0;
572        start = 0;
573        for (int pos = 0; pos < len; pos++) {
574          if (path.charAt(pos) == sep) {
575            if (pos != start) {
576              String tok = path.substring(start, pos);
577              l[count++] = tok;
578            }
579            start = pos + 1;
580          }
581        }
582        if (len != start) {
583          String tok = path.substring(start);
584          l[count /*++*/] = tok;
585        }
586        return l;
587      }
588    
589      /**
590       * Returns dependency information on these two files. If src has been
591       * modified later than target, it returns true. If target doesn't exist,
592       * it likewise returns true. Otherwise, target is newer than src and
593       * is not out of date, thus the method returns false. It also returns
594       * false if the src file doesn't even exist, since how could the
595       * target then be out of date.
596       *
597       * @param src the original file
598       * @param target the file being compared against
599       * @param granularity the amount in seconds of slack we will give in
600       *        determining out of dateness
601       * @return whether the target is out of date
602       */
603      public static boolean isOutOfDate(File src, File target, int granularity) {
604        if (!src.exists()) {
605          return false;
606        }
607        if (!target.exists()) {
608          return true;
609        }
610        if ((src.lastModified() - granularity) > target.lastModified()) {
611          return true;
612        }
613        return false;
614      }
615    
616    //  /**
617    //   * Returns dependency information on these two resources. If src has been
618    //   * modified later than target, it returns true. If target doesn't exist,
619    //   * it likewise returns true. Otherwise, target is newer than src and
620    //   * is not out of date, thus the method returns false. It also returns
621    //   * false if the src file doesn't even exist, since how could the
622    //   * target then be out of date.
623    //   *
624    //   * @param src the original resource
625    //   * @param target the resource being compared against
626    //   * @param granularity the amount in seconds of slack we will give in
627    //   *        determining out of dateness
628    //   * @return whether the target is out of date
629    //   */
630    //    public static boolean isOutOfDate(Resource src, Resource target,
631    //                                      int granularity) {
632    //        if (!src.isExists()) {
633    //            return false;
634    //        }
635    //        if (!target.isExists()) {
636    //            return true;
637    //        }
638    //        if ((src.getLastModified() - granularity) > target.getLastModified()) {
639    //            return true;
640    //        }
641    //        return false;
642    //    }
643    
644      /**
645       * "Flattens" a string by removing all whitespace (space, tab, linefeed,
646       * carriage return, and formfeed). This uses StringTokenizer and the
647       * default set of tokens as documented in the single arguement constructor.
648       *
649       * @param input a String to remove all whitespace.
650       * @return a String that has had all whitespace removed.
651       */
652      public static String removeWhitespace(String input) {
653        StringBuffer result = new StringBuffer();
654        if (input != null) {
655          StringTokenizer st = new StringTokenizer(input);
656          while (st.hasMoreTokens()) {
657            result.append(st.nextToken());
658          }
659        }
660        return result.toString();
661      }
662    
663      /**
664       * Tests if a string contains stars or question marks.
665       * @param input a String which one wants to test for containing wildcard
666       * @return true if the string contains at least a star or a question mark
667       */
668      public static boolean hasWildcards(String input) {
669        return (input.indexOf('*') != -1 || input.indexOf('?') != -1);
670      }
671    
672      /**
673       * Removes from a pattern all tokens to the right containing wildcards.
674       * @param input the input string
675       * @return the leftmost part of the pattern without wildcards
676       */
677      public static String rtrimWildcardTokens(String input) {
678        Vector<String> v = tokenizePath(input, separator);
679        StringBuffer sb = new StringBuffer();
680        for (int counter = 0; counter < v.size(); counter++) {
681          if (hasWildcards(v.elementAt(counter))) {
682            break;
683          }
684          if (counter > 0) {
685            sb.append(separator);
686          }
687          sb.append(v.elementAt(counter));
688        }
689        return sb.toString();
690      }
691    }