001    package org.hackystat.sensorbase.uripattern;
002    
003    import java.util.ArrayList;
004    import java.util.List;
005    import java.util.regex.Matcher;
006    
007    import org.hackystat.sensorbase.resource.projects.jaxb.Project;
008    
009    
010    /**
011     * Implements a UriPattern, such as "file://foo/*.java", which can then be matched against a 
012     * concrete URI string, such as "file://foo/Bar.java".  UriPatterns can be "atomic" or "compound".
013     * <p> 
014     * An atomic UriPattern matches against a single string that can contain wildcard characters
015     * like "*", "**", or "?".  
016     * <p>
017     * A compound UriPattern consists of atomic UriPatterns that are composed
018     * together using the "+" and "-" operators. 
019     * For example, (UriPattern1) + (UriPattern2) means (UriPattern1 OR UriPattern2). 
020     * (UriPattern1) - (UriPattern2) means (UriPattern1 AND (NOT UriPattern2)).
021     * Note that in compound UriPatterns, all atomic patterns must be enclosed in parentheses, and
022     * only one level of parentheses is supported. 
023     * <p>
024     * Note: Matching is case-sensitive, and only the forward slash is supported as a path separator.
025     * So, Windows-based sensors must convert their file paths before sending them!
026     * 
027     * @author Philip Johnson (adapted from code originally written for Hackystat 7 by Qin Zhang).
028     *
029     */
030     
031    public class UriPattern implements Comparable<UriPattern> {
032    
033      /** The string provided by the User defining this UriPattern. */
034      private String rawPattern; 
035    
036      /** The processed Pattern instance created from the rawPattern. */
037      private Pattern pattern = null;
038    
039      /** The pattern used to split a rawPattern representing a compound pattern into its components.*/
040      private java.util.regex.Pattern splitPattern = java.util.regex.Pattern
041          .compile("\\(([^\\(\\)]*)\\)");
042      
043      /** True if this UriPattern is a "top-level" UriPattern, which simplifies the matching process. */
044      private boolean isTopLevel; 
045      
046      /** True if this UriPattern is the "matchAll" UriPattern ("**"). */
047      private boolean isMatchAll;
048    
049      /**
050       * Create a UriPattern instance. There are three possible wildcard characters:
051       * <ul>
052       * <li>'**': matches all directories.</li>
053       * <li>'*': zero or more characters. </li>
054       * <li>'?': one and only one character.</li>
055       * </ul>
056       * 
057       * @param pattern The UriPattern. If null is passed, the pattern defaults to "**".
058       */
059      public UriPattern(String pattern) {
060        this.rawPattern = (pattern == null) ? "**" : pattern;
061        this.isTopLevel = determineTopLevel();
062        this.isMatchAll = "**".equals(pattern);
063        
064        Matcher matcher = this.splitPattern.matcher(this.rawPattern);
065        int searchStartIndex = 0;
066        while (matcher.find()) {
067    
068          if (this.pattern == null) {
069            if (this.rawPattern.substring(searchStartIndex, matcher.start()).trim().length() != 0) {
070              throw new RuntimeException("Illegal pattern.");
071            }
072            this.pattern = new AtomicPattern(matcher.group(1));
073          }
074          else {
075            String strOperator = this.rawPattern.substring(searchStartIndex, matcher.start()).trim();
076            if ("+".equals(strOperator)) {
077              this.pattern = new CompoundPattern(Operator.OR, new Pattern[] { this.pattern,
078                  new AtomicPattern(matcher.group(1)) });
079            }
080            else if ("-".equals(strOperator)) {
081              Pattern second = new AtomicPattern(matcher.group(1));
082              CompoundPattern temp = new CompoundPattern(Operator.NOT, new Pattern[] { second });
083              this.pattern = new CompoundPattern(Operator.AND, new Pattern[] { this.pattern, temp });
084            }
085            else {
086              throw new RuntimeException("Illegal pattern.");
087            }
088          }
089    
090          searchStartIndex = matcher.end();
091        }
092    
093        if (this.pattern == null) {
094          this.pattern = new AtomicPattern(this.rawPattern);
095        }
096        else {
097          if (this.rawPattern.substring(searchStartIndex).trim().length() != 0) {
098            throw new RuntimeException("Illegal pattern.");
099          }
100        }
101      }
102      
103      /**
104       * Returns true if resource matches any of the UriPatterns.
105       * @param resource The resource of interest. 
106       * @param uriPatterns The list of UriPatterns.
107       * @return True if there is a match. 
108       */
109      public static boolean matches(String resource, List<UriPattern> uriPatterns) {
110        for (UriPattern pattern : uriPatterns) {
111          if (pattern.matches(resource)) {
112            return true;
113          }
114        }
115        return false;
116      }
117    
118      /**
119       * Returns a List of UriPatterns extracted from the passed Project.
120       * @param project The project containing a list of UriPattern strings. 
121       * @return The List of UriPattern instances. 
122       */
123      public static List<UriPattern> getPatterns(Project project) {
124        List<UriPattern> patterns = new ArrayList<UriPattern>();
125        for (String uriPatternString : project.getUriPatterns().getUriPattern()) {
126          patterns.add(new UriPattern(uriPatternString));
127        }
128        return patterns;
129      }
130    
131      /**
132       * Returns true if the passed path matches this UriPattern.
133       * <p>
134       * Matching is case sensitive.
135       * <p>
136       *  
137       * This implemementation is optimized for contexts in which a high percentage of the
138       * UriPatterns in use are "top-level". A "top-level" UriPattern is a UriPattern like
139       * "file://hackyCore_Kernel/**", where the only wildcard is a trailing "/**".
140       * This implementation tests to see if this UriPattern is a top-level, and if so
141       * determines the match without recourse to the underlying Ant-based pattern matching machinery.
142       * The overhead of checking for top-level is not high, but the performance advantages of this 
143       * implementation are significant whenthere are a high number of calls to "matches()" with
144       * top-level UriPatterns.
145       * <p>
146       * For interesting information on File: URLs, see http://www.cs.tut.fi/~jkorpela/fileurl.html.
147       * 
148       * 
149       * @param path The path to be tested against this UriPattern.
150       * @return True if it matches, false otherwise.
151       */
152      public boolean matches(String path) {
153        // Take care of the case where this UriPattern is "**" right away.
154        if (this.isMatchAll) {
155          return true;
156        }
157    
158        // Top-level processing is a little complicated, so I'm surrounding this with a try-catch block 
159        // just in case there's a weird boundary condition I didn't think of.
160        try {
161          if (this.isTopLevel) {
162            // The path has to be at least as long as the top-level UriPattern w/o wildcard.
163            if (path.length() < this.rawPattern.length() - 3) {
164              return false;
165            }
166    
167            // If UriPattern is top-level, then the path must match exactly except for last 3 chars
168            for (int i = 0; i < this.rawPattern.length() - 3; i++) {
169              if (path.charAt(i) != this.rawPattern.charAt(i)) {
170                return false;
171              }
172            }
173            // Now make sure that either the path ends at the end of the top-level UriPattern
174            // or else that it has a separator right then.
175            return ((path.length() == (this.rawPattern.length() - 3))
176                || (path.charAt(this.rawPattern.length() - 3) == '/')); 
177          }
178          // Else this UriPattern is not top-level, so do it the normal way.
179          else {
180            // If not top-level, do the match the hard way.
181            return this.pattern.matches(path);
182          }
183        }
184        catch (Exception e) {
185          // OK, something bad happened, so try it again the normal way.
186          return this.pattern.matches(path);
187        }
188      }
189    
190      /**
191       * Returns true if this UriPattern is "top-level", such as "file://hackyCore_Kernel/**". 
192       * UriPatterns in which the characters * or ? appear before the final three characters
193       * are not considered "top-level". The final three characters must be "/**" for the UriPattern
194       * to be "top-level".
195       * Called by the constructor and cached in this.isTopLevel.
196       * 
197       * @return True if the UriPattern is "top-level", false otherwise.
198       */
199      private boolean determineTopLevel() {
200        int length = this.rawPattern.length();
201        if (length < 4) {
202          return false;
203        }
204        // A top-level file pattern does not have * or ? until the final three characters.
205        for (int i = 0; i < length - 3; i++) {
206          if ((this.rawPattern.charAt(i) == '*') || (this.rawPattern.charAt(i) == '?')) {
207            return false;
208          }
209        }
210        if (this.rawPattern.charAt(length - 1) == '*' && 
211            this.rawPattern.charAt(length - 2) == '*' && 
212            this.rawPattern.charAt(length - 3) == '/') {
213          return true;
214        }
215        return false;
216      }
217      
218      /**
219       * Returns true if this UriPattern is top-level.
220       * Package private because this method exists for testing purposes only.
221       * @return True if the UriPattern is top-level.
222       */
223      boolean isTopLevel() {
224        return this.isTopLevel;
225      }
226    
227      /**
228       * Compares two objects.
229       * 
230       * @param another The other object.
231       * @return An integer value indicates the relative magnitude of two objects compared.
232       */
233      public int compareTo(UriPattern another) {
234        return this.rawPattern.compareTo(another.rawPattern);
235      }
236    
237      /**
238       * Tests whether two objects contain the same pattern.
239       * 
240       * @param o The other object.
241       * 
242       * @return True if they are equal.
243       */
244      @Override
245      public boolean equals(Object o) {
246        if (!(o instanceof UriPattern)) {
247          return false;
248        }
249        UriPattern another = (UriPattern) o;
250        return this.rawPattern.equals(another.rawPattern);
251      }
252    
253      /**
254       * Gets the hash code of this object.
255       * 
256       * @return The hash code.
257       */
258      @Override
259      public int hashCode() {
260        return this.rawPattern.hashCode();
261      }
262    
263      /**
264       * Returns the 'raw' pattern, which some clients may find a better string representation.
265       * 
266       * @return The 'raw' pattern as a string.
267       */
268      public String getRawPattern() {
269        return this.rawPattern;
270      }
271    
272      /**
273       * Gets the string representation of this file path pattern.
274       * 
275       * @return The string representation.
276       */
277      @Override
278      public String toString() {
279        return "<UriPattern: " + this.rawPattern + ">";
280      }
281    
282    }