001 package org.hackystat.sensorbase.uripattern; 002 003 import java.util.ArrayList; 004 import java.util.List; 005 import java.util.regex.Matcher; 006 007 import org.hackystat.sensorbase.resource.projects.jaxb.Project; 008 009 010 /** 011 * Implements a UriPattern, such as "file://foo/*.java", which can then be matched against a 012 * concrete URI string, such as "file://foo/Bar.java". UriPatterns can be "atomic" or "compound". 013 * <p> 014 * An atomic UriPattern matches against a single string that can contain wildcard characters 015 * like "*", "**", or "?". 016 * <p> 017 * A compound UriPattern consists of atomic UriPatterns that are composed 018 * together using the "+" and "-" operators. 019 * For example, (UriPattern1) + (UriPattern2) means (UriPattern1 OR UriPattern2). 020 * (UriPattern1) - (UriPattern2) means (UriPattern1 AND (NOT UriPattern2)). 021 * Note that in compound UriPatterns, all atomic patterns must be enclosed in parentheses, and 022 * only one level of parentheses is supported. 023 * <p> 024 * Note: Matching is case-sensitive, and only the forward slash is supported as a path separator. 025 * So, Windows-based sensors must convert their file paths before sending them! 026 * 027 * @author Philip Johnson (adapted from code originally written for Hackystat 7 by Qin Zhang). 028 * 029 */ 030 031 public class UriPattern implements Comparable<UriPattern> { 032 033 /** The string provided by the User defining this UriPattern. */ 034 private String rawPattern; 035 036 /** The processed Pattern instance created from the rawPattern. */ 037 private Pattern pattern = null; 038 039 /** The pattern used to split a rawPattern representing a compound pattern into its components.*/ 040 private java.util.regex.Pattern splitPattern = java.util.regex.Pattern 041 .compile("\\(([^\\(\\)]*)\\)"); 042 043 /** True if this UriPattern is a "top-level" UriPattern, which simplifies the matching process. */ 044 private boolean isTopLevel; 045 046 /** True if this UriPattern is the "matchAll" UriPattern ("**"). */ 047 private boolean isMatchAll; 048 049 /** 050 * Create a UriPattern instance. There are three possible wildcard characters: 051 * <ul> 052 * <li>'**': matches all directories.</li> 053 * <li>'*': zero or more characters. </li> 054 * <li>'?': one and only one character.</li> 055 * </ul> 056 * 057 * @param pattern The UriPattern. If null is passed, the pattern defaults to "**". 058 */ 059 public UriPattern(String pattern) { 060 this.rawPattern = (pattern == null) ? "**" : pattern; 061 this.isTopLevel = determineTopLevel(); 062 this.isMatchAll = "**".equals(pattern); 063 064 Matcher matcher = this.splitPattern.matcher(this.rawPattern); 065 int searchStartIndex = 0; 066 while (matcher.find()) { 067 068 if (this.pattern == null) { 069 if (this.rawPattern.substring(searchStartIndex, matcher.start()).trim().length() != 0) { 070 throw new RuntimeException("Illegal pattern."); 071 } 072 this.pattern = new AtomicPattern(matcher.group(1)); 073 } 074 else { 075 String strOperator = this.rawPattern.substring(searchStartIndex, matcher.start()).trim(); 076 if ("+".equals(strOperator)) { 077 this.pattern = new CompoundPattern(Operator.OR, new Pattern[] { this.pattern, 078 new AtomicPattern(matcher.group(1)) }); 079 } 080 else if ("-".equals(strOperator)) { 081 Pattern second = new AtomicPattern(matcher.group(1)); 082 CompoundPattern temp = new CompoundPattern(Operator.NOT, new Pattern[] { second }); 083 this.pattern = new CompoundPattern(Operator.AND, new Pattern[] { this.pattern, temp }); 084 } 085 else { 086 throw new RuntimeException("Illegal pattern."); 087 } 088 } 089 090 searchStartIndex = matcher.end(); 091 } 092 093 if (this.pattern == null) { 094 this.pattern = new AtomicPattern(this.rawPattern); 095 } 096 else { 097 if (this.rawPattern.substring(searchStartIndex).trim().length() != 0) { 098 throw new RuntimeException("Illegal pattern."); 099 } 100 } 101 } 102 103 /** 104 * Returns true if resource matches any of the UriPatterns. 105 * @param resource The resource of interest. 106 * @param uriPatterns The list of UriPatterns. 107 * @return True if there is a match. 108 */ 109 public static boolean matches(String resource, List<UriPattern> uriPatterns) { 110 for (UriPattern pattern : uriPatterns) { 111 if (pattern.matches(resource)) { 112 return true; 113 } 114 } 115 return false; 116 } 117 118 /** 119 * Returns a List of UriPatterns extracted from the passed Project. 120 * @param project The project containing a list of UriPattern strings. 121 * @return The List of UriPattern instances. 122 */ 123 public static List<UriPattern> getPatterns(Project project) { 124 List<UriPattern> patterns = new ArrayList<UriPattern>(); 125 for (String uriPatternString : project.getUriPatterns().getUriPattern()) { 126 patterns.add(new UriPattern(uriPatternString)); 127 } 128 return patterns; 129 } 130 131 /** 132 * Returns true if the passed path matches this UriPattern. 133 * <p> 134 * Matching is case sensitive. 135 * <p> 136 * 137 * This implemementation is optimized for contexts in which a high percentage of the 138 * UriPatterns in use are "top-level". A "top-level" UriPattern is a UriPattern like 139 * "file://hackyCore_Kernel/**", where the only wildcard is a trailing "/**". 140 * This implementation tests to see if this UriPattern is a top-level, and if so 141 * determines the match without recourse to the underlying Ant-based pattern matching machinery. 142 * The overhead of checking for top-level is not high, but the performance advantages of this 143 * implementation are significant whenthere are a high number of calls to "matches()" with 144 * top-level UriPatterns. 145 * <p> 146 * For interesting information on File: URLs, see http://www.cs.tut.fi/~jkorpela/fileurl.html. 147 * 148 * 149 * @param path The path to be tested against this UriPattern. 150 * @return True if it matches, false otherwise. 151 */ 152 public boolean matches(String path) { 153 // Take care of the case where this UriPattern is "**" right away. 154 if (this.isMatchAll) { 155 return true; 156 } 157 158 // Top-level processing is a little complicated, so I'm surrounding this with a try-catch block 159 // just in case there's a weird boundary condition I didn't think of. 160 try { 161 if (this.isTopLevel) { 162 // The path has to be at least as long as the top-level UriPattern w/o wildcard. 163 if (path.length() < this.rawPattern.length() - 3) { 164 return false; 165 } 166 167 // If UriPattern is top-level, then the path must match exactly except for last 3 chars 168 for (int i = 0; i < this.rawPattern.length() - 3; i++) { 169 if (path.charAt(i) != this.rawPattern.charAt(i)) { 170 return false; 171 } 172 } 173 // Now make sure that either the path ends at the end of the top-level UriPattern 174 // or else that it has a separator right then. 175 return ((path.length() == (this.rawPattern.length() - 3)) 176 || (path.charAt(this.rawPattern.length() - 3) == '/')); 177 } 178 // Else this UriPattern is not top-level, so do it the normal way. 179 else { 180 // If not top-level, do the match the hard way. 181 return this.pattern.matches(path); 182 } 183 } 184 catch (Exception e) { 185 // OK, something bad happened, so try it again the normal way. 186 return this.pattern.matches(path); 187 } 188 } 189 190 /** 191 * Returns true if this UriPattern is "top-level", such as "file://hackyCore_Kernel/**". 192 * UriPatterns in which the characters * or ? appear before the final three characters 193 * are not considered "top-level". The final three characters must be "/**" for the UriPattern 194 * to be "top-level". 195 * Called by the constructor and cached in this.isTopLevel. 196 * 197 * @return True if the UriPattern is "top-level", false otherwise. 198 */ 199 private boolean determineTopLevel() { 200 int length = this.rawPattern.length(); 201 if (length < 4) { 202 return false; 203 } 204 // A top-level file pattern does not have * or ? until the final three characters. 205 for (int i = 0; i < length - 3; i++) { 206 if ((this.rawPattern.charAt(i) == '*') || (this.rawPattern.charAt(i) == '?')) { 207 return false; 208 } 209 } 210 if (this.rawPattern.charAt(length - 1) == '*' && 211 this.rawPattern.charAt(length - 2) == '*' && 212 this.rawPattern.charAt(length - 3) == '/') { 213 return true; 214 } 215 return false; 216 } 217 218 /** 219 * Returns true if this UriPattern is top-level. 220 * Package private because this method exists for testing purposes only. 221 * @return True if the UriPattern is top-level. 222 */ 223 boolean isTopLevel() { 224 return this.isTopLevel; 225 } 226 227 /** 228 * Compares two objects. 229 * 230 * @param another The other object. 231 * @return An integer value indicates the relative magnitude of two objects compared. 232 */ 233 public int compareTo(UriPattern another) { 234 return this.rawPattern.compareTo(another.rawPattern); 235 } 236 237 /** 238 * Tests whether two objects contain the same pattern. 239 * 240 * @param o The other object. 241 * 242 * @return True if they are equal. 243 */ 244 @Override 245 public boolean equals(Object o) { 246 if (!(o instanceof UriPattern)) { 247 return false; 248 } 249 UriPattern another = (UriPattern) o; 250 return this.rawPattern.equals(another.rawPattern); 251 } 252 253 /** 254 * Gets the hash code of this object. 255 * 256 * @return The hash code. 257 */ 258 @Override 259 public int hashCode() { 260 return this.rawPattern.hashCode(); 261 } 262 263 /** 264 * Returns the 'raw' pattern, which some clients may find a better string representation. 265 * 266 * @return The 'raw' pattern as a string. 267 */ 268 public String getRawPattern() { 269 return this.rawPattern; 270 } 271 272 /** 273 * Gets the string representation of this file path pattern. 274 * 275 * @return The string representation. 276 */ 277 @Override 278 public String toString() { 279 return "<UriPattern: " + this.rawPattern + ">"; 280 } 281 282 }