001    package org.hackystat.dailyprojectdata.resource.snapshot;
002    
003    import java.util.Collections;
004    import java.util.HashSet;
005    import java.util.Iterator;
006    import java.util.List;
007    import java.util.Set;
008    import javax.xml.datatype.DatatypeConstants;
009    import javax.xml.datatype.XMLGregorianCalendar;
010    
011    import org.hackystat.sensorbase.client.SensorBaseClient;
012    import org.hackystat.sensorbase.client.SensorBaseClientException;
013    import org.hackystat.sensorbase.resource.sensordata.jaxb.SensorData;
014    import org.hackystat.sensorbase.resource.sensordata.jaxb.SensorDataIndex;
015    import org.hackystat.sensorbase.resource.sensordata.jaxb.SensorDataRef;
016    import org.hackystat.utilities.time.period.Day;
017    import org.hackystat.utilities.tstamp.Tstamp;
018    
019    /**
020     * This class is deprecated, you should use the SensorBase REST API to retrieve snapshots. 
021     * 
022     * SensorDataSnapshot provides a generic means to retrieve and return the set of SensorData of a
023     * given SDT with the most recent 'runtime' value. It does this by retrieving "buckets" of data for
024     * a given interval size (defaulting to 30 minutes), starting at 11:30pm on the given day and
025     * working backward until a complete 'snapshot' has been achieved.
026     * 
027     * The Snapshot algorithm is based upon a number of assumptions that may or may not be true in
028     * practice:
029     * <ul>
030     * <li> When sending data with a given runtime, that "batch" will provide all of the data required
031     * for all projects. The snapshot algorithm does not provide any way to "combine" data from multiple
032     * runs with different timestamps, each for a separate resource in the project.
033     * 
034     * <li> This algorithm depends upon the runtime and tstamp fields being "covariant". More precisely,
035     * given two SensorData instances for a given day called A and B, if runtime(A) is greater than
036     * runtime(B), then tstamp(A) must also be greater than tstamp(B). This assumption enables us to
037     * retrieve SensorData by timestamp in reverse chronological order, and stop as soon as we have a
038     * complete snapshot of runtime values.
039     * 
040     * <li> Finally, this Snapshot is based upon the combined SensorData from all users in a Project.
041     * Thus, results can be unpredictable when SensorData from multiple users (with overlapping
042     * timestamps) is sent at around the same time.
043     * </ul>
044     * 
045     * An alternative algorithm would return a data structure that provides a single entry for every
046     * resource found across all of the users, but using only the latest runtime when multiple entries
047     * for a single resource exist. The problem with this approach is that if a resource is deleted
048     * during a day, it will still be in the snapshot. This can cause bad data when a major refactoring
049     * occurs, resulting in many resources being renamed. In this case, many resources will be
050     * represented twice in the resulting snapshot--once with their old name and once with their new
051     * name.
052     * 
053     * A potentially helpful extension would be to enable retrieval of a snapshot using only data that
054     * contains a property called "DailyProjectDataSnapshot" with a user defined value. This would
055     * enable users to define a daily process that creates "complete" versions of project data with a
056     * common runtime. This process could run at any time and all other data from other users,
057     * regardless of their runtime value, would be ignored.
058     * 
059     * @author jsakuda
060     */
061    public class SensorDataSnapshot implements Iterable<SensorData> {
062      /** If the snapshot has seen older data. */
063      private boolean seenOlderData = false;
064    
065      /** The bucket size in minutes. */
066      private int bucketSize = 30;
067    
068      /** The runtime for this snapshot used to determine if older data has been seen. */
069      private XMLGregorianCalendar snapshotRuntime = null;
070    
071      /** Holds sensor data associated with the latest snapshot. */
072      private Set<SensorData> latestSnapshot = new HashSet<SensorData>();
073    
074      /** The last bucket that was processed. */
075      private SnapshotBucket prevBucket = null;
076    
077      /** Start of the snapshot day. */
078      private XMLGregorianCalendar startOfDay;
079    
080      /** End of the snapshot day. */
081      private XMLGregorianCalendar endOfDay;
082    
083      /** The number of buckets retrieved to create the snapshot. */
084      private int bucketsRetrieved = 0;
085      
086      /**
087       * Creates a new snapshot.
088       * 
089       * @param client The <code>SensorBaseClient</code> to be used for querying for
090       *          <code>SensorData</code>.
091       * @param user The Hackystat user that owns the Project whose data is being retrieved.
092       * @param project The project to obtain data for.
093       * @param sdt The sensor data type to get data for.
094       * @param day The day to get the latest snapshot for.
095       * @throws SensorBaseClientException Thrown if there is an error while communication with the
096       *           sensorbase server.
097       */
098      public SensorDataSnapshot(SensorBaseClient client, String user, String project, String sdt,
099          Day day) throws SensorBaseClientException {
100        this.setUpStartEndDay(day);
101        this.createLatestSnapshot(client, user, project, sdt);
102      }
103    
104      /**
105       * Creates a new snapshot.
106       * 
107       * @param client The <code>SensorBaseClient</code> to be used for querying for
108       *          <code>SensorData</code>.
109       * @param user The Hackystat user that owns the Project whose data is being retrieved.
110       * @param project The project to obtain data for.
111       * @param sdt The sensor data type to get data for.
112       * @param day The day to get the latest snapshot for.
113       * @param bucketSize The interval of time (in minutes) in which data should be retrieved from
114       *          the server.
115       * @throws SensorBaseClientException Thrown if there is an error while communication with the
116       *           sensorbase server.
117       */
118      public SensorDataSnapshot(SensorBaseClient client, String user, String project, String sdt,
119          Day day, int bucketSize) throws SensorBaseClientException {
120        this.bucketSize = bucketSize;
121        this.setUpStartEndDay(day);
122        this.createLatestSnapshot(client, user, project, sdt);
123      }
124    
125      /**
126       * Creates a new snapshot.
127       * 
128       * @param client The <code>SensorBaseClient</code> to be used for querying for
129       *          <code>SensorData</code>.
130       * @param user The Hackystat user that owns the Project whose data is being retrieved.
131       * @param project The project to obtain data for.
132       * @param sdt The sensor data type to get data for.
133       * @param day The day to get the latest snapshot for.
134       * @param bucketSize The interval of time (in minutes) in which data should be retrieved from
135       *          the server.
136       * @param tool The tool that data should be retrieved for.
137       * @throws SensorBaseClientException Thrown if there is an error while communication with the
138       *           sensorbase server.
139       */
140      public SensorDataSnapshot(SensorBaseClient client, String user, String project, String sdt,
141          Day day, int bucketSize, String tool) throws SensorBaseClientException {
142        this.bucketSize = bucketSize;
143        this.setUpStartEndDay(day);
144        this.createLatestToolSnapshot(client, user, project, sdt, tool);
145      }
146    
147      /**
148       * Sets up the start and end day time variables used by the snapshot.
149       * 
150       * @param day The day to use at when setting the start and end of day.
151       */
152      private void setUpStartEndDay(Day day) {
153        long lastTickOfTheDay = day.getLastTickOfTheDay();
154        long firstTickOfTheDay = day.getFirstTickOfTheDay();
155        this.startOfDay = Tstamp.makeTimestamp(firstTickOfTheDay);
156        this.endOfDay = Tstamp.makeTimestamp(lastTickOfTheDay);
157      }
158    
159      /**
160       * Iterates over intervals of time and queries the server for sensordata to create the latest
161       * snapshot for the specified tool.
162       * 
163       * @param client The <code>SensorBaseClient</code> to be used for querying for
164       *          <code>SensorData</code>.
165       * @param user The Hackystat user to obtain data for.
166       * @param project The project to obtain data for.
167       * @param sdt The sensor data type to get data for.
168       * @param tool The tool that the snapshot is for.
169       * @throws SensorBaseClientException Thrown if there is an error while communication with the
170       *           sensorbase server.
171       */
172      private void createLatestToolSnapshot(SensorBaseClient client, String user, String project,
173          String sdt, String tool) throws SensorBaseClientException {
174        SnapshotBucket bucket = this.getNextBucket();
175    
176        // stop checking if older data is seen, or if
177        // bucket becomes null when the entire day has been iterated through
178        while (!this.seenOlderData && bucket != null) {
179          
180          SensorDataIndex index = client.getProjectSensorData(user, project,
181              bucket.getStartTime(), bucket.getEndTime(), sdt);
182    
183          List<SensorDataRef> sensorDataRefList = index.getSensorDataRef();
184          // sort the list by timestamps, newest first
185          Collections.sort(sensorDataRefList, new SensorDataRefComparator(false));
186          for (SensorDataRef sensorDataRef : sensorDataRefList) {
187            if (this.seenOlderData) {
188              // stop iterating, older data found
189              break;
190            }
191            else {
192              SensorData sensorData = client.getSensorData(sensorDataRef);
193              // check that the data is for the correct tool
194              if (sensorData.getTool().equals(tool)) {
195                // tool matches, try to add the data
196                this.addData(sensorData);
197              }
198            }
199          }
200          bucket = this.getNextBucket();
201        }
202    
203      }
204    
205      /**
206       * Iterates over intervals of time and queries the server for sensordata to create the
207       * snapshot.
208       * 
209       * @param client The <code>SensorBaseClient</code> to be used for querying for
210       *          <code>SensorData</code>.
211       * @param user The Hackystat user to obtain data for.
212       * @param project The project to obtain data for.
213       * @param sdt The sensor data type to get data for.
214       * @throws SensorBaseClientException Thrown if there is an error while communication with the
215       *           sensorbase server.
216       */
217      private void createLatestSnapshot(SensorBaseClient client, String user, String project,
218          String sdt) throws SensorBaseClientException {
219    
220        SnapshotBucket bucket = this.getNextBucket();
221    
222        // stop checking if older data is seen, or if
223        // bucket becomes null when the entire day has been iterated through
224        while (!this.seenOlderData && bucket != null) {
225          SensorDataIndex index = client.getProjectSensorData(user, project,
226              bucket.getStartTime(), bucket.getEndTime(), sdt);
227    
228          List<SensorDataRef> sensorDataRefList = index.getSensorDataRef();
229          // sort the list by timestamps, newest first
230          Collections.sort(sensorDataRefList, new SensorDataRefComparator(false));
231          for (SensorDataRef sensorDataRef : sensorDataRefList) {
232            if (this.seenOlderData) {
233              // stop iterating, older data found
234              break;
235            }
236            else {
237              SensorData sensorData = client.getSensorData(sensorDataRef);
238              this.addData(sensorData);
239            }
240          }
241          if (!this.seenOlderData) {
242            bucket = this.getNextBucket();
243          }
244        }
245      }
246    
247      /**
248       * Gets the next bucket of time that should be checked for sensor data.
249       * 
250       * @return Returns the next snapshot bucket or null if all buckets of time for the given day
251       *         have been checked.
252       */
253      private SnapshotBucket getNextBucket() {
254        if (this.prevBucket == null) {
255          // return first bucket, which starts at the end of the day
256          XMLGregorianCalendar startTime = this.getStartTime(this.endOfDay);
257          SnapshotBucket snapshotBucket = new SnapshotBucket(startTime, this.endOfDay);
258          this.prevBucket = snapshotBucket;
259          this.bucketsRetrieved++;
260          return snapshotBucket;
261        }
262        else if (this.prevBucket.getStartTime().compare(this.startOfDay) == DatatypeConstants.GREATER) {
263          // previous bucket did not start at the beginning of the day so,
264          // more buckets can still be obtained
265          // decrement old start time by 1 millisecond to prevent overlap
266          XMLGregorianCalendar newEndTime = Tstamp.incrementMilliseconds(
267              this.prevBucket.getStartTime(), -1);
268          XMLGregorianCalendar newStartTime = this.getStartTime(newEndTime);
269    
270          SnapshotBucket snapshotBucket = new SnapshotBucket(newStartTime, newEndTime);
271          this.prevBucket = snapshotBucket;
272          this.bucketsRetrieved++;
273          return snapshotBucket;
274        }
275        // previous bucket started at the start of day so it was the last bucket
276        return null;
277      }
278    
279      /**
280       * Gets the start time for a bucket based on the end time of the bucket.
281       * 
282       * @param bucketEnd The end time for the bucket.
283       * @return Returns the bucket end time minus the given bucket size or the start of the day if
284       *         subtracting the bucket size overshoots the beginning of the day.
285       */
286      private XMLGregorianCalendar getStartTime(XMLGregorianCalendar bucketEnd) {
287        XMLGregorianCalendar startTime = Tstamp.incrementMinutes(bucketEnd, -this.bucketSize);
288    
289        if (startTime.compare(this.startOfDay) == DatatypeConstants.LESSER) {
290          // calculated start time is before the start of the day
291          // set start time to the start of the day so the remaining data is still retrieved
292          startTime = this.startOfDay;
293        }
294        return startTime;
295      }
296    
297      /**
298       * Adds sensor data to the collection of sensor data only if the runtime is valid.
299       * 
300       * @param data The data to be added to the sensor data collection if it meets all criteria.
301       */
302      private void addData(SensorData data) {
303        XMLGregorianCalendar runtime = data.getRuntime();
304    
305        if (this.snapshotRuntime == null) {
306          // first entry, use that runtime as the snapshot runtime
307          this.snapshotRuntime = runtime;
308        }
309    
310        if (runtime.compare(this.snapshotRuntime) == DatatypeConstants.LESSER) {
311          // new runtime is less than the snapshot runtime, this is older data
312          this.seenOlderData = true;
313        }
314        else {
315          this.latestSnapshot.add(data);
316        }
317      }
318    
319      /**
320       * Returns an iterator over the last <code>SensorData</code> snapshot.
321       * 
322       * @return Returns an iterator over the last <code>SensorData</code> snapshot.
323       */
324      public Iterator<SensorData> iterator() {
325        return this.latestSnapshot.iterator();
326      }
327    
328      /**
329       * Returns the Owner responsible for the Snapshot data, or null if there is no Snapshot data.
330       * @return The owner, or null. 
331       */
332      public String getOwner() {
333        for (SensorData data : this.latestSnapshot) {
334          return data.getOwner();
335        }
336        return null;
337      }
338    
339      /**
340       * Returns the Tool responsible for the Snapshot data, or null if there is no Snapshot data.
341       * @return The owner, or null. 
342       */
343      public String getTool() {
344        for (SensorData data : this.latestSnapshot) {
345          return data.getTool();
346        }
347        return null;
348      }
349      
350      /**
351       * Returns true if the Snapshot contains no data. 
352       * @return True if the Snapshot contains no data. 
353       */
354      public boolean isEmpty() {
355        return this.latestSnapshot.isEmpty();
356      }
357      
358      /**
359       * Gets the number of buckets that were retrieved to create the snapshot. This method is only
360       * meant for testing purposes.
361       * 
362       * @return Returns the number of buckets retrieved during snapshot creation.
363       */
364      int getNumberOfBucketsRetrieved() {
365        return this.bucketsRetrieved;
366      }
367    }