001    package org.hackystat.sensorshell;
002    
003    import java.util.ArrayList;
004    import java.util.List;
005    import java.util.Map;
006    import java.util.Random;
007    
008    import org.hackystat.sensorbase.resource.sensordata.jaxb.SensorData;
009    
010    /**
011     * MultiSensorShell is a wrapper around SingleSensorShell that is designed for high performance
012     * transmission of sensor data instances from a client to a server. Prior research has determined
013     * that when a single SensorShell is used to transmit a large amount of data in a short period of
014     * time, it can spend a substantial portion of its time blocked while waiting for an HTTP PUT to
015     * complete. MultiSensorShell overcomes this problem by instantiating multiple SensorShell instances
016     * internally and then passing sensor data to them in a round-robin fashion. Each SensorShell is
017     * passed an autoSendTimeInterval value, which results in a separate thread for each SensorShell
018     * instance that will concurrently send any buffered data at regular time intervals. This
019     * significantly reduces blocked time for MultiSensorShell, because when any individual SensorShell
020     * instance is performing its HTTP PUT call, the MultiSensorShell can be concurrently adding data to
021     * one of the other SensorShell instances.
022     * <p>
023     * The sensorshell.properties file provides a number of tuning parameters for MultiSensorShell
024     * processing. We currently recommend the following settings for best performance:
025     * <ul>
026     * <li>sensorshell.multishell.enabled = true
027     * <li>sensorshell.multishell.numshells = 10
028     * <li>sensorshell.multishell.batchsize = 250
029     * <li>sensorshell.multishell.autosend.timeinterval = 0.05
030     * <li>sensorshell.autosend.maxbuffer = 1000
031     * <li>sensorshell.timeout = 30
032     * </ul>
033     * <p>
034     * Note that offline storage and recovery are automatically disabled when multishell is enabled.
035     * <p>
036     * The TestMultiSensorShell class provides a main() method that we have used to do some simple
037     * performance evaluation, which we report on next. All results were obtained using a MacBook Pro
038     * with a 2.33 Ghz Intel Core Duo processor and 3 GB of 667 Mhz DDR2 SDRAM. Both the client and
039     * SensorBase server were running on this computer to minimize network latency issues.
040     * <p>
041     * If you instantiate a MultiSensorShell with the number of SensorShells set to 1, you effectively
042     * get the default case. In this situation, we have found that the average time to send a single
043     * SensorData instance is approximately 6 milliseconds, almost independent of the settings for
044     * batchSize and the autoSendInterval. Increasing the number of SensorShell instances to 5 doubles
045     * the throughput, to approximately 3 milliseconds per instance. At this point, some kind of
046     * performance plateau is reached, with further tweaking of the tuning parameters seeming to have
047     * little effect. We do not know whether this is a "real" limit or an artificial limit based upon
048     * some environmental feature.
049     * <p>
050     * With the sensorshell.properties settings listed above, we have 
051     * achieved throughput of 2.8 milliseconds per instance (which is equivalent to 360 instances per
052     * second and 1.2M instances per hour.)
053     * <p>
054     * We have also found that we can store around 350,000 sensor data instances per GB of disk space.
055     * <p>
056     * Note that we are effectively disabling autosend.batchsize by setting it
057     * to a high value (30,000). This is because 
058     * reaching the batchSize limit forces a blocking send() of the data, which is precisely what we
059     * want to avoid in MultiSensorShell.  Instead, we try to tune the autosend.timeinterval so that as
060     * many of our send() invocations as possible occur asynchronously in a separate thread. 
061     * <p>
062     * Note that a single SensorShell instance is simpler, creates less processing overhead, and has
063     * equivalent performance to MultiSensorShell for transmission loads up to a dozen or so sensor data
064     * instances per second. We recommend using a single SensorShell instance rather than
065     * MultiSensorShell unless optimizing data transmission throughput is an important requirement.
066     * 
067     * @author Philip Johnson
068     */
069    public class MultiSensorShell implements Shell {
070      /** The internal SensorShells managed by this MultiSensorShell. */
071      private List<SingleSensorShell> shells;
072      /** The total number of shells. */
073      private int numShells;
074      /** The number of SensorData instances to be sent to a single Shell before going to the next. */
075      private int batchSize;
076      /** A counter that indicates how many instances have been sent to the current SensorShell. */
077      private int batchCounter = 0;
078      /** A pointer to the current Shell that is receiving SensorData instances. */
079      private int currShellIndex = 0;
080      /** Used when batchSize == 0. */
081      private Random generator = new Random(0L);
082    
083      /**
084       * Creates a new MultiSensorShell for multi-threaded transmission of SensorData instances to a
085       * SensorBase.
086       * 
087       * @param properties A SensorProperties instance.
088       * @param toolName The name of the tool, used to name the log file.
089       */
090      public MultiSensorShell(SensorShellProperties properties, String toolName) {
091        this.shells = new ArrayList<SingleSensorShell>(properties.getMultiShellNumShells());
092        this.numShells = properties.getMultiShellNumShells();
093        this.batchSize = properties.getMultiShellBatchSize();
094        properties.switchToMultiShellMode();
095        for (int i = 0; i < numShells; i++) { 
096          // MultiSensorShells must always be non-interactive.
097          boolean isInteractive = false;
098          // Each subshell in a multishell goes to its own log file.  
099          String multiToolName = toolName + "-multishell-" + i;
100          SingleSensorShell shell = new SingleSensorShell(properties, isInteractive, multiToolName);
101          this.shells.add(shell);
102        }
103      }
104    
105      /** {@inheritDoc} */
106      public void add(SensorData sensorData) throws SensorShellException {
107        this.shells.get(getCurrShellIndex()).add(sensorData);
108      }
109    
110      /** {@inheritDoc} */
111      public void add(Map<String, String> keyValMap) throws SensorShellException {
112        this.shells.get(getCurrShellIndex()).add(keyValMap);
113      }
114    
115      /**
116       * Returns an index to the current SensorShell index to be used for data transmission. Internally
117       * updates the batchCounter.
118       * If batchSize is 0, then an index is returned at random. In our initial trials, this was found
119       * to be a suboptimal strategy; it is better to set the batchSize to something like 200.
120       * 
121       * @return The index to the current SensorShell instance.
122       */
123      private int getCurrShellIndex() {
124        // If batchSize is 0, then we return a shell index chosen randomly.
125        if (batchSize == 0) {
126          return generator.nextInt(numShells);
127        }
128        // Now, update the batchCounter and change the currShellIndex if necessary.
129        // batchCounter goes from 1 to batchSize.
130        // currShellIndex goes from 0 to numShells -1
131        batchCounter++;
132        if (this.batchCounter > batchSize) {
133          batchCounter = 0;
134          this.currShellIndex++;
135          if (this.currShellIndex >= this.numShells) {
136            this.currShellIndex = 0;
137          }
138        }
139        return currShellIndex;
140      }
141    
142      /** {@inheritDoc} */
143      public boolean ping() {
144        return this.shells.get(0).ping();
145      }
146    
147      /** {@inheritDoc} */
148      public int send() throws SensorShellException {
149        int totalSent = 0;
150        for (int i = 0; i < numShells; i++) {
151          totalSent += this.shells.get(i).send();
152        }
153        return totalSent;
154      }
155    
156      /** {@inheritDoc} */
157      public void quit() throws SensorShellException {
158        for (int i = 0; i < numShells; i++) {
159          this.shells.get(i).quit();
160        }
161      }
162      
163      /** {@inheritDoc} */
164      public boolean hasOfflineData() {
165        // MultiSensorShells can never have offline data. 
166        return false;
167      }
168      
169      /** {@inheritDoc} */
170      public void statechange(long resourceCheckSum, Map<String, String> keyValMap) throws Exception {
171        // The same SingleSensorShell always has to process statechange events.  
172        this.shells.get(0).statechange(resourceCheckSum, keyValMap);
173      }
174      
175      /** {@inheritDoc} */
176      public SensorShellProperties getProperties() {
177        return this.shells.get(0).getProperties();
178      }
179    }