001    package org.hackystat.sensor.xmldata.util;
002    
003    import java.text.DecimalFormat;
004    import java.text.NumberFormat;
005    import java.util.ArrayList;
006    import java.util.List;
007    
008    /**
009     * Provides an encoder for transforming a List instance containing strings into a single string
010     * (suitable for transmission via SOAP) and a decoder for reconstructing the List on the other side.
011     * Individual strings must be less than MAX_STRING_LENGTH, and the number of strings in the list
012     * must be less than MAX_NUM_STRINGS. Throws an exception if these maxima are exceeded, or if the
013     * List to be decoded contains a non-String element, or if the String to be decoded is not correctly
014     * formatted for decoding, or if the decoded string is not completely consumed by the decoding
015     * process.
016     * 
017     * @author Philip M. Johnson
018     * @version $Id: StringListCodec.java,v 1.1.1.1 2005/10/20 23:56:44 johnson Exp $
019     */
020    public class StringListCodec {
021      /** The maximum length of any individual string to be encoded. */
022      public static final int MAX_STRING_LENGTH = 99999;
023      /** The DecimalFormat pattern for this max size. */
024      private static final String STRING_LENGTH_PATTERN = "00000";
025      /** The number of characters used to represent length field. */
026      private static final int STRING_LENGTH_FIELD_LENGTH = StringListCodec.STRING_LENGTH_PATTERN
027          .length();
028    
029      /** The maximum number of strings that can be encoded. */
030      public static final int MAX_NUM_STRINGS = 9999;
031      /** The DecimalFormat pattern for this max strings. */
032      private static final String NUM_STRINGS_PATTERN = "0000";
033      /** The number of characters used to represent the total number of encoded strings. */
034      private static final int NUM_STRINGS_FIELD_LENGTH = StringListCodec.NUM_STRINGS_PATTERN.length();
035    
036      /**
037       * Provides a thread-local version of DecimalFormat to support multi-threading.
038       * 
039       * @author Philip Johnson
040       */
041      private static class ThreadLocalDecimalFormat extends ThreadLocal<Object> {
042        /**
043         * The initialization function.
044         * 
045         * @return The DecimalFormat instance for this thread.
046         */
047        @Override
048        public Object initialValue() {
049          return NumberFormat.getInstance();
050        }
051      }
052    
053      /** The thread-local instance wrapper for the decimalFormat object. */
054      private static ThreadLocalDecimalFormat decimalFormat = new ThreadLocalDecimalFormat();
055    
056      /**
057       * Gets the thread-local DecimalFormat instance.
058       * 
059       * @return The DecimalFormat instance for this thread.
060       */
061      private static DecimalFormat getDecimalFormat() {
062        return (DecimalFormat) decimalFormat.get();
063      }
064    
065      /**
066       * Encodes the passed list of strings into a single string and returns it.
067       * 
068       * @param stringList a <code>List</code> value
069       * @return a <code>String</code> value
070       * @exception StringListCodecException If the list contains a non-String, or if the number of
071       *            strings in the list exceeds MAX_NUM_STRINGS, or if the length of any individual
072       *            string exceeds MAX_STRING_LENGTH.
073       */
074      public static String encode(List<String> stringList) throws StringListCodecException {
075        // Make sure we don't have too many list elements.
076        if (stringList.size() > MAX_NUM_STRINGS) {
077          // Make darn sure someone hears about this even if the following exception is swallowed.
078          System.out.println("ERROR: StringListCodec max num strings exceeded.");
079          throw new StringListCodecException("String List exceeds " + MAX_NUM_STRINGS + " elements: "
080              + stringList);
081        }
082    
083        StringBuffer buff = new StringBuffer(computeBufferLength(stringList));
084    
085        // Encode the total number of list elements at the beginning of the string.
086        getDecimalFormat().applyPattern(NUM_STRINGS_PATTERN);
087        buff.append(((DecimalFormat) decimalFormat.get()).format(stringList.size()));
088    
089        // From now on, we encode using the following pattern.
090        getDecimalFormat().applyPattern(STRING_LENGTH_PATTERN);
091    
092        // Loop through the elements and add them to the string buffer.
093        for (String element : stringList) {
094          // replace all occurences of "\r", "\r\n" with "\n"
095          element = element.replaceAll("\r\n", "\n").replace('\r', '\n');
096    
097          // Second, make sure it's not too long.
098          if (element.length() > MAX_STRING_LENGTH) {
099            // Make darn sure someone hears about this even if the following exception is swallowed.
100            System.out.println("ERROR: StringListCodec found a too long string.");
101            throw new StringListCodecException("String list contains too long string: " + stringList);
102          }
103    
104          // Now we add its size and the string itself to our buffer.
105          buff.append(getDecimalFormat().format(element.length()));
106          buff.append(element);
107        }
108        return buff.toString();
109      }
110    
111      /**
112       * Computes the exact length of the StringBuffer to allocate for this encoded string. This is
113       * worth the expense since StringBuffers are 16 chars by default and double each time they're
114       * exceeded, throwing away the old char array. A typical encoded string is gonna be 100 chars or
115       * more, which means throwing away 4-5 char arrays each time if we don't figure out the size in
116       * advance.
117       * 
118       * @param stringList a <code>List</code> value
119       * @return an <code>int</code> value
120       */
121      private static int computeBufferLength(List<String> stringList) {
122        int length = NUM_STRINGS_FIELD_LENGTH;
123        for (String element : stringList) {
124          length += element.length() + STRING_LENGTH_FIELD_LENGTH;
125        }
126        return length;
127      }
128    
129      /**
130       * Decodes the passed string, returning a List of strings.
131       * 
132       * @param encodedString The encoded list of strings.
133       * @return A new list of strings.
134       * @exception StringListCodecException If the passed encodedString is not encoded properly.
135       */
136      public static List<String> decode(String encodedString) throws StringListCodecException {
137        // replace all occurences of "\r", "\r\n" with "\n"
138        String newEncodedString = encodedString.replaceAll("\r\n", "\n").replace('\r', '\n');
139    
140        // Get the number of fields to be decoded.
141        int numFields;
142        try {
143          numFields = Integer.parseInt(newEncodedString.substring(0, NUM_STRINGS_FIELD_LENGTH));
144        }
145        catch (Exception e) {
146          throw new StringListCodecException("Error decoding numFields: " + newEncodedString, e);
147        }
148        // Make an array list to hold this number of elements.
149        ArrayList<String> stringList = new ArrayList<String>(numFields);
150        // Cursor always holds the index of next character to be processed in string.
151        int cursor = NUM_STRINGS_FIELD_LENGTH;
152        // Loop through the specified number of fields, extracting the field length and string,
153        // and incrementing cursor.
154        for (int i = 0; i < numFields; i++) {
155          // First, get the field length.
156          int fieldLength;
157          String field;
158          try {
159            fieldLength = Integer.parseInt(newEncodedString.substring(cursor, cursor
160                + STRING_LENGTH_FIELD_LENGTH));
161          }
162          catch (Exception e) {
163            throw new StringListCodecException("Parse failed for field " + i + " and string "
164                + newEncodedString, e);
165          }
166    
167          // Second, extract that substring
168          cursor += STRING_LENGTH_FIELD_LENGTH;
169          try {
170            field = newEncodedString.substring(cursor, cursor + fieldLength);
171          }
172          catch (Exception e) {
173            throw new StringListCodecException("Could not extract field " + i + "from string "
174                + newEncodedString, e);
175          }
176    
177          // Third, add the field to the list, and increment the cursor.
178          stringList.add(field);
179          cursor += fieldLength;
180        }
181    
182        // Make sure we've consumed the entire string.
183        if (cursor != newEncodedString.length()) {
184          throw new StringListCodecException("Encoded string too long: " + newEncodedString);
185        }
186    
187        // We've extracted all of the fields, so now return the list.
188        return stringList;
189      }
190    }