View Javadoc

1   // MyDiscretize.java, created Nov 10, 2004 10:24:08 PM by jwhaley
2   // Copyright (C) 2004 jwhaley
3   // Licensed under the terms of the GNU LGPL; see COPYING for details.
4   package net.sf.bddbddb.order;
5   
6   import java.util.Enumeration;
7   import java.util.Vector;
8   import weka.core.Instances;
9   import weka.core.Option;
10  import weka.core.Utils;
11  import weka.filters.Filter;
12  import weka.filters.unsupervised.attribute.Discretize;
13  
14  /***
15   * Discretizes numeric attributes using equal frequency binning where the
16   * number of bins is equal to the square root of the number of non-missing
17   * values.<p>
18   *
19   * Valid filter-specific options are: <p>
20   *
21   * -R col1,col2-col4,... <br>
22   * Specifies list of columns to Discretize. First
23   * and last are valid indexes. (default: first-last) <p>
24   *
25   * -V <br>
26   * Invert matching sense.<p>
27   *
28   * -D <br>
29   * Make binary nominal attributes. <p>
30   * 
31   * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
32   * @version $Revision: 531 $
33   */
34  public class MyDiscretize extends Discretize {
35  
36      /***
37       * Version ID for serialization.
38       */
39      private static final long serialVersionUID = 3833183631698114354L;
40      
41      double power;
42      
43      public MyDiscretize() {
44          this(0.5);
45      }
46      public MyDiscretize(double p) {
47          this.power = p;
48      }
49      
50    /***
51     * Sets the format of the input instances.
52     *
53     * @param instanceInfo an Instances object containing the input instance
54     * structure (any instances contained in the object are ignored - only the
55     * structure is required).
56     * @return true if the outputFormat may be collected immediately
57     * @exception Exception if the input format can't be set successfully
58     */
59    public boolean setInputFormat(Instances instanceInfo) throws Exception {
60  
61      // alter child behaviour to do what we want
62      m_FindNumBins = true;
63      return super.setInputFormat(instanceInfo);
64    }
65  
66    /***
67     * Finds the number of bins to use and creates the cut points.
68     *
69     * @param index the attribute index
70     */
71   
72    protected void findNumBins(int index) {
73  
74      Instances toFilter = getInputFormat();
75  
76      // Find number of instances for attribute where not missing
77      int numOfInstances = toFilter.numInstances();
78      for (int i = 0; i < toFilter.numInstances(); i++) {
79        if (toFilter.instance(i).isMissing(index))
80      numOfInstances--;
81      }
82  
83      m_NumBins = (int)(Math.pow(numOfInstances, power));
84      System.out.println("Actual num instances: " + numOfInstances);
85      if (m_NumBins > 0) {
86          
87        calculateCutPointsByEqualFrequencyBinning(index);
88      }
89    }
90  
91    /***
92     * Gets an enumeration describing the available options.
93     *
94     * @return an enumeration of all the available options.
95     */
96    public Enumeration listOptions() {
97  
98      Vector newVector = new Vector(7);
99  
100     newVector.addElement(new Option(
101               "\tSpecifies list of columns to Discretize. First"
102           + " and last are valid indexes.\n"
103           + "\t(default: first-last)",
104               "R", 1, "-R <col1,col2-col4,...>"));
105 
106     newVector.addElement(new Option(
107               "\tInvert matching sense of column indexes.",
108               "V", 0, "-V"));
109 
110     newVector.addElement(new Option(
111               "\tOutput binary attributes for discretized attributes.",
112               "D", 0, "-D"));
113 
114     return newVector.elements();
115   }
116 
117 
118   /***
119    * Parses the options for this object. Valid options are: <p>
120    *
121    * -R col1,col2-col4,... <br>
122    * Specifies list of columns to Discretize. First
123    * and last are valid indexes. (default none) <p>
124    *
125    * -V <br>
126    * Invert matching sense.<p>
127    *
128    * -D <br>
129    * Make binary nominal attributes. <p>
130    * 
131    * @param options the list of options as an array of strings
132    * @exception Exception if an option is not supported
133    */
134   public void setOptions(String[] options) throws Exception {
135 
136     setMakeBinary(Utils.getFlag('D', options));
137     setInvertSelection(Utils.getFlag('V', options));
138     
139     String convertList = Utils.getOption('R', options);
140     if (convertList.length() != 0) {
141       setAttributeIndices(convertList);
142     } else {
143       setAttributeIndices("first-last");
144     }
145 
146     if (getInputFormat() != null) {
147       setInputFormat(getInputFormat());
148     }
149   }
150   /***
151    * Gets the current settings of the filter.
152    *
153    * @return an array of strings suitable for passing to setOptions
154    */
155   public String [] getOptions() {
156 
157     String [] options = new String [12];
158     int current = 0;
159 
160     if (getMakeBinary()) {
161       options[current++] = "-D";
162     }
163     if (getInvertSelection()) {
164       options[current++] = "-V";
165     }
166     if (!getAttributeIndices().equals("")) {
167       options[current++] = "-R"; options[current++] = getAttributeIndices();
168     }
169     while (current < options.length) {
170       options[current++] = "";
171     }
172     return options;
173   }
174 
175   /***
176    * Returns a string describing this filter
177    *
178    * @return a description of the filter suitable for
179    * displaying in the explorer/experimenter gui
180    */
181   public String globalInfo() {
182 
183     return "Discretizes numeric attributes using equal frequency binning,"
184       + " where the number of bins is equal to the square root of the"
185       + " number of non-missing values.";
186   }
187   
188   /***
189    * Returns the tip text for this property
190    *
191    * @return tip text for this property suitable for
192    * displaying in the explorer/experimenter gui
193    */
194   public String findNumBinsTipText() {
195 
196     return "Ignored.";
197   }
198 
199   /***
200    * Get the value of FindNumBins.
201    *
202    * @return Value of FindNumBins.
203    */
204   public boolean getFindNumBins() {
205     
206     return false;
207   }
208   
209   /***
210    * Set the value of FindNumBins.
211    *
212    * @param newFindNumBins Value to assign to FindNumBins.
213    */
214   public void setFindNumBins(boolean newFindNumBins) {
215     
216   }
217   
218   /***
219    * Returns the tip text for this property
220    *
221    * @return tip text for this property suitable for
222    * displaying in the explorer/experimenter gui
223    */
224   public String useEqualFrequencyTipText() {
225 
226     return "Always true.";
227   }
228 
229   /***
230    * Get the value of UseEqualFrequency.
231    *
232    * @return Value of UseEqualFrequency.
233    */
234   public boolean getUseEqualFrequency() {
235     
236     return true;
237   }
238   
239   /***
240    * Set the value of UseEqualFrequency.
241    *
242    * @param newUseEqualFrequency Value to assign to UseEqualFrequency.
243    */
244   public void setUseEqualFrequency(boolean newUseEqualFrequency) {
245     
246   }
247 
248   /***
249    * Returns the tip text for this property
250    *
251    * @return tip text for this property suitable for
252    * displaying in the explorer/experimenter gui
253    */
254   public String binsTipText() {
255 
256     return "Ignored.";
257   }
258 
259   /***
260    * Ignored
261    *
262    * @return the number of bins.
263    */
264   public int getBins() {
265 
266     return 0;
267   }
268 
269   /***
270    * Ignored
271    *
272    * @param numBins the number of bins
273    */
274   public void setBins(int numBins) {
275   }
276 
277   /***
278    * Main method for testing this class.
279    *
280    * @param argv should contain arguments to the filter: use -h for help
281    */
282   public static void main(String [] argv) {
283 
284     try {
285       if (Utils.getFlag('b', argv)) {
286     Filter.batchFilterFile(new MyDiscretize(), argv);
287       } else {
288     Filter.filterFile(new MyDiscretize(), argv);
289       }
290     } catch (Exception ex) {
291       System.out.println(ex.getMessage());
292     }
293   }
294 }
295