1
2
3
4 package net.sf.bddbddb.order;
5
6 import java.util.Enumeration;
7 import java.util.Vector;
8 import weka.core.Instances;
9 import weka.core.Option;
10 import weka.core.Utils;
11 import weka.filters.Filter;
12 import weka.filters.unsupervised.attribute.Discretize;
13
14 /***
15 * Discretizes numeric attributes using equal frequency binning where the
16 * number of bins is equal to the square root of the number of non-missing
17 * values.<p>
18 *
19 * Valid filter-specific options are: <p>
20 *
21 * -R col1,col2-col4,... <br>
22 * Specifies list of columns to Discretize. First
23 * and last are valid indexes. (default: first-last) <p>
24 *
25 * -V <br>
26 * Invert matching sense.<p>
27 *
28 * -D <br>
29 * Make binary nominal attributes. <p>
30 *
31 * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
32 * @version $Revision: 531 $
33 */
34 public class MyDiscretize extends Discretize {
35
36 /***
37 * Version ID for serialization.
38 */
39 private static final long serialVersionUID = 3833183631698114354L;
40
41 double power;
42
43 public MyDiscretize() {
44 this(0.5);
45 }
46 public MyDiscretize(double p) {
47 this.power = p;
48 }
49
50 /***
51 * Sets the format of the input instances.
52 *
53 * @param instanceInfo an Instances object containing the input instance
54 * structure (any instances contained in the object are ignored - only the
55 * structure is required).
56 * @return true if the outputFormat may be collected immediately
57 * @exception Exception if the input format can't be set successfully
58 */
59 public boolean setInputFormat(Instances instanceInfo) throws Exception {
60
61
62 m_FindNumBins = true;
63 return super.setInputFormat(instanceInfo);
64 }
65
66 /***
67 * Finds the number of bins to use and creates the cut points.
68 *
69 * @param index the attribute index
70 */
71
72 protected void findNumBins(int index) {
73
74 Instances toFilter = getInputFormat();
75
76
77 int numOfInstances = toFilter.numInstances();
78 for (int i = 0; i < toFilter.numInstances(); i++) {
79 if (toFilter.instance(i).isMissing(index))
80 numOfInstances--;
81 }
82
83 m_NumBins = (int)(Math.pow(numOfInstances, power));
84 System.out.println("Actual num instances: " + numOfInstances);
85 if (m_NumBins > 0) {
86
87 calculateCutPointsByEqualFrequencyBinning(index);
88 }
89 }
90
91 /***
92 * Gets an enumeration describing the available options.
93 *
94 * @return an enumeration of all the available options.
95 */
96 public Enumeration listOptions() {
97
98 Vector newVector = new Vector(7);
99
100 newVector.addElement(new Option(
101 "\tSpecifies list of columns to Discretize. First"
102 + " and last are valid indexes.\n"
103 + "\t(default: first-last)",
104 "R", 1, "-R <col1,col2-col4,...>"));
105
106 newVector.addElement(new Option(
107 "\tInvert matching sense of column indexes.",
108 "V", 0, "-V"));
109
110 newVector.addElement(new Option(
111 "\tOutput binary attributes for discretized attributes.",
112 "D", 0, "-D"));
113
114 return newVector.elements();
115 }
116
117
118 /***
119 * Parses the options for this object. Valid options are: <p>
120 *
121 * -R col1,col2-col4,... <br>
122 * Specifies list of columns to Discretize. First
123 * and last are valid indexes. (default none) <p>
124 *
125 * -V <br>
126 * Invert matching sense.<p>
127 *
128 * -D <br>
129 * Make binary nominal attributes. <p>
130 *
131 * @param options the list of options as an array of strings
132 * @exception Exception if an option is not supported
133 */
134 public void setOptions(String[] options) throws Exception {
135
136 setMakeBinary(Utils.getFlag('D', options));
137 setInvertSelection(Utils.getFlag('V', options));
138
139 String convertList = Utils.getOption('R', options);
140 if (convertList.length() != 0) {
141 setAttributeIndices(convertList);
142 } else {
143 setAttributeIndices("first-last");
144 }
145
146 if (getInputFormat() != null) {
147 setInputFormat(getInputFormat());
148 }
149 }
150 /***
151 * Gets the current settings of the filter.
152 *
153 * @return an array of strings suitable for passing to setOptions
154 */
155 public String [] getOptions() {
156
157 String [] options = new String [12];
158 int current = 0;
159
160 if (getMakeBinary()) {
161 options[current++] = "-D";
162 }
163 if (getInvertSelection()) {
164 options[current++] = "-V";
165 }
166 if (!getAttributeIndices().equals("")) {
167 options[current++] = "-R"; options[current++] = getAttributeIndices();
168 }
169 while (current < options.length) {
170 options[current++] = "";
171 }
172 return options;
173 }
174
175 /***
176 * Returns a string describing this filter
177 *
178 * @return a description of the filter suitable for
179 * displaying in the explorer/experimenter gui
180 */
181 public String globalInfo() {
182
183 return "Discretizes numeric attributes using equal frequency binning,"
184 + " where the number of bins is equal to the square root of the"
185 + " number of non-missing values.";
186 }
187
188 /***
189 * Returns the tip text for this property
190 *
191 * @return tip text for this property suitable for
192 * displaying in the explorer/experimenter gui
193 */
194 public String findNumBinsTipText() {
195
196 return "Ignored.";
197 }
198
199 /***
200 * Get the value of FindNumBins.
201 *
202 * @return Value of FindNumBins.
203 */
204 public boolean getFindNumBins() {
205
206 return false;
207 }
208
209 /***
210 * Set the value of FindNumBins.
211 *
212 * @param newFindNumBins Value to assign to FindNumBins.
213 */
214 public void setFindNumBins(boolean newFindNumBins) {
215
216 }
217
218 /***
219 * Returns the tip text for this property
220 *
221 * @return tip text for this property suitable for
222 * displaying in the explorer/experimenter gui
223 */
224 public String useEqualFrequencyTipText() {
225
226 return "Always true.";
227 }
228
229 /***
230 * Get the value of UseEqualFrequency.
231 *
232 * @return Value of UseEqualFrequency.
233 */
234 public boolean getUseEqualFrequency() {
235
236 return true;
237 }
238
239 /***
240 * Set the value of UseEqualFrequency.
241 *
242 * @param newUseEqualFrequency Value to assign to UseEqualFrequency.
243 */
244 public void setUseEqualFrequency(boolean newUseEqualFrequency) {
245
246 }
247
248 /***
249 * Returns the tip text for this property
250 *
251 * @return tip text for this property suitable for
252 * displaying in the explorer/experimenter gui
253 */
254 public String binsTipText() {
255
256 return "Ignored.";
257 }
258
259 /***
260 * Ignored
261 *
262 * @return the number of bins.
263 */
264 public int getBins() {
265
266 return 0;
267 }
268
269 /***
270 * Ignored
271 *
272 * @param numBins the number of bins
273 */
274 public void setBins(int numBins) {
275 }
276
277 /***
278 * Main method for testing this class.
279 *
280 * @param argv should contain arguments to the filter: use -h for help
281 */
282 public static void main(String [] argv) {
283
284 try {
285 if (Utils.getFlag('b', argv)) {
286 Filter.batchFilterFile(new MyDiscretize(), argv);
287 } else {
288 Filter.filterFile(new MyDiscretize(), argv);
289 }
290 } catch (Exception ex) {
291 System.out.println(ex.getMessage());
292 }
293 }
294 }
295