1
2
3
4
5
6
7 package net.sf.bddbddb.order;
8
9 import java.lang.reflect.Field;
10 import java.util.Enumeration;
11 import java.util.Random;
12
13 import jwutil.util.Assert;
14 import net.sf.bddbddb.FindBestDomainOrder;
15 import weka.core.FastVector;
16 import weka.core.Instance;
17 import weka.core.Instances;
18 import weka.filters.Filter;
19 import weka.filters.unsupervised.attribute.Discretize;
20
21
22 public class TrialInstances extends Instances {
23
24 /***
25 * Version ID for serialization.
26 */
27 private static final long serialVersionUID = 4049922649398589241L;
28
29 /***
30 * @param name
31 * @param attInfo
32 * @param capacity
33 */
34 public TrialInstances(String name, FastVector attInfo, int capacity) {
35 super(name, attInfo, capacity);
36 }
37
38 public Discretization threshold(double thres) {
39 return threshold(thres, this.classIndex());
40 }
41
42 public Discretization threshold(double thres, int index) {
43 if (numInstances() == 0) return null;
44 FastVector clusterValues = new FastVector(2);
45 TrialInstances[] buckets = new TrialInstances[2];
46 FastVector origAttributes = (FastVector) this.m_Attributes.copy();
47
48 buckets[0] = new TrialInstances(this.m_RelationName + "_bucket_0", origAttributes, 30);
49 buckets[0].setClassIndex(classIndex());
50 buckets[1] = new TrialInstances(this.m_RelationName + "_bucket_1", origAttributes, 30);
51 buckets[1].setClassIndex(classIndex());
52 double[] cutPoint = new double[1];
53 cutPoint[0] = thres;
54
55 clusterValues.addElement("<" + FindBestDomainOrder.format(thres));
56 clusterValues.addElement(">" + FindBestDomainOrder.format(thres));
57 weka.core.Attribute a = new weka.core.Attribute("costThres" + FindBestDomainOrder.format(thres), clusterValues);
58 m_Attributes.setElementAt(a, index);
59 setIndex(a, index);
60 Enumeration f = m_Instances.elements();
61 while (f.hasMoreElements()) {
62 TrialInstance old_i = (TrialInstance) f.nextElement();
63 double oldVal = old_i.value(index);
64 double val = oldVal < thres ? 0 : 1;
65
66 double[] old_i_arr = old_i.toDoubleArray();
67 double[] old_i_copy = new double[old_i_arr.length];
68 System.arraycopy(old_i_arr, 0, old_i_copy, 0, old_i_arr.length);
69 buckets[(int) val].add(new TrialInstance(old_i.weight(), old_i_copy, old_i.getOrder(), old_i.getTrialInfo()));
70 old_i.setValue(index, val);
71 }
72
73 return new Discretization(cutPoint, buckets);
74 }
75
76 public Discretization discretize(double power) {
77 int numBins = (int) Math.pow(numInstances(), power);
78 return discretize(new MyDiscretize(power), numBins, this.classIndex());
79 }
80
81 public Discretization discretize(Discretize d, int numBins, int index) {
82 if (numInstances() <= 1) return null;
83 try {
84 int classIndex = this.classIndex();
85 Assert._assert(classIndex >= 0);
86 setClassIndex(-1);
87 d.setAttributeIndices(Integer.toString(index+1));
88 d.setInputFormat(this);
89 Instances newInstances;
90 newInstances = Filter.useFilter(this, d);
91
92 if (d.getFindNumBins())
93 numBins = d.getBins();
94
95 TrialInstances[] buckets = new TrialInstances[numBins];
96 System.out.println("Num trials: " + numInstances() + " Num bins: " + numBins);
97
98 FastVector origAttributes = (FastVector) this.m_Attributes.copy();
99
100 for (int i = 0; i < numBins; ++i) {
101 buckets[i] = new TrialInstances(this.m_RelationName + "_bucket_" + i, origAttributes, this.numInstances() / numBins);
102 buckets[i].setClassIndex(classIndex);
103 }
104 double[] result = d.getCutPoints(index);
105
106 weka.core.Attribute a = WekaInterface.makeBucketAttribute(numBins);
107 m_Attributes.setElementAt(a, index);
108 setIndex(a, index);
109 Enumeration e = newInstances.enumerateInstances();
110 Enumeration f = m_Instances.elements();
111
112
113 while (e.hasMoreElements()) {
114 Instance new_i = (Instance) e.nextElement();
115 TrialInstance old_i = (TrialInstance) f.nextElement();
116 double val = new_i.value(index);
117 double[] old_i_arr = old_i.toDoubleArray();
118 double[] old_i_copy = new double[old_i_arr.length];
119 System.arraycopy(old_i_arr, 0, old_i_copy, 0, old_i_arr.length);
120 buckets[(int) val].add(new TrialInstance(old_i.weight(), old_i_copy, old_i.getOrder(), old_i.getTrialInfo()));
121 old_i.setValue(index, val);
122 }
123 Assert._assert(!f.hasMoreElements());
124 setClassIndex(classIndex);
125 return new Discretization(result, buckets);
126 } catch (Exception x) {
127 System.out.flush();
128 x.printStackTrace();
129 System.exit(-1);
130 return null;
131 }
132 }
133
134
135 public static void setIndex(weka.core.Attribute a, int i) {
136 try {
137 Class c = Class.forName("weka.core.Attribute");
138 Field f = c.getDeclaredField("m_Index");
139 f.setAccessible(true);
140 f.setInt(a, i);
141 } catch (Exception x) {
142 Assert.UNREACHABLE("weka sucks: " + x);
143 }
144 }
145 public TrialInstances infoClone(){
146 return new TrialInstances(this.m_RelationName, (FastVector) this.m_Attributes.copy(), this.numInstances());
147 }
148
149 public Instances resample(Random random) {
150 TrialInstances newData = infoClone();
151 while (newData.numInstances() < numInstances()) {
152 newData.add(instance(random.nextInt(numInstances())));
153 }
154 newData.setClassIndex(classIndex());
155 return newData;
156 }
157
158
159 public TrialInstances copy(){
160 TrialInstances newInstances = infoClone();
161 for (Enumeration e = enumerateInstances(); e.hasMoreElements();) {
162 TrialInstance instance = (TrialInstance) e.nextElement();
163 TrialInstance newInstance = TrialInstance.cloneInstance(instance);
164 newInstance.setDataset(newInstances);
165 newInstances.add(newInstance);
166 }
167 newInstances.setClassIndex(this.classIndex());
168 return newInstances;
169 }
170
171 }