View Javadoc

1   /*
2    * Created on Jan 15, 2005
3    *
4    * TODO To change the template for this generated file go to
5    * Window - Preferences - Java - Code Style - Code Templates
6    */
7   package net.sf.bddbddb.order;
8   
9   import java.lang.reflect.Field;
10  import java.util.Enumeration;
11  import java.util.Random;
12  
13  import jwutil.util.Assert;
14  import net.sf.bddbddb.FindBestDomainOrder;
15  import weka.core.FastVector;
16  import weka.core.Instance;
17  import weka.core.Instances;
18  import weka.filters.Filter;
19  import weka.filters.unsupervised.attribute.Discretize;
20  
21  
22  public class TrialInstances extends Instances {
23  
24      /***
25       * Version ID for serialization.
26       */
27      private static final long serialVersionUID = 4049922649398589241L;
28  
29      /***
30       * @param name
31       * @param attInfo
32       * @param capacity
33       */
34      public TrialInstances(String name, FastVector attInfo, int capacity) {
35          super(name, attInfo, capacity);
36      }
37  
38      public Discretization threshold(double thres) {
39          return threshold(thres, this.classIndex());
40      }
41  
42      public Discretization threshold(double thres, int index) {
43          if (numInstances() == 0) return null;
44          FastVector clusterValues = new FastVector(2);
45          TrialInstances[] buckets = new TrialInstances[2];
46          FastVector origAttributes = (FastVector) this.m_Attributes.copy(); //shared across all buckets
47  
48          buckets[0] = new TrialInstances(this.m_RelationName + "_bucket_0", origAttributes, 30);
49          buckets[0].setClassIndex(classIndex());
50          buckets[1] = new TrialInstances(this.m_RelationName + "_bucket_1", origAttributes, 30);
51          buckets[1].setClassIndex(classIndex());
52          double[] cutPoint = new double[1];
53          cutPoint[0] = thres;
54  
55          clusterValues.addElement("<" + FindBestDomainOrder.format(thres));
56          clusterValues.addElement(">" + FindBestDomainOrder.format(thres));
57          weka.core.Attribute a = new weka.core.Attribute("costThres" + FindBestDomainOrder.format(thres), clusterValues);
58          m_Attributes.setElementAt(a, index);
59          setIndex(a, index);
60          Enumeration f = m_Instances.elements();
61          while (f.hasMoreElements()) {
62              TrialInstance old_i = (TrialInstance) f.nextElement();
63              double oldVal = old_i.value(index);
64              double val = oldVal < thres ? 0 : 1;
65              //deep copy order and trial?
66              double[] old_i_arr = old_i.toDoubleArray();
67              double[] old_i_copy = new double[old_i_arr.length];
68              System.arraycopy(old_i_arr, 0, old_i_copy, 0, old_i_arr.length);
69              buckets[(int) val].add(new TrialInstance(old_i.weight(), old_i_copy, old_i.getOrder(), old_i.getTrialInfo()));
70              old_i.setValue(index, val);
71          }
72  
73          return new Discretization(cutPoint, buckets);
74      }
75  
76      public Discretization discretize(double power) {
77          int numBins = (int) Math.pow(numInstances(), power);
78          return discretize(new MyDiscretize(power), numBins, this.classIndex());
79      }
80  
81      public Discretization discretize(Discretize d, int numBins, int index) {
82          if (numInstances() <= 1) return null;
83          try {
84              int classIndex = this.classIndex();
85              Assert._assert(classIndex >= 0);
86              setClassIndex(-1); // clear class instance for discretization.
87              d.setAttributeIndices(Integer.toString(index+1)); // RANGE IS 1-BASED!!!
88              d.setInputFormat(this); // NOTE: this must be LAST because it calls setUpper
89              Instances newInstances;
90              newInstances = Filter.useFilter(this, d);
91              
92              if (d.getFindNumBins()) 
93                  numBins = d.getBins();
94              
95              TrialInstances[] buckets = new TrialInstances[numBins];
96              System.out.println("Num trials: " + numInstances() + " Num bins: " + numBins);
97              //System.out.println("me: " + this);
98              FastVector origAttributes = (FastVector) this.m_Attributes.copy(); //shared across all buckets
99          
100             for (int i = 0; i < numBins; ++i) {
101                 buckets[i] = new TrialInstances(this.m_RelationName + "_bucket_" + i, origAttributes, this.numInstances() / numBins);
102                 buckets[i].setClassIndex(classIndex);
103             }
104             double[] result = d.getCutPoints(index);
105            
106             weka.core.Attribute a = WekaInterface.makeBucketAttribute(numBins);
107             m_Attributes.setElementAt(a, index);
108             setIndex(a, index);
109             Enumeration e = newInstances.enumerateInstances();
110             Enumeration f = m_Instances.elements();
111            
112            // System.out.println("New Instances: " + newInstances);
113             while (e.hasMoreElements()) {
114                 Instance new_i = (Instance) e.nextElement();
115                 TrialInstance old_i = (TrialInstance) f.nextElement();
116                 double val = new_i.value(index);
117                 double[] old_i_arr = old_i.toDoubleArray();
118                 double[] old_i_copy = new double[old_i_arr.length];
119                 System.arraycopy(old_i_arr, 0, old_i_copy, 0, old_i_arr.length);
120                 buckets[(int) val].add(new TrialInstance(old_i.weight(), old_i_copy, old_i.getOrder(), old_i.getTrialInfo()));
121                 old_i.setValue(index, val);
122             }
123             Assert._assert(!f.hasMoreElements());
124             setClassIndex(classIndex); // reset class index.
125             return new Discretization(result, buckets);
126         } catch (Exception x) {
127             System.out.flush();
128             x.printStackTrace();
129             System.exit(-1);
130             return null;
131         }
132     }
133 
134 
135     public static void setIndex(weka.core.Attribute a, int i) {
136         try {
137             Class c = Class.forName("weka.core.Attribute");
138             Field f = c.getDeclaredField("m_Index");
139             f.setAccessible(true);
140             f.setInt(a, i);
141         } catch (Exception x) {
142             Assert.UNREACHABLE("weka sucks: " + x);
143         }
144     }
145     public TrialInstances infoClone(){
146         return new TrialInstances(this.m_RelationName, (FastVector) this.m_Attributes.copy(), this.numInstances());
147     }
148     
149     public Instances resample(Random random) {
150         TrialInstances newData = infoClone();
151         while (newData.numInstances() < numInstances()) {
152           newData.add(instance(random.nextInt(numInstances())));
153         }
154         newData.setClassIndex(classIndex());
155         return newData;
156       }
157     
158     /* Deep copy attributes and instances */
159     public TrialInstances copy(){
160         TrialInstances newInstances = infoClone();
161         for (Enumeration e = enumerateInstances(); e.hasMoreElements();) {
162             TrialInstance instance = (TrialInstance) e.nextElement();
163             TrialInstance newInstance = TrialInstance.cloneInstance(instance);
164             newInstance.setDataset(newInstances);
165             newInstances.add(newInstance);
166         }
167         newInstances.setClassIndex(this.classIndex());
168         return newInstances;
169     }
170     
171 }