View Javadoc

1   /*
2    * Created on Jan 18, 2005
3    *
4    * TODO To change the template for this generated file go to
5    * Window - Preferences - Java - Code Style - Code Templates
6    */
7   package net.sf.bddbddb.order;
8   
9   import java.io.PrintStream;
10  import java.util.Collection;
11  import java.util.Iterator;
12  import java.util.LinkedHashSet;
13  import java.util.LinkedList;
14  import java.util.Random;
15  
16  import net.sf.bddbddb.FindBestDomainOrder;
17  import net.sf.bddbddb.InferenceRule;
18  import net.sf.bddbddb.order.WekaInterface.OrderInstance;
19  
20  /***
21   * @author mcarbin
22   *
23   * TODO To change the template for this generated type comment go to
24   * Window - Preferences - Java - Code Style - Code Templates
25   */
26  public  abstract class CandidateSampler {
27      //probably should do something else for these, but it's better than not moving the code
28      static int TRACE = FindBestDomainOrder.TRACE;
29      static PrintStream out = FindBestDomainOrder.out;
30      static Random random = new Random(System.currentTimeMillis());
31      int sampleSize;
32      public static String CPE = "net.sf.bddbddb.order.BaggedId3";
33      
34      public static Collection sample(int sampleSize, Order [] orders, double [] distribution, double normalFact){
35          Collection ordersToTry = new LinkedHashSet();
36          for(int i = 0; i < sampleSize; ++i){
37              double choice = random.nextDouble() * normalFact;
38              double current = 0.;
39              for(int j = 0; j < distribution.length; ++j) {
40                  current += distribution[j];
41                  if (current > choice) {
42                      ordersToTry.add(orders[j]);
43                      break;
44                  }
45              }
46          }
47          return ordersToTry;
48      }
49      
50      public abstract Collection sample(Collection orders,
51              TrialInstances vData, TrialInstances aData, TrialInstances dData, InferenceRule ir, boolean force);
52      
53      public static class UncertaintySampler extends CandidateSampler{
54          double uncertaintyThreshold;
55          double vCenter, aCenter, dCenter;
56          double maxScore;
57          public UncertaintySampler(int sampleSize, double uncertaintyThreshold, double vCenter, double aCenter, double dCenter){
58              this.sampleSize = sampleSize;
59              this.uncertaintyThreshold = uncertaintyThreshold;
60              this.vCenter = vCenter;
61              this.aCenter = aCenter;
62              this.dCenter = dCenter;
63              double vFurthest = Math.max(vCenter, 1 - vCenter);
64              double aFurthest = Math.max(aCenter, 1 - aCenter);
65              double dFurthest = Math.max(dCenter, 1 - dCenter);
66              this.maxScore = RMS(vFurthest, vCenter, aFurthest, aCenter, dFurthest, dCenter);
67              
68          }
69          
70          
71          public static double RMS(double vProb, double vCent, double aProb, double aCent, double dProb, double dCent) {
72              double vDiff = Math.abs(vProb - vCent);
73              double aDiff = Math.abs(aProb - aCent);
74              double dDiff = Math.abs(dProb - dCent);
75              return Math.sqrt(((vDiff * vDiff) + (aDiff * aDiff) + (dDiff * dDiff)) / 3);
76              
77          }
78          
79          public Collection sample(Collection orders,
80                  TrialInstances vData, TrialInstances aData, TrialInstances dData, InferenceRule ir, boolean force) {
81              ClassProbabilityEstimator vCPE = null, aCPE  = null, dCPE = null;
82              vCPE = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, vData));
83              aCPE = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, aData));
84              dCPE = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, dData));
85              
86              OrderTranslator v2a = new VarToAttribTranslator(ir);
87              OrderTranslator a2d = AttribToDomainTranslator.INSTANCE;
88              
89              Order best = null;
90              double bestScore = Double.POSITIVE_INFINITY;
91              double [] distribution = new double[orders.size()];
92              double normalFact = 0;
93              Order [] orderArr = new Order[orders.size()];
94              int i = 0;
95              for (Iterator it = orders.iterator(); it.hasNext(); ++i){
96                  Order o_v = (Order) it.next();
97                  orderArr[i] = o_v;
98                  OrderInstance vInstance = TrialInstance.construct(o_v, vData);
99                  Order o_a = v2a.translate(o_v);
100                 OrderInstance aInstance = TrialInstance.construct(o_a, aData);
101                 Order o_d = a2d.translate(o_a);
102                 OrderInstance dInstance = TrialInstance.construct(o_d, dData);
103                 
104                 double vScore = vCPE != null ? vCPE.classProbability(vInstance, 0) : vCenter;
105                 double aScore = aCPE != null ? aCPE.classProbability(aInstance, 0) : aCenter;
106                 double dScore = dCPE != null ? dCPE.classProbability(dInstance, 0) : dCenter;
107                 
108                 double score = RMS(vScore, vCenter, aScore, aCenter, dScore, dCenter);
109                 distribution[i] = maxScore - score;
110                 normalFact += maxScore - score;
111                 
112                 if (score < bestScore) {
113                     if (TRACE > 1){ 
114                         out.println("Uncertain order "+o_v+" score: "+FindBestDomainOrder.format(score)
115                                 + " (v="+FindBestDomainOrder.format(vScore)+",a="
116                                 + FindBestDomainOrder.format(aScore)+",d="
117                                 + FindBestDomainOrder.format(dScore)+")");
118                         double vVariance = vCPE != null ? vCPE.classVariance(vInstance, 0) : 0;
119                         double aVariance = aCPE != null ? aCPE.classVariance(aInstance, 0) : 0;
120                         double dVariance = dCPE != null ? dCPE.classVariance(dInstance, 0) : 0;
121                         /*out.println("\tVariances: var:" + FindBestDomainOrder.format(vVariance) 
122                          + " attrib:"
123                          + FindBestDomainOrder.format(aVariance)
124                          + " dom:" +dVariance);
125                          */
126                     }
127                     
128                     bestScore = score;
129                     best = o_v;
130                 }
131             }
132             Collection ordersToTry = new LinkedList();
133             if (force || bestScore < uncertaintyThreshold) {
134                 if (sampleSize > 1) {
135                     return sample(sampleSize, orderArr, distribution, normalFact);
136                 }
137                 if(best != null) //don't add null order to list
138                     ordersToTry.add(best);
139             }
140             return ordersToTry;
141         }
142         
143     }
144     
145     public static class LocalVarianceSampler{
146         int numEstimators;
147         int sampleSize;
148         
149         /***
150          * @param numEstimators
151          */
152         public LocalVarianceSampler(int sampleSize, int numEstimators) {
153             this.sampleSize = sampleSize;
154             this.numEstimators = numEstimators;
155         }
156         public Collection localVariance(Collection orders, TrialInstances vData, TrialInstances aData, TrialInstances dData, InferenceRule ir) {
157             ClassProbabilityEstimator [] vEstimators = new ClassProbabilityEstimator[numEstimators];
158             ClassProbabilityEstimator [] aEstimators = new ClassProbabilityEstimator[numEstimators];
159             ClassProbabilityEstimator [] dEstimators = new ClassProbabilityEstimator[numEstimators];
160             
161             for(int i = 0; i < numEstimators; ++i){
162                 TrialInstances vBootData = (TrialInstances) vData.resample(random);
163                 TrialInstances aBootData = (TrialInstances) aData.resample(random);
164                 TrialInstances dBootData = (TrialInstances) dData.resample(random);
165                 vEstimators[i] = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, vBootData));
166                 aEstimators[i] = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, aBootData));
167                 dEstimators[i] = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, dBootData));
168             }
169             
170             double [] distribution = new double[orders.size()];
171             Order[] orderArr = new Order[orders.size()];
172             
173             double normalFact = 0;
174             double [][] estimates = new double[numEstimators + 1][3];
175             
176             OrderTranslator v2a = new VarToAttribTranslator(ir);
177             OrderTranslator a2d = AttribToDomainTranslator.INSTANCE;
178             
179             int i = 0;
180             for(Iterator it = orders.iterator(); it.hasNext(); ++i){
181                 Order o_v = (Order) it.next();
182                 //a little sketchy, since this is different data but it should work
183                 OrderInstance vInstance = TrialInstance.construct(o_v, vData);
184                 Order o_a = v2a.translate(o_v);
185                 OrderInstance aInstance = TrialInstance.construct(o_a, aData);
186                 Order o_d = a2d.translate(o_a);
187                 OrderInstance dInstance = TrialInstance.construct(o_d, dData);
188                 
189                 orderArr[i] = o_v;
190                 estimates[numEstimators][0] = 0;
191                 estimates[numEstimators][1] = 0;
192                 estimates[numEstimators][2] = 0;
193                 for(int j = 0; j < numEstimators; ++j){
194                     estimates[j][0] = vEstimators[j] != null ? vEstimators[j].classProbability(vInstance,0) : 0.5;
195                     estimates[j][1] = aEstimators[j] != null ? aEstimators[j].classProbability(aInstance,0) : 0.5;
196                     estimates[j][2] = dEstimators[j] != null ? dEstimators[j].classProbability(dInstance,0) : 0.5;
197                     estimates[numEstimators][0] += vEstimators[j] != null ? estimates[j][0] : 0;
198                     estimates[numEstimators][1] += aEstimators[j] != null ? estimates[j][1] : 0;
199                     estimates[numEstimators][2] += dEstimators[j] != null ? estimates[j][2] : 0;
200                 }
201                 estimates[numEstimators][0] /= numEstimators;
202                 estimates[numEstimators][1] /= numEstimators;
203                 estimates[numEstimators][2] /= numEstimators;
204                 
205                 for(int j = 0; j < numEstimators; ++j){
206                     double vDiff = estimates[j][0] - estimates[numEstimators][0];
207                     double aDiff = estimates[j][1] - estimates[numEstimators][1];
208                     double dDiff = estimates[j][2] - estimates[numEstimators][2];
209                     distribution[i] += (vDiff * vDiff) + (aDiff * aDiff) + (dDiff * dDiff);
210                 }
211                 normalFact += distribution[i];
212             }
213             
214             /*   Collection ordersToTry = new LinkedHashSet();
215              for(i = 0; i < SAMPLE_SIZE; ++i){
216              double choice = random.nextDouble() * normalFact;
217              double current = 0.;
218              for(int j = 0; j < distribution.length; ++j) {
219              current += distribution[j];
220              if (current > choice) {
221              ordersToTry.add(orderArr[j]);
222              break;
223              }
224              }
225              }
226              return ordersToTry;*/
227             return sample(sampleSize, orderArr, distribution, normalFact);
228         }
229     }
230 }
231