1
2
3
4
5
6
7 package net.sf.bddbddb.order;
8
9 import java.io.PrintStream;
10 import java.util.Collection;
11 import java.util.Iterator;
12 import java.util.LinkedHashSet;
13 import java.util.LinkedList;
14 import java.util.Random;
15
16 import net.sf.bddbddb.FindBestDomainOrder;
17 import net.sf.bddbddb.InferenceRule;
18 import net.sf.bddbddb.order.WekaInterface.OrderInstance;
19
20 /***
21 * @author mcarbin
22 *
23 * TODO To change the template for this generated type comment go to
24 * Window - Preferences - Java - Code Style - Code Templates
25 */
26 public abstract class CandidateSampler {
27
28 static int TRACE = FindBestDomainOrder.TRACE;
29 static PrintStream out = FindBestDomainOrder.out;
30 static Random random = new Random(System.currentTimeMillis());
31 int sampleSize;
32 public static String CPE = "net.sf.bddbddb.order.BaggedId3";
33
34 public static Collection sample(int sampleSize, Order [] orders, double [] distribution, double normalFact){
35 Collection ordersToTry = new LinkedHashSet();
36 for(int i = 0; i < sampleSize; ++i){
37 double choice = random.nextDouble() * normalFact;
38 double current = 0.;
39 for(int j = 0; j < distribution.length; ++j) {
40 current += distribution[j];
41 if (current > choice) {
42 ordersToTry.add(orders[j]);
43 break;
44 }
45 }
46 }
47 return ordersToTry;
48 }
49
50 public abstract Collection sample(Collection orders,
51 TrialInstances vData, TrialInstances aData, TrialInstances dData, InferenceRule ir, boolean force);
52
53 public static class UncertaintySampler extends CandidateSampler{
54 double uncertaintyThreshold;
55 double vCenter, aCenter, dCenter;
56 double maxScore;
57 public UncertaintySampler(int sampleSize, double uncertaintyThreshold, double vCenter, double aCenter, double dCenter){
58 this.sampleSize = sampleSize;
59 this.uncertaintyThreshold = uncertaintyThreshold;
60 this.vCenter = vCenter;
61 this.aCenter = aCenter;
62 this.dCenter = dCenter;
63 double vFurthest = Math.max(vCenter, 1 - vCenter);
64 double aFurthest = Math.max(aCenter, 1 - aCenter);
65 double dFurthest = Math.max(dCenter, 1 - dCenter);
66 this.maxScore = RMS(vFurthest, vCenter, aFurthest, aCenter, dFurthest, dCenter);
67
68 }
69
70
71 public static double RMS(double vProb, double vCent, double aProb, double aCent, double dProb, double dCent) {
72 double vDiff = Math.abs(vProb - vCent);
73 double aDiff = Math.abs(aProb - aCent);
74 double dDiff = Math.abs(dProb - dCent);
75 return Math.sqrt(((vDiff * vDiff) + (aDiff * aDiff) + (dDiff * dDiff)) / 3);
76
77 }
78
79 public Collection sample(Collection orders,
80 TrialInstances vData, TrialInstances aData, TrialInstances dData, InferenceRule ir, boolean force) {
81 ClassProbabilityEstimator vCPE = null, aCPE = null, dCPE = null;
82 vCPE = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, vData));
83 aCPE = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, aData));
84 dCPE = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, dData));
85
86 OrderTranslator v2a = new VarToAttribTranslator(ir);
87 OrderTranslator a2d = AttribToDomainTranslator.INSTANCE;
88
89 Order best = null;
90 double bestScore = Double.POSITIVE_INFINITY;
91 double [] distribution = new double[orders.size()];
92 double normalFact = 0;
93 Order [] orderArr = new Order[orders.size()];
94 int i = 0;
95 for (Iterator it = orders.iterator(); it.hasNext(); ++i){
96 Order o_v = (Order) it.next();
97 orderArr[i] = o_v;
98 OrderInstance vInstance = TrialInstance.construct(o_v, vData);
99 Order o_a = v2a.translate(o_v);
100 OrderInstance aInstance = TrialInstance.construct(o_a, aData);
101 Order o_d = a2d.translate(o_a);
102 OrderInstance dInstance = TrialInstance.construct(o_d, dData);
103
104 double vScore = vCPE != null ? vCPE.classProbability(vInstance, 0) : vCenter;
105 double aScore = aCPE != null ? aCPE.classProbability(aInstance, 0) : aCenter;
106 double dScore = dCPE != null ? dCPE.classProbability(dInstance, 0) : dCenter;
107
108 double score = RMS(vScore, vCenter, aScore, aCenter, dScore, dCenter);
109 distribution[i] = maxScore - score;
110 normalFact += maxScore - score;
111
112 if (score < bestScore) {
113 if (TRACE > 1){
114 out.println("Uncertain order "+o_v+" score: "+FindBestDomainOrder.format(score)
115 + " (v="+FindBestDomainOrder.format(vScore)+",a="
116 + FindBestDomainOrder.format(aScore)+",d="
117 + FindBestDomainOrder.format(dScore)+")");
118 double vVariance = vCPE != null ? vCPE.classVariance(vInstance, 0) : 0;
119 double aVariance = aCPE != null ? aCPE.classVariance(aInstance, 0) : 0;
120 double dVariance = dCPE != null ? dCPE.classVariance(dInstance, 0) : 0;
121
122
123
124
125
126 }
127
128 bestScore = score;
129 best = o_v;
130 }
131 }
132 Collection ordersToTry = new LinkedList();
133 if (force || bestScore < uncertaintyThreshold) {
134 if (sampleSize > 1) {
135 return sample(sampleSize, orderArr, distribution, normalFact);
136 }
137 if(best != null)
138 ordersToTry.add(best);
139 }
140 return ordersToTry;
141 }
142
143 }
144
145 public static class LocalVarianceSampler{
146 int numEstimators;
147 int sampleSize;
148
149 /***
150 * @param numEstimators
151 */
152 public LocalVarianceSampler(int sampleSize, int numEstimators) {
153 this.sampleSize = sampleSize;
154 this.numEstimators = numEstimators;
155 }
156 public Collection localVariance(Collection orders, TrialInstances vData, TrialInstances aData, TrialInstances dData, InferenceRule ir) {
157 ClassProbabilityEstimator [] vEstimators = new ClassProbabilityEstimator[numEstimators];
158 ClassProbabilityEstimator [] aEstimators = new ClassProbabilityEstimator[numEstimators];
159 ClassProbabilityEstimator [] dEstimators = new ClassProbabilityEstimator[numEstimators];
160
161 for(int i = 0; i < numEstimators; ++i){
162 TrialInstances vBootData = (TrialInstances) vData.resample(random);
163 TrialInstances aBootData = (TrialInstances) aData.resample(random);
164 TrialInstances dBootData = (TrialInstances) dData.resample(random);
165 vEstimators[i] = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, vBootData));
166 aEstimators[i] = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, aBootData));
167 dEstimators[i] = (ClassProbabilityEstimator) WekaInterface.buildClassifier(CPE, WekaInterface.binarize(0, dBootData));
168 }
169
170 double [] distribution = new double[orders.size()];
171 Order[] orderArr = new Order[orders.size()];
172
173 double normalFact = 0;
174 double [][] estimates = new double[numEstimators + 1][3];
175
176 OrderTranslator v2a = new VarToAttribTranslator(ir);
177 OrderTranslator a2d = AttribToDomainTranslator.INSTANCE;
178
179 int i = 0;
180 for(Iterator it = orders.iterator(); it.hasNext(); ++i){
181 Order o_v = (Order) it.next();
182
183 OrderInstance vInstance = TrialInstance.construct(o_v, vData);
184 Order o_a = v2a.translate(o_v);
185 OrderInstance aInstance = TrialInstance.construct(o_a, aData);
186 Order o_d = a2d.translate(o_a);
187 OrderInstance dInstance = TrialInstance.construct(o_d, dData);
188
189 orderArr[i] = o_v;
190 estimates[numEstimators][0] = 0;
191 estimates[numEstimators][1] = 0;
192 estimates[numEstimators][2] = 0;
193 for(int j = 0; j < numEstimators; ++j){
194 estimates[j][0] = vEstimators[j] != null ? vEstimators[j].classProbability(vInstance,0) : 0.5;
195 estimates[j][1] = aEstimators[j] != null ? aEstimators[j].classProbability(aInstance,0) : 0.5;
196 estimates[j][2] = dEstimators[j] != null ? dEstimators[j].classProbability(dInstance,0) : 0.5;
197 estimates[numEstimators][0] += vEstimators[j] != null ? estimates[j][0] : 0;
198 estimates[numEstimators][1] += aEstimators[j] != null ? estimates[j][1] : 0;
199 estimates[numEstimators][2] += dEstimators[j] != null ? estimates[j][2] : 0;
200 }
201 estimates[numEstimators][0] /= numEstimators;
202 estimates[numEstimators][1] /= numEstimators;
203 estimates[numEstimators][2] /= numEstimators;
204
205 for(int j = 0; j < numEstimators; ++j){
206 double vDiff = estimates[j][0] - estimates[numEstimators][0];
207 double aDiff = estimates[j][1] - estimates[numEstimators][1];
208 double dDiff = estimates[j][2] - estimates[numEstimators][2];
209 distribution[i] += (vDiff * vDiff) + (aDiff * aDiff) + (dDiff * dDiff);
210 }
211 normalFact += distribution[i];
212 }
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227 return sample(sampleSize, orderArr, distribution, normalFact);
228 }
229 }
230 }
231