1 | package felix.optimizer; |
2 | |
3 | import java.util.ArrayList; |
4 | import java.util.HashSet; |
5 | import java.util.regex.Matcher; |
6 | import java.util.regex.Pattern; |
7 | |
8 | import tuffy.db.RDB; |
9 | import tuffy.mln.Literal; |
10 | import tuffy.ra.ConjunctiveQuery; |
11 | import tuffy.util.Config; |
12 | import felix.dstruct.FelixPredicate; |
13 | import felix.main.Felix; |
14 | import felix.util.FelixConfig; |
15 | import felix.util.FelixUIMan; |
16 | |
17 | /** |
18 | * An object of the cost model used by the optimizer. This |
19 | * class works as a bridge from PostgreSQL's optimizer to |
20 | * Felix's optimizer. |
21 | * |
22 | * @author Ce Zhang |
23 | * |
24 | */ |
25 | public class CostModel { |
26 | |
27 | /** |
28 | * Felix object in which this CostModel is used. |
29 | */ |
30 | Felix parentFelix; |
31 | |
32 | /** |
33 | * Database connection. |
34 | */ |
35 | RDB db; |
36 | |
37 | /** |
38 | * The ratio of postgreSQL's unit cost to page IO cost. |
39 | * This is estimated by some sample queries. |
40 | */ |
41 | public double postgreUnit = -1; |
42 | public double memoryTradeOff = 200; |
43 | |
44 | /** |
45 | * Return the number of rows of the given table. |
46 | * @param tableName |
47 | * @return |
48 | */ |
49 | public double getTableSize(String tableName){ |
50 | String sql = "SELECT * FROM " + tableName; |
51 | return getRows(sql); |
52 | } |
53 | |
54 | /** |
55 | * Return the width (in bits) of the given table. |
56 | * @param tableName |
57 | * @return |
58 | */ |
59 | public double getTableWidth(String tableName){ |
60 | String sql = "SELECT * FROM " + tableName; |
61 | return getWidth(sql); |
62 | } |
63 | |
64 | /** |
65 | * Return a new {@link resultTuple}. |
66 | * @return |
67 | */ |
68 | public resultTuple getNewEmptyResultTuple(){ |
69 | return new resultTuple(); |
70 | } |
71 | |
72 | /** |
73 | * Return the cost of the given sql query. |
74 | * @param sql |
75 | * @return |
76 | */ |
77 | public double getCost(String sql){ |
78 | // System.out.println(sql); |
79 | String explain = db.estimateCost(sql); |
80 | if(explain == null){ |
81 | return Double.MAX_VALUE; |
82 | } |
83 | Pattern pa = Pattern.compile("cost=(.*?)\\.\\.(.*?) rows=(.*?) width=(.*?)\\)"); |
84 | Matcher m = pa.matcher(explain); |
85 | double cost = -1; |
86 | if(m.find()){ |
87 | cost = Double.valueOf(m.group(2)); |
88 | } |
89 | return cost; |
90 | } |
91 | |
92 | /** |
93 | * Return the number of rows returned by the given sql query. |
94 | * @param sql |
95 | * @return |
96 | */ |
97 | public double getRows(String sql){ |
98 | // System.out.println(sql); |
99 | String explain = db.estimateCost(sql); |
100 | if(explain == null){ |
101 | return Double.MAX_VALUE; |
102 | } |
103 | Pattern pa = Pattern.compile("cost=(.*?)\\.\\.(.*?) rows=(.*?) width=(.*?)\\)"); |
104 | Matcher m = pa.matcher(explain); |
105 | double row = -1; |
106 | if(m.find()){ |
107 | row = Double.valueOf(m.group(3)); |
108 | } |
109 | return row; |
110 | } |
111 | |
112 | /** |
113 | * Return the width (in bits) returned by the given sql query. |
114 | * @param sql |
115 | * @return |
116 | */ |
117 | public double getWidth(String sql){ |
118 | String explain = db.estimateCost(sql); |
119 | if(explain == null){ |
120 | return Double.MAX_VALUE; |
121 | } |
122 | Pattern pa = Pattern.compile("cost=(.*?)\\.\\.(.*?) rows=(.*?) width=(.*?)\\)"); |
123 | Matcher m = pa.matcher(explain); |
124 | double width = -1; |
125 | if(m.find()){ |
126 | width = Double.valueOf(m.group(4)); |
127 | } |
128 | return width; |
129 | } |
130 | |
131 | /** |
132 | * Estimate {@link CostModel#postgreUnit}. |
133 | */ |
134 | public void getPostgreUnit(){ |
135 | if(postgreUnit > 0){ |
136 | return; |
137 | } |
138 | int ct = 0; |
139 | double sum = 0; |
140 | for(FelixPredicate p : parentFelix.getFelixQuery().getAllPred()){ |
141 | String sql = "SELECT * FROM " + p.getRelName(); |
142 | double cost = this.getCost(sql); |
143 | double row = this.getRows(sql); |
144 | double width = this.getWidth(sql); |
145 | if(row > 0){ |
146 | sum += cost/((row*width)/8192); |
147 | ct ++; |
148 | } |
149 | } |
150 | postgreUnit = sum/ct; |
151 | } |
152 | |
153 | /** |
154 | * The constructor of CostModel. This function will load the evidence |
155 | * file if it is not loaded before. |
156 | * @param _felix |
157 | */ |
158 | public CostModel(Felix _felix){ |
159 | |
160 | parentFelix = _felix; |
161 | if(!parentFelix.hasLoadedEvidence){ |
162 | try { |
163 | parentFelix.loadEvidence(); |
164 | } catch (InterruptedException e) { |
165 | e.printStackTrace(); |
166 | } |
167 | } |
168 | |
169 | db = RDB.getRDBbyConfig(Config.db_schema); |
170 | |
171 | this.getPostgreUnit(); |
172 | FelixUIMan.println(2, 0, ">>> PostgreSQL's cost per page is " + |
173 | "esitmated as " + this.postgreUnit + "\n"); |
174 | } |
175 | |
176 | /** |
177 | * Close the database connection used by CostModel. |
178 | */ |
179 | public void close(){ |
180 | db.close(); |
181 | } |
182 | |
183 | /** |
184 | * Return the cost of Fully-Materialization plan for q. |
185 | * @param q |
186 | * @param numberOfBB |
187 | * @param numberOfBF |
188 | * @param numberOfFF |
189 | * @param whichToBound |
190 | * @return |
191 | */ |
192 | public double getFullMatCost(ConjunctiveQuery q, double numberOfBB, double numberOfBF, double numberOfFF, HashSet<String> whichToBound){ |
193 | |
194 | double rs = 0.0; |
195 | |
196 | Double fullCost = Double.MAX_VALUE; |
197 | Double incCost = Double.MAX_VALUE; |
198 | Double nOfResultTuples = Double.MAX_VALUE; |
199 | Double nOfTableTuples = Double.MAX_VALUE; |
200 | Double width = 1.0; |
201 | |
202 | if(q.getBoundedSQL(new HashSet<String>()) == null){ |
203 | return Double.MAX_VALUE; |
204 | } |
205 | |
206 | fullCost = this.getCost(q.getBoundedSQL(new HashSet<String>())); |
207 | nOfTableTuples = this.getRows(q.getBoundedSQL(new HashSet<String>())); |
208 | width = this.getWidth(q.getBoundedSQL(new HashSet<String>())); |
209 | |
210 | String sql = q.getBoundedSQL(whichToBound); |
211 | if(sql == null){ |
212 | return Double.MAX_VALUE; |
213 | } |
214 | |
215 | nOfResultTuples = this.getRows(sql); |
216 | |
217 | double wholeTableFitInMemory = 1.0; |
218 | |
219 | if(nOfTableTuples * width < 10000000){ |
220 | wholeTableFitInMemory = memoryTradeOff; |
221 | } |
222 | |
223 | incCost = (1 + (Math.log( nOfTableTuples)/Math.log(512) + 1)/wholeTableFitInMemory + nOfResultTuples/wholeTableFitInMemory) * postgreUnit; |
224 | rs = nOfTableTuples/512 * postgreUnit + fullCost + numberOfBF * incCost; |
225 | |
226 | return rs; |
227 | } |
228 | |
229 | |
230 | /** |
231 | * Return the cost of Fully-View plan for q. |
232 | * @param q |
233 | * @param numberOfBB |
234 | * @param numberOfBF |
235 | * @param numberOfFF |
236 | * @param whichToBound |
237 | * @return |
238 | */ |
239 | public double getFullViewCost(ConjunctiveQuery q, double numberOfBB, double numberOfBF, double numberOfFF, HashSet<String> whichToBound){ |
240 | |
241 | q.buildIndexes(db, null, null, null, false, new ArrayList<String>()); |
242 | |
243 | double rs = Double.MAX_VALUE; |
244 | |
245 | // if there are no BF and BB invocations at all, this query should not |
246 | // be regarded as view. |
247 | if(numberOfBF == 0 && numberOfBB == 0){ |
248 | if(numberOfFF == 1){ |
249 | return getFullMatCost(q, numberOfBB, numberOfBF, numberOfFF, whichToBound); |
250 | } |
251 | return rs; |
252 | } |
253 | |
254 | // current setting is not fair to pure view mat. |
255 | double maxTableMemorySize = Double.MIN_VALUE; |
256 | double minTableMemorySize = Double.MAX_VALUE; |
257 | double isEveryTableFitInMemory = 1; |
258 | for(Literal l : q.body){ |
259 | double v = this.getTableSize(l.getPred().getRelName()) * |
260 | this.getTableWidth(l.getPred().getRelName()); |
261 | if(v > maxTableMemorySize){ |
262 | maxTableMemorySize = v; |
263 | } |
264 | if(v < minTableMemorySize){ |
265 | minTableMemorySize = v; |
266 | } |
267 | } |
268 | |
269 | if(maxTableMemorySize < 10000000){ |
270 | isEveryTableFitInMemory = memoryTradeOff; |
271 | } |
272 | |
273 | String sql = q.getBoundedSQL(whichToBound); |
274 | |
275 | if(sql == null){ |
276 | return Double.MAX_VALUE; |
277 | } |
278 | |
279 | Double incCost = Double.MAX_VALUE; |
280 | incCost = this.getCost(sql); |
281 | Double nResultTuples = this.getRows(sql); |
282 | |
283 | //TODO |
284 | |
285 | if(minTableMemorySize < 10000000){ |
286 | incCost -= postgreUnit * nResultTuples; |
287 | if(incCost < 0){ |
288 | incCost = postgreUnit; |
289 | } |
290 | } |
291 | |
292 | |
293 | rs = numberOfBF * incCost / isEveryTableFitInMemory; |
294 | |
295 | return rs; |
296 | } |
297 | |
298 | /** |
299 | * Return the cost of Hybrid-Materialization plan for query {head :- q1.head, q2.head}. |
300 | * @param q |
301 | * @param numberOfBB |
302 | * @param numberOfBF |
303 | * @param numberOfFF |
304 | * @param whichToBound |
305 | * @return |
306 | */ |
307 | public resultTuple getJoinCostBetweenTwoMaterializedTable(Literal head, ConjunctiveQuery q1, |
308 | ConjunctiveQuery q2, double numberOfBB, double numberOfBF, double numberOfFF, HashSet<String> whichToBound ){ |
309 | |
310 | resultTuple rsTuple = new resultTuple(); |
311 | rsTuple.totalCost = Double.MAX_VALUE; |
312 | |
313 | // if there are no BF and BB invocations at all, this query should not |
314 | // be regarded as view. |
315 | if(numberOfBF == 0 && numberOfBB == 0){ |
316 | return rsTuple; |
317 | } |
318 | |
319 | ConjunctiveQuery q = new ConjunctiveQuery(); |
320 | q.addBodyLit(q1.head); |
321 | q.addBodyLit(q2.head); |
322 | q.head = head; |
323 | |
324 | //Direct Join (BF) |
325 | String sql = q.getJoinSQL(whichToBound); |
326 | if(sql == null || q1.getBoundedSQL(new HashSet<String>()) == null |
327 | || q2.getBoundedSQL(new HashSet<String>()) == null){ |
328 | return null; |
329 | } |
330 | sql = sql.replaceAll(q1.head.getPred().getRelName(), "\n(" + q1.getBoundedSQL(new HashSet<String>()) + ")"); |
331 | sql = sql.replaceAll(q2.head.getPred().getRelName(), "\n(" + q2.getBoundedSQL(new HashSet<String>()) + ")"); |
332 | |
333 | double n1 = this.getRows(sql); |
334 | |
335 | //Join with Index |
336 | double costOfQ2 = this.getCost("select * from ( " + q2.getBoundedSQL(new HashSet<String>()) + " ) ppppp;"); |
337 | double rowOfQ2 = this.getRows("select * from ( " + q2.getBoundedSQL(new HashSet<String>()) + " ) ppppp;"); |
338 | double width3 = this.getWidth("select * from ( " + q2.getBoundedSQL(new HashSet<String>()) + " ) ppppp;"); |
339 | |
340 | double costOfQ1 = this.getCost("select * from ( " + q1.getBoundedSQL(new HashSet<String>()) + " ) ppppp;"); |
341 | double rowOfQ1 = this.getRows("select * from ( " + q1.getBoundedSQL(new HashSet<String>()) + " ) ppppp;"); |
342 | double width2 = this.getWidth("select * from ( " + q1.getBoundedSQL(new HashSet<String>()) + " ) ppppp;"); |
343 | double n2 = this.getRows("select * from ( " + q1.getBoundedSQL(whichToBound) + " ) ppppp;"); |
344 | |
345 | double leftTableFitInMemory = 1; |
346 | double rightTableFitInMemory = 1; |
347 | |
348 | if(rowOfQ1 * width2 < 10000000){ |
349 | leftTableFitInMemory = memoryTradeOff; |
350 | } |
351 | |
352 | if(rowOfQ2 * width3 < 10000000){ |
353 | rightTableFitInMemory = memoryTradeOff; |
354 | } |
355 | |
356 | double metaCost = postgreUnit; |
357 | |
358 | //newCost = n2 * (n1/n2+1) / rightTableFitInMemory * metaCost ; |
359 | double newCost = n2 * (n1/n2/(8192/width3)+1) / rightTableFitInMemory * metaCost ; |
360 | double cost = newCost + n2 * (Math.log(rowOfQ2)/Math.log(512) + 1)/rightTableFitInMemory * metaCost; |
361 | cost += (Math.log(rowOfQ1)/Math.log(512) + 1) / leftTableFitInMemory * metaCost; |
362 | //cost /= 1.5; //cache |
363 | |
364 | rsTuple.eachCostInAir = cost; |
365 | |
366 | cost = cost*numberOfBF; |
367 | |
368 | rsTuple.totalCostInAir = cost; |
369 | |
370 | rsTuple.costOfQ1 = costOfQ1; |
371 | rsTuple.costOfQ2 = costOfQ2; |
372 | rsTuple.sizeOfQ1 = rowOfQ1; |
373 | rsTuple.sizeOfQ2 = rowOfQ2; |
374 | rsTuple.totalCost = cost + costOfQ2 + costOfQ1; |
375 | |
376 | return rsTuple; |
377 | |
378 | } |
379 | |
380 | |
381 | /** |
382 | * Class for a set of different costs. |
383 | * @author Ce Zhang |
384 | * |
385 | */ |
386 | public class resultTuple{ |
387 | |
388 | double costOfQ1; |
389 | double costOfQ2; |
390 | double totalCost; |
391 | double totalCostInAir; |
392 | double eachCostInAir; |
393 | double oriCostInAir; |
394 | double sizeOfQ1; |
395 | double sizeOfQ2; |
396 | |
397 | public String toString(){ |
398 | String content = ""; |
399 | content += "/////////////////////////////////\n"; |
400 | content += "Total Cost = " + totalCost + "\n"; |
401 | content += "Mat. Cost Of Q1 = " + costOfQ1 + "\n"; |
402 | content += "Mat. Cost Of Q2 = " + costOfQ2 + "\n"; |
403 | content += "Size Of Q1 = " + sizeOfQ1 + "\n"; |
404 | content += "Size Of Q2 = " + sizeOfQ2 + "\n"; |
405 | content += "Total Inc. Cost = " + totalCostInAir + "\n"; |
406 | content += "Each Inc. Cost = " + eachCostInAir + "\n"; |
407 | content += "/////////////////////////////////\n"; |
408 | return content; |
409 | } |
410 | |
411 | } |
412 | |
413 | |
414 | } |