1 | package felix.dstruct; |
2 | |
3 | import java.io.FileInputStream; |
4 | import java.io.IOException; |
5 | import java.util.ArrayList; |
6 | import java.util.HashMap; |
7 | import java.util.HashSet; |
8 | |
9 | import org.postgresql.PGConnection; |
10 | |
11 | import felix.dstruct.StatOperator.OPType; |
12 | import felix.util.FelixConfig; |
13 | import felix.util.FelixUIMan; |
14 | |
15 | import tuffy.db.RDB; |
16 | import tuffy.mln.Atom; |
17 | import tuffy.mln.Literal; |
18 | import tuffy.mln.Predicate; |
19 | import tuffy.util.ExceptionMan; |
20 | import tuffy.util.FileMan; |
21 | import tuffy.util.StringMan; |
22 | import tuffy.util.UIMan; |
23 | |
24 | |
25 | /** |
26 | * The predicate object used in Felix, which extends |
27 | * the Predicate class in Tuffy to contain Felix-related |
28 | * fields and methods. |
29 | * |
30 | * @author Ce Zhang |
31 | * |
32 | */ |
33 | public class FelixPredicate extends Predicate{ |
34 | |
35 | /** |
36 | * @deprecated |
37 | */ |
38 | String embeddedPythonCode = ""; |
39 | |
40 | /** |
41 | * Properties that can be assigned to each predicate. |
42 | * Example properties include: SYMM, REPLEX, TRANS etc. |
43 | * |
44 | */ |
45 | public enum FPProperty {SYMM, REFLEX, TRANS, CHAIN_RECUR, |
46 | OTHER_RECUR, OTHER_RECUR_WITHOTHER_OPENPRED, KEY_CONSTRAINT, NON_RECUR, EMBED_WEIGHT_RULE}; |
47 | |
48 | /** |
49 | * Path the HDFS file that this predicate depends on. |
50 | */ |
51 | public String dependencyFile = null; |
52 | |
53 | /** |
54 | * Type of dependencies, e.g., "hdfs", "jdbc" etc. |
55 | */ |
56 | public String dependencyName = null; |
57 | |
58 | /** |
59 | * Python script for MAP. |
60 | */ |
61 | public String mapScript = null; |
62 | |
63 | /** |
64 | * Python script for running before all MAPs. |
65 | */ |
66 | public String mapinitScript = ""; |
67 | |
68 | /** |
69 | * Python script for running before all REDUCEs. |
70 | */ |
71 | public String reduceinitScript = ""; |
72 | |
73 | /** |
74 | * Python script for MAP. |
75 | */ |
76 | public String reduceScript = "\tfor v in _inputvalues:\n"+ |
77 | "\t\tfelixio_push(_inputkey, v)"; |
78 | |
79 | /** |
80 | * Do we need to extract features from HDFS for this relation? |
81 | */ |
82 | public boolean needExtractFeatures = false; |
83 | |
84 | /** |
85 | * If the input is XML tag, which <xmltag></xmltag> |
86 | * should we send to MAP as a unit? |
87 | */ |
88 | public String xmltag = null; |
89 | |
90 | /** |
91 | * Whether the extraction of this relation relies on other relations. |
92 | */ |
93 | public String jdbcdep = null; |
94 | |
95 | /** |
96 | * The name of MAP's input variable. |
97 | */ |
98 | public String mapinputvar = "_input"; |
99 | |
100 | /** |
101 | * The name of REDUCE's input key variable. |
102 | */ |
103 | public String reduceinputkeyvar = "_inputkey"; |
104 | |
105 | /** |
106 | * The name of REDUCE's input value variable. |
107 | */ |
108 | public String reduceinputvaluesvar = "_inputvalues"; |
109 | |
110 | /** |
111 | * Whether the evidence file of this relation exists |
112 | * in some relational table instead of input evid. file. |
113 | */ |
114 | public boolean loadFromDatabase = false; |
115 | |
116 | /** |
117 | * see {@link #loadFromDatabase} |
118 | */ |
119 | public String loadingSchema = null; |
120 | |
121 | /** |
122 | * see {@link #loadFromDatabase} |
123 | */ |
124 | public String loadingTable = null; |
125 | |
126 | |
127 | /** |
128 | * Adds atom as evidence to this predicate (Override Felix version) - |
129 | * the difference is that we sometimes flush it to file |
130 | * directly. |
131 | * @param a |
132 | */ |
133 | public void addEvidence(Atom a) { |
134 | |
135 | hasEvid = true; |
136 | |
137 | if (a.isSoftEvidence()) |
138 | setHasSoftEvidence(true); |
139 | |
140 | if(FelixConfig.mixturedLoading == true){ |
141 | |
142 | ArrayList<String> towrite = new ArrayList<String>(); |
143 | |
144 | try { |
145 | |
146 | this.loadingFileWriter.append((a.truth == true? "True" : "False")); |
147 | this.loadingFileWriter.append("\t"); |
148 | this.loadingFileWriter.append(a.prior == null? "1" : a.prior.toString());; |
149 | this.loadingFileWriter.append("\t"); |
150 | |
151 | this.loadingFileWriter.append(StringMan.joinAndEscape("\t", a.sargs)); |
152 | this.loadingFileWriter.append("\n"); |
153 | } catch (IOException e) { |
154 | e.printStackTrace(); |
155 | } |
156 | |
157 | }else{ |
158 | addEvidenceTuple(a); |
159 | } |
160 | } |
161 | |
162 | /** |
163 | * The type of operator this predicate must be assigned to. |
164 | */ |
165 | public OPType mustbe = null; |
166 | |
167 | /** |
168 | * The parent {@link ConcurrentOperatorsBucket}. |
169 | */ |
170 | public ConcurrentOperatorsBucket belongsTo = null; |
171 | |
172 | /** |
173 | * Whether this predicate is the view-based representation of |
174 | * a coref operator. This predicate must with a name suffix ``_map''. |
175 | */ |
176 | public boolean isCorefMapPredicate = false; |
177 | |
178 | /** |
179 | * Whether this predicate is a coref operator. |
180 | */ |
181 | public boolean isCorefPredicate = false; |
182 | |
183 | /** |
184 | * If {@link FelixPredicate#isCorefMapPredicate} is true, to which this |
185 | * predicate serves. |
186 | */ |
187 | public FelixPredicate oriCorefPredicate = null; |
188 | |
189 | /** |
190 | * If {@link FelixPredicate#isCorefPredicate} is true, which |
191 | * relation serves as the linear-view-representation of it? |
192 | */ |
193 | public FelixPredicate corefMAPPredicate = null; |
194 | |
195 | /** |
196 | * Map from predicate properties ({@link FPProperty}) to |
197 | * clauses satisfying the corresponding property. |
198 | */ |
199 | HashMap<FPProperty, HashSet<FelixClause>> properities |
200 | = new HashMap<FPProperty, HashSet<FelixClause>>(); |
201 | |
202 | /** |
203 | * Clauses related to this predicate. |
204 | */ |
205 | HashSet<FelixClause> registeredClauses = new HashSet<FelixClause>(); |
206 | |
207 | /** |
208 | * If this predicate has key constraints, this set records the position |
209 | * of keys. |
210 | */ |
211 | HashSet<Integer> labelPositions = new HashSet<Integer>(); |
212 | |
213 | /** |
214 | * If this relation is defined as a view instead of |
215 | * a table, what is its view definition? |
216 | */ |
217 | public String viewDef = null; |
218 | |
219 | /** |
220 | * Map from chain-rule clauses to possible partitions of sequence. |
221 | */ |
222 | HashMap<FelixClause, ArrayList<String>> chainRulePartitions = |
223 | new HashMap<FelixClause, ArrayList<String>>(); |
224 | |
225 | /** |
226 | * Get the label position if this predicate is LR or CRF. |
227 | * @return |
228 | */ |
229 | public ArrayList<Integer> getLabelPositions(){ |
230 | ArrayList<Integer> ret = new ArrayList<Integer>(); |
231 | for(int i=0;i<this.arity();i++){ |
232 | if(!this.labelPositions.contains(i)){ |
233 | continue; |
234 | } |
235 | ret.add(i); |
236 | } |
237 | return ret; |
238 | } |
239 | |
240 | /** |
241 | * Sets embedded python code for this predicate |
242 | * @param content |
243 | */ |
244 | public void setEmbeddedPythonCode(String content){ |
245 | |
246 | this.embeddedPythonCode = content; |
247 | |
248 | } |
249 | |
250 | /** |
251 | * Get partitioning fields of sequence if this predicate is CRF. |
252 | * @return |
253 | */ |
254 | public ArrayList<String> getCRFPartitionFields(){ |
255 | |
256 | if(chainRulePartitions.keySet().size() != 1){ |
257 | return null; |
258 | } |
259 | |
260 | FelixClause key = chainRulePartitions.keySet().iterator().next(); |
261 | |
262 | if(chainRulePartitions.get(key).size() == 0){ |
263 | return null; |
264 | } |
265 | |
266 | return chainRulePartitions.get(key); |
267 | |
268 | } |
269 | |
270 | /** |
271 | * Get the key position if this predicate is LR or CRF. |
272 | * @return |
273 | */ |
274 | public ArrayList<String> getKeyFieldsArgs(){ |
275 | ArrayList<String> ret = new ArrayList<String>(); |
276 | for(int i=0;i<this.arity();i++){ |
277 | if(!labelPositions.contains(i)){ |
278 | ret.add(this.getArgs().get(i)); |
279 | } |
280 | } |
281 | return ret; |
282 | } |
283 | |
284 | /** |
285 | * Get the label types if this predicate is LR or CRF. |
286 | * @return |
287 | */ |
288 | public ArrayList<String> getLabelFieldsTypeTable(){ |
289 | ArrayList<String> ret = new ArrayList<String>(); |
290 | for(int i=0;i<this.arity();i++){ |
291 | if(!labelPositions.contains(i)){ |
292 | continue; |
293 | } |
294 | ret.add(this.getTypeAt(i).getRelName()); |
295 | } |
296 | return ret; |
297 | } |
298 | |
299 | /** |
300 | * Get the label fields' name if this predicate is LR or CRF. |
301 | * @return |
302 | */ |
303 | public ArrayList<String> getLabelFieldsArgs(){ |
304 | ArrayList<String> ret = new ArrayList<String>(); |
305 | for(int i=0;i<this.arity();i++){ |
306 | if(!labelPositions.contains(i)){ |
307 | continue; |
308 | } |
309 | ret.add(this.getArgs().get(i)); |
310 | } |
311 | return ret; |
312 | } |
313 | |
314 | /** |
315 | * Global counter for temporary predicates. |
316 | */ |
317 | static int tmpPredCounter = 0; |
318 | |
319 | /** |
320 | * Get the name of the next temporary predicate. |
321 | * @return |
322 | */ |
323 | public static String getNextTmpPredicateName(){ |
324 | return "tmp_predicate_" + (tmpPredCounter++); |
325 | } |
326 | |
327 | /** |
328 | * Whether this predicate is the view-based representation of |
329 | * a coref operator. |
330 | * @return |
331 | */ |
332 | public boolean isCorefMap(){ |
333 | return this.isCorefMapPredicate; |
334 | } |
335 | |
336 | /** |
337 | * If {@link FelixPredicate#isCorefMapPredicate} is true, to which this |
338 | * predicate serves. |
339 | * @return |
340 | */ |
341 | public FelixPredicate getOriCorefPredicate(){ |
342 | return this.oriCorefPredicate; |
343 | } |
344 | |
345 | /** |
346 | * Get clauses associated to the given property. |
347 | * @param prop |
348 | * @return |
349 | */ |
350 | public HashSet<FelixClause> getPropertyClauses(FPProperty prop){ |
351 | if(this.hasProperty(prop)){ |
352 | return new HashSet<FelixClause>(properities.get(prop)); |
353 | }else{ |
354 | return new HashSet<FelixClause>(); |
355 | } |
356 | } |
357 | |
358 | /** |
359 | * Whether this predicate has the given property. |
360 | * @param prop |
361 | * @return |
362 | */ |
363 | public boolean hasProperty(FPProperty prop){ |
364 | return properities.containsKey(prop); |
365 | } |
366 | |
367 | /** |
368 | * The constructor. |
369 | * @param aname |
370 | * @param aClosedWorld |
371 | */ |
372 | public FelixPredicate(String aname, boolean aClosedWorld) { |
373 | super(null, aname, aClosedWorld); |
374 | } |
375 | |
376 | /** |
377 | * Get {@link FelixPredicate#keyPositions}. |
378 | * @return |
379 | */ |
380 | public HashSet<Integer> getKeyPositions(){ |
381 | HashSet<Integer> ret = new HashSet<Integer>(); |
382 | for(int i=0;i<this.arity();i++){ |
383 | if(this.labelPositions.contains(i)){ |
384 | continue; |
385 | } |
386 | ret.add(i); |
387 | } |
388 | return ret; |
389 | } |
390 | |
391 | /** |
392 | * Flushes string-based evidence to database. |
393 | * @param rName |
394 | */ |
395 | public void flushStrEvidence(String rName) { |
396 | try { |
397 | |
398 | //create table |
399 | ArrayList<String> tableColumn = new ArrayList<String>(); |
400 | tableColumn.add("truth BOOL"); |
401 | tableColumn.add("prior FLOAT"); |
402 | for(String arg : this.getArgs()){ |
403 | tableColumn.add(arg + " TEXT"); |
404 | } |
405 | String sql = "CREATE TABLE " + rName + " ( " + StringMan.commaList(tableColumn) + ");"; |
406 | RDB db = RDB.getRDBbyConfig(FelixConfig.db_schema); |
407 | db.dropTable(rName); |
408 | db.execute(sql); |
409 | db.commit(); |
410 | |
411 | // flush the file |
412 | loadingFileWriter.close(); |
413 | loadingFileWriter = null; |
414 | // copy into DB |
415 | ArrayList<String> cols = new ArrayList<String>(); |
416 | cols.add("truth"); |
417 | cols.add("prior"); |
418 | cols.addAll(this.getArgs()); |
419 | FileInputStream in = new FileInputStream(loadingFile); |
420 | PGConnection con = (PGConnection) db.getConnection(); |
421 | sql = "COPY " + rName + |
422 | StringMan.commaListParen(cols) + " FROM STDIN"; |
423 | con.getCopyAPI().copyIn(sql, in); |
424 | in.close(); |
425 | db.commit(); |
426 | db.analyze(rName); |
427 | FileMan.removeFile(loadingFile.getAbsolutePath()); |
428 | |
429 | db.close(); |
430 | |
431 | } catch (Exception e) { |
432 | ExceptionMan.handle(e); |
433 | } |
434 | } |
435 | |
436 | /** |
437 | * Get all clauses related to this predicate. |
438 | * @return |
439 | */ |
440 | public HashSet<FelixClause> getRelevantClauses(){ |
441 | return registeredClauses; |
442 | } |
443 | |
444 | /** |
445 | * Add a property to this predicate, along with the clause with this property. |
446 | * @param prop |
447 | * @param evid |
448 | * @param _pos if this property is key constraint, this parameter is the position |
449 | * of keys. |
450 | */ |
451 | public void registerProperty(FPProperty prop, FelixClause evid, int... _pos){ |
452 | |
453 | FelixUIMan.println(2, 0, "\nRegister property {" + prop + "} to Predicate {" |
454 | + this.getName() + "(" + StringMan.join(",", this.getArgs()) + ")" |
455 | + "} with evidence {" + evid + "} {" |
456 | + FelixUIMan.joinArray(_pos) + "}"); |
457 | |
458 | if(prop == FPProperty.EMBED_WEIGHT_RULE){ |
459 | if(!this.properities.containsKey(FPProperty.EMBED_WEIGHT_RULE)){ |
460 | this.properities.put(FPProperty.EMBED_WEIGHT_RULE, new HashSet<FelixClause>()); |
461 | } |
462 | this.properities.get(FPProperty.EMBED_WEIGHT_RULE).add(evid); |
463 | return; |
464 | } |
465 | |
466 | if(prop == FPProperty.KEY_CONSTRAINT){ |
467 | |
468 | if(_pos.length == 0){ |
469 | ExceptionMan.die("Cannot register key constriant without " + |
470 | "any given positions"); |
471 | } |
472 | |
473 | if(this.labelPositions.size() != 0){ |
474 | UIMan.warn("Ignore second key constraints of predicate " |
475 | + this.getName()); |
476 | }else{ |
477 | for(int i=0;i<_pos.length;i++){ |
478 | this.labelPositions.add(_pos[i]); |
479 | } |
480 | } |
481 | } |
482 | |
483 | if(this.hasDependentAttributes() == false && prop != FPProperty.SYMM |
484 | && prop != FPProperty.REFLEX && evid == null){ |
485 | ExceptionMan.die("NULL clause assigned to non key_constraint predicate"); |
486 | } |
487 | |
488 | if(registeredClauses.contains(evid)){ |
489 | FelixUIMan.println(2, 0, "Ignore replicated clause registering for " |
490 | + "property {" + prop + "} to Predicate {" |
491 | + this.getName() + "(" + StringMan.join(",", this.getArgs()) + ")" |
492 | + "} with evidence {" + evid + "} {" |
493 | + FelixUIMan.joinArray(_pos) + "}"); |
494 | }else{ |
495 | if(evid != null){ |
496 | if(!properities.containsKey(prop)){ |
497 | properities.put(prop, new HashSet<FelixClause>()); |
498 | } |
499 | properities.get(prop).add(evid); |
500 | registeredClauses.add(evid); |
501 | |
502 | if(prop == FPProperty.CHAIN_RECUR){ |
503 | this.tryToExtractSeqFromChainRule(evid); |
504 | } |
505 | }else{ |
506 | if(!properities.containsKey(prop)){ |
507 | properities.put(prop, new HashSet<FelixClause>()); |
508 | } |
509 | } |
510 | } |
511 | } |
512 | |
513 | /** |
514 | * If the given clause is a CRF chain rule, try to extract the |
515 | * partitions of sequence. |
516 | * @param fc |
517 | */ |
518 | public void tryToExtractSeqFromChainRule(FelixClause fc){ |
519 | |
520 | ArrayList<String> ret = new ArrayList<String>(); |
521 | |
522 | Literal l1 = null; |
523 | Literal l2 = null; |
524 | |
525 | for(Literal l : fc.getRegLiterals()){ |
526 | if(l.getPred().getName().equals(this.getName())){ |
527 | if(l1 == null){ |
528 | l1 = l; |
529 | }else{ |
530 | l2 = l; |
531 | } |
532 | } |
533 | } |
534 | |
535 | for(int i=0;i<this.arity();i++){ |
536 | // seq partition must be on keys |
537 | if(this.labelPositions.contains(i)){ |
538 | continue; |
539 | } |
540 | |
541 | if(l1.getTerms().get(i).toString().equals( |
542 | l2.getTerms().get(i).toString())){ |
543 | ret.add(this.getArgs().get(i)); |
544 | } |
545 | } |
546 | |
547 | this.chainRulePartitions.put(fc, ret); |
548 | |
549 | } |
550 | |
551 | /** |
552 | * Returns string representation of this predicate. |
553 | */ |
554 | public String toString(){ |
555 | String ret = ""; |
556 | |
557 | ret = this.getName(); |
558 | ret += "("; |
559 | ret += StringMan.commaList(this.getArgs()); |
560 | ret += ")"; |
561 | |
562 | return ret; |
563 | } |
564 | |
565 | |
566 | } |