1 | package felix.util; |
2 | |
3 | import java.io.BufferedReader; |
4 | import java.io.BufferedWriter; |
5 | import java.io.File; |
6 | import java.io.FileReader; |
7 | import java.io.IOException; |
8 | import java.math.BigInteger; |
9 | import java.security.MessageDigest; |
10 | import java.security.NoSuchAlgorithmException; |
11 | import java.sql.DatabaseMetaData; |
12 | import java.sql.ResultSet; |
13 | import java.sql.ResultSetMetaData; |
14 | import java.util.ArrayList; |
15 | import java.util.Arrays; |
16 | import java.util.HashMap; |
17 | import java.util.HashSet; |
18 | |
19 | import tuffy.db.RDB; |
20 | import tuffy.mln.Literal; |
21 | import tuffy.mln.Predicate; |
22 | import tuffy.mln.Term; |
23 | import tuffy.util.Config; |
24 | import tuffy.util.ExceptionMan; |
25 | import tuffy.util.FileMan; |
26 | import tuffy.util.StringMan; |
27 | import tuffy.util.UIMan; |
28 | import felix.dstruct.FelixClause; |
29 | import felix.dstruct.FelixPredicate; |
30 | import felix.dstruct.FelixQuery; |
31 | import felix.parser.FelixCommandOptions; |
32 | |
33 | public class ActiveLearner { |
34 | |
35 | FelixCommandOptions opt; |
36 | |
37 | RDB db; |
38 | |
39 | // TODO: make this general |
40 | /** |
41 | * Generates xml file containing relevant info for query. |
42 | * @throws Exception |
43 | */ |
44 | public void getTopCandidates() throws Exception{ |
45 | |
46 | FelixUIMan.println(">>> Connecting to RDBMS at " + FelixConfig.db_url); |
47 | |
48 | if(FelixConfig.evidDBSchema == null){ |
49 | ExceptionMan.die("Active learning mode needs the evidence schema specified"); |
50 | } |
51 | |
52 | if(FelixConfig.evidDBSchema != null){ |
53 | FelixConfig.db_schema = FelixConfig.evidDBSchema; |
54 | } |
55 | |
56 | db = RDB.getRDBbyConfig(); |
57 | db.resetSchema(FelixConfig.db_schema); |
58 | |
59 | RDB db2 = RDB.getRDBbyConfig(); |
60 | db2.resetSchema(FelixConfig.db_schema); |
61 | |
62 | HashMap<Long, String> id2str = db.loadIdSymbolMapFromTable(); |
63 | HashMap<String, Integer> str2id = db.loadSymbolIdMapFromTable(); |
64 | |
65 | FelixQuery fq = new FelixQuery(); |
66 | |
67 | String[] progFiles = opt.fprog.split(","); |
68 | fq.loadPrograms(progFiles); |
69 | |
70 | if(opt.fquery != null){ |
71 | String[] queryFiles = opt.fquery.split(","); |
72 | fq.loadQueries(queryFiles); |
73 | } |
74 | |
75 | if(opt.queryAtoms != null){ |
76 | FelixUIMan.println(">>> Parsing query atoms in command line"); |
77 | fq.parseQueryCommaList(opt.queryAtoms); |
78 | } |
79 | |
80 | // Predicate -> (attr -> value) |
81 | /* |
82 | predicate | property | value |
83 | -----------+------------+------------- |
84 | el | optype | lr |
85 | el | provtable | pred_DMO_43 |
86 | el | labelfield | eid2 |
87 | */ |
88 | HashMap<FelixPredicate, HashMap<String, String>> attrs = new |
89 | HashMap<FelixPredicate, HashMap<String, String>>(); |
90 | |
91 | // TODO: use this |
92 | //String[] viewDesps = {"QueryWord(queryword) := el(query, entityid), query( query, queryWord, docID)", |
93 | //"EntityWord(wikiTitle, entityWord) := el(query, entityid), entity( entityid, wikiTitle, entityWord)"}; |
94 | |
95 | // TODO: use this! |
96 | // db.execute("CREATE TEMPORARY VIEW _active_view1 AS SELECT " + |
97 | // " t0.queryid1 AS queryid1, t0.entityid2 as entityid2, ARRAY[t1.queryword2] as viewcontent "+ |
98 | // " FROM pred_el t0, pred_query t1 WHERE t0.queryid1 = t1.queryid1"); |
99 | // |
100 | // db.execute("CREATE TEMPORARY VIEW _active_view2 AS SELECT " + |
101 | // " t0.queryid1 AS queryid1, t0.entityid2 as entityid2, ARRAY[t1.wikititle2, t1.entityword3] as viewcontent "+ |
102 | // " FROM pred_el t0, pred_entity t1 WHERE t0.entityid2 = t1.entityid1"); |
103 | |
104 | // load meta information |
105 | ResultSet rs = db.query("SELECT * from _prov_metainfo;"); |
106 | while(rs.next()){ |
107 | String pred = rs.getString(1); |
108 | String attr = rs.getString(2); |
109 | String value = rs.getString(3); |
110 | |
111 | FelixPredicate p = fq.getPredByName(pred); |
112 | |
113 | if(!attrs.containsKey(p)){ |
114 | attrs.put(p, new HashMap<String, String>()); |
115 | } |
116 | attrs.get(p).put(attr, value); |
117 | } |
118 | |
119 | BufferedWriter bw = FileMan.getBufferedWriterMaybeGZ(opt.fout); |
120 | |
121 | bw.append("<?xml version=\"1.0\"?>\n"); |
122 | bw.append("<root>\n\n"); |
123 | |
124 | |
125 | // TODO: make this work for predicates with arity != 2 |
126 | HashMap<Integer, Integer> gt = new HashMap<Integer, Integer>(); |
127 | |
128 | String predicate_name = ""; |
129 | |
130 | // parse gt file |
131 | // if(opt.gtFile != null) { |
132 | // System.out.println("Parsing gt file " + opt.gtFile); |
133 | // try { |
134 | // BufferedReader in = new BufferedReader(new FileReader(opt.gtFile)); |
135 | // String str; |
136 | // while ((str = in.readLine()) != null) { |
137 | // predicate_name = str.substring(0, str.indexOf('(')); |
138 | // String rest = str.substring(str.indexOf('(')+1, str.lastIndexOf(')') - str.indexOf('(') + 1); |
139 | // String[] args = rest.split(","); |
140 | // |
141 | // // Remove leading/trailing quotes |
142 | // for(int i = 0; i < args.length; i++) { |
143 | // args[i] = args[i].trim(); |
144 | // if(args[i].startsWith("\"")) { |
145 | // args[i] = args[i].substring(1); |
146 | // } |
147 | // if(args[i].endsWith("\"")) { |
148 | // args[i] = args[i].substring(0, args[i].length() - 1); |
149 | // } |
150 | // } |
151 | // //System.out.println(args[0] + "\t" + args[1]); |
152 | // //System.out.println("\t" + str2id.get(args[0]) + "\t" + str2id.get(args[1])); |
153 | // gt.put(str2id.get(args[0]), str2id.get(args[1])); |
154 | // } |
155 | // in.close(); |
156 | // } catch (IOException e) { |
157 | // ExceptionMan.die("Error reading from file " + opt.gtFile); |
158 | // } |
159 | // } |
160 | |
161 | // Go through each gt predicate |
162 | //for(int h = 0; h < gt.size(); h++) { |
163 | |
164 | |
165 | |
166 | FelixPredicate p = fq.getPredByName(predicate_name); |
167 | ArrayList<String> arg_names = p.getArgs(); |
168 | |
169 | //for(FelixPredicate p : fq.getAllPred()){ |
170 | if(p.hasQuery()){ |
171 | |
172 | if(!attrs.get(p).get("optype").equals("lr")){ |
173 | ExceptionMan.die("Currently, I can only deal with LR!"); |
174 | } |
175 | |
176 | // this contains: eid2 |
177 | ArrayList<String> labelfields = |
178 | new ArrayList<String>(Arrays.asList(attrs.get(p).get("labelfield").split(","))); |
179 | |
180 | // this works for ouput of every gt link |
181 | // String where = arg_names.get(0) +y "=" + str2id.get(gt_args[0]); |
182 | // for(int i = 1; i < arg_names.size(); i++) { |
183 | // where += " and " + arg_names.get(i) + "=" + str2id.get(gt_args[i]); |
184 | // } |
185 | // rs = db.query("select * from " + p.getRelName() + " where " + where); |
186 | |
187 | //String select = "id, truth, max(prior), club, atomid"; |
188 | //for(int i = 1; i < arg_names.size(); i++) { |
189 | // select += "," + arg_names.get(i); |
190 | //} |
191 | |
192 | //ArrayList<Integer> temp = new ArrayList<Integer>(gt.keySet()); |
193 | //ArrayList<String> where = new ArrayList<String>(); |
194 | //for(int i = 0; i < temp.size(); i++) { |
195 | // where.add(arg_names.get(0) + "=" + temp.get(i)); |
196 | //} |
197 | |
198 | // TODO: make this general (check key constraint?) |
199 | // TODO: make this faster (use GROUP BY?) |
200 | // TODO: use JOIN (create table for gt) |
201 | //String query = "SELECT * FROM " + p.getRelName() + " WHERE " + StringMan.join(" OR ", where) + " ORDER BY " + arg_names.get(0) + " ASC, prior DESC"; |
202 | String query = "SELECT * FROM " + p.getRelName() + " ORDER BY " + arg_names.get(0) + " ASC, prior DESC"; |
203 | System.out.println("rs query: " + query); |
204 | rs = db.query(query); |
205 | |
206 | /* |
207 | id | truth | prior | club | atomid | mid1 | eid2 |
208 | ----+-------+----------------------+------+--------+------+--------- |
209 | 1 | t | 1 | 2 | | 5 | 1203067 |
210 | 2 | t | 3.72007597602084e-44 | 2 | | 5 | 1 |
211 | 3 | t | 2.67863696180808e-33 | 2 | | 5 | 1199200 |
212 | 4 | t | 6.71418428821159e-79 | 2 | | 9 | 909864 |
213 | 5 | t | 2.05388455404083e-85 | 2 | | 9 | 1 |
214 | 6 | t | 6.71418428821159e-79 | 2 | | 9 | 1744854 |
215 | 7 | t | 6.71418428821159e-79 | 2 | | 9 | 1300787 |
216 | 8 | t | 6.71418428821159e-79 | 2 | | 9 | 818955 |
217 | 9 | t | 1.80485138784542e-35 | 2 | | 9 | 1590722 |
218 | 10 | t | 6.71418428821159e-79 | 2 | | 9 | 299086 |
219 | */ |
220 | |
221 | int cur_id = 0; |
222 | int choice = 0; |
223 | int output_count = 0; |
224 | boolean found = false; |
225 | |
226 | while(rs.next() && output_count < 10) { |
227 | |
228 | // Mention id (felix id) |
229 | Integer id1 = rs.getInt(p.getArgs().get(0)); |
230 | |
231 | // Entity id (felix id) |
232 | Integer id2 = rs.getInt(p.getArgs().get(1)); |
233 | |
234 | // check if there's a gt for this result |
235 | if(gt.get(id1) == null) { |
236 | continue; |
237 | } |
238 | |
239 | // Mention id |
240 | String str1 = id2str.get(id1); |
241 | |
242 | // Entity id |
243 | String str2 = id2str.get(id2); |
244 | |
245 | // New id |
246 | if(id1 != cur_id) { |
247 | cur_id = id1; |
248 | choice = 0; |
249 | found = false; |
250 | } |
251 | |
252 | // check gt |
253 | boolean match = (id2.equals(gt.get(id1))); |
254 | |
255 | // Already found correct result |
256 | if(found) { |
257 | continue; |
258 | } |
259 | // result found |
260 | else if(match) { |
261 | found = true; |
262 | // correct |
263 | if(choice == 0) { |
264 | System.out.println(str1 + "\t" + str2); |
265 | System.out.println("\tCorrect"); |
266 | continue; |
267 | } |
268 | // otherwise output result |
269 | } |
270 | // incorrect |
271 | else { |
272 | if(choice != 0) { |
273 | choice++; |
274 | continue; |
275 | } |
276 | choice++; |
277 | System.out.println(str1 + "\t" + str2); |
278 | System.out.println("\tIncorrect\t" + id2str.get(gt.get(id1))); |
279 | } |
280 | |
281 | output_count++; |
282 | |
283 | //String mmmmmid = id2str.get(rs.getInt(p.getArgs().get(0))); |
284 | //System.out.println(mmmmmid); |
285 | |
286 | ArrayList<String> binds = new ArrayList<String>(); |
287 | ArrayList<String> joincond = new ArrayList<String>(); |
288 | ArrayList<String> smalllabelfields = new ArrayList<String>(); |
289 | |
290 | // Go through each column |
291 | for(int i=0;i<p.arity();i++){ |
292 | if(!labelfields.contains(p.getArgs().get(i))){ |
293 | joincond.add("t0." + p.getArgs().get(i) + "=" + rs.getString(p.getArgs().get(i))); |
294 | }else{ |
295 | smalllabelfields.add(id2str.get(rs.getInt(p.getArgs().get(i)))); |
296 | } |
297 | binds.add("t0." + p.getArgs().get(i) + "=" + rs.getInt(p.getArgs().get(i))); |
298 | } |
299 | |
300 | String label = StringMan.commaList(smalllabelfields); |
301 | Double prob = rs.getDouble("prior"); |
302 | String probability = UIMan.decimalRound(4, prob); |
303 | |
304 | //TODO: THINK ABOUT ``NO-KEY'' CASE |
305 | // SELECT * FROM pred_DMO_39 WHERE t0.mid1=163545 |
306 | // TODO: only do this once per mention |
307 | String sql = "SELECT * FROM " + attrs.get(p).get("provtable") + " t0 WHERE " + |
308 | StringMan.join(" AND ",joincond) + " ORDER BY t0.weight DESC"; |
309 | System.out.println("prov query: " + sql); |
310 | |
311 | ResultSet provrs = db2.query(sql); |
312 | // System.out.print(p.getName() + "("); |
313 | bw.append("<QUERY>\n"); |
314 | bw.append(" <NAME>" + p.getName() + "("); |
315 | |
316 | ArrayList<String> args = new ArrayList<String>(); |
317 | ArrayList<String> rawargs = new ArrayList<String>(); |
318 | for(int i=0;i<p.arity();i++){ |
319 | args.add(id2str.get(rs.getInt(p.getArgs().get(i)))); |
320 | rawargs.add(rs.getString(p.getArgs().get(i))); |
321 | } |
322 | bw.append(StringMan.commaList(args) + ")</NAME>\n"); |
323 | bw.append(" <TYPE>" + "LR" + "</TYPE>\n"); |
324 | bw.append(" <LABEL>" + label + "</LABEL>\n"); |
325 | bw.append(" <PROB>" + probability + "</PROB>\n"); |
326 | bw.append(" <CORRECT>" + found + "</CORRECT>\n"); |
327 | |
328 | |
329 | // TODO: make this general |
330 | // Get mention text |
331 | String sql1 = "SELECT t0.word4 FROM pred_mention t0 WHERE t0.mid1=" + id1; |
332 | System.out.println("mention query: " + sql1); |
333 | ResultSet mid = db.query(sql1); |
334 | String mention = "Missing"; |
335 | if(mid.next()) { |
336 | mention = id2str.get(mid.getInt(1)); |
337 | } |
338 | |
339 | // Get entity text |
340 | String sql2 = "SELECT t0.word2 FROM pred_entity t0 WHERE t0.eid1=" + id2; |
341 | System.out.println("entity query: " + sql2); |
342 | ResultSet eid = db.query(sql2); |
343 | String entity = "NIL"; |
344 | if(eid.next()) { |
345 | entity = id2str.get(eid.getInt(1)); |
346 | } |
347 | |
348 | // Get sentence text |
349 | String sql3 = "SELECT t0.sentencetext2 FROM pred_sentence t0 WHERE t0.mid1=" + id1; |
350 | System.out.println("sentence query: " + sql3); |
351 | ResultSet sent = db.query(sql3); |
352 | String sentence = ""; |
353 | if(sent.next()) { |
354 | sentence = id2str.get(sent.getInt(1)); |
355 | sentence = sentence.replaceAll("&", "&"); |
356 | sentence = sentence.replaceAll("<", "<"); |
357 | sentence = sentence.replaceAll(">", ">"); |
358 | } |
359 | bw.append(" <COMMENT>\n"); |
360 | bw.append(" <DESP>Mention</DESP>\n"); |
361 | bw.append(" <RS><METARS>" + mention + "</METARS></RS>\n"); |
362 | bw.append(" </COMMENT>\n"); |
363 | bw.append(" <COMMENT>\n"); |
364 | bw.append(" <DESP>Entity</DESP>\n"); |
365 | bw.append(" <RS><METARS>" + entity + "</METARS></RS>\n"); |
366 | bw.append(" </COMMENT>\n"); |
367 | bw.append(" <COMMENT>\n"); |
368 | bw.append(" <DESP>Sentence</DESP>\n"); |
369 | bw.append(" <RS><METARS>" + sentence + "</METARS></RS>\n"); |
370 | bw.append(" </COMMENT>\n"); |
371 | |
372 | |
373 | // for(int i=1;i<=2;i++){ |
374 | // bw.append(" <COMMENT>\n"); |
375 | // bw.append(" <DESP>" + viewDesps[i-1] + "</DESP>\n"); |
376 | // String viewname = "_active_view" + i; |
377 | // //ResultSet viewrs = db.query("SELECT * FROM " + viewname + " t0 WHERE " + |
378 | // // StringMan.join(" AND ", binds)); |
379 | // //while(viewrs.next()){ |
380 | // // Integer[] columns = (Integer[])viewrs.getArray("viewcontent").getArray(); |
381 | // // bw.append(" <RS>\n"); |
382 | // // for(Integer ec : columns){ |
383 | // // bw.append(" <METARS>" + id2str.get(ec) + "</METARS>\n"); |
384 | // // } |
385 | // // bw.append(" </RS>\n"); |
386 | // bw.append("<RS><METARS></METARS></RS>\n"); |
387 | // //} |
388 | // bw.append(" </COMMENT>\n"); |
389 | // } |
390 | |
391 | //bw.append(" <COMMENT>" + StringMan.commaList(rawargs) + "</COMMENT>\n"); |
392 | |
393 | // Go through each rule |
394 | while(provrs.next()){ |
395 | |
396 | // This contains entity id |
397 | smalllabelfields = new ArrayList<String>(); |
398 | for(int i=0;i<p.arity();i++){ |
399 | if(labelfields.contains(p.getArgs().get(i))){ |
400 | smalllabelfields.add(id2str.get(provrs.getInt(p.getArgs().get(i)))); |
401 | } |
402 | } |
403 | String thislabel = StringMan.commaList(smalllabelfields); |
404 | |
405 | int clauseID = provrs.getInt("prov"); |
406 | double weight = provrs.getFloat("weight"); |
407 | |
408 | |
409 | |
410 | // NOTE: deepprov doesn't match up with terms |
411 | |
412 | String deepProv = provrs.getString("deepprov"); //{qid:160,docID:264066,queryWord:155} |
413 | deepProv = deepProv.replaceAll("\\{|\\}", ""); |
414 | |
415 | HashMap<String, Integer> term2constant = new HashMap<String, Integer>(); |
416 | String[] smalls = deepProv.split(","); |
417 | //int num = 1; |
418 | for(String small : smalls){ |
419 | String[] metamap = small.split(":"); |
420 | //String temp = metamap[0] + num; |
421 | //term2constant.put(temp, Integer.valueOf(metamap[1])); |
422 | //num++; |
423 | term2constant.put(metamap[0], Integer.valueOf(metamap[1])); |
424 | } |
425 | |
426 | // TODO: Be carefully with the consistency of clause id between different runs |
427 | FelixClause clause = fq.id2clause.get(clauseID); |
428 | |
429 | // Get source clause/comment |
430 | String source_clause = clause.toString(); |
431 | String name = ""; |
432 | String content = ""; |
433 | |
434 | String[] lines = source_clause.split("\n"); |
435 | for(String line : lines) { |
436 | if(!line.equals("")) { |
437 | if(line.contains(">")) { |
438 | line = line.replaceAll(">", ">"); |
439 | } |
440 | if(line.contains("<")) { |
441 | line = line.replaceAll("<", "<"); |
442 | } |
443 | if(line.startsWith("//")) { |
444 | line = line.replaceFirst("//", ""); |
445 | line = line.trim(); |
446 | if(name.length() != 0) { |
447 | name += "\n"; |
448 | } |
449 | name += line; |
450 | } else { |
451 | if(content.length() != 0) { |
452 | content += "\n"; |
453 | } |
454 | content += line; |
455 | } |
456 | } |
457 | } |
458 | |
459 | bw.append(" <PROV>\n"); |
460 | bw.append(" <SOURCECLAUSE>\n"); |
461 | bw.append(" <NAME>" + name + "</NAME>\n"); |
462 | bw.append(" <CONTENT>" + content + "</CONTENT>\n"); |
463 | bw.append(" </SOURCECLAUSE>\n"); |
464 | //bw.append(" <UNAME>" + clause.g) |
465 | bw.append(" <WEIGHT>" + weight + "</WEIGHT>\n"); |
466 | bw.append(" <LABEL>" + thislabel + "</LABEL>\n"); |
467 | bw.append(" <CLAUSEID>" + clauseID + "</CLAUSEID>\n"); |
468 | |
469 | // Go through each literal |
470 | for(Literal l2 : clause.getRegLiterals()){ |
471 | Literal l = (Literal) l2.clone(); |
472 | |
473 | // Rule for this predicate |
474 | if(l.getPred().equals(p)){ |
475 | bw.append(" <LITERAL>\n"); |
476 | bw.append(" <SENSE>" + (l.getSense()==true ? "POS" : "NEG") + "</SENSE>\n"); |
477 | bw.append(" <NAME>" + l.getPred().getName() + "</NAME>\n"); |
478 | bw.append(" <STRINGFORM>=>"); |
479 | }else{ |
480 | l.setSense(!l.getSense()); |
481 | bw.append(" <LITERAL>\n"); |
482 | bw.append(" <SENSE>" + (l.getSense()==true ? "POS" : "NEG") + "</SENSE>\n"); |
483 | bw.append(" <NAME>" + l.getPred().getName() + "</NAME>\n"); |
484 | bw.append(" <STRINGFORM>"); |
485 | } |
486 | |
487 | // output predicate name |
488 | bw.append(l.getSense()==true? "" : "!"); |
489 | bw.append(l.getPred().getName() + "("); |
490 | |
491 | ArrayList<String> terms = new ArrayList<String>(); |
492 | ArrayList<Integer> arguments = new ArrayList<Integer>(); |
493 | |
494 | for(Term t : l.getTerms()){ |
495 | if(t.isConstant()){ |
496 | terms.add("\"" + id2str.get(Integer.valueOf(t.toString())) + "\""); |
497 | }else{ |
498 | terms.add("\"" + id2str.get(term2constant.get(t.toString())) + "\""); |
499 | arguments.add(term2constant.get(t.toString())); |
500 | } |
501 | } |
502 | bw.append(StringMan.join(", ", terms) + ")</STRINGFORM>\n"); |
503 | |
504 | |
505 | // Go through each argument |
506 | for(Integer id : arguments) { |
507 | String str = "" + id2str.get(id); |
508 | bw.append(" <ARG><NAME>" + str + "</NAME>"); |
509 | |
510 | // TODO: make this general |
511 | if(str.startsWith("MEN_")) { |
512 | String sql_men = "SELECT t0.word4 FROM pred_mention t0 WHERE t0.mid1=" + id; |
513 | ResultSet men = db.query(sql_men); |
514 | if(men.next()) { |
515 | String temp = id2str.get(men.getInt(1)); |
516 | bw.append("<VIEW><METARS>" + temp + "</METARS>"); |
517 | } |
518 | String sql_sent = "SELECT t0.sentencetext2 FROM pred_sentence t0 WHERE t0.mid1=" + id; |
519 | ResultSet sen = db.query(sql_sent); |
520 | if(sen.next()) { |
521 | String temp = id2str.get(sen.getInt(1)); |
522 | bw.append("<METARS>" + temp + "</METARS>"); |
523 | } |
524 | bw.append("</VIEW>"); |
525 | } else if(str.startsWith("E_m_")) { |
526 | String sql_ent = "SELECT t0.word2 FROM pred_entity t0 WHERE t0.eid1=" + id; |
527 | ResultSet ent = db.query(sql_ent); |
528 | if(ent.next()) { |
529 | String temp = id2str.get(ent.getInt(1)); |
530 | temp = temp.replaceAll("&", "&"); |
531 | temp = temp.replaceAll("<", "<"); |
532 | temp = temp.replaceAll(">", ">"); |
533 | bw.append("<VIEW><METARS>" + temp + "</METARS></VIEW>"); |
534 | } |
535 | } |
536 | bw.append("</ARG>\n"); |
537 | } |
538 | bw.append(" </LITERAL>\n"); |
539 | } |
540 | bw.append(" </PROV>\n"); |
541 | |
542 | //System.out.print("\t" + provrs.getString("prov") + "\t" + provrs.getString("weight") + "\t" + provrs.getString("deepprov")+"\n\n"); |
543 | |
544 | } |
545 | |
546 | bw.append("</QUERY>\n"); |
547 | } |
548 | } |
549 | bw.append("\n\n</root>\n\n"); |
550 | bw.close(); |
551 | |
552 | } |
553 | |
554 | // TODO: make this general |
555 | /** |
556 | * Creates tables/views in public schema for use by django feedback tool. |
557 | * @throws Exception |
558 | */ |
559 | public void DjangoSetup() throws Exception { |
560 | |
561 | HashMap<FelixPredicate, HashMap<String, String>> attrs = new |
562 | HashMap<FelixPredicate, HashMap<String, String>>(); |
563 | FelixQuery fq = new FelixQuery(); |
564 | ResultSet rs; |
565 | String query = ""; |
566 | |
567 | // For generating django models.py, admin,py |
568 | ArrayList<String> models = new ArrayList<String>(); |
569 | ArrayList<String> admin = new ArrayList<String>(); |
570 | |
571 | // setup stuff |
572 | db = RDB.getRDBbyConfig(opt.evidDBSchema); |
573 | db.resetSchema(opt.evidDBSchema); |
574 | //db.resetSchema(FelixConfig.db_schema); |
575 | String[] progFiles = opt.fprog.split(","); |
576 | fq.loadPrograms(progFiles); |
577 | |
578 | |
579 | HashMap<Long, String> id2str = db.loadIdSymbolMapFromTable(); |
580 | HashMap<String, Integer> str2id = db.loadSymbolIdMapFromTable(); |
581 | |
582 | if(opt.fquery != null){ |
583 | String[] queryFiles = opt.fquery.split(","); |
584 | fq.loadQueries(queryFiles); |
585 | } |
586 | |
587 | if(opt.queryAtoms != null){ |
588 | FelixUIMan.println(">>> Parsing query atoms in command line"); |
589 | fq.parseQueryCommaList(opt.queryAtoms); |
590 | } |
591 | |
592 | HashMap<Integer, Integer> gt = new HashMap<Integer, Integer>(); |
593 | |
594 | // TODO: get this somewhere |
595 | String predicate_name = "el"; |
596 | |
597 | // gets metainfo |
598 | rs = db.query("SELECT * FROM _prov_metainfo;"); |
599 | while(rs.next()){ |
600 | String pred = rs.getString(1); |
601 | String attr = rs.getString(2); |
602 | String value = rs.getString(3); |
603 | |
604 | FelixPredicate p = fq.getPredByName(pred); |
605 | |
606 | if(!attrs.containsKey(p)){ |
607 | attrs.put(p, new HashMap<String, String>()); |
608 | } |
609 | attrs.get(p).put(attr, value); |
610 | } |
611 | |
612 | FelixPredicate p = fq.getPredByName(predicate_name); |
613 | ArrayList<String> arg_names = p.getArgs(); |
614 | |
615 | // TODO: allow multiple predicates |
616 | //for(FelixPredicate p : fq.getAllPred()){ |
617 | if(p.hasQuery()) { |
618 | |
619 | if(!attrs.get(p).get("optype").equals("lr")){ |
620 | ExceptionMan.die("Currently, I can only deal with LR!"); |
621 | } |
622 | |
623 | // this contains: eid2 |
624 | ArrayList<String> labelfields = new ArrayList<String>(Arrays.asList(attrs.get(p).get("labelfield").split(","))); |
625 | |
626 | |
627 | // TODO: allow gt with different fields/key constraints |
628 | // parse gt file |
629 | String gtTable = "public." + p.getRelName() + "_gt"; |
630 | String gtTableTmp = "public." + p.getRelName() + "_gt_tmp"; |
631 | |
632 | if(opt.gt != null) { |
633 | // TODO: make this general |
634 | // create tmp table for gt predicate |
635 | db.dropTable(gtTableTmp); |
636 | query = "CREATE TABLE " + gtTableTmp + " (mid1 integer, eid2 integer, mid1_value text, eid2_value text)"; |
637 | System.out.println(query); |
638 | db.execute(query); |
639 | |
640 | |
641 | System.out.println("Parsing gt file " + opt.gt); |
642 | try { |
643 | BufferedReader in = new BufferedReader(new FileReader(opt.gt)); |
644 | String str; |
645 | while ((str = in.readLine()) != null) { |
646 | if(predicate_name.equals(str.substring(0, str.indexOf('(')))) { |
647 | |
648 | String rest = str.substring(str.indexOf('(')+1, str.lastIndexOf(')') - str.indexOf('(') + 1); |
649 | String[] args = rest.split(","); |
650 | |
651 | // Remove leading/trailing quotes |
652 | for(int i = 0; i < args.length; i++) { |
653 | args[i] = args[i].trim(); |
654 | if(args[i].startsWith("\"")) { |
655 | args[i] = args[i].substring(1); |
656 | } |
657 | if(args[i].endsWith("\"")) { |
658 | args[i] = args[i].substring(0, args[i].length() - 1); |
659 | } |
660 | } |
661 | //System.out.println(args[0] + "\t" + args[1]); |
662 | //System.out.println("\t" + str2id.get(args[0]) + "\t" + str2id.get(args[1])); |
663 | |
664 | // add to gt table |
665 | Integer id1 = str2id.get(args[0]); |
666 | Integer id2 = str2id.get(args[1]); |
667 | |
668 | query = "INSERT INTO " + gtTableTmp + " VALUES (" + id1 + ", " + id2 + ", '" + args[0] + "', 'n/" + args[1] + "')"; |
669 | System.out.println(query); |
670 | db.execute(query); |
671 | } |
672 | } |
673 | in.close(); |
674 | |
675 | // TODO: make this general |
676 | // create table for gt predicate |
677 | db.dropTable(gtTable); |
678 | query = "CREATE TABLE " + gtTable + " AS " + |
679 | "SELECT t1.id AS id, t0.mid1 AS mid1, t0.eid2 AS eid2, t0.mid1_value AS mid1_value, t0.eid2_value AS eid2_value " + |
680 | "FROM " + gtTableTmp + " AS t0, " + p.getRelName() + " AS t1 " + |
681 | "WHERE t0.mid1 = t1.mid1 AND t0.eid2 = t1.eid2"; |
682 | System.out.println(query); |
683 | db.execute(query); |
684 | |
685 | // drop tmp gt table |
686 | query = "DROP TABLE " + gtTableTmp; |
687 | System.out.println(query); |
688 | db.execute(query); |
689 | } catch (IOException e) { |
690 | ExceptionMan.die("Error reading from file " + opt.gt); |
691 | } |
692 | } |
693 | |
694 | // TODO: make this general |
695 | // Creates public view for this predicate |
696 | db.dropView("public." + p.getRelName()); |
697 | if(gt != null) { |
698 | query = "CREATE VIEW public." + p.getRelName() + " AS SELECT t0.*, t1.constantvalue as mid1_value, t2.constantvalue as eid2_value FROM " + p.getRelName() + " AS t0, type_mid AS t1, type_eid AS t2 WHERE t0.mid1 = t1.constantid AND t0.eid2 = t2.constantid AND mid1 IN (SELECT mid1 FROM " + gtTable + ")"; |
699 | System.out.println(query); |
700 | db.execute(query); |
701 | query = "ALTER TABLE " + gtTable + " ADD FOREIGN KEY (id) REFERENCES " + p.getRelName() + "(id)"; |
702 | System.out.println(query); |
703 | db.execute(query); |
704 | } else { |
705 | query = "CREATE VIEW public." + p.getRelName() + " AS SELECT * FROM " + p.getRelName(); |
706 | System.out.println(query); |
707 | db.execute(query); |
708 | } |
709 | |
710 | |
711 | String prov_table = attrs.get(p).get("provtable"); |
712 | |
713 | if(prov_table == null) { |
714 | ExceptionMan.die("Unable to find provtable for query predicate"); |
715 | } |
716 | |
717 | // TODO: make this general |
718 | if(prov_table != null) { |
719 | db.dropTable("public." + prov_table); |
720 | if(opt.gt != null) { |
721 | // TODO: make this step faster! (takes about 10 mins) |
722 | // maybe have command line option specifying if rules should be updated? |
723 | query = "CREATE TABLE public." + prov_table + " AS SELECT t0.mid1, t0.eid2, t0.weight, t0.prov, array_to_string(t0.deepprov, ',') as deepprov, t1.id AS lid FROM " + prov_table + " AS t0, public." + p.getRelName() + " AS t1 WHERE t0.mid1 = t1.mid1 and t0.eid2 = t1.eid2"; |
724 | System.out.println(query); |
725 | db.execute(query); |
726 | query = "ALTER TABLE public." + prov_table + " ADD FOREIGN KEY (lid) REFERENCES " + p.getRelName() + "(id)"; |
727 | System.out.println(query); |
728 | db.execute(query); |
729 | query = "ALTER TABLE public." + prov_table + " ADD id SERIAL"; |
730 | System.out.println(query); |
731 | db.update(query); |
732 | } else { |
733 | System.out.println("CREATE TABLE public." + prov_table + " AS SELECT * FROM " + prov_table); |
734 | db.copyTable(prov_table, "public." + prov_table); |
735 | query = "ALTER TABLE public." + prov_table + " ADD id SERIAL"; |
736 | System.out.println(query); |
737 | db.update(query); |
738 | } |
739 | } |
740 | } |
741 | |
742 | // Constants |
743 | db.dropView("public.constants"); |
744 | query = "CREATE VIEW public.constants AS SELECT * FROM constants"; |
745 | System.out.println(query); |
746 | db.execute(query); |
747 | |
748 | // Clauses |
749 | db.dropTable("public.clauses"); |
750 | query = "CREATE TABLE public.clauses(id serial, comment text, content text)"; |
751 | System.out.println(query); |
752 | db.execute(query); |
753 | for(Integer id : fq.id2clause.keySet()) { |
754 | FelixClause clause = fq.id2clause.get(id); |
755 | |
756 | int clauseID = clause.getId(); |
757 | // Get source clause/comment |
758 | String source_clause = clause.toString(); |
759 | String comment = ""; |
760 | String content = ""; |
761 | |
762 | String[] lines = source_clause.split("\n"); |
763 | for(String line : lines) { |
764 | if(!line.equals("")) { |
765 | line = line.replaceAll("'", ""); |
766 | if(line.startsWith("//")) { |
767 | line = line.replaceFirst("//", ""); |
768 | line = line.trim(); |
769 | if(comment.length() != 0) { |
770 | comment += "\n"; |
771 | } |
772 | comment += line; |
773 | } else { |
774 | if(content.length() != 0) { |
775 | content += "\n"; |
776 | } |
777 | content += line; |
778 | } |
779 | } |
780 | } |
781 | |
782 | query = "INSERT INTO public.clauses VALUES (" + clauseID + ", '" + comment + "', '" + content + "')"; |
783 | System.out.println(query); |
784 | db.update(query); |
785 | } |
786 | |
787 | // // TODO: go through all evidence files, create views (join with type tables) |
788 | // for (Predicate pred : fq.getAllPred()) { |
789 | // if(pred.hasEvid) { |
790 | // String pred_name = pred.getRelName(); |
791 | // System.out.println("pred: " + pred_name); |
792 | // } |
793 | // |
794 | // } |
795 | |
796 | |
797 | // Entity |
798 | db.dropView("public.pred_entity"); |
799 | query = "CREATE VIEW public.pred_entity AS " + |
800 | "SELECT t0.*, t1.constantvalue AS eid1_value, t2.constantvalue AS word2_value " + |
801 | "FROM pred_entity AS t0, type_eid AS t1, type_word AS t2 " + |
802 | "WHERE t0.eid1 = t1.constantid AND t0.word2 = t2.constantid"; |
803 | System.out.println(query); |
804 | db.execute(query); |
805 | |
806 | // Mention |
807 | db.dropView("public.pred_mention"); |
808 | query = "CREATE VIEW public.pred_mention AS " + |
809 | "SELECT t0.*, t1.constantvalue AS mid1_value, t2.constantvalue AS sentid2_value, t3.constantvalue AS docid3_value, t4.constantvalue AS word4_value " + |
810 | "FROM pred_mention AS t0, type_mid AS t1, type_sentid AS t2, type_docid AS t3, type_word AS t4 " + |
811 | "WHERE t0.mid1 = t1.constantid AND t0.sentid2 = t2.constantid AND t0.docid3 = t3.constantid AND t0.word4 = t4.constantid"; |
812 | System.out.println(query); |
813 | db.execute(query); |
814 | |
815 | // Sentence |
816 | db.dropView("public.pred_sentence"); |
817 | query = "CREATE VIEW public.pred_sentence AS " + |
818 | "SELECT t0.*, t1.constantvalue AS mid1_value, t2.constantvalue AS sentencetext2_value " + |
819 | "FROM pred_sentence AS t0, type_mid AS t1, type_sentencetext AS t2 " + |
820 | "WHERE t0.mid1 = t1.constantid AND t0.sentencetext2 = t2.constantid"; |
821 | System.out.println(query); |
822 | db.execute(query); |
823 | |
824 | // TODO: auto-generate models.py |
825 | |
826 | // TODO: auto-generate admin.py |
827 | } |
828 | |
829 | /** |
830 | * The constructor. |
831 | * @param _opt |
832 | */ |
833 | public ActiveLearner(FelixCommandOptions _opt){ |
834 | opt = _opt; |
835 | } |
836 | |
837 | |
838 | } |