import javax.servlet.*;
import javax.servlet.http.*;
import java.sql.*;
import java.util.*;
import java.net.*;
/**
* Performs what is probably the most intersting search, the motif
* search. The motif search takes a string of words from the user
* and finds the set of synonyms for all of those words. Then it
* finds the occurrences of each of these words, and assigns scores
* to pages in the database which match the words and their synonyms.
* It also gives additional weight to words which are less common.
*/
public class MotifSearch extends LitsearchBase {
public String drawPage(HttpServletRequest request,
HttpServletResponse response) throws Exception {
String searchQuery = request.getParameter("query");
//If there is no query, show the search page.
if (searchQuery == null) {
Template t = new Template(TEMPLATE_DIR + "motifsearch.html");
return t.toString();
}
searchQuery = searchQuery.trim();
String text = "";
String[] stripes = new String[2];
stripes[0] = "
";
stripes[1] = "
";
int curColor = 0;
//Parse the query into individual words, the stem those words.
ArrayList terms = new ArrayList();
while (true) {
int i = searchQuery.indexOf(' ');
if (i == -1) {
terms.add(PorterStemmer.stem(searchQuery));
break;
}
terms.add(PorterStemmer.stem(searchQuery.substring(0, i)));
searchQuery = searchQuery.substring(i + 1).trim();
}
//Call the search algorithm to get a list of results and scores
Hashtable results = doMotifSearch(terms);
text += "";
text += stripes[curColor % 2] + "";
text += "Found " + results.size() + " results for your query: \"" +
request.getParameter("query") + "\" | \n";
text += stripes[curColor++ %2] +
"Title | Author | Page (click to view) | Score | \n";
if (results.size() == 0) return text + "
";
//Look up the full information on each of the search results.
ConnectionWrapper wrapper = null;
try {
wrapper = DatabaseHook.getConnection();
Connection c = wrapper.connection();
Statement stmt = c.createStatement();
ResultSet rs;
String query;
ArrayList finalResults = new ArrayList();
Enumeration enum = results.keys();
String pageList = "-1";
while (enum.hasMoreElements()) {
String key = (String) enum.nextElement();
query = "SELECT w.id, w.title, b.name, u.page, u.id AS pageid " +
"FROM UniquePage u, Work w, WrittenBy b " +
"WHERE u.id =" + key +
" AND w.id = u.work AND b.work = w.id";
rs = stmt.executeQuery(query);
if (!rs.next()) break;
int score = Integer.parseInt((String) results.get(rs.getString(5)));
ResultStore r = new ResultStore(rs.getInt(1), rs.getString(2), rs.getString(3),
rs.getString(4), rs.getString(5), score);
finalResults.add(r);
}
//Sort the results in order of score
Collections.sort(finalResults);
int max = 1;
ResultStore maxRS = (ResultStore) Collections.max(finalResults);
if (maxRS != null) max = maxRS.score;
//Display the results in pretty tabular format
for (int i = finalResults.size() - 1; i >= 0; i--) {
ResultStore r = (ResultStore) finalResults.get(i);
int widthGood = (int) (r.score * 60.0 / max);
int widthBad = 60 - widthGood;
text += stripes[curColor++ % 2];
text += "" + r.title + " | ";
text += "" + r.name + " | \n";
text += "page " + r.page + " | \n";
text += " " +
" " +
r.score + " |
\n";
}
text += stripes[curColor++ % 2];
text += "Search Again\n";
text += " | ";
wrapper.checkIn();
return text;
} catch (Exception e) {
wrapper.checkIn();
throw e;
}
}
/**
* Performs the motif search. Comments in the code describe the process
* Note: This function used to make much more use of "interesting"
* unions and subqueries, but these were rewritten to preserve MySQL
* compatibility. The query is still pretty "interesting," though.
*/
public Hashtable doMotifSearch(ArrayList terms) throws Exception {
int numSynPerWord = 12 / terms.size();
ConnectionWrapper wrapper = null;
try {
wrapper = DatabaseHook.getConnection();
Connection c = wrapper.connection();
Statement stmt = c.createStatement();
ResultSet rs = null;
String query;
//Step 1: Find all of the words
query = "SELECT id FROM Word WHERE word IN (-1";
Iterator itr = terms.iterator();
while (itr.hasNext()) {
query += ", " + formatString((String) itr.next());
}
query += ")";
rs = stmt.executeQuery(query);
int[] roots = new int[terms.size()];
int[] freqs = new int[terms.size()];
int i = 0;
while (rs.next()) {
roots[i++] = rs.getInt(1);
}
Arrays.sort(roots); //sort roots inascending order
//Step 1.5: Get the frequencies of all of the roots
query = "SELECT word, count(*) score FROM PageContainsWord where word IN(-1";
for (i = 0; i < roots.length; i++) {
query += ", " + roots[i];
}
query += ") GROUP BY word ORDER BY word";
rs = stmt.executeQuery(query);
i = 0;
int maxFreq = 0;
while (rs.next()) {
int f = rs.getInt(1);
if (f > maxFreq) maxFreq = f;
freqs[i++] = f;
}
//Step 2: Get X synonyms from each description.
String allSyn = "";
for (i = 0; i < terms.size(); i++) {
if (freqs[i] > 0) {
//get the frequency of each word and use to determine the num to accept
query = "SELECT child FROM Syn WHERE root = " + roots[i] +
" LIMIT " + numSynPerWord * maxFreq / freqs[i];
;
rs = stmt.executeQuery(query);
while (rs.next()) {
allSyn += rs.getString(1) + ",";
}
allSyn += roots[i] + ",";
}
}
//Step 3: Do the large query, group the results by the
//page they appear on, and assign each page a score
//based on these results.
allSyn = allSyn.substring(0, allSyn.length() - 1);
query =
"SELECT page, COUNT(*) AS score " +
"FROM PageContainsWord WHERE word IN (" + allSyn + ") " +
"GROUP BY page " +
"ORDER BY score DESC, page " +
"LIMIT 40";
Hashtable pageKeys = new Hashtable();
rs = stmt.executeQuery(query);
while(rs.next()) {
pageKeys.put(rs.getString(1), rs.getString(2));
}
wrapper.checkIn();
return pageKeys;
} catch (Exception e) {
wrapper.checkIn();
throw e;
}
}
class ResultStore implements Comparable {
public int id;
public String title;
public String name;
public String page;
public String pageid;
public int score;
public ResultStore(int i, String t, String n, String p, String pi, int s) {
id = i; title = t; name = n; page = p; pageid = pi; score = s;
}
public int compareTo(Object o) {
ResultStore s = (ResultStore) o;
if (s.score == score) return 0;
if (s.score > score) return -1;
return 1;
}
}
}