package votorola.a.diff.harvest.cache; // Copyright 2010-2012. Christian Weilbach. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Votorola Software"), to deal in the Votorola Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicence, and/or sell copies of the Votorola Software, and to permit persons to whom the Votorola Software is furnished to do so, subject to the following conditions: The preceding copyright notice and this permission notice shall be included in all copies or substantial portions of the Votorola Software. THE VOTOROLA SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE VOTOROLA SOFTWARE OR THE USE OR OTHER DEALINGS IN THE VOTOROLA SOFTWARE.
import java.io.IOException;
import java.io.File;
import java.net.URISyntaxException;
import java.security.NoSuchAlgorithmException;
import java.sql.SQLException;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.script.ScriptException;
import votorola.a.VoteServer;
import votorola.a.diff.DiffKeyParse;
import votorola.a.diff.DraftPair;
import votorola.a.diff.harvest.Message;
import votorola.a.diff.harvest.MessageContext;
import votorola.a.diff.harvest.auth.Authenticator;
import votorola.a.diff.harvest.kick.Kicker;
import votorola.a.diff.harvest.kick.UpdateKick;
import votorola.a.count.Vote;
import votorola.a.count.XCastRelation;
import votorola.g.lang.ThreadSafe;
import votorola.g.logging.LoggerX;
import votorola.g.script.JavaScriptIncluder;
import votorola.g.sql.Database;
/**
* Pass your messages from the communication media the cache to find and store
* valid difference messages. The overall process of processing messages in
* regard to this cache is outlined here:
*
*
*
* {@linkplain HarvestCache#check(MessageContext, Authenticator) HarvestCache} {@linkplain HarvestCache#store(Message, Authenticator) HarvestCache}
* ^ ^
* | |
* | {@linkplain MessageContext} | {@linkplain Message}
* | |
* | |
* Detectors Parse message from HTML (harvester)
*
* {@linkplain UpdateKick} (via {@linkplain Kicker})
* 1. Message detection ------------> 2. Archive harvesting
*
*
* The steps of 1. and 2. are completely separated, since we have to parse the
* information from the web anyway to gain valid URLs for some media. If the Url
* to the message can be parsed in the detector no harvesting (web scraping) of
* the archive is necessary and Message objects can directly be used. In fact
* they both only differ in this very crucial information.
*/
public @ThreadSafe
class HarvestCache {
/**
* The cache needs access to a VoteServer.Run and must be initialized
* before usage.
*/
public static HarvestCache init(final VoteServer.Run vsRun) {
if (instance == null) {
synchronized (HarvestCache.class) {
try {
instance = new HarvestCache(vsRun);
} catch (Exception e) {
LOGGER.log(Level.SEVERE,
"Could not initialize HarvestCache.", e);
System.exit(1);
}
}
}
return instance;
}
/**
* Implemented as singleton.
*/
public static HarvestCache i() {
return instance;
}
private static HarvestCache instance;
/**
* Global logger object.
*/
private static final Logger LOGGER = LoggerX.i(HarvestCache.class);
/**
* Maximum length of the communication excerpt cut out of the message body.
*/
public static final int CHARLIMIT = 150;
/**
* This VoteServer.Run instance is needed to query authorship information
* from the VoteServer.
*/
private final VoteServer.Run vsRun;
private final DiffMessageTable table;
/**
* Database which contains the {@linkplain #diffMessageTable() Table}
*/
private final Database db;
/**
* Access the database of this HarvestCache instance. This can be used
* initialize other services with this db internally, so you only have to
* setup HarvestCache once and all services have access to the db.
*
* @return db
*/
public Database getDatabase() {
return db;
}
/**
* Patterns for difference bridge URLs. If those match links in the message
* body, these matched difference links are then checked for valid
* difference messages.
*/
private final List diffPatterns;
/**
* A context for configuring the construction of a {@linkplain VoteServer
* vote-server}. The construction is configured by the vote-server's
* {@linkplain VoteServer#startupConfigurationFile() startup configuration
* file}, which contains a script (s) for that purpose. During construction,
* an instance of this context (vsCC) is passed to s via
* s::constructingVoteServer(vsCC).
*/
public static @ThreadSafe
final class ConstructionContext {
/**
* Constructs the complete configuration of the harvest-cache.
*
* @param s
* the complete startup configuration script.
*/
private static ConstructionContext configure(final JavaScriptIncluder s)
throws ScriptException, URISyntaxException {
final ConstructionContext cc = new ConstructionContext(s);
s.invokeKnownFunction("constructingHarvestCache", cc);
return cc;
}
private ConstructionContext(final JavaScriptIncluder s) {
}
private final List patternList = new LinkedList();
public List getBridgePatternList() {
return patternList;
}
}
/**
* Construct a cache to which you pass your messages from the communication
* media. Communicates with a VoteServer
*
* @see votorola.a.VoteServer
*
* @param diffPatterns
* Patterns to match urls of requested difference bridges.
* @throws URISyntaxException
* VoteServer initialisation fails.
* @throws NoSuchAlgorithmException
* SHA hashing not available. This should never happen.
* @throws SQLException
* Problem while initializing the database connection.
* @throws ScriptException
* Cannot load configuration file (script) for VoteServer.
* @throws IOException
* VoteServer has some IO-Problem.
*/
private HarvestCache(final VoteServer.Run vsRun) throws URISyntaxException,
NoSuchAlgorithmException, IOException, ScriptException,
SQLException {
List tempDiffPatterns = new LinkedList();
VoteServer v = vsRun.voteServer();
final File configFile = new File(v.votorolaDirectory(),
"harvest-cache.js");
if (configFile.exists()) {
final ConstructionContext cc = ConstructionContext
.configure(new JavaScriptIncluder(configFile));
for (final String pat : cc.getBridgePatternList()) {
tempDiffPatterns.add(Pattern.compile(pat));
LOGGER.finest("Added pattern for bridge: " + pat);
}
} else {
final String bridgeUrl = "http://" + v.serverName() + ":8080/"
+ v.name() + "/w/D";
LOGGER.info("BridgeURL(s) not configured, using default: "
+ bridgeUrl);
tempDiffPatterns.add(Pattern.compile(Pattern.quote(bridgeUrl)
+ "\\S+"));
}
this.diffPatterns = Collections.unmodifiableList(tempDiffPatterns);
this.vsRun = vsRun;
this.db = vsRun.database();
this.table = new DiffMessageTable(db);
if(!table.exists()) {
table.create();
}
};
/**
* Return the VoteServer instance for this Cache.
*
*/
public VoteServer voteServer() {
return vsRun.voteServer();
}
/**
* TODO Examine the message for difference-urls and return whether this
* message contains valid difference information. This includes search for
* difference-urls, comparing the message sender to author and candidate of
* the difference and verifying that the used forum is linked on the
* candidate's position page. Use this method in detectors to decide whether
* to raise a {@linkplain votorola.a.diff.harvest.kick.UpdateKick}.
*
* @param mc
* Context of the message to check.
* @param av
* Verifier for forum identities.
* @return List of DraftPairs encountered or an empty list.
*/
public boolean check(final MessageContext mc, final Authenticator av) {
return false;
}
/**
* Call this once you have a valid message including
* {@linkplain Message#path() url} to the web, to store it permanently in
* the database.
*
* This is the main public API call which processes messages:
*
* - Separate each {@linkplain DiffMessage} for each Url
* - Authenticate message to {@linkplain AuthDiffMessage}
* - TODO store relation in {@linkplain RelAuthDiffMessage}
* - Store message with {@linkplain DiffMessageTable}
*
*
*
* @param msg
* Message to process.
* @param av
* Verifier for this communication medium's identities.
* @return False if any of the steps failed.
*/
public boolean store(final Message msg, final Authenticator av) {
final List potMsgs = expandDiffMessages(msg);
if (potMsgs.isEmpty()) {
return false;
}
for (final DiffMessage dmsg : potMsgs) {
try {
final AuthDiffMessage authMsg = av.verify(dmsg);
if (authMsg == null) {
return false;
}
LOGGER.finest("Trying to store message: " + msg.mc().sentDate()
+ " " + authMsg.author() + " " + authMsg.addressee());
return put(authMsg);
} catch (Exception e) {
LOGGER.log(Level.WARNING,
"Could not verify message: " + msg.content()
+ " with date: " + msg.mc().sentDate(), e);
return false;
}
}
return true;
}
/**
* Access this table to
* {@linkplain DiffMessageTable#get(String, String[], int)} messages.
*/
public DiffMessageTable getTable() {
return table;
}
/**
* Stores the DiffMessage finally into the database.
*
* @param dmsg
* Message to store in the database.
* @return False if a database error occurs.
*/
private boolean put(final AuthDiffMessage dmsg) {
try {
table.put(dmsg);
} catch (Exception e) {
LOGGER.log(Level.WARNING, "Putting message "
+ dmsg.toString()
+ " to DB failed. ", e);
return false;
}
return true;
}
/**
* Lists all interesting difference-urls. Interest is determined by the
* difference patterns configured for this cache.
*
* @param searchedString
* Message body or content to search.
* @return A list of matched difference URLs.
*
* @see #expandDiffMessages(Message)
* @see #diffPatterns
*/
public List findDiffUrls(final String searchedString) {
LinkedList urlList = new LinkedList();
for (Pattern diffPattern : diffPatterns) {
Matcher diffMatcher = diffPattern.matcher(searchedString);
while (diffMatcher.find()) {
String diffUrl = diffMatcher.group();
urlList.add(diffUrl);
}
}
return urlList;
}
/**
* Generate a list of @see {@linkplain votorola.a.diff.harvest.cache.DiffMessage}
* by comparing all difference-urls of the message.
*
* @param msg
* @return A list of difference messages cloned from msg and decorated with
* the difference authorship information.
*/
private List expandDiffMessages(final Message msg) {
final List potMsgs = new LinkedList();
for (String diffUrl : msg.mc().diffUrls()) {
for (Pattern diffPattern : diffPatterns) {
if (!diffPattern.matcher(diffUrl).find()) {
continue;
}
String[] paramStrings = new String[] { "a", "b", "aR", "bR" };
HashMap pMap = new HashMap();
for (String ps : paramStrings) {
Pattern pat = Pattern.compile(ps + "=(\\d+)");
Matcher mat = pat.matcher(diffUrl);
pMap.put(ps, mat.find() ? Integer.parseInt(mat.group(1))
: -1);
}
try {
DraftPair draftPair = DraftPair.newDraftPair(
new DiffKeyParse(pMap.get("a"), pMap.get("aR"), // CWFIX obsolete form
pMap.get("b"), pMap.get("bR")),
vsRun.voteServer().pollwiki());
final DiffMessage potMsg = new DiffMessage(msg, draftPair);
potMsgs.add(potMsg);
// } catch (MediaWiki.IDException e) {
// LOGGER.log(Level.FINER,
// "DraftPair revision for " + msg.path() + " failed.");
// continue;
// } catch (Exception e) {
// LOGGER.log(Level.WARNING,
// "Cannot expand DiffMessage, draftPair failed.", e);
// continue;
} catch (IOException e) { // CWFIX review this change
LOGGER.log(Level.CONFIG,
"DraftPair construction for " + msg.path() + " failed.", e);
continue;
}
}
}
return potMsgs;
}
/**
* TODO was private, privatize again when vote history is available. Used by
* votorola/s/wap/HarvestWAP.java for now.
*
* @param author
* @param addressee
* @param pollName
* @return current relation between author and addressee
*/
public XCastRelation relation(final String author,
final String addressee, final String pollName) {
try {
final Vote authorVote = new Vote(author, vsRun.scopePoll()
.ensurePoll(pollName).voterInputTable());
final Vote addresseeVote = new Vote(addressee, vsRun.scopePoll()
.ensurePoll(pollName).voterInputTable());
if (addresseeVote.getCandidateEmail() != null
&& addresseeVote.getCandidateEmail().equals(author)) {
return XCastRelation.CANDIDATE;
}
if (authorVote.getCandidateEmail() != null
&& authorVote.getCandidateEmail().equals(addressee)) {
return XCastRelation.VOTER;
}
} catch (IOException | SQLException | ScriptException e) {
LOGGER.log(Level.WARNING,
"Cannot determine author/voter relationship.", e);
}
return XCastRelation.UNKNOWN;
}
}