MediawikiConnection.java: Difference between revisions
From Knot Atlas
Jump to navigationJump to search
No edit summary |
(despammifying) |
||
(11 intermediate revisions by 3 users not shown) | |||
Line 1: | Line 1: | ||
/* |
|||
package mediawiki; |
|||
This page contains the source code for MediawikiConnection.java, the java component of Scott Morrison's [[WikiLink` package]]. |
|||
<pre> |
|||
*/ |
|||
package wikilink; |
|||
import java.io.File; |
|||
import java.io.IOException; |
import java.io.IOException; |
||
import java.io.InputStream; |
import java.io.InputStream; |
||
import java.util.Arrays; |
|||
import java.util.HashMap; |
|||
import java.util.HashSet; |
|||
import java.util.Iterator; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
import java.util.Set; |
|||
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; |
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; |
||
import org.apache.commons.httpclient.HttpClient; |
import org.apache.commons.httpclient.HttpClient; |
||
import org.apache.commons.httpclient.HttpException; |
import org.apache.commons.httpclient.HttpException; |
||
import org.apache.commons.httpclient.HttpMethodBase; |
|||
import org.apache.commons.httpclient.NameValuePair; |
import org.apache.commons.httpclient.NameValuePair; |
||
import org.apache.commons.httpclient.methods.GetMethod; |
import org.apache.commons.httpclient.methods.GetMethod; |
||
import org.apache.commons.httpclient.methods.PostMethod; |
import org.apache.commons.httpclient.methods.PostMethod; |
||
import org.apache.commons.httpclient.methods.multipart.FilePart; |
|||
import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity; |
|||
import org.apache.commons.httpclient.methods.multipart.Part; |
|||
import org.apache.commons.httpclient.methods.multipart.StringPart; |
|||
import org.apache.commons.httpclient.params.HttpMethodParams; |
import org.apache.commons.httpclient.params.HttpMethodParams; |
||
import org.apache.commons.lang.StringEscapeUtils; |
|||
import org.jdom.Document; |
import org.jdom.Document; |
||
import org.jdom.Element; |
import org.jdom.Element; |
||
Line 18: | Line 37: | ||
/** |
/** |
||
* @author Scott Morrison |
* @author Scott Morrison |
||
* copyright 2005, available under either the MIT or GPL license. |
|||
*/ |
*/ |
||
public class MediawikiConnection { |
public class MediawikiConnection { |
||
private String baseURL; |
private String baseURL; |
||
private |
private String username, password; |
||
// public only for debugging! |
|||
public HttpClient client; |
|||
public HttpMethodBase currentMethod; |
|||
public MediawikiConnection(String baseURL) { |
public MediawikiConnection(String baseURL) { |
||
initialise(baseURL, "", ""); |
|||
} |
} |
||
public MediawikiConnection(String baseURL, String username, String password) { |
public MediawikiConnection(String baseURL, String username, String password) { |
||
initialise(baseURL, username, password); |
|||
} |
|||
private void initialise(String baseURL, String username, String password) { |
|||
this.baseURL = baseURL; |
this.baseURL = baseURL; |
||
this.username = username; |
|||
this.password = password; |
|||
renewClient(); |
|||
} |
|||
private boolean renewClient() { |
|||
if(client == null) { |
|||
client = new HttpClient(); |
|||
client.getHttpConnectionManager().getParams().setConnectionTimeout(5000); |
|||
return doLogin(username, password); |
|||
} |
|||
return true; |
|||
} |
|||
private void disposeClient() { |
|||
client = null; |
|||
} |
|||
private boolean forceRenewClient() { |
|||
disposeClient(); |
|||
return renewClient(); |
|||
} |
|||
private void sleep() { |
|||
try { |
|||
Thread.sleep(5000); |
|||
} catch (InterruptedException e) { |
|||
// yawn!!! Better get back to work. |
|||
e.printStackTrace(); |
|||
} |
|||
} |
} |
||
private boolean doLogin(String username, String password) { |
private boolean doLogin(String username, String password) { |
||
if(username == "") return true; |
|||
PostMethod post = new PostMethod(baseURL + "?title=Special:Userlogin&action=submit"); |
PostMethod post = new PostMethod(baseURL + "?title=Special:Userlogin&action=submit"); |
||
// post.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); |
|||
currentMethod = post; |
|||
// enable automatic retrying |
// enable automatic retrying |
||
Line 45: | Line 103: | ||
new NameValuePair("wpPassword", password), |
new NameValuePair("wpPassword", password), |
||
new NameValuePair("wpRemember", "1"), |
new NameValuePair("wpRemember", "1"), |
||
// ugh... this changed between 1.4beta5 and 1.4.7 |
|||
new NameValuePair("wpLoginAttempt", "1") |
|||
// for 1.4beta5 |
|||
new NameValuePair("wpLoginAttempt", "1"), |
|||
// for 1.4.7 |
|||
new NameValuePair("wpLoginattempt", "Log in") |
|||
}; |
}; |
||
post.setRequestBody(data); |
post.setRequestBody(data); |
||
int status; |
int status = 1; |
||
try { |
try { |
||
status = client.executeMethod(post); |
status = client.executeMethod(post); |
||
} catch (HttpException e) { |
} catch (HttpException e) { |
||
// uhoh... the HttpClient object is probably in a bad state. |
|||
return false; |
|||
// deal with this!!! I can't just dispose here... or can I? |
|||
e.printStackTrace(); |
|||
disposeClient(); |
|||
sleep(); |
|||
} catch (IOException e) { |
} catch (IOException e) { |
||
// who cares |
|||
e.printStackTrace(); |
|||
disposeClient(); |
|||
sleep(); |
|||
} finally { |
} finally { |
||
post.releaseConnection(); |
post.releaseConnection(); |
||
currentMethod = null; |
|||
} |
} |
||
return (status == 302); |
return (status == 302); |
||
Line 64: | Line 134: | ||
public String getPageText(String title) throws HttpException, IOException, JDOMException { |
public String getPageText(String title) throws HttpException, IOException, JDOMException { |
||
renewClient(); |
|||
String URL = baseURL + "?title=Special:Export/" + title; |
String URL = baseURL + "?title=Special:Export/" + title; |
||
GetMethod get = new GetMethod(URL); |
GetMethod get = new GetMethod(URL); |
||
currentMethod = get; |
|||
// enable automatic retrying |
// enable automatic retrying |
||
Line 72: | Line 145: | ||
client.executeMethod(get); |
client.executeMethod(get); |
||
InputStream is = get.getResponseBodyAsStream(); |
InputStream is = get.getResponseBodyAsStream(); |
||
SAXBuilder builder = new SAXBuilder(); |
SAXBuilder builder = new SAXBuilder(); |
||
Document doc = builder.build(is); |
Document doc = builder.build(is); |
||
Element |
Element page = doc.getRootElement().getChild("page"); |
||
String text = ""; |
|||
if(page != null) |
|||
text = page.getChild("revision").getChild("text").getText(); |
|||
// finally, make sure we release the connection! |
// finally, make sure we release the connection! |
||
get.releaseConnection(); |
get.releaseConnection(); |
||
currentMethod = null; |
|||
return text |
return text; |
||
} |
} |
||
private String[][] mapToArray(Map map) { |
|||
public boolean setPageText(String title, String text) throws IOException { |
|||
String[][] array = new String[map.size()][2]; |
|||
return setPageText(title, text, ""); |
|||
Iterator iter = map.keySet().iterator(); |
|||
String title; |
|||
int i = 0; |
|||
while(iter.hasNext()) { |
|||
title = (String)iter.next(); |
|||
array[i][0] = title; |
|||
array[i][1] = (String)map.get(title); |
|||
i++; |
|||
} |
|||
return array; |
|||
} |
} |
||
private Map arrayToMap(String[][] array) { |
|||
public boolean setPageText(String title, String text, String summary) throws IOException { |
|||
Map map = new HashMap(); |
|||
for(int i = 0; i < array.length; i++) { |
|||
map.put(array[i][0], array[i][1]); |
|||
} |
|||
return map; |
|||
} |
|||
public String[][] getPageTexts(String[] titles) { |
|||
return mapToArray(getPageTexts(new HashSet(Arrays.asList(titles)))); |
|||
} |
|||
public Map getPageTexts(Set titles) { |
|||
renewClient(); |
|||
Map pageTexts = new HashMap(); |
|||
String titlesString = ""; |
|||
Iterator iter = titles.iterator(); |
|||
while(iter.hasNext()) { |
|||
titlesString = titlesString + (String)iter.next() + "\r\n"; |
|||
} |
|||
String URL = baseURL + "?title=Special:Export"; |
|||
PostMethod post = new PostMethod(URL); |
|||
currentMethod = post; |
|||
post.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, |
|||
new DefaultHttpMethodRetryHandler(3, false)); |
|||
post.getParams().setIntParameter(HttpMethodParams.SO_TIMEOUT, 5000); |
|||
NameValuePair[] data = { |
|||
new NameValuePair("action", "submit"), |
|||
new NameValuePair("curonly", "true"), |
|||
new NameValuePair("pages", titlesString), |
|||
}; |
|||
post.setRequestBody(data); |
|||
InputStream is = null; |
|||
int status = 1; |
|||
try { |
|||
status = client.executeMethod(post); |
|||
is = post.getResponseBodyAsStream(); |
|||
} catch (HttpException e) { |
|||
// uhoh... the HttpClient object is probably in a bad state. |
|||
e.printStackTrace(); |
|||
disposeClient(); |
|||
post.releaseConnection(); |
|||
return pageTexts; |
|||
} catch (IOException e) { |
|||
// who cares |
|||
e.printStackTrace(); |
|||
disposeClient(); |
|||
post.releaseConnection(); |
|||
return pageTexts; |
|||
} |
|||
SAXBuilder builder = new SAXBuilder(); |
|||
try { |
|||
Document doc = builder.build(is); |
|||
Element page; |
|||
String title; |
|||
String text; |
|||
List pages = doc.getRootElement().getChildren("page"); |
|||
iter = pages.iterator(); |
|||
while(iter.hasNext()) { |
|||
page = (Element)iter.next(); |
|||
text = page.getChild("revision").getChild("text").getText(); |
|||
title = page.getChild("title").getText(); |
|||
pageTexts.put(title, text); |
|||
} |
|||
} catch (IOException e) { |
|||
e.printStackTrace(); |
|||
disposeClient(); |
|||
} catch (JDOMException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
post.releaseConnection(); |
|||
return pageTexts; |
|||
} |
|||
public String[][] filterSetPageTexts(String[][] edits) { |
|||
return mapToArray(filterSetPageTexts(arrayToMap(edits))); |
|||
} |
|||
public Map filterSetPageTexts(Map edits) { |
|||
Map pageTexts = getPageTexts(edits.keySet()); |
|||
Iterator iter = pageTexts.keySet().iterator(); |
|||
String title; |
|||
while(iter.hasNext()) { |
|||
title = (String)iter.next(); |
|||
if( ((String)edits.get(title)).equals((String)pageTexts.get(title)) ) { |
|||
edits.remove(title); |
|||
} |
|||
} |
|||
return edits; |
|||
} |
|||
private Map setPageTextsOnce(Map edits) { |
|||
Map failedEdits = new HashMap(); |
|||
Iterator iter = edits.keySet().iterator(); |
|||
String title, text; |
|||
while(iter.hasNext()) { |
|||
title = (String)iter.next(); |
|||
text = (String)edits.get(title); |
|||
if(!setPageText(title, text)) { |
|||
failedEdits.put(title, text); |
|||
} |
|||
} |
|||
return failedEdits; |
|||
} |
|||
private Map setFilteredPageTexts(Map edits) { |
|||
return setPageTextsOnce(filterSetPageTexts(edits)); |
|||
} |
|||
public Map setPageTexts(Map edits) { |
|||
return setFilteredPageTexts(setFilteredPageTexts(edits)); |
|||
} |
|||
public String[][] setPageTexts(String[][] edits) { |
|||
return mapToArray(setPageTexts(arrayToMap(edits))); |
|||
} |
|||
public boolean setPageText(String title, String text) { |
|||
return setPageText(title, text, ""); |
|||
} |
|||
public boolean setPageText(String title, String text, String summary) { |
|||
renewClient(); |
|||
String URL = baseURL + "?title=" + title + "&action=edit"; |
String URL = baseURL + "?title=" + title + "&action=edit"; |
||
// first, load the edit page, to get an 'edittime' |
// first, load the edit page, to get an 'edittime' and 'edittoken' |
||
String editTime = ""; |
String editTime = ""; |
||
String editToken = ""; |
|||
String mungedWikiSource = ""; |
|||
GetMethod get = new GetMethod(URL); |
GetMethod get = new GetMethod(URL); |
||
currentMethod = get; |
|||
// enable automatic retrying |
// enable automatic retrying |
||
get.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, |
get.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, |
||
new DefaultHttpMethodRetryHandler(3, false)); |
new DefaultHttpMethodRetryHandler(3, false)); |
||
client.executeMethod(get); |
|||
String response = ""; |
|||
String response = get.getResponseBodyAsString(); |
|||
boolean failure = false; |
|||
get.releaseConnection(); |
|||
try { |
|||
java.util.regex.Pattern regex = java.util.regex.Pattern.compile( |
|||
client.executeMethod(get); |
|||
response = get.getResponseBodyAsString(); |
|||
} catch (HttpException e) { |
|||
// uhoh... the HttpClient object is probably in a bad state. |
|||
e.printStackTrace(); |
|||
failure = true; |
|||
} catch (IOException e) { |
|||
// who cares |
|||
e.printStackTrace(); |
|||
failure = true; |
|||
} finally { |
|||
get.releaseConnection(); |
|||
currentMethod = null; |
|||
if(failure) { |
|||
disposeClient(); |
|||
return false; |
|||
} |
|||
} |
|||
// now sift through the response looking for the editime and edittoken |
|||
java.util.regex.Pattern regex1 = java.util.regex.Pattern.compile( |
|||
"value=\"([0-9]+)\" name=\"wpEdittime\""); |
"value=\"([0-9]+)\" name=\"wpEdittime\""); |
||
java.util.regex.Matcher |
java.util.regex.Matcher matcher1 = regex1.matcher(response); |
||
if( |
if(matcher1.find()) editTime = matcher1.group(1); |
||
java.util.regex.Pattern regex2 = java.util.regex.Pattern.compile( |
|||
"value=\"([0-9a-z]+)\" name=\"wpEditToken\""); |
|||
java.util.regex.Matcher matcher2 = regex2.matcher(response); |
|||
if(matcher2.find()) editToken = matcher2.group(1); |
|||
// while we're at it, let's get the (html-munged) wiki source as well. |
|||
java.util.regex.Pattern regex3 = java.util.regex.Pattern.compile( |
|||
"<textarea .*? name=\"wpTextbox1\" .*?>(.*?)</textarea>", java.util.regex.Pattern.DOTALL); |
|||
java.util.regex.Matcher matcher3 = regex3.matcher(response); |
|||
if(matcher3.find()) mungedWikiSource = matcher3.group(1); |
|||
String wikiSource = StringEscapeUtils.unescapeHtml(mungedWikiSource).trim(); |
|||
if(wikiSource.equals(text.trim())) { |
|||
// don't need to do anything |
|||
return true; |
|||
} |
|||
PostMethod post = new PostMethod(URL); |
PostMethod post = new PostMethod(URL); |
||
currentMethod = post; |
|||
post.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, |
|||
new DefaultHttpMethodRetryHandler(3, false)); |
|||
post.getParams().setIntParameter(HttpMethodParams.SO_TIMEOUT, 5000); |
|||
NameValuePair[] data = { |
NameValuePair[] data = { |
||
new NameValuePair("wpTextbox1", text), |
new NameValuePair("wpTextbox1", text), |
||
new NameValuePair("wpEdittime", editTime), |
new NameValuePair("wpEdittime", editTime), |
||
new NameValuePair("wpEditToken", editToken), |
|||
new NameValuePair("wpSummary", summary) |
new NameValuePair("wpSummary", summary) |
||
}; |
}; |
||
Line 117: | Line 383: | ||
post.setRequestBody(data); |
post.setRequestBody(data); |
||
int status; |
int status = 1; |
||
try { |
try { |
||
System.out.println("Starting post, name: " + title); |
|||
status = client.executeMethod(post); |
status = client.executeMethod(post); |
||
System.out.println("Finished setPageText post, status: " + status); |
|||
System.out.println(post.getResponseBodyAsString()); |
|||
} catch (HttpException e) { |
} catch (HttpException e) { |
||
// uhoh... the HttpClient object is probably in a bad state. |
|||
return false; |
|||
e.printStackTrace(); |
|||
disposeClient(); |
|||
} catch (IOException e) { |
} catch (IOException e) { |
||
// who cares |
|||
return false; |
|||
e.printStackTrace(); |
|||
disposeClient(); |
|||
} finally { |
} finally { |
||
// debug! |
|||
post.releaseConnection(); |
post.releaseConnection(); |
||
currentMethod = null; |
|||
} |
} |
||
Line 131: | Line 406: | ||
} |
} |
||
public boolean uploadFile(String filename, String description) throws IOException { |
|||
return uploadFile(new File(filename), description); |
|||
} |
|||
public boolean uploadFile(File f, String description) throws IOException { |
|||
renewClient(); |
|||
String URL = baseURL + "?title=Special:Upload"; |
|||
PostMethod upload = new PostMethod(URL); |
|||
currentMethod = upload; |
|||
Part[] parts = { |
|||
new StringPart("wpUploadDescription", description), |
|||
new StringPart("wpUploadAffirm", "1"), |
|||
new StringPart("wpIgnoreWarning", "1"), |
|||
new StringPart("wpUpload", "Upload file"), |
|||
new FilePart("wpUploadFile", f.getName(), f) |
|||
}; |
|||
upload.setRequestEntity( new MultipartRequestEntity(parts, upload.getParams()) ); |
|||
int status = 1; |
|||
try { |
|||
status = client.executeMethod(upload); |
|||
} catch (HttpException e) { |
|||
e.printStackTrace(); |
|||
disposeClient(); |
|||
} catch (IOException e) { |
|||
e.printStackTrace(); |
|||
disposeClient(); |
|||
} finally { |
|||
upload.releaseConnection(); |
|||
currentMethod = null; |
|||
} |
|||
return (status == 302); |
|||
} |
|||
// deleteFile doesn't work! The mediawiki software says that something is wrong with the login session. |
|||
public boolean deleteFile(String filename, String reason) throws HttpException, IOException { |
|||
String URL1 = baseURL + "?title=Image:" + filename + "&action=delete"; |
|||
// first, load the edit page, to get an 'edittime' and 'edittoken' |
|||
String editTime = ""; |
|||
String editToken = ""; |
|||
GetMethod get = new GetMethod(URL1); |
|||
currentMethod = get; |
|||
// enable automatic retrying |
|||
get.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, |
|||
new DefaultHttpMethodRetryHandler(3, false)); |
|||
client.executeMethod(get); |
|||
// now sift through the response looking for the editime and edittoken |
|||
String response = get.getResponseBodyAsString(); |
|||
get.releaseConnection(); |
|||
currentMethod = null; |
|||
java.util.regex.Pattern regex1 = java.util.regex.Pattern.compile( |
|||
"value=\"([0-9]+)\" name=\"wpEdittime\""); |
|||
java.util.regex.Matcher matcher1 = regex1.matcher(response); |
|||
if(matcher1.find()) editTime = matcher1.group(1); |
|||
java.util.regex.Pattern regex2 = java.util.regex.Pattern.compile( |
|||
"value=\"([0-9a-z]+)\" name=\"wpEditToken\""); |
|||
java.util.regex.Matcher matcher2 = regex2.matcher(response); |
|||
if(matcher2.find()) editToken = matcher2.group(1); |
|||
String URL2 = baseURL + "?title=Image:" + filename + "&action=delete" + "&image=" + filename; |
|||
PostMethod post = new PostMethod(URL2); |
|||
currentMethod = post; |
|||
// post.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); |
|||
NameValuePair[] data = { |
|||
new NameValuePair("wpConfirm", "1"), |
|||
// new NameValuePair("wpEdittime", editTime), |
|||
new NameValuePair("wpReason", reason), |
|||
new NameValuePair("wpConfirmB", "Confirm"), |
|||
new NameValuePair("wpEditToken", editToken) |
|||
}; |
|||
post.setRequestBody(data); |
|||
int status = 1; |
|||
try { |
|||
status = client.executeMethod(post); |
|||
} catch (HttpException e) { |
|||
e.printStackTrace(); |
|||
} catch (IOException e) { |
|||
e.printStackTrace(); |
|||
} finally { |
|||
post.releaseConnection(); |
|||
currentMethod = null; |
|||
} |
|||
return (status == 302); |
|||
} |
|||
public static class Test { |
public static class Test { |
||
public static void main(String[] args) throws HttpException, IOException, JDOMException { |
public static void main(String[] args) throws HttpException, IOException, JDOMException { |
||
MediawikiConnection |
MediawikiConnection conn1 = new MediawikiConnection( |
||
"http://math. |
"http://katlas.math.toronto.edu/w/index.php", "TestRobot", "******"); |
||
// MediawikiConnection conn2 = new MediawikiConnection( |
|||
System.out.println(conn.getPageText("Main_Page")); |
|||
// "http://katlas.math.toronto.edu/w/index.php", "TestRobot", "******"); |
|||
// MediawikiConnection conn3 = new MediawikiConnection( |
|||
// "http://katlas.math.toronto.edu/w/index.php", "TestRobot", "******"); |
|||
// MediawikiConnection conn2 = new MediawikiConnection( |
|||
// "http://math.berkeley.edu/~scott/w/index.php", "TestRobot", "******"); |
|||
// MediawikiConnection conn3 = new MediawikiConnection( |
|||
// "http://katlas.math.toronto.edu/w/index.php", "ScottSysopRobot", "******"); |
|||
// System.out.println(conn.getPageText("Main_Page")); |
|||
// System.out.println(conn.setPageText("foo", "crel3", "auto!")); |
|||
System.out.println(conn1.setPageText("foo","<math>1+2</math> "+Math.random())); |
|||
System.out.println(conn1.setPageText("foo","<math>1+2</math> ")); |
|||
System.out.println(conn1.setPageText("foo","<math>1+2</math> ")); |
|||
// System.out.println(conn.deleteFile("K7a4_m.gif","redundant, see [[Image:K7a4.gif]]")); |
|||
} |
} |
||
} |
} |
||
} |
} |
||
/* |
|||
</pre> |
|||
[[Category:Source Code]] |
|||
*/ |
Latest revision as of 20:26, 20 February 2007
/* This page contains the source code for MediawikiConnection.java, the java component of Scott Morrison's WikiLink` package.
*/ package wikilink; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpMethodBase; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.httpclient.methods.multipart.FilePart; import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity; import org.apache.commons.httpclient.methods.multipart.Part; import org.apache.commons.httpclient.methods.multipart.StringPart; import org.apache.commons.httpclient.params.HttpMethodParams; import org.apache.commons.lang.StringEscapeUtils; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; /** * @author Scott Morrison */ public class MediawikiConnection { private String baseURL; private String username, password; // public only for debugging! public HttpClient client; public HttpMethodBase currentMethod; public MediawikiConnection(String baseURL) { initialise(baseURL, "", ""); } public MediawikiConnection(String baseURL, String username, String password) { initialise(baseURL, username, password); } private void initialise(String baseURL, String username, String password) { this.baseURL = baseURL; this.username = username; this.password = password; renewClient(); } private boolean renewClient() { if(client == null) { client = new HttpClient(); client.getHttpConnectionManager().getParams().setConnectionTimeout(5000); return doLogin(username, password); } return true; } private void disposeClient() { client = null; } private boolean forceRenewClient() { disposeClient(); return renewClient(); } private void sleep() { try { Thread.sleep(5000); } catch (InterruptedException e) { // yawn!!! Better get back to work. e.printStackTrace(); } } private boolean doLogin(String username, String password) { if(username == "") return true; PostMethod post = new PostMethod(baseURL + "?title=Special:Userlogin&action=submit"); // post.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); currentMethod = post; // enable automatic retrying post.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); NameValuePair[] data = { new NameValuePair("wpName", username), new NameValuePair("wpPassword", password), new NameValuePair("wpRemember", "1"), // ugh... this changed between 1.4beta5 and 1.4.7 // for 1.4beta5 new NameValuePair("wpLoginAttempt", "1"), // for 1.4.7 new NameValuePair("wpLoginattempt", "Log in") }; post.setRequestBody(data); int status = 1; try { status = client.executeMethod(post); } catch (HttpException e) { // uhoh... the HttpClient object is probably in a bad state. // deal with this!!! I can't just dispose here... or can I? e.printStackTrace(); disposeClient(); sleep(); } catch (IOException e) { // who cares e.printStackTrace(); disposeClient(); sleep(); } finally { post.releaseConnection(); currentMethod = null; } return (status == 302); } public String getPageText(String title) throws HttpException, IOException, JDOMException { renewClient(); String URL = baseURL + "?title=Special:Export/" + title; GetMethod get = new GetMethod(URL); currentMethod = get; // enable automatic retrying get.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); client.executeMethod(get); InputStream is = get.getResponseBodyAsStream(); SAXBuilder builder = new SAXBuilder(); Document doc = builder.build(is); Element page = doc.getRootElement().getChild("page"); String text = ""; if(page != null) text = page.getChild("revision").getChild("text").getText(); // finally, make sure we release the connection! get.releaseConnection(); currentMethod = null; return text; } private String[][] mapToArray(Map map) { String[][] array = new String[map.size()][2]; Iterator iter = map.keySet().iterator(); String title; int i = 0; while(iter.hasNext()) { title = (String)iter.next(); array[i][0] = title; array[i][1] = (String)map.get(title); i++; } return array; } private Map arrayToMap(String[][] array) { Map map = new HashMap(); for(int i = 0; i < array.length; i++) { map.put(array[i][0], array[i][1]); } return map; } public String[][] getPageTexts(String[] titles) { return mapToArray(getPageTexts(new HashSet(Arrays.asList(titles)))); } public Map getPageTexts(Set titles) { renewClient(); Map pageTexts = new HashMap(); String titlesString = ""; Iterator iter = titles.iterator(); while(iter.hasNext()) { titlesString = titlesString + (String)iter.next() + "\r\n"; } String URL = baseURL + "?title=Special:Export"; PostMethod post = new PostMethod(URL); currentMethod = post; post.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); post.getParams().setIntParameter(HttpMethodParams.SO_TIMEOUT, 5000); NameValuePair[] data = { new NameValuePair("action", "submit"), new NameValuePair("curonly", "true"), new NameValuePair("pages", titlesString), }; post.setRequestBody(data); InputStream is = null; int status = 1; try { status = client.executeMethod(post); is = post.getResponseBodyAsStream(); } catch (HttpException e) { // uhoh... the HttpClient object is probably in a bad state. e.printStackTrace(); disposeClient(); post.releaseConnection(); return pageTexts; } catch (IOException e) { // who cares e.printStackTrace(); disposeClient(); post.releaseConnection(); return pageTexts; } SAXBuilder builder = new SAXBuilder(); try { Document doc = builder.build(is); Element page; String title; String text; List pages = doc.getRootElement().getChildren("page"); iter = pages.iterator(); while(iter.hasNext()) { page = (Element)iter.next(); text = page.getChild("revision").getChild("text").getText(); title = page.getChild("title").getText(); pageTexts.put(title, text); } } catch (IOException e) { e.printStackTrace(); disposeClient(); } catch (JDOMException e) { e.printStackTrace(); } post.releaseConnection(); return pageTexts; } public String[][] filterSetPageTexts(String[][] edits) { return mapToArray(filterSetPageTexts(arrayToMap(edits))); } public Map filterSetPageTexts(Map edits) { Map pageTexts = getPageTexts(edits.keySet()); Iterator iter = pageTexts.keySet().iterator(); String title; while(iter.hasNext()) { title = (String)iter.next(); if( ((String)edits.get(title)).equals((String)pageTexts.get(title)) ) { edits.remove(title); } } return edits; } private Map setPageTextsOnce(Map edits) { Map failedEdits = new HashMap(); Iterator iter = edits.keySet().iterator(); String title, text; while(iter.hasNext()) { title = (String)iter.next(); text = (String)edits.get(title); if(!setPageText(title, text)) { failedEdits.put(title, text); } } return failedEdits; } private Map setFilteredPageTexts(Map edits) { return setPageTextsOnce(filterSetPageTexts(edits)); } public Map setPageTexts(Map edits) { return setFilteredPageTexts(setFilteredPageTexts(edits)); } public String[][] setPageTexts(String[][] edits) { return mapToArray(setPageTexts(arrayToMap(edits))); } public boolean setPageText(String title, String text) { return setPageText(title, text, ""); } public boolean setPageText(String title, String text, String summary) { renewClient(); String URL = baseURL + "?title=" + title + "&action=edit"; // first, load the edit page, to get an 'edittime' and 'edittoken' String editTime = ""; String editToken = ""; String mungedWikiSource = ""; GetMethod get = new GetMethod(URL); currentMethod = get; // enable automatic retrying get.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); String response = ""; boolean failure = false; try { client.executeMethod(get); response = get.getResponseBodyAsString(); } catch (HttpException e) { // uhoh... the HttpClient object is probably in a bad state. e.printStackTrace(); failure = true; } catch (IOException e) { // who cares e.printStackTrace(); failure = true; } finally { get.releaseConnection(); currentMethod = null; if(failure) { disposeClient(); return false; } } // now sift through the response looking for the editime and edittoken java.util.regex.Pattern regex1 = java.util.regex.Pattern.compile( "value=\"([0-9]+)\" name=\"wpEdittime\""); java.util.regex.Matcher matcher1 = regex1.matcher(response); if(matcher1.find()) editTime = matcher1.group(1); java.util.regex.Pattern regex2 = java.util.regex.Pattern.compile( "value=\"([0-9a-z]+)\" name=\"wpEditToken\""); java.util.regex.Matcher matcher2 = regex2.matcher(response); if(matcher2.find()) editToken = matcher2.group(1); // while we're at it, let's get the (html-munged) wiki source as well. java.util.regex.Pattern regex3 = java.util.regex.Pattern.compile( "<textarea .*? name=\"wpTextbox1\" .*?>(.*?)</textarea>", java.util.regex.Pattern.DOTALL); java.util.regex.Matcher matcher3 = regex3.matcher(response); if(matcher3.find()) mungedWikiSource = matcher3.group(1); String wikiSource = StringEscapeUtils.unescapeHtml(mungedWikiSource).trim(); if(wikiSource.equals(text.trim())) { // don't need to do anything return true; } PostMethod post = new PostMethod(URL); currentMethod = post; post.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); post.getParams().setIntParameter(HttpMethodParams.SO_TIMEOUT, 5000); NameValuePair[] data = { new NameValuePair("wpTextbox1", text), new NameValuePair("wpEdittime", editTime), new NameValuePair("wpEditToken", editToken), new NameValuePair("wpSummary", summary) }; post.setRequestBody(data); int status = 1; try { System.out.println("Starting post, name: " + title); status = client.executeMethod(post); System.out.println("Finished setPageText post, status: " + status); System.out.println(post.getResponseBodyAsString()); } catch (HttpException e) { // uhoh... the HttpClient object is probably in a bad state. e.printStackTrace(); disposeClient(); } catch (IOException e) { // who cares e.printStackTrace(); disposeClient(); } finally { // debug! post.releaseConnection(); currentMethod = null; } return (status == 302); } public boolean uploadFile(String filename, String description) throws IOException { return uploadFile(new File(filename), description); } public boolean uploadFile(File f, String description) throws IOException { renewClient(); String URL = baseURL + "?title=Special:Upload"; PostMethod upload = new PostMethod(URL); currentMethod = upload; Part[] parts = { new StringPart("wpUploadDescription", description), new StringPart("wpUploadAffirm", "1"), new StringPart("wpIgnoreWarning", "1"), new StringPart("wpUpload", "Upload file"), new FilePart("wpUploadFile", f.getName(), f) }; upload.setRequestEntity( new MultipartRequestEntity(parts, upload.getParams()) ); int status = 1; try { status = client.executeMethod(upload); } catch (HttpException e) { e.printStackTrace(); disposeClient(); } catch (IOException e) { e.printStackTrace(); disposeClient(); } finally { upload.releaseConnection(); currentMethod = null; } return (status == 302); } // deleteFile doesn't work! The mediawiki software says that something is wrong with the login session. public boolean deleteFile(String filename, String reason) throws HttpException, IOException { String URL1 = baseURL + "?title=Image:" + filename + "&action=delete"; // first, load the edit page, to get an 'edittime' and 'edittoken' String editTime = ""; String editToken = ""; GetMethod get = new GetMethod(URL1); currentMethod = get; // enable automatic retrying get.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false)); client.executeMethod(get); // now sift through the response looking for the editime and edittoken String response = get.getResponseBodyAsString(); get.releaseConnection(); currentMethod = null; java.util.regex.Pattern regex1 = java.util.regex.Pattern.compile( "value=\"([0-9]+)\" name=\"wpEdittime\""); java.util.regex.Matcher matcher1 = regex1.matcher(response); if(matcher1.find()) editTime = matcher1.group(1); java.util.regex.Pattern regex2 = java.util.regex.Pattern.compile( "value=\"([0-9a-z]+)\" name=\"wpEditToken\""); java.util.regex.Matcher matcher2 = regex2.matcher(response); if(matcher2.find()) editToken = matcher2.group(1); String URL2 = baseURL + "?title=Image:" + filename + "&action=delete" + "&image=" + filename; PostMethod post = new PostMethod(URL2); currentMethod = post; // post.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); NameValuePair[] data = { new NameValuePair("wpConfirm", "1"), // new NameValuePair("wpEdittime", editTime), new NameValuePair("wpReason", reason), new NameValuePair("wpConfirmB", "Confirm"), new NameValuePair("wpEditToken", editToken) }; post.setRequestBody(data); int status = 1; try { status = client.executeMethod(post); } catch (HttpException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { post.releaseConnection(); currentMethod = null; } return (status == 302); } public static class Test { public static void main(String[] args) throws HttpException, IOException, JDOMException { MediawikiConnection conn1 = new MediawikiConnection( "http://katlas.math.toronto.edu/w/index.php", "TestRobot", "******"); // MediawikiConnection conn2 = new MediawikiConnection( // "http://katlas.math.toronto.edu/w/index.php", "TestRobot", "******"); // MediawikiConnection conn3 = new MediawikiConnection( // "http://katlas.math.toronto.edu/w/index.php", "TestRobot", "******"); // MediawikiConnection conn2 = new MediawikiConnection( // "http://math.berkeley.edu/~scott/w/index.php", "TestRobot", "******"); // MediawikiConnection conn3 = new MediawikiConnection( // "http://katlas.math.toronto.edu/w/index.php", "ScottSysopRobot", "******"); // System.out.println(conn.getPageText("Main_Page")); // System.out.println(conn.setPageText("foo", "crel3", "auto!")); System.out.println(conn1.setPageText("foo","<math>1+2</math> "+Math.random())); System.out.println(conn1.setPageText("foo","<math>1+2</math> ")); System.out.println(conn1.setPageText("foo","<math>1+2</math> ")); // System.out.println(conn.deleteFile("K7a4_m.gif","redundant, see [[Image:K7a4.gif]]")); } } } /*
- /