MediawikiConnection.java

From Knot Atlas
Revision as of 17:24, 19 February 2007 by 62.109.185.48 (talk)
Jump to navigationJump to search

/* This page contains the source code for MediawikiConnection.java, the java component of Scott Morrison's WikiLink` package.

*/
package wikilink;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethodBase;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.methods.multipart.FilePart;
import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity;
import org.apache.commons.httpclient.methods.multipart.Part;
import org.apache.commons.httpclient.methods.multipart.StringPart;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.lang.StringEscapeUtils;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;

/**
 * @author Scott Morrison
 */
public class MediawikiConnection {
	
	private String baseURL;
	private String username, password;
	// public only for debugging!
	public HttpClient client;
	public HttpMethodBase currentMethod;
	
	public MediawikiConnection(String baseURL) {
		initialise(baseURL, "", "");
	}
	
	public MediawikiConnection(String baseURL, String username, String password) {
		initialise(baseURL, username, password);
	}
	
	private void initialise(String baseURL, String username, String password) {
		this.baseURL = baseURL;
		this.username = username;
		this.password = password;
		renewClient();
	}
		
	private boolean renewClient() {
		if(client == null) {
			client = new HttpClient();
			client.getHttpConnectionManager().getParams().setConnectionTimeout(5000);
			return doLogin(username, password);
		}
		return true;
	}
	
	private void disposeClient() {
		client = null;
	}
	
	private boolean forceRenewClient() {
		disposeClient();
		return renewClient();
	}
	
	private void sleep() {
		try {
			Thread.sleep(5000);
		} catch (InterruptedException e) {
			// yawn!!! Better get back to work.
			e.printStackTrace();
		}
	}
	
	private boolean doLogin(String username, String password) {
		if(username == "") return true;
		
		PostMethod post = new PostMethod(baseURL + "?title=Special:Userlogin&action=submit");
		// post.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
		currentMethod = post;
		
		// enable automatic retrying
		post.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, 
	    		new DefaultHttpMethodRetryHandler(3, false));
		
		NameValuePair[] data = {
			new NameValuePair("wpName", username),
		    new NameValuePair("wpPassword", password),
		    new NameValuePair("wpRemember", "1"),
		    // ugh... this changed between 1.4beta5 and 1.4.7
		    // for 1.4beta5
		    new NameValuePair("wpLoginAttempt", "1"),
		    // for 1.4.7
		    new NameValuePair("wpLoginattempt", "Log in")
		};
		
		post.setRequestBody(data);
		
		int status = 1;
		try {
			status = client.executeMethod(post);
		} catch (HttpException e) {
			// uhoh... the HttpClient object is probably in a bad state.
			// deal with this!!! I can't just dispose here... or can I?
			e.printStackTrace();
			disposeClient();
			sleep();
		} catch (IOException e) {
			// who cares
			e.printStackTrace();
			disposeClient();
			sleep();
		} finally {
			post.releaseConnection();
			currentMethod = null;
		}
		return (status == 302);
	}

	public String getPageText(String title) throws HttpException, IOException, JDOMException {
		renewClient();
		
		String URL = baseURL + "?title=Special:Export/" + title;
		GetMethod get = new GetMethod(URL);
		currentMethod = get;
		
		// enable automatic retrying
		get.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, 
	    		new DefaultHttpMethodRetryHandler(3, false));
		
		client.executeMethod(get);

		InputStream is = get.getResponseBodyAsStream();
		
		SAXBuilder builder = new SAXBuilder();
		Document doc = builder.build(is);
		Element page = doc.getRootElement().getChild("page");
		String text = "";
		if(page != null)
			text = page.getChild("revision").getChild("text").getText();

		// finally, make sure we release the connection!
		get.releaseConnection();
		currentMethod = null;
		
		return text;
	}
	
	private String[][] mapToArray(Map map) {
		String[][] array = new String[map.size()][2];
		Iterator iter = map.keySet().iterator();
		String title;
		int i = 0;
		while(iter.hasNext()) {
			title = (String)iter.next();
			array[i][0] = title;
			array[i][1] = (String)map.get(title);
			i++;
		}
		return array;
	}
	
	private Map arrayToMap(String[][] array) {
		Map map = new HashMap();
		for(int i = 0; i < array.length; i++) {
			map.put(array[i][0], array[i][1]);
		}
		return map;
	}
	
	public String[][] getPageTexts(String[] titles) {
		return mapToArray(getPageTexts(new HashSet(Arrays.asList(titles))));
	}
	
	public Map getPageTexts(Set titles) {
		renewClient();
		
		Map pageTexts = new HashMap();
		
		String titlesString = "";
		
		Iterator iter = titles.iterator();
		while(iter.hasNext()) {
			titlesString = titlesString + (String)iter.next() + "\r\n";
		}
		
		String URL = baseURL + "?title=Special:Export";
		PostMethod post = new PostMethod(URL);
		currentMethod = post;
		post.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, 
	    		new DefaultHttpMethodRetryHandler(3, false));
		post.getParams().setIntParameter(HttpMethodParams.SO_TIMEOUT, 5000);
		
		NameValuePair[] data = {
			new NameValuePair("action", "submit"),
			new NameValuePair("curonly", "true"),
			new NameValuePair("pages", titlesString),			
		};
		
		post.setRequestBody(data);
		
		InputStream is = null;
		int status = 1;
		try {
			status = client.executeMethod(post);
			is = post.getResponseBodyAsStream();
		} catch (HttpException e) {
			// uhoh... the HttpClient object is probably in a bad state.
			e.printStackTrace();
			disposeClient();
			post.releaseConnection();
			return pageTexts;
		} catch (IOException e) {
			// who cares		
			e.printStackTrace();
			disposeClient();
			post.releaseConnection();
			return pageTexts;
		}
		
		SAXBuilder builder = new SAXBuilder();
		try {
			Document doc = builder.build(is);
			Element page;
			String title;
			String text;
			List pages = doc.getRootElement().getChildren("page");
			iter = pages.iterator();
			while(iter.hasNext()) {
				page = (Element)iter.next();
				text = page.getChild("revision").getChild("text").getText();
				title = page.getChild("title").getText();
				pageTexts.put(title, text);
			}
		} catch (IOException e) {
			e.printStackTrace();
			disposeClient();
		} catch (JDOMException e) {
			e.printStackTrace();
		}
		
		post.releaseConnection();
		return pageTexts;
	}
	
	public String[][] filterSetPageTexts(String[][] edits) {
		return mapToArray(filterSetPageTexts(arrayToMap(edits)));
	}
	
	public Map filterSetPageTexts(Map edits) {
		Map pageTexts = getPageTexts(edits.keySet());
		Iterator iter = pageTexts.keySet().iterator();
		String title;
		while(iter.hasNext()) {
			title = (String)iter.next();
			if( ((String)edits.get(title)).equals((String)pageTexts.get(title)) ) {
				edits.remove(title);
			}
		}
		return edits;
	}
	
	private Map setPageTextsOnce(Map edits) {
		Map failedEdits = new HashMap();
		Iterator iter = edits.keySet().iterator();
		String title, text;
		while(iter.hasNext()) {
			title = (String)iter.next();
			text = (String)edits.get(title);
			if(!setPageText(title, text)) {
				failedEdits.put(title, text);
			}
		}
		return failedEdits;
	}
	
	private Map setFilteredPageTexts(Map edits) {
		return setPageTextsOnce(filterSetPageTexts(edits));
	}
	
	public Map setPageTexts(Map edits) {
		return setFilteredPageTexts(setFilteredPageTexts(edits));
	}
	
	public String[][] setPageTexts(String[][] edits) {
		return mapToArray(setPageTexts(arrayToMap(edits)));
	}
	
	public boolean setPageText(String title, String text) {
		return setPageText(title, text, "");		
	}
		
	public boolean setPageText(String title, String text, String summary) {
		renewClient();
		
		String URL = baseURL + "?title=" + title + "&action=edit";
		
		// first, load the edit page, to get an 'edittime' and 'edittoken'
		String editTime = "";
		String editToken = "";
		String mungedWikiSource = "";
		GetMethod get = new GetMethod(URL);
		currentMethod = get;
		// enable automatic retrying
		get.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, 
	    		new DefaultHttpMethodRetryHandler(3, false));
		
		String response = "";
		
		boolean failure = false;
		try {
			client.executeMethod(get);
			response = get.getResponseBodyAsString();			
		} catch (HttpException e) {
			// uhoh... the HttpClient object is probably in a bad state.
			e.printStackTrace();
			failure = true;
		} catch (IOException e) {
			// who cares
			e.printStackTrace();
			failure = true;
		} finally {
			get.releaseConnection();
			currentMethod = null;
			if(failure) {
				disposeClient();
				return false;
			}
		}
		
		// now sift through the response looking for the editime and edittoken
		java.util.regex.Pattern regex1 = java.util.regex.Pattern.compile(
				"value=\"([0-9]+)\" name=\"wpEdittime\"");
		java.util.regex.Matcher matcher1 = regex1.matcher(response);
		if(matcher1.find()) editTime = matcher1.group(1);

		java.util.regex.Pattern regex2 = java.util.regex.Pattern.compile(
			"value=\"([0-9a-z]+)\" name=\"wpEditToken\"");
		java.util.regex.Matcher matcher2 = regex2.matcher(response);
		if(matcher2.find()) editToken = matcher2.group(1);		
		
		// while we're at it, let's get the (html-munged) wiki source as well.
		java.util.regex.Pattern regex3 = java.util.regex.Pattern.compile(
			"<textarea .*? name=\"wpTextbox1\" .*?>(.*?)</textarea>", java.util.regex.Pattern.DOTALL);
		java.util.regex.Matcher matcher3 = regex3.matcher(response);
		if(matcher3.find()) mungedWikiSource = matcher3.group(1);			
		
		String wikiSource = StringEscapeUtils.unescapeHtml(mungedWikiSource).trim();
		
		if(wikiSource.equals(text.trim())) {
			// don't need to do anything
			return true;
		}
		
		PostMethod post = new PostMethod(URL);
		currentMethod = post;
		post.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, 
	    		new DefaultHttpMethodRetryHandler(3, false));
		post.getParams().setIntParameter(HttpMethodParams.SO_TIMEOUT, 5000);
		
		NameValuePair[] data = {
			new NameValuePair("wpTextbox1", text),
			new NameValuePair("wpEdittime", editTime),
			new NameValuePair("wpEditToken", editToken),			
		    new NameValuePair("wpSummary", summary)
		};
			
		post.setRequestBody(data);
		
		int status = 1;
		try {
			System.out.println("Starting post, name: " + title);	
			status = client.executeMethod(post);
			System.out.println("Finished setPageText post, status: " + status);			
			System.out.println(post.getResponseBodyAsString());
		} catch (HttpException e) {
			// uhoh... the HttpClient object is probably in a bad state.
			e.printStackTrace();
			disposeClient();
		} catch (IOException e) {
			// who cares		
			e.printStackTrace();
			disposeClient();
		} finally {
			// debug!
			post.releaseConnection();
			currentMethod = null;
		}
	
		return (status == 302);
	}
	
	public boolean uploadFile(String filename, String description) throws IOException {
		return uploadFile(new File(filename), description);
	}
	
	public boolean uploadFile(File f, String description) throws IOException {
		renewClient();
		
		String URL = baseURL + "?title=Special:Upload";
		
		PostMethod upload = new PostMethod(URL);
		currentMethod = upload;
		Part[] parts = {
				new StringPart("wpUploadDescription", description),
				new StringPart("wpUploadAffirm", "1"),
				new StringPart("wpIgnoreWarning", "1"),
				new StringPart("wpUpload", "Upload file"),
				new FilePart("wpUploadFile", f.getName(), f)
		};
		upload.setRequestEntity( new MultipartRequestEntity(parts, upload.getParams()) );

		int status = 1;
		try {
			status = client.executeMethod(upload);
		} catch (HttpException e) {
			e.printStackTrace();
			disposeClient();
		} catch (IOException e) {
			e.printStackTrace();
			disposeClient();
		} finally {
			upload.releaseConnection();
			currentMethod = null;
		}
	
		return (status == 302);
	}	
	
	// deleteFile doesn't work! The mediawiki software says that something is wrong with the login session.
	public boolean deleteFile(String filename, String reason) throws HttpException, IOException {
		String URL1 = baseURL + "?title=Image:" + filename + "&action=delete";
				
		// first, load the edit page, to get an 'edittime' and 'edittoken'
		String editTime = "";
		String editToken = "";
		GetMethod get = new GetMethod(URL1);
		currentMethod = get;
		// enable automatic retrying
		get.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, 
	    		new DefaultHttpMethodRetryHandler(3, false));
		client.executeMethod(get);
		
		// now sift through the response looking for the editime and edittoken
		String response = get.getResponseBodyAsString();
		get.releaseConnection();
		currentMethod = null;
		java.util.regex.Pattern regex1 = java.util.regex.Pattern.compile(
				"value=\"([0-9]+)\" name=\"wpEdittime\"");
		java.util.regex.Matcher matcher1 = regex1.matcher(response);
		if(matcher1.find()) editTime = matcher1.group(1);

		java.util.regex.Pattern regex2 = java.util.regex.Pattern.compile(
			"value=\"([0-9a-z]+)\" name=\"wpEditToken\"");
		java.util.regex.Matcher matcher2 = regex2.matcher(response);
		if(matcher2.find()) editToken = matcher2.group(1);		
		
		String URL2 = baseURL + "?title=Image:" + filename + "&action=delete" + "&image=" + filename;		
		PostMethod post = new PostMethod(URL2);
		currentMethod = post;
		// post.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
		
		NameValuePair[] data = {
			new NameValuePair("wpConfirm", "1"),
			// new NameValuePair("wpEdittime", editTime),			
		    new NameValuePair("wpReason", reason),
		    new NameValuePair("wpConfirmB", "Confirm"),
			new NameValuePair("wpEditToken", editToken)
		};
			
		post.setRequestBody(data);
		
		int status = 1;
		try {
			status = client.executeMethod(post);
		} catch (HttpException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			post.releaseConnection();
			currentMethod = null;
		}
		
		return (status == 302);
	}
		
	public static class Test {
		public static void main(String[] args) throws HttpException, IOException, JDOMException {
			MediawikiConnection conn1 = new MediawikiConnection(
					"http://katlas.math.toronto.edu/w/index.php", "TestRobot", "******");
//			MediawikiConnection conn2 = new MediawikiConnection(
//					"http://katlas.math.toronto.edu/w/index.php", "TestRobot", "******");
//			MediawikiConnection conn3 = new MediawikiConnection(
//					"http://katlas.math.toronto.edu/w/index.php", "TestRobot", "******");
//			MediawikiConnection conn2 = new MediawikiConnection(
//					"http://math.berkeley.edu/~scott/w/index.php", "TestRobot", "******");
//			MediawikiConnection conn3 = new MediawikiConnection(
//					"http://katlas.math.toronto.edu/w/index.php", "ScottSysopRobot", "******");
//			System.out.println(conn.getPageText("Main_Page"));
//			System.out.println(conn.setPageText("foo", "crel3", "auto!"));
			System.out.println(conn1.setPageText("foo","<math>1+2</math> "+Math.random()));
			System.out.println(conn1.setPageText("foo","<math>1+2</math> "));
			System.out.println(conn1.setPageText("foo","<math>1+2</math> "));
//			System.out.println(conn.deleteFile("K7a4_m.gif","redundant, see [[Image:K7a4.gif]]"));
		}
	}
}

/*
  • /

Amateur Sex Busty Babes Facial Cumshot Free Amateur Porn Free Porn Free Porn Movies Group Sex Hardcore Porn Hardcore Sex Hot Babes Internal Cumshot Lesbian Sex Mature Sex Nude Teens Teen Lesbians Teen Porn