1   package org.christianschenk.titletest;
2   
3   import org.apache.commons.httpclient.HttpClient;
4   import org.apache.commons.httpclient.HttpMethod;
5   import org.apache.commons.httpclient.methods.GetMethod;
6   import org.christianschenk.titletest.util.UrlUtils;
7   
8   import static org.christianschenk.titletest.util.RegExUtils.findMatch;
9   import static org.christianschenk.titletest.util.StringUtils.between;
10  
11  /**
12   * Downloads the contents from a given URL and tries to follow HTTP redirects and meta refresh tags.
13   * 
14   * @author Christian Schenk
15   */
16  public class TitleTest {
17  
18  	private final HttpClient client;
19  
20  	public TitleTest() {
21  		this.client = new HttpClient();
22  		this.client.getHttpConnectionManager().getParams().setConnectionTimeout(3000);
23  	}
24  
25  	/**
26  	 * Checks whether the downloaded HTML contains the given title.
27  	 */
28  	public boolean assertUrlTitle(final String url, final String title) {
29  		final String response = this.doGet(url);
30  		final String found = findMatch("<title>.*</title>", response);
31  		if (found != null) {
32  			final String actualTitle = between(found, "<title>", "</title>");
33  			if (actualTitle.startsWith(title)) {
34  				return true;
35  			}
36  			System.err.println("Expected '" + title + "' but found '" + actualTitle + "'");
37  		} else {
38  			System.err.println("No title tag found for URL '" + url + "'");
39  		}
40  		return false;
41  	}
42  
43  	/**
44  	 * Downloads the contents from a URL with HTTP GET. It not only follows HTTP redirects but tries
45  	 * to follow meta refresh tags as well.
46  	 */
47  	private String doGet(final String url) {
48  		final HttpMethod method = new GetMethod(url);
49  		method.setFollowRedirects(true);
50  
51  		try {
52  			this.client.executeMethod(method);
53  			final String responseBody = method.getResponseBodyAsString();
54  
55  			/*
56  			 * Follow meta refresh
57  			 * 
58  			 * <meta http-equiv="Refresh" content="0;URL=foo.html">
59  			 */
60  			final String metaRefresh = findMatch("<meta http-equiv=\"refresh\" content=\".*\">", responseBody);
61  			if (metaRefresh != null) {
62  				final String refreshUrl = findMatch("URL=.*\"", metaRefresh);
63  				if (refreshUrl == null) {
64  					throw new RuntimeException("Couldn't extract refresh URL!");
65  				}
66  				return this.doGet(UrlUtils.buildUrl(method.getURI().toString(), between(refreshUrl, "URL=", "\"")));
67  			}
68  
69  			return responseBody;
70  		} catch (final Exception ex) {
71  			throw new RuntimeException(ex);
72  		} finally {
73  			method.releaseConnection();
74  		}
75  	}
76  }