View Javadoc
1   /* 
2    * Licensed under the Apache License, Version 2.0 (the "License");
3    * you may not use this file except in compliance with the License.
4    * You may obtain a copy of the License at
5    *
6    * http://www.apache.org/licenses/LICENSE-2.0
7    *
8    * Unless required by applicable law or agreed to in writing, software
9    * distributed under the License is distributed on an "AS IS" BASIS,
10   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11   * See the License for the specific language governing permissions and
12   * limitations under the License.
13   *
14   */
15  
16  package org.esigate.util;
17  
18  import java.net.URI;
19  import java.net.URISyntaxException;
20  import java.util.List;
21  import java.util.regex.Pattern;
22  
23  import org.apache.commons.lang3.StringUtils;
24  import org.apache.http.HttpHost;
25  import org.apache.http.NameValuePair;
26  import org.apache.http.client.utils.URIUtils;
27  import org.apache.http.client.utils.URLEncodedUtils;
28  import org.esigate.Parameters;
29  
30  /**
31   * Utility class to manipulate {@link URI} represented as a {@link String} or as a {@link URI}.
32   * 
33   * @author Francois-Xavier Bonnet
34   * 
35   */
36  public final class UriUtils {
37  
38      private static final int CONVERSION_TABLE_SIZE = 128;
39      private static final String RESERVED_CHARACTERS = ":/?&=#%";
40      private static final String[] CONVERSION_TABLE = new String[CONVERSION_TABLE_SIZE];
41  
42      static {
43          for (int i = 0; i < CONVERSION_TABLE_SIZE; i++) {
44              char character = (char) i;
45              String charString = Character.toString(character);
46              if (RESERVED_CHARACTERS.indexOf(i) == -1) {
47                  charString = encode(charString);
48              }
49              CONVERSION_TABLE[i] = charString;
50          }
51      }
52  
53      private UriUtils() {
54          // Do not instantiate
55      }
56  
57      private static String encode(char character) {
58          return Character.toString(character);
59      }
60  
61      private static String encode(String charString) {
62          try {
63              return new URI(null, null, null, -1, charString, null, null).toASCIIString();
64          } catch (URISyntaxException e) {
65              throw new InvalidUriException(e);
66          }
67      }
68  
69      /**
70       * Fixes common mistakes in URI by replacing all illegal characters by their encoded value.
71       * 
72       * @param uri
73       *            the URI to fix
74       * @return the fixed URI
75       */
76      public static String encodeIllegalCharacters(String uri) {
77          StringBuilder result = new StringBuilder();
78          int length = uri.length();
79          for (int i = 0; i < length; i++) {
80              char character = uri.charAt(i);
81              if (character == '%') {
82                  // Encode invalid escape sequences
83                  if (i >= length - 2 || !isHex(uri.charAt(i + 1)) || !isHex(uri.charAt(i + 2))) {
84                      result.append("%25");
85                  } else {
86                      result.append('%');
87                  }
88              } else {
89                  int j = (int) character;
90                  if (j >= CONVERSION_TABLE_SIZE || j < 0) {
91                      result.append(encode(character));
92                  } else {
93                      result.append(CONVERSION_TABLE[j]);
94                  }
95              }
96          }
97          return result.toString();
98      }
99  
100     private static boolean isHex(char character) {
101         return character == '0' || character == '1' || character == '2' || character == '3' || character == '4'
102                 || character == '5' || character == '6' || character == '7' || character == '8' || character == '9'
103                 || character == 'a' || character == 'b' || character == 'c' || character == 'd' || character == 'e'
104                 || character == 'f' || character == 'A' || character == 'B' || character == 'C' || character == 'D'
105                 || character == 'E' || character == 'F';
106     }
107 
108     private static final class InvalidUriException extends RuntimeException {
109         private static final long serialVersionUID = 7013885420191182730L;
110 
111         private InvalidUriException(URISyntaxException cause) {
112             super(cause);
113         }
114 
115     }
116 
117     /**
118      * Creates an URI as a String.
119      * 
120      * @param scheme
121      *            the scheme
122      * @param host
123      *            the host
124      * @param port
125      *            the port
126      * @param path
127      *            the path
128      * @param query
129      *            the query
130      * @param fragment
131      *            the fragment
132      * @return the uri
133      */
134     public static String createURI(final String scheme, final String host, int port, final String path,
135             final String query, final String fragment) {
136         StringBuilder buffer = new StringBuilder(Parameters.SMALL_BUFFER_SIZE);
137         if (host != null) {
138             if (scheme != null) {
139                 buffer.append(scheme);
140                 buffer.append("://");
141             }
142             buffer.append(host);
143             if (port > 0) {
144                 buffer.append(':');
145                 buffer.append(port);
146             }
147         }
148         if (path == null || !path.startsWith("/")) {
149             buffer.append('/');
150         }
151         if (path != null) {
152             buffer.append(path);
153         }
154         if (query != null) {
155             buffer.append('?');
156             buffer.append(query);
157         }
158         if (fragment != null) {
159             buffer.append('#');
160             buffer.append(fragment);
161         }
162         return buffer.toString();
163     }
164 
165     /**
166      * Extracts the host name from a URI.
167      * 
168      * @param uri
169      *            the uri
170      * @return the host name
171      */
172     public static String extractHostName(final String uri) {
173         return extractHost(uri).getHostName();
174     }
175 
176     /**
177      * Extracts the {@link HttpHost} from a URI.
178      * 
179      * @param uri
180      *            the URI
181      * @return the {@link HttpHost}
182      */
183     public static HttpHost extractHost(final String uri) {
184         return URIUtils.extractHost(createURI(uri));
185     }
186 
187     /**
188      * Extracts the {@link HttpHost} from a URI.
189      * 
190      * @param uri
191      *            the {@link URI}
192      * @return the {@link HttpHost}
193      */
194     public static HttpHost extractHost(final URI uri) {
195         return URIUtils.extractHost(uri);
196     }
197 
198     /**
199      * Creates an {@link URI} after escaping some special characters in order to tolerate some incorrect URI types. If
200      * the uri contains a server name but no path, the path is set to "/" as a browser would do.
201      * 
202      * @param uri
203      *            the URI as a {@link String}
204      * @return the URI as a {@link URI} object
205      */
206     public static URI createURI(String uri) {
207         uri = encodeIllegalCharacters(uri);
208         URI result = URI.create(uri);
209         if (result.getHost() != null && StringUtils.isEmpty(result.getPath())) {
210             result =
211                     URI.create(createURI(result.getScheme(), result.getHost(), result.getPort(), "/",
212                             result.getRawQuery(), result.getRawFragment()));
213         }
214         return result;
215     }
216 
217     /**
218      * Replaces the scheme, host and port in a URI.
219      * 
220      * @param uri
221      *            the URI
222      * @param targetHost
223      *            the target host
224      * @return the rewritten URI
225      */
226     public static String rewriteURI(String uri, HttpHost targetHost) {
227         try {
228             return URIUtils.rewriteURI(createURI(uri), targetHost).toString();
229         } catch (URISyntaxException e) {
230             throw new InvalidUriException(e);
231         }
232     }
233 
234     /**
235      * Removes the jsessionid that may have been added to a URI on a java application server.
236      * 
237      * @param sessionId
238      *            the value of the sessionId that can also be found in a JSESSIONID cookie
239      * @param page
240      *            the html code of the page
241      * @return the fixed html
242      */
243     public static String removeSessionId(String sessionId, String page) {
244         String regexp = ";?jsessionid=" + Pattern.quote(sessionId);
245         return page.replaceAll(regexp, "");
246     }
247 
248     /**
249      * Extracts the scheme of a URI.
250      * 
251      * @param uri
252      *            the URI
253      * @return the scheme
254      */
255     public static String extractScheme(String uri) {
256         return extractHost(uri).getSchemeName();
257     }
258 
259     /**
260      * Returns the raw query component of this URI. The query component of a URI, if defined, only contains legal URI
261      * characters.
262      * 
263      * @param uri
264      *            the URI
265      * @return The raw query component of this URI, or null if the query is undefined
266      */
267     public static String getRawQuery(String uri) {
268         return createURI(uri).getRawQuery();
269     }
270 
271     /**
272      * Returns the decoded path component of this URI. The string returned by this method is equal to that returned by
273      * the getRawPath method except that all sequences of escaped octets are decoded.
274      * 
275      * @param uri
276      *            the uri to retrieve the path from
277      * @return The decoded path component of this URI, or null if the path is undefined
278      */
279 
280     public static String getPath(String uri) {
281         return createURI(uri).getPath();
282     }
283 
284     /**
285      * Returns a list of {@link NameValuePair NameValuePairs} as built from the URI's query portion. For example, a URI
286      * of http://example.org/path/to/file?a=1&amp;b=2&amp;c=3 would return a list of three NameValuePairs, one for a=1,
287      * one for b=2, and one for c=3. By convention, {@code '&amp;'} and {@code ';'} are accepted as parameter
288      * separators.
289      * <p>
290      * This is typically useful while parsing an HTTP PUT.
291      * 
292      * This API is currently only used for testing.
293      * 
294      * @param uri
295      *            URI to parse
296      * @param charset
297      *            Charset name to use while parsing the query
298      * @return a list of {@link NameValuePair} as built from the URI's query portion.
299      */
300     public static List<NameValuePair> parse(final String uri, final String charset) {
301         return URLEncodedUtils.parse(createURI(uri), charset);
302     }
303 
304     /**
305      * Checks if a URI starts with a protocol.
306      * 
307      * @param uri
308      *            the URI
309      * @return true if the URI starts with "http://" or "https://"
310      */
311     public static boolean isAbsolute(String uri) {
312         return (uri.startsWith("http://") || uri.startsWith("https://"));
313     }
314 
315     /**
316      * Concatenates 2 {@link URI} by taking the beginning of the first (up to the path) and the end of the other
317      * (starting from the path). While concatenating, checks that there is no doubled "/" character between the path
318      * fragments.
319      * 
320      * @param base
321      *            the base uri
322      * @param relPath
323      *            the path to concatenate with the base uri
324      * @return the concatenated uri
325      */
326     public static URI concatPath(URI base, String relPath) {
327         String resultPath = base.getPath() + StringUtils.stripStart(relPath, "/");
328         try {
329             return new URI(base.getScheme(), base.getUserInfo(), base.getHost(), base.getPort(), resultPath, null, null);
330         } catch (URISyntaxException e) {
331             throw new InvalidUriException(e);
332         }
333     }
334 
335     /**
336      * Removes the server information frome a {@link URI}.
337      * 
338      * @param uri
339      *            the {@link URI}
340      * @return a new {@link URI} with no scheme, host and port
341      */
342     public static URI removeServer(URI uri) {
343         try {
344             return new URI(null, null, null, -1, uri.getPath(), uri.getQuery(), uri.getFragment());
345         } catch (URISyntaxException e) {
346             throw new InvalidUriException(e);
347         }
348 
349     }
350 
351     /**
352      * Interpret the url relatively to the request url (may be relative). Due to a bug in {@link URI} class when using a
353      * relUri containing only a query string, we cannot use directly the method provided by {@link URI} class.
354      * 
355      * @param relUri
356      *            the relative URI
357      * @param base
358      *            the reference {@link URI}
359      * @return the resolved {@link URI}
360      */
361     public static URI resolve(String relUri, URI base) {
362         URI uri = createURI(relUri);
363         if (uri.getScheme() == null && uri.getUserInfo() == null && uri.getHost() == null && uri.getPort() == -1
364                 && StringUtils.isEmpty(uri.getPath()) && uri.getQuery() != null) {
365             try {
366                 return new URI(base.getScheme(), base.getUserInfo(), base.getHost(), base.getPort(), base.getPath(),
367                         uri.getQuery(), uri.getFragment());
368             } catch (URISyntaxException e) {
369                 throw new InvalidUriException(e);
370             }
371         } else {
372             return base.resolve(uri);
373         }
374     }
375 
376     /**
377      * Removes the query and fragment at the end of a URI.
378      * 
379      * @param uriString
380      *            the original URI as a String
381      * 
382      * @return the URI without querystring nor fragment
383      */
384     public static String removeQuerystring(String uriString) {
385         URI uri = createURI(uriString);
386         try {
387             return new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), uri.getPort(), uri.getPath(), null, null)
388                     .toASCIIString();
389         } catch (URISyntaxException e) {
390             throw new InvalidUriException(e);
391         }
392     }
393 
394 }