UriUtils class
Utility class containing static methods for validating and sanitizing URIs.
class UriUtils {
// /**
// * Characters that don't need %-escaping (minus letters and digits), according
// * to ECMAScript 5th edition for the {@code encodeURI} function.
// */
// static final String DONT_NEED_ENCODING = ";/?:@&=+\$,-_.!~*'()#[]"; // could be used in IPv6 addresses
//
// // used in conditional code in encode()
// static final RegExp ESCAPED_LBRACKET_RE = GWT.isScript() ? RegExp.compile("%5B", "g") : null;
// static final RegExp ESCAPED_RBRACKET_RE = GWT.isScript() ? RegExp.compile("%5D", "g") : null;
/**
* Encodes the URL.
* <p>
* In client code, this method delegates to {@link URL#encode(String)} and
* then unescapes brackets, as they might be used for IPv6 addresses.
*
* @param uri the URL to encode
* @return the %-escaped URL
*/
static String encode(String uri) {
// uri = URL.encode(uri);
// // Follow the same approach as SafeHtmlUtils.htmlEscape
// if (uri.indexOf("%5B") != -1) {
// uri = ESCAPED_LBRACKET_RE.replace(uri, "[");
// }
// if (uri.indexOf("%5D") != -1) {
// uri = ESCAPED_RBRACKET_RE.replace(uri, "]");
// }
// return uri;
// } else {
// StringBuilder sb = new StringBuilder();
// byte[] utf8bytes;
// try {
// utf8bytes = uri.getBytes("UTF-8");
// } catch (UnsupportedEncodingException e) {
// // UTF-8 is guaranteed to be implemented, this code won't ever run.
// return null;
// }
// for (byte b : utf8bytes) {
// int c = b & 0xFF;
// // This works because characters that don't need encoding are all
// // expressed as a single UTF-8 byte
// if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')
// || DONT_NEED_ENCODING.indexOf(c) != -1) {
// sb.append((char) c);
// } else {
// String hexByte = Integer.toHexString(c).toUpperCase();
// if (hexByte.length() == 1) {
// hexByte = "0" + hexByte;
// }
// sb.append('%').append(hexByte);
// }
// }
// return sb.toString();
return Uri.encodeFull(uri);
}
/**
* Encodes the URL, preserving existing %-escapes.
*
* @param uri the URL to encode
* @return the %-escaped URL
*/
static String encodeAllowEscapes(String uri) {
// StringBuilder escaped = new StringBuilder();
//
// bool firstSegment = true;
// for (String segment : uri.split("%", -1)) {
// if (firstSegment) {
// /*
// * The first segment is never part of a percent-escape, so we always
// * escape it. Note that if the input starts with a percent, we will get
// * an empty segment before that.
// */
// firstSegment = false;
// escaped.append(encode(segment));
// continue;
// }
//
// if (segment.length() >= 2 && segment.substring(0, 2).matches("[0-9a-fA-F]{2}")) {
// // Append the escape without encoding.
// escaped.append("%").append(segment.substring(0, 2));
//
// // Append the rest of the segment, escaped.
// escaped.append(encode(segment.substring(2)));
// } else {
// // The segment did not start with an escape, so encode the whole
// // segment.
// escaped.append("%25").append(encode(segment));
// }
// }
// return escaped.toString();
return Uri.encodeComponent(uri);
}
/**
* Extracts the scheme of a URI.
*
* @param uri the URI to extract the scheme from
* @return the URI's scheme, or {@code null} if the URI does not have one
*/
static String extractScheme(String uri) {
// int colonPos = uri.indexOf(':');
// if (colonPos < 0) {
// return null;
// }
// String scheme = uri.substring(0, colonPos);
// if (scheme.indexOf('/') >= 0 || scheme.indexOf('#') >= 0) {
// /*
// * The URI's prefix up to the first ':' contains other URI special
// * chars, and won't be interpreted as a scheme.
// *
// * TODO(xtof): Consider basing this on URL#isValidProtocol or similar;
// * however I'm worried that being too strict here will effectively
// * allow dangerous schemes accepted in loosely parsing browsers.
// */
// return null;
// }
// return scheme;
Uri u = Uri.parse(uri);
return u.scheme;
}
/**
* Returns a {@link SafeUri} constructed from a value that is fully under
* the control of the program, e.g., a constant.
*
* <p>
* The string is not sanitized and no checks are performed. The assumption
* that the resulting value adheres to the {@link SafeUri} type contract
* is entirely based on the argument being fully under program control and
* not being derived from a program input.
*
* <p>
* <strong>Convention of use:</strong> This method must only be invoked on
* values that are fully under the program's control, such as string literals.
*
* @param s the input String
* @return a SafeUri instance
*/
static SafeUri fromSafeConstant(String s) {
return new SafeUriString(s);
}
/**
* Returns a {@link SafeUri} obtained by sanitizing the provided string.
*
* <p>
* The input string is sanitized using {@link #sanitizeUri(String)}.
*
* @param s the input String
* @return a SafeUri instance
*/
static SafeUri fromString(String s) {
return new SafeUriString(sanitizeUri(s));
}
/**
* Returns a {@link SafeUri} constructed from a trusted string, i.e., without
* sanitizing the string. No checks are performed. The calling code should be
* carefully reviewed to ensure the argument meets the SafeUri contract.
*
* @param s the input String
* @return a SafeUri instance
*/
static SafeUri fromTrustedString(String s) {
// SafeUriHostedModeUtils.maybeCheckValidUri(s);
return new SafeUriString(s);
}
/**
* Determines if a {@link String} is safe to use as the value of a URI-valued
* HTML attribute such as {@code src} or {@code href}.
*
* <p>
* In this context, a URI is safe if it can be established that using it as
* the value of a URI-valued HTML attribute such as {@code src} or {@code
* href} cannot result in script execution. Specifically, this method deems a
* URI safe if it either does not have a scheme, or its scheme is one of
* {@code http, https, ftp, mailto}.
*
* @param uri the URI to validate
* @return {@code true} if {@code uri} is safe in the above sense; {@code
* false} otherwise
*/
static bool isSafeUri(String uri) {
String scheme = extractScheme(uri);
if (scheme == null) {
return true;
}
/*
* Special care is be taken with case-insensitive 'i' in the Turkish locale.
* i -> to upper in Turkish locale -> Д°
* I -> to lower in Turkish locale -> Д±
* For this reason there are two checks for mailto: "mailto" and "MAILTO"
* For details, see: http://www.i18nguy.com/unicode/turkish-i18n.html
*/
String schemeLc = scheme.toLowerCase();
return ("http" == schemeLc
|| "https" == schemeLc
|| "ftp" == schemeLc
|| "mailto" == schemeLc
|| "MAILTO" == scheme.toUpperCase());
}
/**
* Sanitizes a URI.
*
* <p>
* This method returns the URI provided if it is safe to use as the the value
* of a URI-valued HTML attribute according to {@link #isSafeUri}, or the URI
* "{@code #}" otherwise.
*
* @param uri the URI to sanitize
* @return a sanitized String
*/
static String sanitizeUri(String uri) {
if (isSafeUri(uri)) {
return encodeAllowEscapes(uri);
} else {
return "#";
}
}
/**
* Returns a {@link SafeUri} constructed from an untrusted string but without
* sanitizing it.
*
* <strong>Despite this method creating a SafeUri instance, no checks are
* performed, so the returned SafeUri is absolutely NOT guaranteed to be
* safe!</strong>
*
* @param s the input String
* @return a SafeUri instance
* @deprecated This method is intended only for use in APIs that use
* {@link SafeUri} to represent URIs, but for backwards
* compatibility have methods that accept URI parameters as plain
* strings.
*/
static SafeUri unsafeCastFromUntrustedString(String s) {
return new SafeUriString(s);
}
// prevent instantiation
UriUtils() ;
}
Static Methods
String encode(String uri) #
Encodes the URL. <p> In client code, this method delegates to {@link URL#encode(String)} and then unescapes brackets, as they might be used for IPv6 addresses.
@param uri the URL to encode @return the %-escaped URL
static String encode(String uri) {
// uri = URL.encode(uri);
// // Follow the same approach as SafeHtmlUtils.htmlEscape
// if (uri.indexOf("%5B") != -1) {
// uri = ESCAPED_LBRACKET_RE.replace(uri, "[");
// }
// if (uri.indexOf("%5D") != -1) {
// uri = ESCAPED_RBRACKET_RE.replace(uri, "]");
// }
// return uri;
// } else {
// StringBuilder sb = new StringBuilder();
// byte[] utf8bytes;
// try {
// utf8bytes = uri.getBytes("UTF-8");
// } catch (UnsupportedEncodingException e) {
// // UTF-8 is guaranteed to be implemented, this code won't ever run.
// return null;
// }
// for (byte b : utf8bytes) {
// int c = b & 0xFF;
// // This works because characters that don't need encoding are all
// // expressed as a single UTF-8 byte
// if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')
// || DONT_NEED_ENCODING.indexOf(c) != -1) {
// sb.append((char) c);
// } else {
// String hexByte = Integer.toHexString(c).toUpperCase();
// if (hexByte.length() == 1) {
// hexByte = "0" + hexByte;
// }
// sb.append('%').append(hexByte);
// }
// }
// return sb.toString();
return Uri.encodeFull(uri);
}
String encodeAllowEscapes(String uri) #
Encodes the URL, preserving existing %-escapes.
@param uri the URL to encode @return the %-escaped URL
static String encodeAllowEscapes(String uri) {
// StringBuilder escaped = new StringBuilder();
//
// bool firstSegment = true;
// for (String segment : uri.split("%", -1)) {
// if (firstSegment) {
// /*
// * The first segment is never part of a percent-escape, so we always
// * escape it. Note that if the input starts with a percent, we will get
// * an empty segment before that.
// */
// firstSegment = false;
// escaped.append(encode(segment));
// continue;
// }
//
// if (segment.length() >= 2 && segment.substring(0, 2).matches("[0-9a-fA-F]{2}")) {
// // Append the escape without encoding.
// escaped.append("%").append(segment.substring(0, 2));
//
// // Append the rest of the segment, escaped.
// escaped.append(encode(segment.substring(2)));
// } else {
// // The segment did not start with an escape, so encode the whole
// // segment.
// escaped.append("%25").append(encode(segment));
// }
// }
// return escaped.toString();
return Uri.encodeComponent(uri);
}
String extractScheme(String uri) #
Extracts the scheme of a URI.
@param uri the URI to extract the scheme from @return the URI's scheme, or {@code null} if the URI does not have one
static String extractScheme(String uri) {
// int colonPos = uri.indexOf(':');
// if (colonPos < 0) {
// return null;
// }
// String scheme = uri.substring(0, colonPos);
// if (scheme.indexOf('/') >= 0 || scheme.indexOf('#') >= 0) {
// /*
// * The URI's prefix up to the first ':' contains other URI special
// * chars, and won't be interpreted as a scheme.
// *
// * TODO(xtof): Consider basing this on URL#isValidProtocol or similar;
// * however I'm worried that being too strict here will effectively
// * allow dangerous schemes accepted in loosely parsing browsers.
// */
// return null;
// }
// return scheme;
Uri u = Uri.parse(uri);
return u.scheme;
}
SafeUri fromSafeConstant(String s) #
Returns a {@link SafeUri} constructed from a value that is fully under the control of the program, e.g., a constant.
The string is not sanitized and no checks are performed. The assumption that the resulting value adheres to the {@link SafeUri} type contract is entirely based on the argument being fully under program control and not being derived from a program input.
Convention of use: This method must only be invoked on values that are fully under the program's control, such as string literals.
@param s the input String @return a SafeUri instance
static SafeUri fromSafeConstant(String s) {
return new SafeUriString(s);
}
SafeUri fromString(String s) #
Returns a {@link SafeUri} obtained by sanitizing the provided string.
The input string is sanitized using {@link #sanitizeUri(String)}.
@param s the input String @return a SafeUri instance
static SafeUri fromString(String s) {
return new SafeUriString(sanitizeUri(s));
}
SafeUri fromTrustedString(String s) #
Returns a {@link SafeUri} constructed from a trusted string, i.e., without sanitizing the string. No checks are performed. The calling code should be carefully reviewed to ensure the argument meets the SafeUri contract.
@param s the input String @return a SafeUri instance
static SafeUri fromTrustedString(String s) {
// SafeUriHostedModeUtils.maybeCheckValidUri(s);
return new SafeUriString(s);
}
bool isSafeUri(String uri) #
Determines if a {@link String} is safe to use as the value of a URI-valued HTML attribute such as {@code src} or {@code href}.
In this context, a URI is safe if it can be established that using it as the value of a URI-valued HTML attribute such as {@code src} or {@code href} cannot result in script execution. Specifically, this method deems a URI safe if it either does not have a scheme, or its scheme is one of {@code http, https, ftp, mailto}.
@param uri the URI to validate @return {@code true} if {@code uri} is safe in the above sense; {@code
false} otherwise
static bool isSafeUri(String uri) {
String scheme = extractScheme(uri);
if (scheme == null) {
return true;
}
/*
* Special care is be taken with case-insensitive 'i' in the Turkish locale.
* i -> to upper in Turkish locale -> Д°
* I -> to lower in Turkish locale -> Д±
* For this reason there are two checks for mailto: "mailto" and "MAILTO"
* For details, see: http://www.i18nguy.com/unicode/turkish-i18n.html
*/
String schemeLc = scheme.toLowerCase();
return ("http" == schemeLc
|| "https" == schemeLc
|| "ftp" == schemeLc
|| "mailto" == schemeLc
|| "MAILTO" == scheme.toUpperCase());
}
String sanitizeUri(String uri) #
Sanitizes a URI.
This method returns the URI provided if it is safe to use as the the value of a URI-valued HTML attribute according to {@link #isSafeUri}, or the URI "{@code #}" otherwise.
@param uri the URI to sanitize @return a sanitized String
static String sanitizeUri(String uri) {
if (isSafeUri(uri)) {
return encodeAllowEscapes(uri);
} else {
return "#";
}
}
SafeUri unsafeCastFromUntrustedString(String s) #
Returns a {@link SafeUri} constructed from an untrusted string but without sanitizing it.
Despite this method creating a SafeUri instance, no checks are performed, so the returned SafeUri is absolutely NOT guaranteed to be safe!@param s the input String @return a SafeUri instance @deprecated This method is intended only for use in APIs that use
{@link SafeUri} to represent URIs, but for backwards
compatibility have methods that accept URI parameters as plain
strings.
static SafeUri unsafeCastFromUntrustedString(String s) {
return new SafeUriString(s);
}