init
This commit is contained in:
495
java/org/apache/tomcat/util/buf/UDecoder.java
Normal file
495
java/org/apache/tomcat/util/buf/UDecoder.java
Normal file
@@ -0,0 +1,495 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tomcat.util.buf;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.CharConversionException;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.juli.logging.Log;
|
||||
import org.apache.juli.logging.LogFactory;
|
||||
import org.apache.tomcat.util.res.StringManager;
|
||||
|
||||
/**
|
||||
* All URL decoding happens here. This way we can reuse, review, optimize
|
||||
* without adding complexity to the buffers.
|
||||
*
|
||||
* The conversion will modify the original buffer.
|
||||
*
|
||||
* @author Costin Manolache
|
||||
*/
|
||||
public final class UDecoder {
|
||||
|
||||
private static final StringManager sm = StringManager.getManager(UDecoder.class);
|
||||
|
||||
private static final Log log = LogFactory.getLog(UDecoder.class);
|
||||
|
||||
public static final boolean ALLOW_ENCODED_SLASH =
|
||||
Boolean.parseBoolean(System.getProperty("org.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH", "false"));
|
||||
|
||||
private static class DecodeException extends CharConversionException {
|
||||
private static final long serialVersionUID = 1L;
|
||||
public DecodeException(String s) {
|
||||
super(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Throwable fillInStackTrace() {
|
||||
// This class does not provide a stack trace
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/** Unexpected end of data. */
|
||||
private static final IOException EXCEPTION_EOF = new DecodeException(sm.getString("uDecoder.eof"));
|
||||
|
||||
/** %xx with not-hex digit */
|
||||
private static final IOException EXCEPTION_NOT_HEX_DIGIT = new DecodeException(
|
||||
"isHexDigit");
|
||||
|
||||
/** %-encoded slash is forbidden in resource path */
|
||||
private static final IOException EXCEPTION_SLASH = new DecodeException(
|
||||
"noSlash");
|
||||
|
||||
public UDecoder()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* URLDecode, will modify the source.
|
||||
* @param mb The URL encoded bytes
|
||||
* @param query <code>true</code> if this is a query string
|
||||
* @throws IOException Invalid %xx URL encoding
|
||||
*/
|
||||
public void convert( ByteChunk mb, boolean query )
|
||||
throws IOException
|
||||
{
|
||||
int start=mb.getOffset();
|
||||
byte buff[]=mb.getBytes();
|
||||
int end=mb.getEnd();
|
||||
|
||||
int idx= ByteChunk.findByte( buff, start, end, (byte) '%' );
|
||||
int idx2=-1;
|
||||
if( query ) {
|
||||
idx2= ByteChunk.findByte( buff, start, (idx >= 0 ? idx : end), (byte) '+' );
|
||||
}
|
||||
if( idx<0 && idx2<0 ) {
|
||||
return;
|
||||
}
|
||||
|
||||
// idx will be the smallest positive index ( first % or + )
|
||||
if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) {
|
||||
idx=idx2;
|
||||
}
|
||||
|
||||
final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
|
||||
|
||||
for( int j=idx; j<end; j++, idx++ ) {
|
||||
if( buff[ j ] == '+' && query) {
|
||||
buff[idx]= (byte)' ' ;
|
||||
} else if( buff[ j ] != '%' ) {
|
||||
buff[idx]= buff[j];
|
||||
} else {
|
||||
// read next 2 digits
|
||||
if( j+2 >= end ) {
|
||||
throw EXCEPTION_EOF;
|
||||
}
|
||||
byte b1= buff[j+1];
|
||||
byte b2=buff[j+2];
|
||||
if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) {
|
||||
throw EXCEPTION_NOT_HEX_DIGIT;
|
||||
}
|
||||
|
||||
j+=2;
|
||||
int res=x2c( b1, b2 );
|
||||
if (noSlash && (res == '/')) {
|
||||
throw EXCEPTION_SLASH;
|
||||
}
|
||||
buff[idx]=(byte)res;
|
||||
}
|
||||
}
|
||||
|
||||
mb.setEnd( idx );
|
||||
|
||||
}
|
||||
|
||||
// -------------------- Additional methods --------------------
|
||||
// XXX What do we do about charset ????
|
||||
|
||||
/**
|
||||
* In-buffer processing - the buffer will be modified.
|
||||
* @param mb The URL encoded chars
|
||||
* @param query <code>true</code> if this is a query string
|
||||
* @throws IOException Invalid %xx URL encoding
|
||||
*/
|
||||
public void convert( CharChunk mb, boolean query )
|
||||
throws IOException
|
||||
{
|
||||
// log( "Converting a char chunk ");
|
||||
int start=mb.getOffset();
|
||||
char buff[]=mb.getBuffer();
|
||||
int cend=mb.getEnd();
|
||||
|
||||
int idx= CharChunk.indexOf( buff, start, cend, '%' );
|
||||
int idx2=-1;
|
||||
if( query ) {
|
||||
idx2= CharChunk.indexOf( buff, start, (idx >= 0 ? idx : cend), '+' );
|
||||
}
|
||||
if( idx<0 && idx2<0 ) {
|
||||
return;
|
||||
}
|
||||
|
||||
// idx will be the smallest positive index ( first % or + )
|
||||
if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) {
|
||||
idx=idx2;
|
||||
}
|
||||
|
||||
final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
|
||||
|
||||
for( int j=idx; j<cend; j++, idx++ ) {
|
||||
if( buff[ j ] == '+' && query ) {
|
||||
buff[idx]=( ' ' );
|
||||
} else if( buff[ j ] != '%' ) {
|
||||
buff[idx]=buff[j];
|
||||
} else {
|
||||
// read next 2 digits
|
||||
if( j+2 >= cend ) {
|
||||
// invalid
|
||||
throw EXCEPTION_EOF;
|
||||
}
|
||||
char b1= buff[j+1];
|
||||
char b2=buff[j+2];
|
||||
if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) {
|
||||
throw EXCEPTION_NOT_HEX_DIGIT;
|
||||
}
|
||||
|
||||
j+=2;
|
||||
int res=x2c( b1, b2 );
|
||||
if (noSlash && (res == '/')) {
|
||||
throw EXCEPTION_SLASH;
|
||||
}
|
||||
buff[idx]=(char)res;
|
||||
}
|
||||
}
|
||||
mb.setEnd( idx );
|
||||
}
|
||||
|
||||
/**
|
||||
* URLDecode, will modify the source
|
||||
* @param mb The URL encoded String, bytes or chars
|
||||
* @param query <code>true</code> if this is a query string
|
||||
* @throws IOException Invalid %xx URL encoding
|
||||
*/
|
||||
public void convert(MessageBytes mb, boolean query)
|
||||
throws IOException
|
||||
{
|
||||
|
||||
switch (mb.getType()) {
|
||||
case MessageBytes.T_STR:
|
||||
String strValue=mb.toString();
|
||||
if( strValue==null ) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
mb.setString( convert( strValue, query ));
|
||||
} catch (RuntimeException ex) {
|
||||
throw new DecodeException(ex.getMessage());
|
||||
}
|
||||
break;
|
||||
case MessageBytes.T_CHARS:
|
||||
CharChunk charC=mb.getCharChunk();
|
||||
convert( charC, query );
|
||||
break;
|
||||
case MessageBytes.T_BYTES:
|
||||
ByteChunk bytesC=mb.getByteChunk();
|
||||
convert( bytesC, query );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* %xx decoding of a string. FIXME: this is inefficient.
|
||||
* @param str The URL encoded string
|
||||
* @param query <code>true</code> if this is a query string
|
||||
* @return the decoded string
|
||||
*/
|
||||
public final String convert(String str, boolean query)
|
||||
{
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if( (!query || str.indexOf( '+' ) < 0) && str.indexOf( '%' ) < 0 ) {
|
||||
return str;
|
||||
}
|
||||
|
||||
final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
|
||||
|
||||
StringBuilder dec = new StringBuilder(); // decoded string output
|
||||
int strPos = 0;
|
||||
int strLen = str.length();
|
||||
|
||||
dec.ensureCapacity(str.length());
|
||||
while (strPos < strLen) {
|
||||
int laPos; // lookahead position
|
||||
|
||||
// look ahead to next URLencoded metacharacter, if any
|
||||
for (laPos = strPos; laPos < strLen; laPos++) {
|
||||
char laChar = str.charAt(laPos);
|
||||
if ((laChar == '+' && query) || (laChar == '%')) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// if there were non-metacharacters, copy them all as a block
|
||||
if (laPos > strPos) {
|
||||
dec.append(str.substring(strPos,laPos));
|
||||
strPos = laPos;
|
||||
}
|
||||
|
||||
// shortcut out of here if we're at the end of the string
|
||||
if (strPos >= strLen) {
|
||||
break;
|
||||
}
|
||||
|
||||
// process next metacharacter
|
||||
char metaChar = str.charAt(strPos);
|
||||
if (metaChar == '+') {
|
||||
dec.append(' ');
|
||||
strPos++;
|
||||
continue;
|
||||
} else if (metaChar == '%') {
|
||||
// We throw the original exception - the super will deal with
|
||||
// it
|
||||
// try {
|
||||
char res = (char) Integer.parseInt(
|
||||
str.substring(strPos + 1, strPos + 3), 16);
|
||||
if (noSlash && (res == '/')) {
|
||||
throw new IllegalArgumentException(sm.getString("uDecoder.noSlash"));
|
||||
}
|
||||
dec.append(res);
|
||||
strPos += 3;
|
||||
}
|
||||
}
|
||||
|
||||
return dec.toString();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Decode and return the specified URL-encoded String.
|
||||
* When the byte array is converted to a string, ISO-885901 is used. This
|
||||
* may be different than some other servers. It is assumed the string is not
|
||||
* a query string.
|
||||
*
|
||||
* @param str The url-encoded string
|
||||
* @return the decoded string
|
||||
* @exception IllegalArgumentException if a '%' character is not followed
|
||||
* by a valid 2-digit hexadecimal number
|
||||
*/
|
||||
public static String URLDecode(String str) {
|
||||
return URLDecode(str, StandardCharsets.ISO_8859_1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Decode and return the specified URL-encoded String. It is assumed the
|
||||
* string is not a query string.
|
||||
*
|
||||
* @param str The url-encoded string
|
||||
* @param enc The encoding to use; if null, ISO-885901 is used. If
|
||||
* an unsupported encoding is specified null will be returned
|
||||
* @return the decoded string
|
||||
* @exception IllegalArgumentException if a '%' character is not followed
|
||||
* by a valid 2-digit hexadecimal number
|
||||
*
|
||||
* @deprecated This method will be removed in Tomcat 9
|
||||
*/
|
||||
@Deprecated
|
||||
public static String URLDecode(String str, String enc) {
|
||||
return URLDecode(str, enc, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Decode and return the specified URL-encoded String. It is assumed the
|
||||
* string is not a query string.
|
||||
*
|
||||
* @param str The url-encoded string
|
||||
* @param charset The character encoding to use; if null, ISO-8859-1 is
|
||||
* used.
|
||||
* @return the decoded string
|
||||
* @exception IllegalArgumentException if a '%' character is not followed
|
||||
* by a valid 2-digit hexadecimal number
|
||||
*/
|
||||
public static String URLDecode(String str, Charset charset) {
|
||||
return URLDecode(str, charset, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Decode and return the specified URL-encoded String.
|
||||
*
|
||||
* @param str The url-encoded string
|
||||
* @param enc The encoding to use; if null, ISO-8859-1 is used. If
|
||||
* an unsupported encoding is specified null will be returned
|
||||
* @param isQuery Is this a query string being processed
|
||||
* @return the decoded string
|
||||
* @exception IllegalArgumentException if a '%' character is not followed
|
||||
* by a valid 2-digit hexadecimal number
|
||||
*
|
||||
* @deprecated This method will be removed in Tomcat 9
|
||||
*/
|
||||
@Deprecated
|
||||
public static String URLDecode(String str, String enc, boolean isQuery) {
|
||||
Charset charset = null;
|
||||
|
||||
if (enc != null) {
|
||||
try {
|
||||
charset = B2CConverter.getCharset(enc);
|
||||
} catch (UnsupportedEncodingException uee) {
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug(sm.getString("uDecoder.urlDecode.uee", enc), uee);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return URLDecode(str, charset, isQuery);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Decode and return the specified URL-encoded byte array.
|
||||
*
|
||||
* @param bytes The url-encoded byte array
|
||||
* @param enc The encoding to use; if null, ISO-8859-1 is used. If
|
||||
* an unsupported encoding is specified null will be returned
|
||||
* @param isQuery Is this a query string being processed
|
||||
* @return the decoded string
|
||||
* @exception IllegalArgumentException if a '%' character is not followed
|
||||
* by a valid 2-digit hexadecimal number
|
||||
*
|
||||
* @deprecated This method will be removed in Tomcat 9
|
||||
*/
|
||||
@Deprecated
|
||||
public static String URLDecode(byte[] bytes, String enc, boolean isQuery) {
|
||||
throw new IllegalArgumentException(sm.getString("udecoder.urlDecode.iae"));
|
||||
}
|
||||
|
||||
|
||||
private static String URLDecode(String str, Charset charset, boolean isQuery) {
|
||||
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (str.indexOf('%') == -1) {
|
||||
// No %nn sequences, so return string unchanged
|
||||
return str;
|
||||
}
|
||||
|
||||
if (charset == null) {
|
||||
charset = StandardCharsets.ISO_8859_1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decoding is required.
|
||||
*
|
||||
* Potential complications:
|
||||
* - The source String may be partially decoded so it is not valid to
|
||||
* assume that the source String is ASCII.
|
||||
* - Have to process as characters since there is no guarantee that the
|
||||
* byte sequence for '%' is going to be the same in all character
|
||||
* sets.
|
||||
* - We don't know how many '%nn' sequences are required for a single
|
||||
* character. It varies between character sets and some use a variable
|
||||
* length.
|
||||
*/
|
||||
|
||||
// This isn't perfect but it is a reasonable guess for the size of the
|
||||
// array required
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length() * 2);
|
||||
|
||||
OutputStreamWriter osw = new OutputStreamWriter(baos, charset);
|
||||
|
||||
char[] sourceChars = str.toCharArray();
|
||||
int len = sourceChars.length;
|
||||
int ix = 0;
|
||||
|
||||
try {
|
||||
while (ix < len) {
|
||||
char c = sourceChars[ix++];
|
||||
if (c == '%') {
|
||||
osw.flush();
|
||||
if (ix + 2 > len) {
|
||||
throw new IllegalArgumentException(
|
||||
sm.getString("uDecoder.urlDecode.missingDigit", str));
|
||||
}
|
||||
char c1 = sourceChars[ix++];
|
||||
char c2 = sourceChars[ix++];
|
||||
if (isHexDigit(c1) && isHexDigit(c2)) {
|
||||
baos.write(x2c(c1, c2));
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
sm.getString("uDecoder.urlDecode.missingDigit", str));
|
||||
}
|
||||
} else if (c == '+' && isQuery) {
|
||||
osw.append(' ');
|
||||
} else {
|
||||
osw.append(c);
|
||||
}
|
||||
}
|
||||
osw.flush();
|
||||
|
||||
return baos.toString(charset.name());
|
||||
} catch (IOException ioe) {
|
||||
throw new IllegalArgumentException(
|
||||
sm.getString("uDecoder.urlDecode.conversionError", str, charset.name()), ioe);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static boolean isHexDigit( int c ) {
|
||||
return ( ( c>='0' && c<='9' ) ||
|
||||
( c>='a' && c<='f' ) ||
|
||||
( c>='A' && c<='F' ));
|
||||
}
|
||||
|
||||
|
||||
private static int x2c( byte b1, byte b2 ) {
|
||||
int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 :
|
||||
(b1 -'0');
|
||||
digit*=16;
|
||||
digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 :
|
||||
(b2 -'0');
|
||||
return digit;
|
||||
}
|
||||
|
||||
|
||||
private static int x2c( char b1, char b2 ) {
|
||||
int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 :
|
||||
(b1 -'0');
|
||||
digit*=16;
|
||||
digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 :
|
||||
(b2 -'0');
|
||||
return digit;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user