/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tomcat.util.buf; import java.io.ByteArrayOutputStream; import java.io.CharConversionException; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import org.apache.juli.logging.Log; import org.apache.juli.logging.LogFactory; import org.apache.tomcat.util.res.StringManager; /** * All URL decoding happens here. This way we can reuse, review, optimize * without adding complexity to the buffers. * * The conversion will modify the original buffer. * * @author Costin Manolache */ public final class UDecoder { private static final StringManager sm = StringManager.getManager(UDecoder.class); private static final Log log = LogFactory.getLog(UDecoder.class); public static final boolean ALLOW_ENCODED_SLASH = Boolean.parseBoolean(System.getProperty("org.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH", "false")); private static class DecodeException extends CharConversionException { private static final long serialVersionUID = 1L; public DecodeException(String s) { super(s); } @Override public synchronized Throwable fillInStackTrace() { // This class does not provide a stack trace return this; } } /** Unexpected end of data. */ private static final IOException EXCEPTION_EOF = new DecodeException(sm.getString("uDecoder.eof")); /** %xx with not-hex digit */ private static final IOException EXCEPTION_NOT_HEX_DIGIT = new DecodeException( "isHexDigit"); /** %-encoded slash is forbidden in resource path */ private static final IOException EXCEPTION_SLASH = new DecodeException( "noSlash"); public UDecoder() { } /** * URLDecode, will modify the source. * @param mb The URL encoded bytes * @param query true if this is a query string * @throws IOException Invalid %xx URL encoding */ public void convert( ByteChunk mb, boolean query ) throws IOException { int start=mb.getOffset(); byte buff[]=mb.getBytes(); int end=mb.getEnd(); int idx= ByteChunk.findByte( buff, start, end, (byte) '%' ); int idx2=-1; if( query ) { idx2= ByteChunk.findByte( buff, start, (idx >= 0 ? idx : end), (byte) '+' ); } if( idx<0 && idx2<0 ) { return; } // idx will be the smallest positive index ( first % or + ) if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) { idx=idx2; } final boolean noSlash = !(ALLOW_ENCODED_SLASH || query); for( int j=idx; j= end ) { throw EXCEPTION_EOF; } byte b1= buff[j+1]; byte b2=buff[j+2]; if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) { throw EXCEPTION_NOT_HEX_DIGIT; } j+=2; int res=x2c( b1, b2 ); if (noSlash && (res == '/')) { throw EXCEPTION_SLASH; } buff[idx]=(byte)res; } } mb.setEnd( idx ); } // -------------------- Additional methods -------------------- // XXX What do we do about charset ???? /** * In-buffer processing - the buffer will be modified. * @param mb The URL encoded chars * @param query true if this is a query string * @throws IOException Invalid %xx URL encoding */ public void convert( CharChunk mb, boolean query ) throws IOException { // log( "Converting a char chunk "); int start=mb.getOffset(); char buff[]=mb.getBuffer(); int cend=mb.getEnd(); int idx= CharChunk.indexOf( buff, start, cend, '%' ); int idx2=-1; if( query ) { idx2= CharChunk.indexOf( buff, start, (idx >= 0 ? idx : cend), '+' ); } if( idx<0 && idx2<0 ) { return; } // idx will be the smallest positive index ( first % or + ) if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) { idx=idx2; } final boolean noSlash = !(ALLOW_ENCODED_SLASH || query); for( int j=idx; j= cend ) { // invalid throw EXCEPTION_EOF; } char b1= buff[j+1]; char b2=buff[j+2]; if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) { throw EXCEPTION_NOT_HEX_DIGIT; } j+=2; int res=x2c( b1, b2 ); if (noSlash && (res == '/')) { throw EXCEPTION_SLASH; } buff[idx]=(char)res; } } mb.setEnd( idx ); } /** * URLDecode, will modify the source * @param mb The URL encoded String, bytes or chars * @param query true if this is a query string * @throws IOException Invalid %xx URL encoding */ public void convert(MessageBytes mb, boolean query) throws IOException { switch (mb.getType()) { case MessageBytes.T_STR: String strValue=mb.toString(); if( strValue==null ) { return; } try { mb.setString( convert( strValue, query )); } catch (RuntimeException ex) { throw new DecodeException(ex.getMessage()); } break; case MessageBytes.T_CHARS: CharChunk charC=mb.getCharChunk(); convert( charC, query ); break; case MessageBytes.T_BYTES: ByteChunk bytesC=mb.getByteChunk(); convert( bytesC, query ); break; } } /** * %xx decoding of a string. FIXME: this is inefficient. * @param str The URL encoded string * @param query true if this is a query string * @return the decoded string */ public final String convert(String str, boolean query) { if (str == null) { return null; } if( (!query || str.indexOf( '+' ) < 0) && str.indexOf( '%' ) < 0 ) { return str; } final boolean noSlash = !(ALLOW_ENCODED_SLASH || query); StringBuilder dec = new StringBuilder(); // decoded string output int strPos = 0; int strLen = str.length(); dec.ensureCapacity(str.length()); while (strPos < strLen) { int laPos; // lookahead position // look ahead to next URLencoded metacharacter, if any for (laPos = strPos; laPos < strLen; laPos++) { char laChar = str.charAt(laPos); if ((laChar == '+' && query) || (laChar == '%')) { break; } } // if there were non-metacharacters, copy them all as a block if (laPos > strPos) { dec.append(str.substring(strPos,laPos)); strPos = laPos; } // shortcut out of here if we're at the end of the string if (strPos >= strLen) { break; } // process next metacharacter char metaChar = str.charAt(strPos); if (metaChar == '+') { dec.append(' '); strPos++; continue; } else if (metaChar == '%') { // We throw the original exception - the super will deal with // it // try { char res = (char) Integer.parseInt( str.substring(strPos + 1, strPos + 3), 16); if (noSlash && (res == '/')) { throw new IllegalArgumentException(sm.getString("uDecoder.noSlash")); } dec.append(res); strPos += 3; } } return dec.toString(); } /** * Decode and return the specified URL-encoded String. * When the byte array is converted to a string, ISO-885901 is used. This * may be different than some other servers. It is assumed the string is not * a query string. * * @param str The url-encoded string * @return the decoded string * @exception IllegalArgumentException if a '%' character is not followed * by a valid 2-digit hexadecimal number */ public static String URLDecode(String str) { return URLDecode(str, StandardCharsets.ISO_8859_1); } /** * Decode and return the specified URL-encoded String. It is assumed the * string is not a query string. * * @param str The url-encoded string * @param enc The encoding to use; if null, ISO-885901 is used. If * an unsupported encoding is specified null will be returned * @return the decoded string * @exception IllegalArgumentException if a '%' character is not followed * by a valid 2-digit hexadecimal number * * @deprecated This method will be removed in Tomcat 9 */ @Deprecated public static String URLDecode(String str, String enc) { return URLDecode(str, enc, false); } /** * Decode and return the specified URL-encoded String. It is assumed the * string is not a query string. * * @param str The url-encoded string * @param charset The character encoding to use; if null, ISO-8859-1 is * used. * @return the decoded string * @exception IllegalArgumentException if a '%' character is not followed * by a valid 2-digit hexadecimal number */ public static String URLDecode(String str, Charset charset) { return URLDecode(str, charset, false); } /** * Decode and return the specified URL-encoded String. * * @param str The url-encoded string * @param enc The encoding to use; if null, ISO-8859-1 is used. If * an unsupported encoding is specified null will be returned * @param isQuery Is this a query string being processed * @return the decoded string * @exception IllegalArgumentException if a '%' character is not followed * by a valid 2-digit hexadecimal number * * @deprecated This method will be removed in Tomcat 9 */ @Deprecated public static String URLDecode(String str, String enc, boolean isQuery) { Charset charset = null; if (enc != null) { try { charset = B2CConverter.getCharset(enc); } catch (UnsupportedEncodingException uee) { if (log.isDebugEnabled()) { log.debug(sm.getString("uDecoder.urlDecode.uee", enc), uee); } } } return URLDecode(str, charset, isQuery); } /** * Decode and return the specified URL-encoded byte array. * * @param bytes The url-encoded byte array * @param enc The encoding to use; if null, ISO-8859-1 is used. If * an unsupported encoding is specified null will be returned * @param isQuery Is this a query string being processed * @return the decoded string * @exception IllegalArgumentException if a '%' character is not followed * by a valid 2-digit hexadecimal number * * @deprecated This method will be removed in Tomcat 9 */ @Deprecated public static String URLDecode(byte[] bytes, String enc, boolean isQuery) { throw new IllegalArgumentException(sm.getString("udecoder.urlDecode.iae")); } private static String URLDecode(String str, Charset charset, boolean isQuery) { if (str == null) { return null; } if (str.indexOf('%') == -1) { // No %nn sequences, so return string unchanged return str; } if (charset == null) { charset = StandardCharsets.ISO_8859_1; } /* * Decoding is required. * * Potential complications: * - The source String may be partially decoded so it is not valid to * assume that the source String is ASCII. * - Have to process as characters since there is no guarantee that the * byte sequence for '%' is going to be the same in all character * sets. * - We don't know how many '%nn' sequences are required for a single * character. It varies between character sets and some use a variable * length. */ // This isn't perfect but it is a reasonable guess for the size of the // array required ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length() * 2); OutputStreamWriter osw = new OutputStreamWriter(baos, charset); char[] sourceChars = str.toCharArray(); int len = sourceChars.length; int ix = 0; try { while (ix < len) { char c = sourceChars[ix++]; if (c == '%') { osw.flush(); if (ix + 2 > len) { throw new IllegalArgumentException( sm.getString("uDecoder.urlDecode.missingDigit", str)); } char c1 = sourceChars[ix++]; char c2 = sourceChars[ix++]; if (isHexDigit(c1) && isHexDigit(c2)) { baos.write(x2c(c1, c2)); } else { throw new IllegalArgumentException( sm.getString("uDecoder.urlDecode.missingDigit", str)); } } else if (c == '+' && isQuery) { osw.append(' '); } else { osw.append(c); } } osw.flush(); return baos.toString(charset.name()); } catch (IOException ioe) { throw new IllegalArgumentException( sm.getString("uDecoder.urlDecode.conversionError", str, charset.name()), ioe); } } private static boolean isHexDigit( int c ) { return ( ( c>='0' && c<='9' ) || ( c>='a' && c<='f' ) || ( c>='A' && c<='F' )); } private static int x2c( byte b1, byte b2 ) { int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 : (b1 -'0'); digit*=16; digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 : (b2 -'0'); return digit; } private static int x2c( char b1, char b2 ) { int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 : (b1 -'0'); digit*=16; digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 : (b2 -'0'); return digit; } }