This commit is contained in:
2024-11-30 19:03:49 +08:00
commit 1e6763c160
3806 changed files with 737676 additions and 0 deletions

View File

@@ -0,0 +1,181 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.io.Serializable;
/**
* Base class for the *Chunk implementation to reduce duplication.
*/
public abstract class AbstractChunk implements Cloneable, Serializable {
private static final long serialVersionUID = 1L;
/*
* JVMs may limit the maximum array size to slightly less than
* Integer.MAX_VALUE. On markt's desktop the limit is MAX_VALUE - 2.
* Comments in the JRE source code for ArrayList and other classes indicate
* that it may be as low as MAX_VALUE - 8 on some systems.
*/
public static final int ARRAY_MAX_SIZE = Integer.MAX_VALUE - 8;
private int hashCode = 0;
protected boolean hasHashCode = false;
protected boolean isSet;
private int limit = -1;
protected int start;
protected int end;
/**
* Maximum amount of data in this buffer. If -1 or not set, the buffer will
* grow to {{@link #ARRAY_MAX_SIZE}. Can be smaller than the current buffer
* size ( which will not shrink ). When the limit is reached, the buffer
* will be flushed (if out is set) or throw exception.
*
* @param limit The new limit
*/
public void setLimit(int limit) {
this.limit = limit;
}
public int getLimit() {
return limit;
}
protected int getLimitInternal() {
if (limit > 0) {
return limit;
} else {
return ARRAY_MAX_SIZE;
}
}
/**
* @return the start position of the data in the buffer
*/
public int getStart() {
return start;
}
public int getEnd() {
return end;
}
public void setEnd(int i) {
end = i;
}
// TODO: Deprecate offset and use start
public int getOffset() {
return start;
}
public void setOffset(int off) {
if (end < off) {
end = off;
}
start = off;
}
/**
* @return the length of the data in the buffer
*/
public int getLength() {
return end - start;
}
public boolean isNull() {
if (end > 0) {
return false;
}
return !isSet;
}
public int indexOf(String src, int srcOff, int srcLen, int myOff) {
char first = src.charAt(srcOff);
// Look for first char
int srcEnd = srcOff + srcLen;
mainLoop: for (int i = myOff + start; i <= (end - srcLen); i++) {
if (getBufferElement(i) != first) {
continue;
}
// found first char, now look for a match
int myPos = i + 1;
for (int srcPos = srcOff + 1; srcPos < srcEnd;) {
if (getBufferElement(myPos++) != src.charAt(srcPos++)) {
continue mainLoop;
}
}
return i - start; // found it
}
return -1;
}
/**
* Resets the chunk to an uninitialized state.
*/
public void recycle() {
hasHashCode = false;
isSet = false;
start = 0;
end = 0;
}
@Override
public int hashCode() {
if (hasHashCode) {
return hashCode;
}
int code = 0;
code = hash();
hashCode = code;
hasHashCode = true;
return code;
}
public int hash() {
int code = 0;
for (int i = start; i < end; i++) {
code = code * 37 + getBufferElement(i);
}
return code;
}
protected abstract int getBufferElement(int index);
}

View File

@@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
/**
* This class implements some basic ASCII character handling functions.
*
* @author dac@eng.sun.com
* @author James Todd [gonzo@eng.sun.com]
*/
public final class Ascii {
/*
* Character translation tables.
*/
private static final byte[] toLower = new byte[256];
/*
* Character type tables.
*/
private static final boolean[] isDigit = new boolean[256];
private static final long OVERFLOW_LIMIT = Long.MAX_VALUE / 10;
/*
* Initialize character translation and type tables.
*/
static {
for (int i = 0; i < 256; i++) {
toLower[i] = (byte)i;
}
for (int lc = 'a'; lc <= 'z'; lc++) {
int uc = lc + 'A' - 'a';
toLower[uc] = (byte)lc;
}
for (int d = '0'; d <= '9'; d++) {
isDigit[d] = true;
}
}
/**
* Returns the lower case equivalent of the specified ASCII character.
* @param c The char
* @return the lower case equivalent char
*/
public static int toLower(int c) {
return toLower[c & 0xff] & 0xff;
}
/**
* @return <code>true</code> if the specified ASCII character is a digit.
* @param c The char
*/
private static boolean isDigit(int c) {
return isDigit[c & 0xff];
}
/**
* Parses an unsigned long from the specified subarray of bytes.
* @param b the bytes to parse
* @param off the start offset of the bytes
* @param len the length of the bytes
* @return the long value
* @exception NumberFormatException if the long format was invalid
*/
public static long parseLong(byte[] b, int off, int len)
throws NumberFormatException
{
int c;
if (b == null || len <= 0 || !isDigit(c = b[off++])) {
throw new NumberFormatException();
}
long n = c - '0';
while (--len > 0) {
if (isDigit(c = b[off++]) &&
(n < OVERFLOW_LIMIT || (n == OVERFLOW_LIMIT && (c - '0') < 8))) {
n = n * 10 + c - '0';
} else {
throw new NumberFormatException();
}
}
return n;
}
}

View File

@@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.math.BigInteger;
import org.apache.tomcat.util.res.StringManager;
/**
* This is a very basic ASN.1 parser that provides the limited functionality
* required by Tomcat. It is a long way from a complete parser.
*
* TODO: Consider extending this parser and refactoring the SpnegoTokenFixer to
* use it.
*/
public class Asn1Parser {
private static final StringManager sm = StringManager.getManager(Asn1Parser.class);
private final byte[] source;
private int pos = 0;
public Asn1Parser(byte[] source) {
this.source = source;
}
public void parseTag(int tag) {
int value = next();
if (value != tag) {
throw new IllegalArgumentException(sm.getString("asn1Parser.tagMismatch",
Integer.valueOf(tag), Integer.valueOf(value)));
}
}
public void parseFullLength() {
int len = parseLength();
if (len + pos != source.length) {
throw new IllegalArgumentException(sm.getString("asn1Parser.lengthInvalid",
Integer.valueOf(len), Integer.valueOf(source.length - pos)));
}
}
public int parseLength() {
int len = next();
if (len > 127) {
int bytes = len - 128;
len = 0;
for (int i = 0; i < bytes; i++) {
len = len << 8;
len = len + next();
}
}
return len;
}
public BigInteger parseInt() {
parseTag(0x02);
int len = parseLength();
byte[] val = new byte[len];
System.arraycopy(source, pos, val, 0, len);
pos += len;
return new BigInteger(val);
}
public void parseBytes(byte[] dest) {
System.arraycopy(source, pos, dest, 0, dest.length);
pos += dest.length;
}
private int next() {
return source[pos++] & 0xFF;
}
}

View File

@@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
public class Asn1Writer {
public static byte[] writeSequence(byte[]... components) {
int len = 0;
for (byte[] component : components) {
len += component.length;
}
byte[] combined = new byte[len];
int pos = 0;
for (byte[] component : components) {
System.arraycopy(component, 0, combined, pos, component.length);
pos += component.length;
}
return writeTag((byte) 0x30, combined);
}
public static byte[] writeInteger(int value) {
// How many bytes required to write the value? No more than 4 for int.
int valueSize = 1;
while ((value >> (valueSize * 8)) > 0) {
valueSize++;
}
byte[] valueBytes = new byte[valueSize];
int i = 0;
while (valueSize > 0) {
valueBytes[i] = (byte) (value >> (8 * (valueSize - 1)));
value = value >> 8;
valueSize--;
i++;
}
return writeTag((byte) 0x02, valueBytes);
}
public static byte[] writeOctetString(byte[] data) {
return writeTag((byte) 0x04, data);
}
public static byte[] writeTag(byte tagId, byte[] data) {
int dataSize = data.length;
// How many bytes to write the length?
int lengthSize = 1;
if (dataSize >127) {
// 1 byte we have is now used to record how many bytes we need to
// record a length > 127
// Result is lengthSize = 1 + number of bytes to record length
do {
lengthSize++;
}
while ((dataSize >> (lengthSize * 8)) > 0);
}
// 1 for tag + lengthSize + dataSize
byte[] result = new byte[1 + lengthSize + dataSize];
result[0] = tagId;
if (dataSize < 128) {
result[1] = (byte) dataSize;
} else {
// lengthSize is 1 + number of bytes for length
result[1] = (byte) (127 + lengthSize);
int i = lengthSize;
while (dataSize > 0) {
result[i] = (byte) (dataSize & 0xFF);
dataSize = dataSize >> 8;
i--;
}
}
System.arraycopy(data, 0, result, 1 + lengthSize, data.length);
return result;
}
}

View File

@@ -0,0 +1,284 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.Locale;
import org.apache.tomcat.util.res.StringManager;
/**
* NIO based character decoder.
*/
public class B2CConverter {
private static final StringManager sm =
StringManager.getManager(Constants.Package);
private static final CharsetCache charsetCache = new CharsetCache();
// Protected so unit tests can use it
protected static final int LEFTOVER_SIZE = 9;
/**
* Obtain the Charset for the given encoding
*
* @param enc The name of the encoding for the required charset
*
* @return The Charset corresponding to the requested encoding
*
* @throws UnsupportedEncodingException If the requested Charset is not
* available
*/
public static Charset getCharset(String enc)
throws UnsupportedEncodingException {
// Encoding names should all be ASCII
String lowerCaseEnc = enc.toLowerCase(Locale.ENGLISH);
return getCharsetLower(lowerCaseEnc);
}
/**
* Only to be used when it is known that the encoding name is in lower case.
* @param lowerCaseEnc The name of the encoding for the required charset in
* lower case
*
* @return The Charset corresponding to the requested encoding
*
* @throws UnsupportedEncodingException If the requested Charset is not
* available
*
* @deprecated Will be removed in Tomcat 9.0.x
*/
@Deprecated
public static Charset getCharsetLower(String lowerCaseEnc)
throws UnsupportedEncodingException {
Charset charset = charsetCache.getCharset(lowerCaseEnc);
if (charset == null) {
// Pre-population of the cache means this must be invalid
throw new UnsupportedEncodingException(
sm.getString("b2cConverter.unknownEncoding", lowerCaseEnc));
}
return charset;
}
private final CharsetDecoder decoder;
private ByteBuffer bb = null;
private CharBuffer cb = null;
/**
* Leftover buffer used for incomplete characters.
*/
private final ByteBuffer leftovers;
public B2CConverter(Charset charset) {
this(charset, false);
}
public B2CConverter(Charset charset, boolean replaceOnError) {
byte[] left = new byte[LEFTOVER_SIZE];
leftovers = ByteBuffer.wrap(left);
CodingErrorAction action;
if (replaceOnError) {
action = CodingErrorAction.REPLACE;
} else {
action = CodingErrorAction.REPORT;
}
// Special case. Use the Apache Harmony based UTF-8 decoder because it
// - a) rejects invalid sequences that the JVM decoder does not
// - b) fails faster for some invalid sequences
if (charset.equals(StandardCharsets.UTF_8)) {
decoder = new Utf8Decoder();
} else {
decoder = charset.newDecoder();
}
decoder.onMalformedInput(action);
decoder.onUnmappableCharacter(action);
}
/**
* Reset the decoder state.
*/
public void recycle() {
decoder.reset();
leftovers.position(0);
}
/**
* Convert the given bytes to characters.
*
* @param bc byte input
* @param cc char output
* @param endOfInput Is this all of the available data
*
* @throws IOException If the conversion can not be completed
*/
public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
throws IOException {
if ((bb == null) || (bb.array() != bc.getBuffer())) {
// Create a new byte buffer if anything changed
bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
} else {
// Initialize the byte buffer
bb.limit(bc.getEnd());
bb.position(bc.getStart());
}
if ((cb == null) || (cb.array() != cc.getBuffer())) {
// Create a new char buffer if anything changed
cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(),
cc.getBuffer().length - cc.getEnd());
} else {
// Initialize the char buffer
cb.limit(cc.getBuffer().length);
cb.position(cc.getEnd());
}
CoderResult result = null;
// Parse leftover if any are present
if (leftovers.position() > 0) {
int pos = cb.position();
// Loop until one char is decoded or there is a decoder error
do {
leftovers.put(bc.substractB());
leftovers.flip();
result = decoder.decode(leftovers, cb, endOfInput);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (cb.position() == pos));
if (result.isError() || result.isMalformed()) {
result.throwException();
}
bb.position(bc.getStart());
leftovers.position(0);
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
result = decoder.decode(bb, cb, endOfInput);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
// Propagate current positions to the byte chunk and char chunk, if
// this continues the char buffer will get resized
bc.setOffset(bb.position());
cc.setEnd(cb.position());
} else if (result.isUnderflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setOffset(bb.position());
cc.setEnd(cb.position());
// Put leftovers in the leftovers byte buffer
if (bc.getLength() > 0) {
leftovers.limit(leftovers.array().length);
leftovers.position(bc.getLength());
bc.substract(leftovers.array(), 0, bc.getLength());
}
}
}
/**
* Convert the given bytes to characters.
*
* @param bc byte input
* @param cc char output
* @param ic byte input channel
* @param endOfInput Is this all of the available data
*
* @throws IOException If the conversion can not be completed
*/
public void convert(ByteBuffer bc, CharBuffer cc, ByteChunk.ByteInputChannel ic, boolean endOfInput)
throws IOException {
if ((bb == null) || (bb.array() != bc.array())) {
// Create a new byte buffer if anything changed
bb = ByteBuffer.wrap(bc.array(), bc.arrayOffset() + bc.position(), bc.remaining());
} else {
// Initialize the byte buffer
bb.limit(bc.limit());
bb.position(bc.position());
}
if ((cb == null) || (cb.array() != cc.array())) {
// Create a new char buffer if anything changed
cb = CharBuffer.wrap(cc.array(), cc.limit(), cc.capacity() - cc.limit());
} else {
// Initialize the char buffer
cb.limit(cc.capacity());
cb.position(cc.limit());
}
CoderResult result = null;
// Parse leftover if any are present
if (leftovers.position() > 0) {
int pos = cb.position();
// Loop until one char is decoded or there is a decoder error
do {
byte chr;
if (bc.remaining() == 0) {
int n = ic.realReadBytes();
chr = n < 0 ? -1 : bc.get();
} else {
chr = bc.get();
}
leftovers.put(chr);
leftovers.flip();
result = decoder.decode(leftovers, cb, endOfInput);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (cb.position() == pos));
if (result.isError() || result.isMalformed()) {
result.throwException();
}
bb.position(bc.position());
leftovers.position(0);
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
result = decoder.decode(bb, cb, endOfInput);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
// Propagate current positions to the byte chunk and char chunk, if
// this continues the char buffer will get resized
bc.position(bb.position());
cc.limit(cb.position());
} else if (result.isUnderflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.position(bb.position());
cc.limit(cb.position());
// Put leftovers in the leftovers byte buffer
if (bc.remaining() > 0) {
leftovers.limit(leftovers.array().length);
leftovers.position(bc.remaining());
bc.get(leftovers.array(), 0, bc.remaining());
}
}
}
public Charset getCharset() {
return decoder.charset();
}
}

View File

@@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.nio.ByteBuffer;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* Simple wrapper for a {@link ByteBuffer} that remembers if the buffer has been
* flipped or not.
*/
public class ByteBufferHolder {
private final ByteBuffer buf;
private final AtomicBoolean flipped;
public ByteBufferHolder(ByteBuffer buf, boolean flipped) {
this.buf = buf;
this.flipped = new AtomicBoolean(flipped);
}
public ByteBuffer getBuf() {
return buf;
}
public boolean isFlipped() {
return flipped.get();
}
public boolean flip() {
if (flipped.compareAndSet(false, true)) {
buf.flip();
return true;
} else {
return false;
}
}
}

View File

@@ -0,0 +1,144 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import org.apache.juli.logging.Log;
import org.apache.juli.logging.LogFactory;
import org.apache.tomcat.util.compat.JreCompat;
import org.apache.tomcat.util.res.StringManager;
public class ByteBufferUtils {
private static final StringManager sm =
StringManager.getManager(Constants.Package);
private static final Log log = LogFactory.getLog(ByteBufferUtils.class);
private static final Object unsafe;
private static final Method cleanerMethod;
private static final Method cleanMethod;
private static final Method invokeCleanerMethod;
static {
ByteBuffer tempBuffer = ByteBuffer.allocateDirect(0);
Method cleanerMethodLocal = null;
Method cleanMethodLocal = null;
Object unsafeLocal = null;
Method invokeCleanerMethodLocal = null;
if (JreCompat.isJre9Available()) {
try {
Class<?> clazz = Class.forName("sun.misc.Unsafe");
Field theUnsafe = clazz.getDeclaredField("theUnsafe");
theUnsafe.setAccessible(true);
unsafeLocal = theUnsafe.get(null);
invokeCleanerMethodLocal = clazz.getMethod("invokeCleaner", ByteBuffer.class);
invokeCleanerMethodLocal.invoke(unsafeLocal, tempBuffer);
} catch (IllegalAccessException | IllegalArgumentException
| InvocationTargetException | NoSuchMethodException | SecurityException
| ClassNotFoundException | NoSuchFieldException e) {
log.warn(sm.getString("byteBufferUtils.cleaner"), e);
unsafeLocal = null;
invokeCleanerMethodLocal = null;
}
} else {
try {
cleanerMethodLocal = tempBuffer.getClass().getMethod("cleaner");
cleanerMethodLocal.setAccessible(true);
Object cleanerObject = cleanerMethodLocal.invoke(tempBuffer);
cleanMethodLocal = cleanerObject.getClass().getMethod("clean");
cleanMethodLocal.invoke(cleanerObject);
} catch (NoSuchMethodException | SecurityException | IllegalAccessException |
IllegalArgumentException | InvocationTargetException e) {
log.warn(sm.getString("byteBufferUtils.cleaner"), e);
cleanerMethodLocal = null;
cleanMethodLocal = null;
}
}
cleanerMethod = cleanerMethodLocal;
cleanMethod = cleanMethodLocal;
unsafe = unsafeLocal;
invokeCleanerMethod = invokeCleanerMethodLocal;
}
private ByteBufferUtils() {
// Hide the default constructor since this is a utility class.
}
/**
* Expands buffer to the given size unless it is already as big or bigger.
* Buffers are assumed to be in 'write to' mode since there would be no need
* to expand a buffer while it was in 'read from' mode.
*
* @param in Buffer to expand
* @param newSize The size t which the buffer should be expanded
* @return The expanded buffer with any data from the input buffer
* copied in to it or the original buffer if there was no
* need for expansion
*/
public static ByteBuffer expand(ByteBuffer in, int newSize) {
if (in.capacity() >= newSize) {
return in;
}
ByteBuffer out;
boolean direct = false;
if (in.isDirect()) {
out = ByteBuffer.allocateDirect(newSize);
direct = true;
} else {
out = ByteBuffer.allocate(newSize);
}
// Copy data
in.flip();
out.put(in);
if (direct) {
cleanDirectBuffer(in);
}
return out;
}
public static void cleanDirectBuffer(ByteBuffer buf) {
if (cleanMethod != null) {
try {
cleanMethod.invoke(cleanerMethod.invoke(buf));
} catch (IllegalAccessException | IllegalArgumentException
| InvocationTargetException | SecurityException e) {
if (log.isDebugEnabled()) {
log.debug(sm.getString("byteBufferUtils.cleaner"), e);
}
}
} else if (invokeCleanerMethod != null) {
try {
invokeCleanerMethod.invoke(unsafe, buf);
} catch (IllegalAccessException | IllegalArgumentException
| InvocationTargetException | SecurityException e) {
if (log.isDebugEnabled()) {
log.debug(sm.getString("byteBufferUtils.cleaner"), e);
}
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,192 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
/**
* NIO based character encoder.
*/
public final class C2BConverter {
private final CharsetEncoder encoder;
private ByteBuffer bb = null;
private CharBuffer cb = null;
/**
* Leftover buffer used for multi-characters characters.
*/
private final CharBuffer leftovers;
public C2BConverter(Charset charset) {
encoder = charset.newEncoder();
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE)
.onMalformedInput(CodingErrorAction.REPLACE);
char[] left = new char[4];
leftovers = CharBuffer.wrap(left);
}
/**
* Reset the encoder state.
*/
public void recycle() {
encoder.reset();
leftovers.position(0);
}
public boolean isUndeflow() {
return (leftovers.position() > 0);
}
/**
* Convert the given characters to bytes.
*
* @param cc char input
* @param bc byte output
* @throws IOException An encoding error occurred
*/
public void convert(CharChunk cc, ByteChunk bc) throws IOException {
if ((bb == null) || (bb.array() != bc.getBuffer())) {
// Create a new byte buffer if anything changed
bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), bc.getBuffer().length - bc.getEnd());
} else {
// Initialize the byte buffer
bb.limit(bc.getBuffer().length);
bb.position(bc.getEnd());
}
if ((cb == null) || (cb.array() != cc.getBuffer())) {
// Create a new char buffer if anything changed
cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), cc.getLength());
} else {
// Initialize the char buffer
cb.limit(cc.getEnd());
cb.position(cc.getStart());
}
CoderResult result = null;
// Parse leftover if any are present
if (leftovers.position() > 0) {
int pos = bb.position();
// Loop until one char is encoded or there is a encoder error
do {
leftovers.put((char) cc.substract());
leftovers.flip();
result = encoder.encode(leftovers, bb, false);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (bb.position() == pos));
if (result.isError() || result.isMalformed()) {
result.throwException();
}
cb.position(cc.getStart());
leftovers.position(0);
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
result = encoder.encode(cb, bb, false);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setEnd(bb.position());
cc.setOffset(cb.position());
} else if (result.isUnderflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.setEnd(bb.position());
cc.setOffset(cb.position());
// Put leftovers in the leftovers char buffer
if (cc.getLength() > 0) {
leftovers.limit(leftovers.array().length);
leftovers.position(cc.getLength());
cc.substract(leftovers.array(), 0, cc.getLength());
}
}
}
/**
* Convert the given characters to bytes.
*
* @param cc char input
* @param bc byte output
* @throws IOException An encoding error occurred
*/
public void convert(CharBuffer cc, ByteBuffer bc) throws IOException {
if ((bb == null) || (bb.array() != bc.array())) {
// Create a new byte buffer if anything changed
bb = ByteBuffer.wrap(bc.array(), bc.limit(), bc.capacity() - bc.limit());
} else {
// Initialize the byte buffer
bb.limit(bc.capacity());
bb.position(bc.limit());
}
if ((cb == null) || (cb.array() != cc.array())) {
// Create a new char buffer if anything changed
cb = CharBuffer.wrap(cc.array(), cc.arrayOffset() + cc.position(), cc.remaining());
} else {
// Initialize the char buffer
cb.limit(cc.limit());
cb.position(cc.position());
}
CoderResult result = null;
// Parse leftover if any are present
if (leftovers.position() > 0) {
int pos = bb.position();
// Loop until one char is encoded or there is a encoder error
do {
leftovers.put(cc.get());
leftovers.flip();
result = encoder.encode(leftovers, bb, false);
leftovers.position(leftovers.limit());
leftovers.limit(leftovers.array().length);
} while (result.isUnderflow() && (bb.position() == pos));
if (result.isError() || result.isMalformed()) {
result.throwException();
}
cb.position(cc.position());
leftovers.position(0);
}
// Do the decoding and get the results into the byte chunk and the char
// chunk
result = encoder.encode(cb, bb, false);
if (result.isError() || result.isMalformed()) {
result.throwException();
} else if (result.isOverflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.limit(bb.position());
cc.position(cb.position());
} else if (result.isUnderflow()) {
// Propagate current positions to the byte chunk and char chunk
bc.limit(bb.position());
cc.position(cb.position());
// Put leftovers in the leftovers char buffer
if (cc.remaining() > 0) {
leftovers.limit(leftovers.array().length);
leftovers.position(cc.remaining());
cc.get(leftovers.array(), 0, cc.remaining());
}
}
}
public Charset getCharset() {
return encoder.charset();
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,230 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.util.Locale;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
public class CharsetCache {
/* Note: Package private to enable testing without reflection */
static final String[] INITIAL_CHARSETS = new String[] { "iso-8859-1", "utf-8" };
/*
* Note: Package private to enable testing without reflection
*/
static final String[] LAZY_CHARSETS = new String[] {
// Initial set from Oracle JDK 8 u192
"037", "1006", "1025", "1026", "1046", "1047", "1089", "1097", "1098", "1112", "1122", "1123", "1124",
"1140", "1141", "1142", "1143", "1144", "1145", "1146", "1147", "1148", "1149", "1166", "1364", "1381",
"1383", "273", "277", "278", "280", "284", "285", "290", "297", "300", "33722", "420", "424", "437", "500",
"5601", "646", "737", "775", "813", "834", "838", "850", "852", "855", "856", "857", "858", "860", "861",
"862", "863", "864", "865", "866", "868", "869", "870", "871", "874", "875", "8859_13", "8859_15", "8859_2",
"8859_3", "8859_4", "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "912", "913", "914", "915", "916",
"918", "920", "921", "922", "923", "930", "933", "935", "937", "939", "942", "942c", "943", "943c", "948",
"949", "949c", "950", "964", "970", "ansi-1251", "ansi_x3.4-1968", "ansi_x3.4-1986", "arabic", "ascii",
"ascii7", "asmo-708", "big5", "big5-hkscs", "big5-hkscs", "big5-hkscs-2001", "big5-hkscs:unicode3.0",
"big5_hkscs", "big5_hkscs_2001", "big5_solaris", "big5hk", "big5hk-2001", "big5hkscs", "big5hkscs-2001",
"ccsid00858", "ccsid01140", "ccsid01141", "ccsid01142", "ccsid01143", "ccsid01144", "ccsid01145",
"ccsid01146", "ccsid01147", "ccsid01148", "ccsid01149", "cesu-8", "cesu8", "cns11643", "compound_text",
"cp-ar", "cp-gr", "cp-is", "cp00858", "cp01140", "cp01141", "cp01142", "cp01143", "cp01144", "cp01145",
"cp01146", "cp01147", "cp01148", "cp01149", "cp037", "cp1006", "cp1025", "cp1026", "cp1046", "cp1047",
"cp1089", "cp1097", "cp1098", "cp1112", "cp1122", "cp1123", "cp1124", "cp1140", "cp1141", "cp1142",
"cp1143", "cp1144", "cp1145", "cp1146", "cp1147", "cp1148", "cp1149", "cp1166", "cp1250", "cp1251",
"cp1252", "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", "cp1364", "cp1381", "cp1383", "cp273",
"cp277", "cp278", "cp280", "cp284", "cp285", "cp290", "cp297", "cp300", "cp33722", "cp367", "cp420",
"cp424", "cp437", "cp500", "cp50220", "cp50221", "cp5346", "cp5347", "cp5348", "cp5349", "cp5350", "cp5353",
"cp737", "cp775", "cp813", "cp833", "cp834", "cp838", "cp850", "cp852", "cp855", "cp856", "cp857", "cp858",
"cp860", "cp861", "cp862", "cp863", "cp864", "cp865", "cp866", "cp868", "cp869", "cp870", "cp871", "cp874",
"cp875", "cp912", "cp913", "cp914", "cp915", "cp916", "cp918", "cp920", "cp921", "cp922", "cp923", "cp930",
"cp933", "cp935", "cp936", "cp937", "cp939", "cp942", "cp942c", "cp943", "cp943c", "cp948", "cp949",
"cp949c", "cp950", "cp964", "cp970", "cpibm284", "cpibm285", "cpibm297", "cpibm37", "cs-ebcdic-cp-ca",
"cs-ebcdic-cp-nl", "cs-ebcdic-cp-us", "cs-ebcdic-cp-wt", "csascii", "csbig5", "cscesu-8", "cseuckr",
"cseucpkdfmtjapanese", "cshalfwidthkatakana", "csibm037", "csibm278", "csibm284", "csibm285", "csibm290",
"csibm297", "csibm420", "csibm424", "csibm500", "csibm857", "csibm860", "csibm861", "csibm862", "csibm863",
"csibm864", "csibm865", "csibm866", "csibm868", "csibm869", "csibm870", "csibm871", "csiso153gost1976874",
"csiso159jisx02121990", "csiso2022cn", "csiso2022jp", "csiso2022jp2", "csiso2022kr", "csiso87jisx0208",
"csisolatin0", "csisolatin2", "csisolatin3", "csisolatin4", "csisolatin5", "csisolatin9",
"csisolatinarabic", "csisolatincyrillic", "csisolatingreek", "csisolatinhebrew", "csjisencoding", "cskoi8r",
"cspc850multilingual", "cspc862latinhebrew", "cspc8codepage437", "cspcp852", "cspcp855", "csshiftjis",
"cswindows31j", "cyrillic", "default", "ebcdic-cp-ar1", "ebcdic-cp-ar2", "ebcdic-cp-bh", "ebcdic-cp-ca",
"ebcdic-cp-ch", "ebcdic-cp-fr", "ebcdic-cp-gb", "ebcdic-cp-he", "ebcdic-cp-is", "ebcdic-cp-nl",
"ebcdic-cp-roece", "ebcdic-cp-se", "ebcdic-cp-us", "ebcdic-cp-wt", "ebcdic-cp-yu", "ebcdic-de-273+euro",
"ebcdic-dk-277+euro", "ebcdic-es-284+euro", "ebcdic-fi-278+euro", "ebcdic-fr-277+euro", "ebcdic-gb",
"ebcdic-gb-285+euro", "ebcdic-international-500+euro", "ebcdic-it-280+euro", "ebcdic-jp-kana",
"ebcdic-no-277+euro", "ebcdic-s-871+euro", "ebcdic-se-278+euro", "ebcdic-sv", "ebcdic-us-037+euro",
"ecma-114", "ecma-118", "elot_928", "euc-cn", "euc-jp", "euc-jp-linux", "euc-kr", "euc-tw", "euc_cn",
"euc_jp", "euc_jp_linux", "euc_jp_solaris", "euc_kr", "euc_tw", "euccn", "eucjis", "eucjp", "eucjp-open",
"euckr", "euctw", "extended_unix_code_packed_format_for_japanese", "gb18030", "gb18030-2000", "gb2312",
"gb2312", "gb2312-1980", "gb2312-80", "gbk", "greek", "greek8", "hebrew", "ibm-037", "ibm-1006", "ibm-1025",
"ibm-1026", "ibm-1046", "ibm-1047", "ibm-1089", "ibm-1097", "ibm-1098", "ibm-1112", "ibm-1122", "ibm-1123",
"ibm-1124", "ibm-1166", "ibm-1364", "ibm-1381", "ibm-1383", "ibm-273", "ibm-277", "ibm-278", "ibm-280",
"ibm-284", "ibm-285", "ibm-290", "ibm-297", "ibm-300", "ibm-33722", "ibm-33722_vascii_vpua", "ibm-37",
"ibm-420", "ibm-424", "ibm-437", "ibm-500", "ibm-5050", "ibm-737", "ibm-775", "ibm-813", "ibm-833",
"ibm-834", "ibm-838", "ibm-850", "ibm-852", "ibm-855", "ibm-856", "ibm-857", "ibm-860", "ibm-861",
"ibm-862", "ibm-863", "ibm-864", "ibm-865", "ibm-866", "ibm-868", "ibm-869", "ibm-870", "ibm-871",
"ibm-874", "ibm-875", "ibm-912", "ibm-913", "ibm-914", "ibm-915", "ibm-916", "ibm-918", "ibm-920",
"ibm-921", "ibm-922", "ibm-923", "ibm-930", "ibm-933", "ibm-935", "ibm-937", "ibm-939", "ibm-942",
"ibm-942c", "ibm-943", "ibm-943c", "ibm-948", "ibm-949", "ibm-949c", "ibm-950", "ibm-964", "ibm-970",
"ibm-euckr", "ibm-thai", "ibm00858", "ibm01140", "ibm01141", "ibm01142", "ibm01143", "ibm01144", "ibm01145",
"ibm01146", "ibm01147", "ibm01148", "ibm01149", "ibm037", "ibm037", "ibm1006", "ibm1025", "ibm1026",
"ibm1026", "ibm1046", "ibm1047", "ibm1089", "ibm1097", "ibm1098", "ibm1112", "ibm1122", "ibm1123",
"ibm1124", "ibm1166", "ibm1364", "ibm1381", "ibm1383", "ibm273", "ibm273", "ibm277", "ibm277", "ibm278",
"ibm278", "ibm280", "ibm280", "ibm284", "ibm284", "ibm285", "ibm285", "ibm290", "ibm290", "ibm297",
"ibm297", "ibm300", "ibm33722", "ibm367", "ibm420", "ibm420", "ibm424", "ibm424", "ibm437", "ibm437",
"ibm500", "ibm500", "ibm737", "ibm775", "ibm775", "ibm813", "ibm833", "ibm834", "ibm838", "ibm850",
"ibm850", "ibm852", "ibm852", "ibm855", "ibm855", "ibm856", "ibm857", "ibm857", "ibm860", "ibm860",
"ibm861", "ibm861", "ibm862", "ibm862", "ibm863", "ibm863", "ibm864", "ibm864", "ibm865", "ibm865",
"ibm866", "ibm866", "ibm868", "ibm868", "ibm869", "ibm869", "ibm870", "ibm870", "ibm871", "ibm871",
"ibm874", "ibm875", "ibm912", "ibm913", "ibm914", "ibm915", "ibm916", "ibm918", "ibm920", "ibm921",
"ibm922", "ibm923", "ibm930", "ibm933", "ibm935", "ibm937", "ibm939", "ibm942", "ibm942c", "ibm943",
"ibm943c", "ibm948", "ibm949", "ibm949c", "ibm950", "ibm964", "ibm970", "iscii", "iscii91",
"iso-10646-ucs-2", "iso-2022-cn", "iso-2022-cn-cns", "iso-2022-cn-gb", "iso-2022-jp", "iso-2022-jp-2",
"iso-2022-kr", "iso-8859-11", "iso-8859-13", "iso-8859-15", "iso-8859-15", "iso-8859-2", "iso-8859-3",
"iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-9", "iso-ir-101",
"iso-ir-109", "iso-ir-110", "iso-ir-126", "iso-ir-127", "iso-ir-138", "iso-ir-144", "iso-ir-148",
"iso-ir-153", "iso-ir-159", "iso-ir-6", "iso-ir-87", "iso2022cn", "iso2022cn_cns", "iso2022cn_gb",
"iso2022jp", "iso2022jp2", "iso2022kr", "iso646-us", "iso8859-13", "iso8859-15", "iso8859-2", "iso8859-3",
"iso8859-4", "iso8859-5", "iso8859-6", "iso8859-7", "iso8859-8", "iso8859-9", "iso8859_11", "iso8859_13",
"iso8859_15", "iso8859_15_fdis", "iso8859_2", "iso8859_3", "iso8859_4", "iso8859_5", "iso8859_6",
"iso8859_7", "iso8859_8", "iso8859_9", "iso_646.irv:1983", "iso_646.irv:1991", "iso_8859-13", "iso_8859-15",
"iso_8859-2", "iso_8859-2:1987", "iso_8859-3", "iso_8859-3:1988", "iso_8859-4", "iso_8859-4:1988",
"iso_8859-5", "iso_8859-5:1988", "iso_8859-6", "iso_8859-6:1987", "iso_8859-7", "iso_8859-7:1987",
"iso_8859-8", "iso_8859-8:1988", "iso_8859-9", "iso_8859-9:1989", "jis", "jis0201", "jis0208", "jis0212",
"jis_c6226-1983", "jis_encoding", "jis_x0201", "jis_x0201", "jis_x0208-1983", "jis_x0212-1990",
"jis_x0212-1990", "jisautodetect", "johab", "koi8", "koi8-r", "koi8-u", "koi8_r", "koi8_u",
"ks_c_5601-1987", "ksc5601", "ksc5601-1987", "ksc5601-1992", "ksc5601_1987", "ksc5601_1992", "ksc_5601",
"l2", "l3", "l4", "l5", "l9", "latin0", "latin2", "latin3", "latin4", "latin5", "latin9", "macarabic",
"maccentraleurope", "maccroatian", "maccyrillic", "macdingbat", "macgreek", "machebrew", "maciceland",
"macroman", "macromania", "macsymbol", "macthai", "macturkish", "macukraine", "ms-874", "ms1361", "ms50220",
"ms50221", "ms874", "ms932", "ms936", "ms949", "ms950", "ms950_hkscs", "ms950_hkscs_xp", "ms_936", "ms_949",
"ms_kanji", "pc-multilingual-850+euro", "pck", "shift-jis", "shift_jis", "shift_jis", "sjis",
"st_sev_358-88", "sun_eu_greek", "tis-620", "tis620", "tis620.2533", "unicode", "unicodebig",
"unicodebigunmarked", "unicodelittle", "unicodelittleunmarked", "us", "us-ascii", "utf-16", "utf-16be",
"utf-16le", "utf-32", "utf-32be", "utf-32be-bom", "utf-32le", "utf-32le-bom", "utf16", "utf32", "utf_16",
"utf_16be", "utf_16le", "utf_32", "utf_32be", "utf_32be_bom", "utf_32le", "utf_32le_bom", "windows-1250",
"windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256",
"windows-1257", "windows-1258", "windows-31j", "windows-437", "windows-874", "windows-932", "windows-936",
"windows-949", "windows-950", "windows-iso2022jp", "windows949", "x-big5-hkscs-2001", "x-big5-solaris",
"x-compound-text", "x-compound_text", "x-euc-cn", "x-euc-jp", "x-euc-jp-linux", "x-euc-tw", "x-eucjp",
"x-eucjp-open", "x-ibm1006", "x-ibm1025", "x-ibm1046", "x-ibm1097", "x-ibm1098", "x-ibm1112", "x-ibm1122",
"x-ibm1123", "x-ibm1124", "x-ibm1166", "x-ibm1364", "x-ibm1381", "x-ibm1383", "x-ibm300", "x-ibm33722",
"x-ibm737", "x-ibm833", "x-ibm834", "x-ibm856", "x-ibm874", "x-ibm875", "x-ibm921", "x-ibm922", "x-ibm930",
"x-ibm933", "x-ibm935", "x-ibm937", "x-ibm939", "x-ibm942", "x-ibm942c", "x-ibm943", "x-ibm943c",
"x-ibm948", "x-ibm949", "x-ibm949c", "x-ibm950", "x-ibm964", "x-ibm970", "x-iscii91", "x-iso-2022-cn-cns",
"x-iso-2022-cn-gb", "x-iso-8859-11", "x-jis0208", "x-jisautodetect", "x-johab", "x-macarabic",
"x-maccentraleurope", "x-maccroatian", "x-maccyrillic", "x-macdingbat", "x-macgreek", "x-machebrew",
"x-maciceland", "x-macroman", "x-macromania", "x-macsymbol", "x-macthai", "x-macturkish", "x-macukraine",
"x-ms932_0213", "x-ms950-hkscs", "x-ms950-hkscs-xp", "x-mswin-936", "x-pck", "x-sjis", "x-sjis_0213",
"x-utf-16be", "x-utf-16le", "x-utf-16le-bom", "x-utf-32be", "x-utf-32be-bom", "x-utf-32le",
"x-utf-32le-bom", "x-windows-50220", "x-windows-50221", "x-windows-874", "x-windows-949", "x-windows-950",
"x-windows-iso2022jp", "x0201", "x0208", "x0212", "x11-compound_text",
// Added from Oracle JDK 10.0.2
"csiso885915", "csiso885916", "iso-8859-16", "iso-ir-226", "iso_8859-16", "iso_8859-16:2001", "l10",
"latin-9", "latin10", "ms932-0213", "ms932:2004", "ms932_0213", "shift_jis:2004", "shift_jis_0213:2004",
"sjis-0213", "sjis:2004", "sjis_0213", "sjis_0213:2004", "windows-932-0213", "windows-932:2004",
// Added from OpenJDK 11.0.1
"932", "cp932", "cpeuccn", "ibm-1252", "ibm-932", "ibm-euccn", "ibm1252", "ibm932", "ibmeuccn", "x-ibm932",
// Added from OpenJDK 12 ea28
"1129", "cp1129", "ibm-1129", "ibm-euctw", "ibm1129", "x-ibm1129",
// Added from OpenJDK 13 ea15
"29626c", "833", "cp29626c", "ibm-1140", "ibm-1141", "ibm-1142", "ibm-1143", "ibm-1144", "ibm-1145",
"ibm-1146", "ibm-1147", "ibm-1148", "ibm-1149", "ibm-29626c", "ibm-858", "ibm-eucjp", "ibm1140", "ibm1141",
"ibm1142", "ibm1143", "ibm1144", "ibm1145", "ibm1146", "ibm1147", "ibm1148", "ibm1149", "ibm29626c",
"ibm858", "x-ibm29626c",
// Added from HPE JVM 1.8.0.17-hp-ux
"cp1051", "cp1386", "cshproman8", "hp-roman8", "ibm-1051", "r8", "roman8", "roman9"
};
private static final Charset DUMMY_CHARSET = new DummyCharset("Dummy", null);
private ConcurrentMap<String,Charset> cache = new ConcurrentHashMap<>();
public CharsetCache() {
// Pre-populate the cache
for (String charsetName : INITIAL_CHARSETS) {
Charset charset = Charset.forName(charsetName);
addToCache(charsetName, charset);
}
for (String charsetName : LAZY_CHARSETS) {
addToCache(charsetName, DUMMY_CHARSET);
}
}
private void addToCache(String name, Charset charset) {
cache.put(name, charset);
for (String alias : charset.aliases()) {
cache.put(alias.toLowerCase(Locale.ENGLISH), charset);
}
}
public Charset getCharset(String charsetName) {
String lcCharsetName = charsetName.toLowerCase(Locale.ENGLISH);
Charset result = cache.get(lcCharsetName);
if (result == DUMMY_CHARSET) {
// Name is known but the Charset is not in the cache
Charset charset = Charset.forName(lcCharsetName);
if (charset == null) {
// Charset not available in this JVM - remove cache entry
cache.remove(lcCharsetName);
result = null;
} else {
// Charset is available - populate cache entry
addToCache(lcCharsetName, charset);
result = charset;
}
}
return result;
}
/*
* Placeholder Charset implementation for entries that will be loaded lazily
* into the cache.
*/
private static class DummyCharset extends Charset {
protected DummyCharset(String canonicalName, String[] aliases) {
super(canonicalName, aliases);
}
@Override
public boolean contains(Charset cs) {
return false;
}
@Override
public CharsetDecoder newDecoder() {
return null;
}
@Override
public CharsetEncoder newEncoder() {
return null;
}
}
}

View File

@@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
/**
* String constants for the file package.
*/
public final class Constants {
public static final String Package = "org.apache.tomcat.util.buf";
}

View File

@@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import org.apache.tomcat.util.res.StringManager;
/**
* Tables useful when converting byte arrays to and from strings of hexadecimal
* digits.
* Code from Ajp11, from Apache's JServ.
*
* @author Craig R. McClanahan
*/
public final class HexUtils {
private static final StringManager sm =
StringManager.getManager(Constants.Package);
// -------------------------------------------------------------- Constants
/**
* Table for HEX to DEC byte translation.
*/
private static final int[] DEC = {
00, 01, 02, 03, 04, 05, 06, 07, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15,
};
/**
* Table for DEC to HEX byte translation.
*/
private static final byte[] HEX =
{ (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5',
(byte) '6', (byte) '7', (byte) '8', (byte) '9', (byte) 'a', (byte) 'b',
(byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f' };
/**
* Table for byte to hex string translation.
*/
private static final char[] hex = "0123456789abcdef".toCharArray();
// --------------------------------------------------------- Static Methods
public static int getDec(int index) {
// Fast for correct values, slower for incorrect ones
try {
return DEC[index - '0'];
} catch (ArrayIndexOutOfBoundsException ex) {
return -1;
}
}
public static byte getHex(int index) {
return HEX[index];
}
public static String toHexString(byte[] bytes) {
if (null == bytes) {
return null;
}
StringBuilder sb = new StringBuilder(bytes.length << 1);
for(int i = 0; i < bytes.length; ++i) {
sb.append(hex[(bytes[i] & 0xf0) >> 4])
.append(hex[(bytes[i] & 0x0f)])
;
}
return sb.toString();
}
public static byte[] fromHexString(String input) {
if (input == null) {
return null;
}
if ((input.length() & 1) == 1) {
// Odd number of characters
throw new IllegalArgumentException(sm.getString("hexUtils.fromHex.oddDigits"));
}
char[] inputChars = input.toCharArray();
byte[] result = new byte[input.length() >> 1];
for (int i = 0; i < result.length; i++) {
int upperNibble = getDec(inputChars[2*i]);
int lowerNibble = getDec(inputChars[2*i + 1]);
if (upperNibble < 0 || lowerNibble < 0) {
// Non hex character
throw new IllegalArgumentException(sm.getString("hexUtils.fromHex.nonHex"));
}
result[i] = (byte) ((upperNibble << 4) + lowerNibble);
}
return result;
}
}

View File

@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
asn1Parser.lengthInvalid=Invalid length [{0}] bytes reported when the input data length is [{1}] bytes
asn1Parser.tagMismatch=Expected to find value [{0}] but found value [{1}]
b2cConverter.unknownEncoding=The character encoding [{0}] is not supported
byteBufferUtils.cleaner=Cannot use direct ByteBuffer cleaner, memory leaking may occur
c2bConverter.recycleFailed=Failed to recycle the C2B Converter. Creating new BufferedWriter, WriteConvertor and IntermediateOutputStream.
hexUtils.fromHex.nonHex=The input must consist only of hex digits
hexUtils.fromHex.oddDigits=The input must consist of an even number of hex digits
uDecoder.eof=End of file (EOF)
uDecoder.noSlash=The encoded slash character is not allowed
uDecoder.urlDecode.conversionError=Failed to decode [{0}] using character set [{1}]
uDecoder.urlDecode.missingDigit=Failed to decode [{0}] because the % character must be followed by two hexademical digits
uDecoder.urlDecode.uee=Unable to URL decode the specified input since the encoding [{0}] is not supported.
udecoder.urlDecode.iae=It is practical to %nn decode a byte array since how the %nn is encoded will vary by character set

View File

@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
uDecoder.convertHexDigit.notHex=[{0}] ist keine hexadezimale Ziffer

View File

@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
b2cConverter.unknownEncoding=La codificación de carácter [{0}] no está soportada

View File

@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
asn1Parser.lengthInvalid=Une longueur d''octets invalide [{0}] a été rapportée alors que la longueur de données en entrée est de [{1}] octets
asn1Parser.tagMismatch=La valeur [{0}] était attendue mais la valeur [{1}] a été rencontrée
b2cConverter.unknownEncoding=L''encodage de caractères [{0}] n''est pas supporté
byteBufferUtils.cleaner=Impossible d'utiliser le nettoyeur de ByteBuffers directs, une fuite de mémoire peut se produire
hexUtils.fromHex.nonHex=L'entrée doit être uniquement des chiffres héxadécimaux
hexUtils.fromHex.oddDigits=L'entrée doit contenir un nombre pair de chiffres héxadécimaux
uDecoder.eof=Fin de fichier (EOF)
uDecoder.noSlash=Un caractère slash encodé n'est pas autorisé
uDecoder.urlDecode.conversionError=Echec de décodage [{0}] en utilisant le jeu de caractères [{1}]
uDecoder.urlDecode.missingDigit=Impossible de décoder [{0}] parce que le caractère % doit être suivi de deux chiffres héxadécimaux

View File

@@ -0,0 +1,26 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
b2cConverter.unknownEncoding=文字エンコーディング [{0}] は未対応です。
byteBufferUtils.cleaner=直接ByteBufferクリーナーを使用することはできません、メモリリークが発生する可能性があります。
hexUtils.fromHex.nonHex=入力は16進数でなければなりません
hexUtils.fromHex.oddDigits=入力は、偶数の16進数で構成する必要があります。
uDecoder.eof=予期せぬ場所で終端に達しました。
uDecoder.noSlash="/" を符号化して含めることはできません。
uDecoder.urlDecode.conversionError=文字セット[{1}]を使用して[{0}]のデコードに失敗しました
uDecoder.urlDecode.missingDigit=文字の後ろに2つの16進数字が続く必要があるため、[{0}]のデコードに失敗しました。

View File

@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
asn1Parser.lengthInvalid=입력 데이터의 바이트 길이가 [{1}]인데, 유효하지 않은 바이트 길이 [{0}](이)가 보고되었습니다.
asn1Parser.tagMismatch=[{0}] 값이 기대 되었는데, [{1}] 값이 발견되었습니다.
b2cConverter.unknownEncoding=문자 인코딩 [{0}]은(는) 지원되지 않습니다.
byteBufferUtils.cleaner=직접적인 ByteBuffer cleaner를 사용할 수 없습니다. 메모리 누수가 발생할 수 있습니다.
hexUtils.fromHex.nonHex=입력은 오직 16진수 숫자로만 이루어져야 합니다.
hexUtils.fromHex.oddDigits=입력은 반드시 짝수 개의 16진수 숫자들로 이루어져야 합니다.
uDecoder.eof=파일의 끝 (EOF)
uDecoder.noSlash=인코딩된 슬래시 문자는 허용되지 않습니다.
uDecoder.urlDecode.conversionError=문자셋 [{1}]을(를) 사용하여 [{0}]을(를) 디코드하지 못했습니다.
uDecoder.urlDecode.missingDigit=% 문자 뒤에 두 개의 16진수 숫자들이 이어져야 하기 때문에, [{0}]을(를) 디코드하지 못했습니다.

View File

@@ -0,0 +1,21 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
asn1Parser.lengthInvalid=无效长度 [{0}]字节报告,但是输入数据的长度是 [{1}]字节
asn1Parser.tagMismatch=期望找到值 [{0}]但是却找到值 [{1}]
hexUtils.fromHex.nonHex=输入只能由十六进制数字组成
uDecoder.urlDecode.conversionError=使用编码[{1}]解码[{0}]失败

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.util.Arrays;
import java.util.Collection;
/**
* Utility methods to build a separated list from a given set (not
* java.util.Set) of inputs and return that list as a string or append it to an
* existing StringBuilder. If the given set is null or empty, an empty string
* will be returned.
*/
public final class StringUtils {
private static final String EMPTY_STRING = "";
private StringUtils() {
// Utility class
}
public static String join(String[] array) {
if (array == null) {
return EMPTY_STRING;
}
return join(Arrays.asList(array));
}
public static void join(String[] array, char separator, StringBuilder sb) {
if (array == null) {
return;
}
join(Arrays.asList(array), separator, sb);
}
public static String join(Collection<String> collection) {
return join(collection, ',');
}
public static String join(Collection<String> collection, char separator) {
// Shortcut
if (collection == null || collection.isEmpty()) {
return EMPTY_STRING;
}
StringBuilder result = new StringBuilder();
join(collection, separator, result);
return result.toString();
}
public static void join(Iterable<String> iterable, char separator, StringBuilder sb) {
join(iterable, separator,
new Function<String>() {@Override public String apply(String t) { return t; }}, sb);
}
public static <T> void join(T[] array, char separator, Function<T> function,
StringBuilder sb) {
if (array == null) {
return;
}
join(Arrays.asList(array), separator, function, sb);
}
public static <T> void join(Iterable<T> iterable, char separator, Function<T> function,
StringBuilder sb) {
if (iterable == null) {
return;
}
boolean first = true;
for (T value : iterable) {
if (first) {
first = false;
} else {
sb.append(separator);
}
sb.append(function.apply(value));
}
}
public interface Function<T> {
public String apply(T t);
}
}

View File

@@ -0,0 +1,495 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.io.ByteArrayOutputStream;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.apache.juli.logging.Log;
import org.apache.juli.logging.LogFactory;
import org.apache.tomcat.util.res.StringManager;
/**
* All URL decoding happens here. This way we can reuse, review, optimize
* without adding complexity to the buffers.
*
* The conversion will modify the original buffer.
*
* @author Costin Manolache
*/
public final class UDecoder {
private static final StringManager sm = StringManager.getManager(UDecoder.class);
private static final Log log = LogFactory.getLog(UDecoder.class);
public static final boolean ALLOW_ENCODED_SLASH =
Boolean.parseBoolean(System.getProperty("org.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH", "false"));
private static class DecodeException extends CharConversionException {
private static final long serialVersionUID = 1L;
public DecodeException(String s) {
super(s);
}
@Override
public synchronized Throwable fillInStackTrace() {
// This class does not provide a stack trace
return this;
}
}
/** Unexpected end of data. */
private static final IOException EXCEPTION_EOF = new DecodeException(sm.getString("uDecoder.eof"));
/** %xx with not-hex digit */
private static final IOException EXCEPTION_NOT_HEX_DIGIT = new DecodeException(
"isHexDigit");
/** %-encoded slash is forbidden in resource path */
private static final IOException EXCEPTION_SLASH = new DecodeException(
"noSlash");
public UDecoder()
{
}
/**
* URLDecode, will modify the source.
* @param mb The URL encoded bytes
* @param query <code>true</code> if this is a query string
* @throws IOException Invalid %xx URL encoding
*/
public void convert( ByteChunk mb, boolean query )
throws IOException
{
int start=mb.getOffset();
byte buff[]=mb.getBytes();
int end=mb.getEnd();
int idx= ByteChunk.findByte( buff, start, end, (byte) '%' );
int idx2=-1;
if( query ) {
idx2= ByteChunk.findByte( buff, start, (idx >= 0 ? idx : end), (byte) '+' );
}
if( idx<0 && idx2<0 ) {
return;
}
// idx will be the smallest positive index ( first % or + )
if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) {
idx=idx2;
}
final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
for( int j=idx; j<end; j++, idx++ ) {
if( buff[ j ] == '+' && query) {
buff[idx]= (byte)' ' ;
} else if( buff[ j ] != '%' ) {
buff[idx]= buff[j];
} else {
// read next 2 digits
if( j+2 >= end ) {
throw EXCEPTION_EOF;
}
byte b1= buff[j+1];
byte b2=buff[j+2];
if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) {
throw EXCEPTION_NOT_HEX_DIGIT;
}
j+=2;
int res=x2c( b1, b2 );
if (noSlash && (res == '/')) {
throw EXCEPTION_SLASH;
}
buff[idx]=(byte)res;
}
}
mb.setEnd( idx );
}
// -------------------- Additional methods --------------------
// XXX What do we do about charset ????
/**
* In-buffer processing - the buffer will be modified.
* @param mb The URL encoded chars
* @param query <code>true</code> if this is a query string
* @throws IOException Invalid %xx URL encoding
*/
public void convert( CharChunk mb, boolean query )
throws IOException
{
// log( "Converting a char chunk ");
int start=mb.getOffset();
char buff[]=mb.getBuffer();
int cend=mb.getEnd();
int idx= CharChunk.indexOf( buff, start, cend, '%' );
int idx2=-1;
if( query ) {
idx2= CharChunk.indexOf( buff, start, (idx >= 0 ? idx : cend), '+' );
}
if( idx<0 && idx2<0 ) {
return;
}
// idx will be the smallest positive index ( first % or + )
if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) {
idx=idx2;
}
final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
for( int j=idx; j<cend; j++, idx++ ) {
if( buff[ j ] == '+' && query ) {
buff[idx]=( ' ' );
} else if( buff[ j ] != '%' ) {
buff[idx]=buff[j];
} else {
// read next 2 digits
if( j+2 >= cend ) {
// invalid
throw EXCEPTION_EOF;
}
char b1= buff[j+1];
char b2=buff[j+2];
if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) {
throw EXCEPTION_NOT_HEX_DIGIT;
}
j+=2;
int res=x2c( b1, b2 );
if (noSlash && (res == '/')) {
throw EXCEPTION_SLASH;
}
buff[idx]=(char)res;
}
}
mb.setEnd( idx );
}
/**
* URLDecode, will modify the source
* @param mb The URL encoded String, bytes or chars
* @param query <code>true</code> if this is a query string
* @throws IOException Invalid %xx URL encoding
*/
public void convert(MessageBytes mb, boolean query)
throws IOException
{
switch (mb.getType()) {
case MessageBytes.T_STR:
String strValue=mb.toString();
if( strValue==null ) {
return;
}
try {
mb.setString( convert( strValue, query ));
} catch (RuntimeException ex) {
throw new DecodeException(ex.getMessage());
}
break;
case MessageBytes.T_CHARS:
CharChunk charC=mb.getCharChunk();
convert( charC, query );
break;
case MessageBytes.T_BYTES:
ByteChunk bytesC=mb.getByteChunk();
convert( bytesC, query );
break;
}
}
/**
* %xx decoding of a string. FIXME: this is inefficient.
* @param str The URL encoded string
* @param query <code>true</code> if this is a query string
* @return the decoded string
*/
public final String convert(String str, boolean query)
{
if (str == null) {
return null;
}
if( (!query || str.indexOf( '+' ) < 0) && str.indexOf( '%' ) < 0 ) {
return str;
}
final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
StringBuilder dec = new StringBuilder(); // decoded string output
int strPos = 0;
int strLen = str.length();
dec.ensureCapacity(str.length());
while (strPos < strLen) {
int laPos; // lookahead position
// look ahead to next URLencoded metacharacter, if any
for (laPos = strPos; laPos < strLen; laPos++) {
char laChar = str.charAt(laPos);
if ((laChar == '+' && query) || (laChar == '%')) {
break;
}
}
// if there were non-metacharacters, copy them all as a block
if (laPos > strPos) {
dec.append(str.substring(strPos,laPos));
strPos = laPos;
}
// shortcut out of here if we're at the end of the string
if (strPos >= strLen) {
break;
}
// process next metacharacter
char metaChar = str.charAt(strPos);
if (metaChar == '+') {
dec.append(' ');
strPos++;
continue;
} else if (metaChar == '%') {
// We throw the original exception - the super will deal with
// it
// try {
char res = (char) Integer.parseInt(
str.substring(strPos + 1, strPos + 3), 16);
if (noSlash && (res == '/')) {
throw new IllegalArgumentException(sm.getString("uDecoder.noSlash"));
}
dec.append(res);
strPos += 3;
}
}
return dec.toString();
}
/**
* Decode and return the specified URL-encoded String.
* When the byte array is converted to a string, ISO-885901 is used. This
* may be different than some other servers. It is assumed the string is not
* a query string.
*
* @param str The url-encoded string
* @return the decoded string
* @exception IllegalArgumentException if a '%' character is not followed
* by a valid 2-digit hexadecimal number
*/
public static String URLDecode(String str) {
return URLDecode(str, StandardCharsets.ISO_8859_1);
}
/**
* Decode and return the specified URL-encoded String. It is assumed the
* string is not a query string.
*
* @param str The url-encoded string
* @param enc The encoding to use; if null, ISO-885901 is used. If
* an unsupported encoding is specified null will be returned
* @return the decoded string
* @exception IllegalArgumentException if a '%' character is not followed
* by a valid 2-digit hexadecimal number
*
* @deprecated This method will be removed in Tomcat 9
*/
@Deprecated
public static String URLDecode(String str, String enc) {
return URLDecode(str, enc, false);
}
/**
* Decode and return the specified URL-encoded String. It is assumed the
* string is not a query string.
*
* @param str The url-encoded string
* @param charset The character encoding to use; if null, ISO-8859-1 is
* used.
* @return the decoded string
* @exception IllegalArgumentException if a '%' character is not followed
* by a valid 2-digit hexadecimal number
*/
public static String URLDecode(String str, Charset charset) {
return URLDecode(str, charset, false);
}
/**
* Decode and return the specified URL-encoded String.
*
* @param str The url-encoded string
* @param enc The encoding to use; if null, ISO-8859-1 is used. If
* an unsupported encoding is specified null will be returned
* @param isQuery Is this a query string being processed
* @return the decoded string
* @exception IllegalArgumentException if a '%' character is not followed
* by a valid 2-digit hexadecimal number
*
* @deprecated This method will be removed in Tomcat 9
*/
@Deprecated
public static String URLDecode(String str, String enc, boolean isQuery) {
Charset charset = null;
if (enc != null) {
try {
charset = B2CConverter.getCharset(enc);
} catch (UnsupportedEncodingException uee) {
if (log.isDebugEnabled()) {
log.debug(sm.getString("uDecoder.urlDecode.uee", enc), uee);
}
}
}
return URLDecode(str, charset, isQuery);
}
/**
* Decode and return the specified URL-encoded byte array.
*
* @param bytes The url-encoded byte array
* @param enc The encoding to use; if null, ISO-8859-1 is used. If
* an unsupported encoding is specified null will be returned
* @param isQuery Is this a query string being processed
* @return the decoded string
* @exception IllegalArgumentException if a '%' character is not followed
* by a valid 2-digit hexadecimal number
*
* @deprecated This method will be removed in Tomcat 9
*/
@Deprecated
public static String URLDecode(byte[] bytes, String enc, boolean isQuery) {
throw new IllegalArgumentException(sm.getString("udecoder.urlDecode.iae"));
}
private static String URLDecode(String str, Charset charset, boolean isQuery) {
if (str == null) {
return null;
}
if (str.indexOf('%') == -1) {
// No %nn sequences, so return string unchanged
return str;
}
if (charset == null) {
charset = StandardCharsets.ISO_8859_1;
}
/*
* Decoding is required.
*
* Potential complications:
* - The source String may be partially decoded so it is not valid to
* assume that the source String is ASCII.
* - Have to process as characters since there is no guarantee that the
* byte sequence for '%' is going to be the same in all character
* sets.
* - We don't know how many '%nn' sequences are required for a single
* character. It varies between character sets and some use a variable
* length.
*/
// This isn't perfect but it is a reasonable guess for the size of the
// array required
ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length() * 2);
OutputStreamWriter osw = new OutputStreamWriter(baos, charset);
char[] sourceChars = str.toCharArray();
int len = sourceChars.length;
int ix = 0;
try {
while (ix < len) {
char c = sourceChars[ix++];
if (c == '%') {
osw.flush();
if (ix + 2 > len) {
throw new IllegalArgumentException(
sm.getString("uDecoder.urlDecode.missingDigit", str));
}
char c1 = sourceChars[ix++];
char c2 = sourceChars[ix++];
if (isHexDigit(c1) && isHexDigit(c2)) {
baos.write(x2c(c1, c2));
} else {
throw new IllegalArgumentException(
sm.getString("uDecoder.urlDecode.missingDigit", str));
}
} else if (c == '+' && isQuery) {
osw.append(' ');
} else {
osw.append(c);
}
}
osw.flush();
return baos.toString(charset.name());
} catch (IOException ioe) {
throw new IllegalArgumentException(
sm.getString("uDecoder.urlDecode.conversionError", str, charset.name()), ioe);
}
}
private static boolean isHexDigit( int c ) {
return ( ( c>='0' && c<='9' ) ||
( c>='a' && c<='f' ) ||
( c>='A' && c<='F' ));
}
private static int x2c( byte b1, byte b2 ) {
int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 :
(b1 -'0');
digit*=16;
digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 :
(b2 -'0');
return digit;
}
private static int x2c( char b1, char b2 ) {
int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 :
(b1 -'0');
digit*=16;
digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 :
(b2 -'0');
return digit;
}
}

View File

@@ -0,0 +1,167 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
/**
* Efficient implementation of an UTF-8 encoder.
* This class is not thread safe - you need one encoder per thread.
* The encoder will save and recycle the internal objects, avoiding
* garbage.
*
* You can add extra characters that you want preserved, for example
* while encoding a URL you can add "/".
*
* @author Costin Manolache
*/
public final class UEncoder {
public enum SafeCharsSet {
WITH_SLASH("/"), DEFAULT("");
private final BitSet safeChars;
private BitSet getSafeChars() {
return this.safeChars;
}
private SafeCharsSet(String additionalSafeChars) {
safeChars = initialSafeChars();
for (char c : additionalSafeChars.toCharArray()) {
safeChars.set(c);
}
}
}
// Not static - the set may differ ( it's better than adding
// an extra check for "/", "+", etc
private BitSet safeChars=null;
private C2BConverter c2b=null;
private ByteChunk bb=null;
private CharChunk cb=null;
private CharChunk output=null;
/**
* Create a UEncoder with an unmodifiable safe character set.
*
* @param safeCharsSet safe characters for this encoder
*/
public UEncoder(SafeCharsSet safeCharsSet) {
this.safeChars = safeCharsSet.getSafeChars();
}
/**
* URL Encode string, using a specified encoding.
*
* @param s string to be encoded
* @param start the beginning index, inclusive
* @param end the ending index, exclusive
*
* @return A new CharChunk contained the URL encoded string
*
* @throws IOException If an I/O error occurs
*/
public CharChunk encodeURL(String s, int start, int end)
throws IOException {
if (c2b == null) {
bb = new ByteChunk(8); // small enough.
cb = new CharChunk(2); // small enough.
output = new CharChunk(64); // small enough.
c2b = new C2BConverter(StandardCharsets.UTF_8);
} else {
bb.recycle();
cb.recycle();
output.recycle();
}
for (int i = start; i < end; i++) {
char c = s.charAt(i);
if (safeChars.get(c)) {
output.append(c);
} else {
cb.append(c);
c2b.convert(cb, bb);
// "surrogate" - UTF is _not_ 16 bit, but 21 !!!!
// ( while UCS is 31 ). Amazing...
if (c >= 0xD800 && c <= 0xDBFF) {
if ((i+1) < end) {
char d = s.charAt(i+1);
if (d >= 0xDC00 && d <= 0xDFFF) {
cb.append(d);
c2b.convert(cb, bb);
i++;
}
}
}
urlEncode(output, bb);
cb.recycle();
bb.recycle();
}
}
return output;
}
protected void urlEncode(CharChunk out, ByteChunk bb)
throws IOException {
byte[] bytes = bb.getBuffer();
for (int j = bb.getStart(); j < bb.getEnd(); j++) {
out.append('%');
char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16);
out.append(ch);
ch = Character.forDigit(bytes[j] & 0xF, 16);
out.append(ch);
}
}
// -------------------- Internal implementation --------------------
private static BitSet initialSafeChars() {
BitSet initialSafeChars=new BitSet(128);
int i;
for (i = 'a'; i <= 'z'; i++) {
initialSafeChars.set(i);
}
for (i = 'A'; i <= 'Z'; i++) {
initialSafeChars.set(i);
}
for (i = '0'; i <= '9'; i++) {
initialSafeChars.set(i);
}
//safe
initialSafeChars.set('$');
initialSafeChars.set('-');
initialSafeChars.set('_');
initialSafeChars.set('.');
// Dangerous: someone may treat this as " "
// RFC1738 does allow it, it's not reserved
// initialSafeChars.set('+');
//extra
initialSafeChars.set('!');
initialSafeChars.set('*');
initialSafeChars.set('\'');
initialSafeChars.set('(');
initialSafeChars.set(')');
initialSafeChars.set(',');
return initialSafeChars;
}
}

View File

@@ -0,0 +1,197 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Pattern;
/**
* Utility class for working with URIs and URLs.
*/
public final class UriUtil {
private static final char[] HEX =
{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
private static final Pattern PATTERN_EXCLAMATION_MARK = Pattern.compile("!/");
private static final Pattern PATTERN_CARET = Pattern.compile("\\^/");
private static final Pattern PATTERN_ASTERISK = Pattern.compile("\\*/");
private static final Pattern PATTERN_CUSTOM;
private static final String REPLACE_CUSTOM;
private static final String WAR_SEPARATOR;
static {
String custom = System.getProperty("org.apache.tomcat.util.buf.UriUtil.WAR_SEPARATOR");
if (custom == null) {
WAR_SEPARATOR = "*/";
PATTERN_CUSTOM = null;
REPLACE_CUSTOM = null;
} else {
WAR_SEPARATOR = custom + "/";
PATTERN_CUSTOM = Pattern.compile(Pattern.quote(WAR_SEPARATOR));
StringBuffer sb = new StringBuffer(custom.length() * 3);
// Deliberately use the platform's default encoding
byte[] ba = custom.getBytes();
for (int j = 0; j < ba.length; j++) {
// Converting each byte in the buffer
byte toEncode = ba[j];
sb.append('%');
int low = toEncode & 0x0f;
int high = (toEncode & 0xf0) >> 4;
sb.append(HEX[high]);
sb.append(HEX[low]);
}
REPLACE_CUSTOM = sb.toString();
}
}
private UriUtil() {
// Utility class. Hide default constructor
}
/**
* Determine if the character is allowed in the scheme of a URI.
* See RFC 2396, Section 3.1
*
* @param c The character to test
*
* @return {@code true} if a the character is allowed, otherwise {code
* @false}
*/
private static boolean isSchemeChar(char c) {
return Character.isLetterOrDigit(c) || c == '+' || c == '-' || c == '.';
}
/**
* Determine if a URI string has a <code>scheme</code> component.
*
* @param uri The URI to test
*
* @return {@code true} if a scheme is present, otherwise {code @false}
*/
public static boolean hasScheme(CharSequence uri) {
int len = uri.length();
for(int i=0; i < len ; i++) {
char c = uri.charAt(i);
if(c == ':') {
return i > 0;
} else if(!UriUtil.isSchemeChar(c)) {
return false;
}
}
return false;
}
public static URL buildJarUrl(File jarFile) throws MalformedURLException {
return buildJarUrl(jarFile, null);
}
public static URL buildJarUrl(File jarFile, String entryPath) throws MalformedURLException {
return buildJarUrl(jarFile.toURI().toString(), entryPath);
}
public static URL buildJarUrl(String fileUrlString) throws MalformedURLException {
return buildJarUrl(fileUrlString, null);
}
public static URL buildJarUrl(String fileUrlString, String entryPath) throws MalformedURLException {
String safeString = makeSafeForJarUrl(fileUrlString);
StringBuilder sb = new StringBuilder();
sb.append(safeString);
sb.append("!/");
if (entryPath != null) {
sb.append(makeSafeForJarUrl(entryPath));
}
return new URL("jar", null, -1, sb.toString());
}
public static URL buildJarSafeUrl(File file) throws MalformedURLException {
String safe = makeSafeForJarUrl(file.toURI().toString());
return new URL(safe);
}
/*
* When testing on markt's desktop each iteration was taking ~1420ns when
* using String.replaceAll().
*
* Switching the implementation to use pre-compiled patterns and
* Pattern.matcher(input).replaceAll(replacement) reduced this by ~10%.
*
* Note: Given the very small absolute time of a single iteration, even for
* a web application with 1000 JARs this is only going to add ~3ms.
* It is therefore unlikely that further optimisation will be
* necessary.
*/
/*
* Pulled out into a separate method in case we need to handle other unusual
* sequences in the future.
*/
private static String makeSafeForJarUrl(String input) {
// Since "!/" has a special meaning in a JAR URL, make sure that the
// sequence is properly escaped if present.
String tmp = PATTERN_EXCLAMATION_MARK.matcher(input).replaceAll("%21/");
// Tomcat's custom jar:war: URL handling treats */ and ^/ as special
tmp = PATTERN_CARET.matcher(tmp).replaceAll("%5e/");
tmp = PATTERN_ASTERISK.matcher(tmp).replaceAll("%2a/");
if (PATTERN_CUSTOM != null) {
tmp = PATTERN_CUSTOM.matcher(tmp).replaceAll(REPLACE_CUSTOM);
}
return tmp;
}
/**
* Convert a URL of the form <code>war:file:...</code> to
* <code>jar:file:...</code>.
*
* @param warUrl The WAR URL to convert
*
* @return The equivalent JAR URL
*
* @throws MalformedURLException If the conversion fails
*/
public static URL warToJar(URL warUrl) throws MalformedURLException {
// Assumes that the spec is absolute and starts war:file:/...
String file = warUrl.getFile();
if (file.contains("*/")) {
file = file.replaceFirst("\\*/", "!/");
} else if (file.contains("^/")) {
file = file.replaceFirst("\\^/", "!/");
} else if (PATTERN_CUSTOM != null) {
file = file.replaceFirst(PATTERN_CUSTOM.pattern(), "!/");
}
return new URL("jar", warUrl.getHost(), warUrl.getPort(), file);
}
public static String getWarSeparator() {
return WAR_SEPARATOR;
}
}

View File

@@ -0,0 +1,299 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;
/**
* Decodes bytes to UTF-8. Extracted from Apache Harmony and modified to reject
* code points from U+D800 to U+DFFF as per RFC3629. The standard Java decoder
* does not reject these. It has also been modified to reject code points
* greater than U+10FFFF which the standard Java decoder rejects but the harmony
* one does not.
*/
public class Utf8Decoder extends CharsetDecoder {
// The next table contains information about UTF-8 charset and
// correspondence of 1st byte to the length of sequence
// For information please visit http://www.ietf.org/rfc/rfc3629.txt
//
// Please note, o means 0, actually.
// -------------------------------------------------------------------
// 0 1 2 3 Value
// -------------------------------------------------------------------
// oxxxxxxx 00000000 00000000 0xxxxxxx
// 11oyyyyy 1oxxxxxx 00000000 00000yyy yyxxxxxx
// 111ozzzz 1oyyyyyy 1oxxxxxx 00000000 zzzzyyyy yyxxxxxx
// 1111ouuu 1ouuzzzz 1oyyyyyy 1oxxxxxx 000uuuuu zzzzyyyy yyxxxxxx
private static final int remainingBytes[] = {
// 1owwwwww
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
// 11oyyyyy
-1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 111ozzzz
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
// 1111ouuu
3, 3, 3, 3, 3, -1, -1, -1,
// > 11110111
-1, -1, -1, -1, -1, -1, -1, -1};
private static final int remainingNumbers[] = {0, // 0 1 2 3
4224, // (01o00000b << 6)+(1o000000b)
401536, // (011o0000b << 12)+(1o000000b << 6)+(1o000000b)
29892736 // (0111o000b << 18)+(1o000000b << 12)+(1o000000b <<
// 6)+(1o000000b)
};
private static final int lowerEncodingLimit[] = {-1, 0x80, 0x800, 0x10000};
public Utf8Decoder() {
super(StandardCharsets.UTF_8, 1.0f, 1.0f);
}
@Override
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
if (in.hasArray() && out.hasArray()) {
return decodeHasArray(in, out);
}
return decodeNotHasArray(in, out);
}
private CoderResult decodeNotHasArray(ByteBuffer in, CharBuffer out) {
int outRemaining = out.remaining();
int pos = in.position();
int limit = in.limit();
try {
while (pos < limit) {
if (outRemaining == 0) {
return CoderResult.OVERFLOW;
}
int jchar = in.get();
if (jchar < 0) {
jchar = jchar & 0x7F;
int tail = remainingBytes[jchar];
if (tail == -1) {
return CoderResult.malformedForLength(1);
}
if (limit - pos < 1 + tail) {
// No early test for invalid sequences here as peeking
// at the next byte is harder
return CoderResult.UNDERFLOW;
}
int nextByte;
for (int i = 0; i < tail; i++) {
nextByte = in.get() & 0xFF;
if ((nextByte & 0xC0) != 0x80) {
return CoderResult.malformedForLength(1 + i);
}
jchar = (jchar << 6) + nextByte;
}
jchar -= remainingNumbers[tail];
if (jchar < lowerEncodingLimit[tail]) {
// Should have been encoded in a fewer octets
return CoderResult.malformedForLength(1);
}
pos += tail;
}
// Apache Tomcat added test
if (jchar >= 0xD800 && jchar <= 0xDFFF) {
return CoderResult.unmappableForLength(3);
}
// Apache Tomcat added test
if (jchar > 0x10FFFF) {
return CoderResult.unmappableForLength(4);
}
if (jchar <= 0xffff) {
out.put((char) jchar);
outRemaining--;
} else {
if (outRemaining < 2) {
return CoderResult.OVERFLOW;
}
out.put((char) ((jchar >> 0xA) + 0xD7C0));
out.put((char) ((jchar & 0x3FF) + 0xDC00));
outRemaining -= 2;
}
pos++;
}
return CoderResult.UNDERFLOW;
} finally {
in.position(pos);
}
}
private CoderResult decodeHasArray(ByteBuffer in, CharBuffer out) {
int outRemaining = out.remaining();
int pos = in.position();
int limit = in.limit();
final byte[] bArr = in.array();
final char[] cArr = out.array();
final int inIndexLimit = limit + in.arrayOffset();
int inIndex = pos + in.arrayOffset();
int outIndex = out.position() + out.arrayOffset();
// if someone would change the limit in process,
// he would face consequences
for (; inIndex < inIndexLimit && outRemaining > 0; inIndex++) {
int jchar = bArr[inIndex];
if (jchar < 0) {
jchar = jchar & 0x7F;
// If first byte is invalid, tail will be set to -1
int tail = remainingBytes[jchar];
if (tail == -1) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(1);
}
// Additional checks to detect invalid sequences ASAP
// Checks derived from Unicode 6.2, Chapter 3, Table 3-7
// Check 2nd byte
int tailAvailable = inIndexLimit - inIndex - 1;
if (tailAvailable > 0) {
// First byte C2..DF, second byte 80..BF
if (jchar > 0x41 && jchar < 0x60 &&
(bArr[inIndex + 1] & 0xC0) != 0x80) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(1);
}
// First byte E0, second byte A0..BF
if (jchar == 0x60 && (bArr[inIndex + 1] & 0xE0) != 0xA0) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(1);
}
// First byte E1..EC, second byte 80..BF
if (jchar > 0x60 && jchar < 0x6D &&
(bArr[inIndex + 1] & 0xC0) != 0x80) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(1);
}
// First byte ED, second byte 80..9F
if (jchar == 0x6D && (bArr[inIndex + 1] & 0xE0) != 0x80) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(1);
}
// First byte EE..EF, second byte 80..BF
if (jchar > 0x6D && jchar < 0x70 &&
(bArr[inIndex + 1] & 0xC0) != 0x80) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(1);
}
// First byte F0, second byte 90..BF
if (jchar == 0x70 &&
((bArr[inIndex + 1] & 0xFF) < 0x90 ||
(bArr[inIndex + 1] & 0xFF) > 0xBF)) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(1);
}
// First byte F1..F3, second byte 80..BF
if (jchar > 0x70 && jchar < 0x74 &&
(bArr[inIndex + 1] & 0xC0) != 0x80) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(1);
}
// First byte F4, second byte 80..8F
if (jchar == 0x74 &&
(bArr[inIndex + 1] & 0xF0) != 0x80) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(1);
}
}
// Check third byte if present and expected
if (tailAvailable > 1 && tail > 1) {
if ((bArr[inIndex + 2] & 0xC0) != 0x80) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(2);
}
}
// Check fourth byte if present and expected
if (tailAvailable > 2 && tail > 2) {
if ((bArr[inIndex + 3] & 0xC0) != 0x80) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(3);
}
}
if (tailAvailable < tail) {
break;
}
for (int i = 0; i < tail; i++) {
int nextByte = bArr[inIndex + i + 1] & 0xFF;
if ((nextByte & 0xC0) != 0x80) {
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(1 + i);
}
jchar = (jchar << 6) + nextByte;
}
jchar -= remainingNumbers[tail];
if (jchar < lowerEncodingLimit[tail]) {
// Should have been encoded in fewer octets
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.malformedForLength(1);
}
inIndex += tail;
}
// Apache Tomcat added test
if (jchar >= 0xD800 && jchar <= 0xDFFF) {
return CoderResult.unmappableForLength(3);
}
// Apache Tomcat added test
if (jchar > 0x10FFFF) {
return CoderResult.unmappableForLength(4);
}
if (jchar <= 0xffff) {
cArr[outIndex++] = (char) jchar;
outRemaining--;
} else {
if (outRemaining < 2) {
// Encoded with 4 bytes. inIndex currently points
// to the final byte. Move it back to first byte.
inIndex -= 3;
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return CoderResult.OVERFLOW;
}
cArr[outIndex++] = (char) ((jchar >> 0xA) + 0xD7C0);
cArr[outIndex++] = (char) ((jchar & 0x3FF) + 0xDC00);
outRemaining -= 2;
}
}
in.position(inIndex - in.arrayOffset());
out.position(outIndex - out.arrayOffset());
return (outRemaining == 0 && inIndex < inIndexLimit) ?
CoderResult.OVERFLOW :
CoderResult.UNDERFLOW;
}
}

View File

@@ -0,0 +1,235 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;
/**
* Encodes characters as bytes using UTF-8. Extracted from Apache Harmony with
* some minor bug fixes applied.
*/
public class Utf8Encoder extends CharsetEncoder {
public Utf8Encoder() {
super(StandardCharsets.UTF_8, 1.1f, 4.0f);
}
@Override
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
if (in.hasArray() && out.hasArray()) {
return encodeHasArray(in, out);
}
return encodeNotHasArray(in, out);
}
private CoderResult encodeHasArray(CharBuffer in, ByteBuffer out) {
int outRemaining = out.remaining();
int pos = in.position();
int limit = in.limit();
byte[] bArr;
char[] cArr;
int x = pos;
bArr = out.array();
cArr = in.array();
int outPos = out.position();
int rem = in.remaining();
for (x = pos; x < pos + rem; x++) {
int jchar = (cArr[x] & 0xFFFF);
if (jchar <= 0x7F) {
if (outRemaining < 1) {
in.position(x);
out.position(outPos);
return CoderResult.OVERFLOW;
}
bArr[outPos++] = (byte) (jchar & 0xFF);
outRemaining--;
} else if (jchar <= 0x7FF) {
if (outRemaining < 2) {
in.position(x);
out.position(outPos);
return CoderResult.OVERFLOW;
}
bArr[outPos++] = (byte) (0xC0 + ((jchar >> 6) & 0x1F));
bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F));
outRemaining -= 2;
} else if (jchar >= 0xD800 && jchar <= 0xDFFF) {
// in has to have one byte more.
if (limit <= x + 1) {
in.position(x);
out.position(outPos);
return CoderResult.UNDERFLOW;
}
if (outRemaining < 4) {
in.position(x);
out.position(outPos);
return CoderResult.OVERFLOW;
}
// The surrogate pair starts with a low-surrogate.
if (jchar >= 0xDC00) {
in.position(x);
out.position(outPos);
return CoderResult.malformedForLength(1);
}
int jchar2 = cArr[x + 1] & 0xFFFF;
// The surrogate pair ends with a high-surrogate.
if (jchar2 < 0xDC00) {
in.position(x);
out.position(outPos);
return CoderResult.malformedForLength(1);
}
// Note, the Unicode scalar value n is defined
// as follows:
// n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
// Where jchar is a high-surrogate,
// jchar2 is a low-surrogate.
int n = (jchar << 10) + jchar2 + 0xFCA02400;
bArr[outPos++] = (byte) (0xF0 + ((n >> 18) & 0x07));
bArr[outPos++] = (byte) (0x80 + ((n >> 12) & 0x3F));
bArr[outPos++] = (byte) (0x80 + ((n >> 6) & 0x3F));
bArr[outPos++] = (byte) (0x80 + (n & 0x3F));
outRemaining -= 4;
x++;
} else {
if (outRemaining < 3) {
in.position(x);
out.position(outPos);
return CoderResult.OVERFLOW;
}
bArr[outPos++] = (byte) (0xE0 + ((jchar >> 12) & 0x0F));
bArr[outPos++] = (byte) (0x80 + ((jchar >> 6) & 0x3F));
bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F));
outRemaining -= 3;
}
if (outRemaining == 0) {
in.position(x + 1);
out.position(outPos);
// If both input and output are exhausted, return UNDERFLOW
if (x + 1 == limit) {
return CoderResult.UNDERFLOW;
} else {
return CoderResult.OVERFLOW;
}
}
}
if (rem != 0) {
in.position(x);
out.position(outPos);
}
return CoderResult.UNDERFLOW;
}
private CoderResult encodeNotHasArray(CharBuffer in, ByteBuffer out) {
int outRemaining = out.remaining();
int pos = in.position();
int limit = in.limit();
try {
while (pos < limit) {
if (outRemaining == 0) {
return CoderResult.OVERFLOW;
}
int jchar = (in.get() & 0xFFFF);
if (jchar <= 0x7F) {
if (outRemaining < 1) {
return CoderResult.OVERFLOW;
}
out.put((byte) jchar);
outRemaining--;
} else if (jchar <= 0x7FF) {
if (outRemaining < 2) {
return CoderResult.OVERFLOW;
}
out.put((byte) (0xC0 + ((jchar >> 6) & 0x1F)));
out.put((byte) (0x80 + (jchar & 0x3F)));
outRemaining -= 2;
} else if (jchar >= 0xD800 && jchar <= 0xDFFF) {
// in has to have one byte more.
if (limit <= pos + 1) {
return CoderResult.UNDERFLOW;
}
if (outRemaining < 4) {
return CoderResult.OVERFLOW;
}
// The surrogate pair starts with a low-surrogate.
if (jchar >= 0xDC00) {
return CoderResult.malformedForLength(1);
}
int jchar2 = (in.get() & 0xFFFF);
// The surrogate pair ends with a high-surrogate.
if (jchar2 < 0xDC00) {
return CoderResult.malformedForLength(1);
}
// Note, the Unicode scalar value n is defined
// as follows:
// n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
// Where jchar is a high-surrogate,
// jchar2 is a low-surrogate.
int n = (jchar << 10) + jchar2 + 0xFCA02400;
out.put((byte) (0xF0 + ((n >> 18) & 0x07)));
out.put((byte) (0x80 + ((n >> 12) & 0x3F)));
out.put((byte) (0x80 + ((n >> 6) & 0x3F)));
out.put((byte) (0x80 + (n & 0x3F)));
outRemaining -= 4;
pos++;
} else {
if (outRemaining < 3) {
return CoderResult.OVERFLOW;
}
out.put((byte) (0xE0 + ((jchar >> 12) & 0x0F)));
out.put((byte) (0x80 + ((jchar >> 6) & 0x3F)));
out.put((byte) (0x80 + (jchar & 0x3F)));
outRemaining -= 3;
}
pos++;
}
} finally {
in.position(pos);
}
return CoderResult.UNDERFLOW;
}
}

View File

@@ -0,0 +1,37 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html><body>
<H2>Buffers and Encodings</h2>
This package contains buffers and utils to perform encoding/decoding of buffers. That includes byte to char
conversions, URL encodings, etc.
<p>
Encoding is a critical operation for performance. There are few tricks in this package - the C2B and
B2C converters are caching an ISReader/OSWriter and keep everything allocated to do the conversions
in any VM without any garbage.
<p>
This package must accommodate future extensions and additional converters ( most important: the nio.charset,
which should be detected and used if available ). Also, we do have one hand-written UTF8Decoder, and
other tuned encoders could be added.
<p>
My benchmarks ( I'm costin :-) show only small differences between C2B, B2C and hand-written codders/decoders,
so UTF8Decoder may be disabled.
</body></html>