init
This commit is contained in:
167
java/org/apache/tomcat/util/buf/UEncoder.java
Normal file
167
java/org/apache/tomcat/util/buf/UEncoder.java
Normal file
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tomcat.util.buf;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* Efficient implementation of an UTF-8 encoder.
|
||||
* This class is not thread safe - you need one encoder per thread.
|
||||
* The encoder will save and recycle the internal objects, avoiding
|
||||
* garbage.
|
||||
*
|
||||
* You can add extra characters that you want preserved, for example
|
||||
* while encoding a URL you can add "/".
|
||||
*
|
||||
* @author Costin Manolache
|
||||
*/
|
||||
public final class UEncoder {
|
||||
|
||||
public enum SafeCharsSet {
|
||||
WITH_SLASH("/"), DEFAULT("");
|
||||
private final BitSet safeChars;
|
||||
|
||||
private BitSet getSafeChars() {
|
||||
return this.safeChars;
|
||||
}
|
||||
|
||||
private SafeCharsSet(String additionalSafeChars) {
|
||||
safeChars = initialSafeChars();
|
||||
for (char c : additionalSafeChars.toCharArray()) {
|
||||
safeChars.set(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Not static - the set may differ ( it's better than adding
|
||||
// an extra check for "/", "+", etc
|
||||
private BitSet safeChars=null;
|
||||
private C2BConverter c2b=null;
|
||||
private ByteChunk bb=null;
|
||||
private CharChunk cb=null;
|
||||
private CharChunk output=null;
|
||||
|
||||
/**
|
||||
* Create a UEncoder with an unmodifiable safe character set.
|
||||
*
|
||||
* @param safeCharsSet safe characters for this encoder
|
||||
*/
|
||||
public UEncoder(SafeCharsSet safeCharsSet) {
|
||||
this.safeChars = safeCharsSet.getSafeChars();
|
||||
}
|
||||
|
||||
/**
|
||||
* URL Encode string, using a specified encoding.
|
||||
*
|
||||
* @param s string to be encoded
|
||||
* @param start the beginning index, inclusive
|
||||
* @param end the ending index, exclusive
|
||||
*
|
||||
* @return A new CharChunk contained the URL encoded string
|
||||
*
|
||||
* @throws IOException If an I/O error occurs
|
||||
*/
|
||||
public CharChunk encodeURL(String s, int start, int end)
|
||||
throws IOException {
|
||||
if (c2b == null) {
|
||||
bb = new ByteChunk(8); // small enough.
|
||||
cb = new CharChunk(2); // small enough.
|
||||
output = new CharChunk(64); // small enough.
|
||||
c2b = new C2BConverter(StandardCharsets.UTF_8);
|
||||
} else {
|
||||
bb.recycle();
|
||||
cb.recycle();
|
||||
output.recycle();
|
||||
}
|
||||
|
||||
for (int i = start; i < end; i++) {
|
||||
char c = s.charAt(i);
|
||||
if (safeChars.get(c)) {
|
||||
output.append(c);
|
||||
} else {
|
||||
cb.append(c);
|
||||
c2b.convert(cb, bb);
|
||||
|
||||
// "surrogate" - UTF is _not_ 16 bit, but 21 !!!!
|
||||
// ( while UCS is 31 ). Amazing...
|
||||
if (c >= 0xD800 && c <= 0xDBFF) {
|
||||
if ((i+1) < end) {
|
||||
char d = s.charAt(i+1);
|
||||
if (d >= 0xDC00 && d <= 0xDFFF) {
|
||||
cb.append(d);
|
||||
c2b.convert(cb, bb);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
urlEncode(output, bb);
|
||||
cb.recycle();
|
||||
bb.recycle();
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
protected void urlEncode(CharChunk out, ByteChunk bb)
|
||||
throws IOException {
|
||||
byte[] bytes = bb.getBuffer();
|
||||
for (int j = bb.getStart(); j < bb.getEnd(); j++) {
|
||||
out.append('%');
|
||||
char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16);
|
||||
out.append(ch);
|
||||
ch = Character.forDigit(bytes[j] & 0xF, 16);
|
||||
out.append(ch);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------- Internal implementation --------------------
|
||||
|
||||
private static BitSet initialSafeChars() {
|
||||
BitSet initialSafeChars=new BitSet(128);
|
||||
int i;
|
||||
for (i = 'a'; i <= 'z'; i++) {
|
||||
initialSafeChars.set(i);
|
||||
}
|
||||
for (i = 'A'; i <= 'Z'; i++) {
|
||||
initialSafeChars.set(i);
|
||||
}
|
||||
for (i = '0'; i <= '9'; i++) {
|
||||
initialSafeChars.set(i);
|
||||
}
|
||||
//safe
|
||||
initialSafeChars.set('$');
|
||||
initialSafeChars.set('-');
|
||||
initialSafeChars.set('_');
|
||||
initialSafeChars.set('.');
|
||||
|
||||
// Dangerous: someone may treat this as " "
|
||||
// RFC1738 does allow it, it's not reserved
|
||||
// initialSafeChars.set('+');
|
||||
//extra
|
||||
initialSafeChars.set('!');
|
||||
initialSafeChars.set('*');
|
||||
initialSafeChars.set('\'');
|
||||
initialSafeChars.set('(');
|
||||
initialSafeChars.set(')');
|
||||
initialSafeChars.set(',');
|
||||
return initialSafeChars;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user