init
This commit is contained in:
235
java/org/apache/tomcat/util/buf/Utf8Encoder.java
Normal file
235
java/org/apache/tomcat/util/buf/Utf8Encoder.java
Normal file
@@ -0,0 +1,235 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tomcat.util.buf;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
/**
|
||||
* Encodes characters as bytes using UTF-8. Extracted from Apache Harmony with
|
||||
* some minor bug fixes applied.
|
||||
*/
|
||||
public class Utf8Encoder extends CharsetEncoder {
|
||||
|
||||
public Utf8Encoder() {
|
||||
super(StandardCharsets.UTF_8, 1.1f, 4.0f);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
|
||||
if (in.hasArray() && out.hasArray()) {
|
||||
return encodeHasArray(in, out);
|
||||
}
|
||||
return encodeNotHasArray(in, out);
|
||||
}
|
||||
|
||||
private CoderResult encodeHasArray(CharBuffer in, ByteBuffer out) {
|
||||
int outRemaining = out.remaining();
|
||||
int pos = in.position();
|
||||
int limit = in.limit();
|
||||
byte[] bArr;
|
||||
char[] cArr;
|
||||
int x = pos;
|
||||
bArr = out.array();
|
||||
cArr = in.array();
|
||||
int outPos = out.position();
|
||||
int rem = in.remaining();
|
||||
for (x = pos; x < pos + rem; x++) {
|
||||
int jchar = (cArr[x] & 0xFFFF);
|
||||
|
||||
if (jchar <= 0x7F) {
|
||||
if (outRemaining < 1) {
|
||||
in.position(x);
|
||||
out.position(outPos);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
bArr[outPos++] = (byte) (jchar & 0xFF);
|
||||
outRemaining--;
|
||||
} else if (jchar <= 0x7FF) {
|
||||
|
||||
if (outRemaining < 2) {
|
||||
in.position(x);
|
||||
out.position(outPos);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
bArr[outPos++] = (byte) (0xC0 + ((jchar >> 6) & 0x1F));
|
||||
bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F));
|
||||
outRemaining -= 2;
|
||||
|
||||
} else if (jchar >= 0xD800 && jchar <= 0xDFFF) {
|
||||
|
||||
// in has to have one byte more.
|
||||
if (limit <= x + 1) {
|
||||
in.position(x);
|
||||
out.position(outPos);
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
if (outRemaining < 4) {
|
||||
in.position(x);
|
||||
out.position(outPos);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
|
||||
// The surrogate pair starts with a low-surrogate.
|
||||
if (jchar >= 0xDC00) {
|
||||
in.position(x);
|
||||
out.position(outPos);
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
|
||||
int jchar2 = cArr[x + 1] & 0xFFFF;
|
||||
|
||||
// The surrogate pair ends with a high-surrogate.
|
||||
if (jchar2 < 0xDC00) {
|
||||
in.position(x);
|
||||
out.position(outPos);
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
|
||||
// Note, the Unicode scalar value n is defined
|
||||
// as follows:
|
||||
// n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
|
||||
// Where jchar is a high-surrogate,
|
||||
// jchar2 is a low-surrogate.
|
||||
int n = (jchar << 10) + jchar2 + 0xFCA02400;
|
||||
|
||||
bArr[outPos++] = (byte) (0xF0 + ((n >> 18) & 0x07));
|
||||
bArr[outPos++] = (byte) (0x80 + ((n >> 12) & 0x3F));
|
||||
bArr[outPos++] = (byte) (0x80 + ((n >> 6) & 0x3F));
|
||||
bArr[outPos++] = (byte) (0x80 + (n & 0x3F));
|
||||
outRemaining -= 4;
|
||||
x++;
|
||||
|
||||
} else {
|
||||
|
||||
if (outRemaining < 3) {
|
||||
in.position(x);
|
||||
out.position(outPos);
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
bArr[outPos++] = (byte) (0xE0 + ((jchar >> 12) & 0x0F));
|
||||
bArr[outPos++] = (byte) (0x80 + ((jchar >> 6) & 0x3F));
|
||||
bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F));
|
||||
outRemaining -= 3;
|
||||
}
|
||||
if (outRemaining == 0) {
|
||||
in.position(x + 1);
|
||||
out.position(outPos);
|
||||
// If both input and output are exhausted, return UNDERFLOW
|
||||
if (x + 1 == limit) {
|
||||
return CoderResult.UNDERFLOW;
|
||||
} else {
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if (rem != 0) {
|
||||
in.position(x);
|
||||
out.position(outPos);
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
private CoderResult encodeNotHasArray(CharBuffer in, ByteBuffer out) {
|
||||
int outRemaining = out.remaining();
|
||||
int pos = in.position();
|
||||
int limit = in.limit();
|
||||
try {
|
||||
while (pos < limit) {
|
||||
if (outRemaining == 0) {
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
|
||||
int jchar = (in.get() & 0xFFFF);
|
||||
|
||||
if (jchar <= 0x7F) {
|
||||
|
||||
if (outRemaining < 1) {
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
out.put((byte) jchar);
|
||||
outRemaining--;
|
||||
|
||||
} else if (jchar <= 0x7FF) {
|
||||
|
||||
if (outRemaining < 2) {
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
out.put((byte) (0xC0 + ((jchar >> 6) & 0x1F)));
|
||||
out.put((byte) (0x80 + (jchar & 0x3F)));
|
||||
outRemaining -= 2;
|
||||
|
||||
} else if (jchar >= 0xD800 && jchar <= 0xDFFF) {
|
||||
|
||||
// in has to have one byte more.
|
||||
if (limit <= pos + 1) {
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
if (outRemaining < 4) {
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
|
||||
// The surrogate pair starts with a low-surrogate.
|
||||
if (jchar >= 0xDC00) {
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
|
||||
int jchar2 = (in.get() & 0xFFFF);
|
||||
|
||||
// The surrogate pair ends with a high-surrogate.
|
||||
if (jchar2 < 0xDC00) {
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
|
||||
// Note, the Unicode scalar value n is defined
|
||||
// as follows:
|
||||
// n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
|
||||
// Where jchar is a high-surrogate,
|
||||
// jchar2 is a low-surrogate.
|
||||
int n = (jchar << 10) + jchar2 + 0xFCA02400;
|
||||
|
||||
out.put((byte) (0xF0 + ((n >> 18) & 0x07)));
|
||||
out.put((byte) (0x80 + ((n >> 12) & 0x3F)));
|
||||
out.put((byte) (0x80 + ((n >> 6) & 0x3F)));
|
||||
out.put((byte) (0x80 + (n & 0x3F)));
|
||||
outRemaining -= 4;
|
||||
pos++;
|
||||
|
||||
} else {
|
||||
|
||||
if (outRemaining < 3) {
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
out.put((byte) (0xE0 + ((jchar >> 12) & 0x0F)));
|
||||
out.put((byte) (0x80 + ((jchar >> 6) & 0x3F)));
|
||||
out.put((byte) (0x80 + (jchar & 0x3F)));
|
||||
outRemaining -= 3;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
} finally {
|
||||
in.position(pos);
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user