init
This commit is contained in:
678
test/org/apache/tomcat/util/buf/TestUtf8.java
Normal file
678
test/org/apache/tomcat/util/buf/TestUtf8.java
Normal file
@@ -0,0 +1,678 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tomcat.util.buf;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* These tests have been written with reference to
|
||||
* <a href="http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf">unicode 6.2,
|
||||
* chapter 3, section 3.9</a>.
|
||||
*/
|
||||
public class TestUtf8 {
|
||||
|
||||
// Indicates that at invalid sequence is detected one character later than
|
||||
// the earliest possible moment
|
||||
private static final int ERROR_POS_PLUS1 = 1;
|
||||
// Indicates that at invalid sequence is detected two characters later than
|
||||
// the earliest possible moment
|
||||
private static final int ERROR_POS_PLUS2 = 2;
|
||||
// Indicates that at invalid sequence is detected four characters later
|
||||
// than the earliest possible moment
|
||||
private static final int ERROR_POS_PLUS4 = 4;
|
||||
// Indicates that the trailing valid byte is included in replacement of the
|
||||
// previous error
|
||||
private static final int REPLACE_SWALLOWS_TRAILER = 8;
|
||||
// Indicates that one replacement character is missing
|
||||
private static final int REPLACE_MISSING1 = 16;
|
||||
// Indicates that two replacement characters are missing
|
||||
private static final int REPLACE_MISSING2 = 32;
|
||||
// Indicates that three replacement characters are missing
|
||||
private static final int REPLACE_MISSING4 = 64;
|
||||
|
||||
public static final List<Utf8TestCase> TEST_CASES;
|
||||
|
||||
private static int workAroundCount = 0;
|
||||
|
||||
static {
|
||||
// All known issues have been fixed in Java 8
|
||||
// https://bugs.openjdk.java.net/browse/JDK-8039751
|
||||
// Base assumption in Java 7
|
||||
int javaVersion = 7;
|
||||
try {
|
||||
Class.forName("java.util.stream.Collector");
|
||||
javaVersion = 8;
|
||||
} catch (Exception e) {
|
||||
// Ignore
|
||||
}
|
||||
|
||||
Utf8TestCase testCase = null;
|
||||
ArrayList<Utf8TestCase> testCases = new ArrayList<>();
|
||||
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Zero length input",
|
||||
new int[] {},
|
||||
-1,
|
||||
""));
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Valid one byte sequence",
|
||||
new int[] {0x41},
|
||||
-1,
|
||||
"A"));
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Valid two byte sequence",
|
||||
new int[] {0xC2, 0xA9},
|
||||
-1,
|
||||
"\u00A9"));
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Valid three byte sequence",
|
||||
new int[] {0xE0, 0xA4, 0x87},
|
||||
-1,
|
||||
"\u0907"));
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Valid four byte sequence",
|
||||
new int[] {0xF0, 0x90, 0x90, 0x80},
|
||||
-1,
|
||||
"\uD801\uDC00"));
|
||||
// Java 7 JVM decoder does not report error until all 4 bytes are
|
||||
// available
|
||||
testCase = new Utf8TestCase(
|
||||
"Invalid code point - out of range",
|
||||
new int[] {0x41, 0xF4, 0x90, 0x80, 0x80, 0x41},
|
||||
2,
|
||||
"A\uFFFD\uFFFD\uFFFD\uFFFDA");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS2);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
// Java 7 JVM decoder does not report error until all 2 bytes are available
|
||||
testCase = new Utf8TestCase(
|
||||
"Valid sequence padded from one byte to two",
|
||||
new int[] {0x41, 0xC0, 0xC1, 0x41},
|
||||
1,
|
||||
"A\uFFFD\uFFFDA");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
// Java 7 JVM decoder does not report error until all 3 bytes are available
|
||||
testCase = new Utf8TestCase(
|
||||
"Valid sequence padded from one byte to three",
|
||||
new int[] {0x41, 0xE0, 0x80, 0xC1, 0x41},
|
||||
2,
|
||||
"A\uFFFD\uFFFD\uFFFDA");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
// Java 7 JVM decoder does not report error until all 4 bytes are
|
||||
// available
|
||||
testCase = new Utf8TestCase(
|
||||
"Valid sequence padded from one byte to four",
|
||||
new int[] {0x41, 0xF0, 0x80, 0x80, 0xC1, 0x41},
|
||||
2,
|
||||
"A\uFFFD\uFFFD\uFFFD\uFFFDA");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS2);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Invalid one byte 1111 1111",
|
||||
new int[] {0x41, 0xFF, 0x41},
|
||||
1,
|
||||
"A\uFFFDA"));
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Invalid one byte 1111 0000",
|
||||
new int[] {0x41, 0xF0, 0x41},
|
||||
2,
|
||||
"A\uFFFDA");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(REPLACE_SWALLOWS_TRAILER);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Invalid one byte 1110 0000",
|
||||
new int[] {0x41, 0xE0, 0x41},
|
||||
2,
|
||||
"A\uFFFDA");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(REPLACE_SWALLOWS_TRAILER);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Invalid one byte 1100 0000",
|
||||
new int[] {0x41, 0xC0, 0x41},
|
||||
1,
|
||||
"A\uFFFDA");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Invalid one byte 1000 000",
|
||||
new int[] {0x41, 0x80, 0x41},
|
||||
1,
|
||||
"A\uFFFDA"));
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Invalid sequence from unicode 6.2 spec, table 3-8",
|
||||
new int[] {0x61, 0xF1, 0x80, 0x80, 0xE1, 0x80, 0xC2, 0x62, 0x80,
|
||||
0x63, 0x80, 0xBF, 0x64},
|
||||
4,
|
||||
"a\uFFFD\uFFFD\uFFFDb\uFFFDc\uFFFD\uFFFDd"));
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Valid 4-byte sequence truncated to 3 bytes",
|
||||
new int[] {0x61, 0xF0, 0x90, 0x90},
|
||||
3,
|
||||
"a\uFFFD"));
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Valid 4-byte sequence truncated to 2 bytes",
|
||||
new int[] {0x61, 0xF0, 0x90},
|
||||
2,
|
||||
"a\uFFFD"));
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Valid 4-byte sequence truncated to 1 byte",
|
||||
new int[] {0x61, 0xF0},
|
||||
1,
|
||||
"a\uFFFD"));
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Valid 4-byte sequence truncated to 3 bytes with trailer",
|
||||
new int[] {0x61, 0xF0, 0x90, 0x90, 0x61},
|
||||
4,
|
||||
"a\uFFFDa"));
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Valid 4-byte sequence truncated to 2 bytes with trailer",
|
||||
new int[] {0x61, 0xF0, 0x90, 0x61},
|
||||
3,
|
||||
"a\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(REPLACE_SWALLOWS_TRAILER);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Valid 4-byte sequence truncated to 1 byte with trailer",
|
||||
new int[] {0x61, 0xF0, 0x61},
|
||||
2,
|
||||
"a\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(REPLACE_SWALLOWS_TRAILER);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"U+0000 zero-padded to two bytes",
|
||||
new int[] {0x61, 0xC0, 0x80, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"U+007F zero-padded to two bytes",
|
||||
new int[] {0x61, 0xC1, 0xBF, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Two bytes, all 1's",
|
||||
new int[] {0x61, 0xFF, 0xFF, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFDa"));
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Two bytes, 1110 first byte first nibble",
|
||||
new int[] {0x61, 0xE0, 0x80, 0x61},
|
||||
2,
|
||||
"a\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Two bytes, 101x first byte first nibble",
|
||||
new int[] {0x61, 0xA0, 0x80, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFDa"));
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Two bytes, invalid second byte",
|
||||
new int[] {0x61, 0xC2, 0x00, 0x61},
|
||||
2,
|
||||
"a\uFFFD\u0000a"));
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Two bytes, invalid second byte",
|
||||
new int[] {0x61, 0xC2, 0xC0, 0x61},
|
||||
2,
|
||||
"a\uFFFD\uFFFDa"));
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Three bytes, U+0000 zero-padded",
|
||||
new int[] {0x61, 0xE0, 0x80, 0x80, 0x61},
|
||||
2,
|
||||
"a\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Three bytes, U+007F zero-padded",
|
||||
new int[] {0x61, 0xE0, 0x81, 0xBF, 0x61},
|
||||
2,
|
||||
"a\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Three bytes, U+07FF zero-padded",
|
||||
new int[] {0x61, 0xE0, 0x9F, 0xBF, 0x61},
|
||||
2,
|
||||
"a\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Three bytes, all 1's",
|
||||
new int[] {0x61, 0xFF, 0xFF, 0xFF, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFDa"));
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Three bytes, invalid first byte",
|
||||
new int[] {0x61, 0xF8, 0x80, 0x80, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(REPLACE_MISSING2).addForJvm(
|
||||
REPLACE_SWALLOWS_TRAILER);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Three bytes, invalid second byte",
|
||||
new int[] {0x61, 0xE0, 0xC0, 0x80, 0x61},
|
||||
2,
|
||||
"a\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Three bytes, invalid third byte",
|
||||
new int[] {0x61, 0xE1, 0x80, 0xC0, 0x61},
|
||||
3,
|
||||
"a\uFFFD\uFFFDa"));
|
||||
testCase = new Utf8TestCase(
|
||||
"Four bytes, U+0000 zero-padded",
|
||||
new int[] {0x61, 0xF0, 0x80, 0x80, 0x80, 0x61},
|
||||
2,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS2);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Four bytes, U+007F zero-padded",
|
||||
new int[] {0x61, 0xF0, 0x80, 0x81, 0xBF, 0x61},
|
||||
2,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS2);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Four bytes, U+07FF zero-padded",
|
||||
new int[] {0x61, 0xF0, 0x80, 0x9F, 0xBF, 0x61},
|
||||
2,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS2);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Four bytes, U+FFFF zero-padded",
|
||||
new int[] {0x61, 0xF0, 0x8F, 0xBF, 0xBF, 0x61},
|
||||
2,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS2);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Four bytes, all 1's",
|
||||
new int[] {0x61, 0xFF, 0xFF, 0xFF, 0xFF, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFDa"));
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Four bytes, invalid first byte",
|
||||
new int[] {0x61, 0xF8, 0x80, 0x80, 0x80, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(
|
||||
REPLACE_MISSING2).addForJvm(REPLACE_MISSING1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Four bytes, invalid second byte",
|
||||
new int[] {0x61, 0xF1, 0xC0, 0x80, 0x80, 0x61},
|
||||
2,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS2);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Four bytes, invalid third byte",
|
||||
new int[] {0x61, 0xF1, 0x80, 0xC0, 0x80, 0x61},
|
||||
3,
|
||||
"a\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCases.add(new Utf8TestCase(
|
||||
"Four bytes, invalid fourth byte",
|
||||
new int[] {0x61, 0xF1, 0x80, 0x80, 0xC0, 0x61},
|
||||
4,
|
||||
"a\uFFFD\uFFFDa"));
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Five bytes, U+0000 zero padded",
|
||||
new int[] {0x61, 0xF8, 0x80, 0x80, 0x80, 0x80, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(REPLACE_MISSING4);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Five bytes, U+007F zero padded",
|
||||
new int[] {0x61, 0xF8, 0x80, 0x80, 0x81, 0xBF, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(REPLACE_MISSING4);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Five bytes, U+07FF zero padded",
|
||||
new int[] {0x61, 0xF8, 0x80, 0x80, 0x9F, 0xBF, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(REPLACE_MISSING4);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Five bytes, U+FFFF zero padded",
|
||||
new int[] {0x61, 0xF8, 0x80, 0x8F, 0xBF, 0xBF, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(REPLACE_MISSING4);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Six bytes, U+0000 zero padded",
|
||||
new int[] {0x61, 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(
|
||||
ERROR_POS_PLUS1).addForJvm(REPLACE_MISSING4).addForJvm(
|
||||
REPLACE_MISSING1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Six bytes, U+007F zero padded",
|
||||
new int[] {0x61, 0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(
|
||||
ERROR_POS_PLUS1).addForJvm(REPLACE_MISSING4).addForJvm(
|
||||
REPLACE_MISSING1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Six bytes, U+07FF zero padded",
|
||||
new int[] {0x61, 0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(
|
||||
ERROR_POS_PLUS1).addForJvm(REPLACE_MISSING4).addForJvm(
|
||||
REPLACE_MISSING1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Six bytes, U+FFFF zero padded",
|
||||
new int[] {0x61, 0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF, 0x61},
|
||||
1,
|
||||
"a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(
|
||||
ERROR_POS_PLUS1).addForJvm(REPLACE_MISSING4).addForJvm(
|
||||
REPLACE_MISSING1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
testCase = new Utf8TestCase(
|
||||
"Original test case - derived from Autobahn?",
|
||||
new int[] {0xCE, 0xBA, 0xE1, 0xDB, 0xB9, 0xCF, 0x83, 0xCE,
|
||||
0xBC, 0xCE, 0xB5, 0xED, 0x80, 0x65, 0x64, 0x69,
|
||||
0x74, 0x65, 0x64},
|
||||
3,
|
||||
"\u03BA\uFFFD\u06F9\u03C3\u03BC\u03B5\uFFFDedited");
|
||||
if (javaVersion < 8) {
|
||||
testCase.addForJvm(ERROR_POS_PLUS1);
|
||||
}
|
||||
testCases.add(testCase);
|
||||
|
||||
TEST_CASES = Collections.unmodifiableList(testCases);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHarmonyDecoder() {
|
||||
CharsetDecoder decoder = new Utf8Decoder();
|
||||
for (Utf8TestCase testCase : TEST_CASES) {
|
||||
doTest(decoder, testCase, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testJvmDecoder() {
|
||||
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
|
||||
int testCount = 0;
|
||||
try {
|
||||
for (Utf8TestCase testCase : TEST_CASES) {
|
||||
doTest(decoder, testCase, testCase.flagsJvm);
|
||||
testCount++;
|
||||
}
|
||||
} finally {
|
||||
System.err.println("Workarounds added to " + workAroundCount +
|
||||
" tests to account for known JVM bugs");
|
||||
if (testCount < TEST_CASES.size()) {
|
||||
System.err.println("Executed " + testCount + " of " +
|
||||
TEST_CASES.size() + " UTF-8 tests before " +
|
||||
"encountering a failure");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void doTest(CharsetDecoder decoder, Utf8TestCase testCase,
|
||||
int flags) {
|
||||
|
||||
int len = testCase.input.length;
|
||||
ByteBuffer bb = ByteBuffer.allocate(len);
|
||||
CharBuffer cb = CharBuffer.allocate(len);
|
||||
|
||||
// Configure decoder to fail on an error
|
||||
decoder.reset();
|
||||
decoder.onMalformedInput(CodingErrorAction.REPORT);
|
||||
decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
|
||||
// Add each byte one at a time. The decoder should fail as soon as
|
||||
// an invalid sequence has been provided
|
||||
for (int i = 0; i < len; i++) {
|
||||
bb.put((byte) testCase.input[i]);
|
||||
bb.flip();
|
||||
CoderResult cr = decoder.decode(bb, cb, false);
|
||||
if (cr.isError()) {
|
||||
int expected = testCase.invalidIndex;
|
||||
if ((flags & ERROR_POS_PLUS1) != 0) {
|
||||
expected += 1;
|
||||
}
|
||||
if ((flags & ERROR_POS_PLUS2) != 0) {
|
||||
expected += 2;
|
||||
}
|
||||
if ((flags & ERROR_POS_PLUS4) != 0) {
|
||||
expected += 4;
|
||||
}
|
||||
Assert.assertEquals(testCase.description, expected, i);
|
||||
break;
|
||||
}
|
||||
bb.compact();
|
||||
}
|
||||
|
||||
// Configure decoder to replace on an error
|
||||
decoder.reset();
|
||||
decoder.onMalformedInput(CodingErrorAction.REPLACE);
|
||||
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||
|
||||
// Add each byte one at a time.
|
||||
bb.clear();
|
||||
cb.clear();
|
||||
for (int i = 0; i < len; i++) {
|
||||
bb.put((byte) testCase.input[i]);
|
||||
bb.flip();
|
||||
CoderResult cr = decoder.decode(bb, cb, false);
|
||||
if (cr.isError()) {
|
||||
Assert.fail(testCase.description);
|
||||
}
|
||||
bb.compact();
|
||||
}
|
||||
// For incomplete sequences at the end of the input need to tell
|
||||
// the decoder the input has ended
|
||||
bb.flip();
|
||||
CoderResult cr = decoder.decode(bb, cb, true);
|
||||
if (cr.isError()) {
|
||||
Assert.fail(testCase.description);
|
||||
}
|
||||
cb.flip();
|
||||
|
||||
String expected = testCase.outputReplaced;
|
||||
if ((flags & REPLACE_SWALLOWS_TRAILER) != 0) {
|
||||
expected = expected.substring(0, expected.length() - 1);
|
||||
}
|
||||
|
||||
if ((flags & REPLACE_MISSING1) != 0) {
|
||||
expected = expected.substring(0, 1) +
|
||||
expected.substring(2, expected.length());
|
||||
}
|
||||
|
||||
if ((flags & REPLACE_MISSING2) != 0) {
|
||||
expected = expected.substring(0, 1) +
|
||||
expected.substring(3, expected.length());
|
||||
}
|
||||
|
||||
if ((flags & REPLACE_MISSING4) != 0) {
|
||||
expected = expected.substring(0, 1) +
|
||||
expected.substring(5, expected.length());
|
||||
}
|
||||
|
||||
Assert.assertEquals(testCase.description, expected, cb.toString());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Encapsulates a single UTF-8 test case
|
||||
*/
|
||||
public static class Utf8TestCase {
|
||||
public final String description;
|
||||
public final int[] input;
|
||||
public final int invalidIndex;
|
||||
public final String outputReplaced;
|
||||
public int flagsJvm = 0;
|
||||
|
||||
public Utf8TestCase(String description, int[] input, int invalidIndex,
|
||||
String outputReplaced) {
|
||||
this.description = description;
|
||||
this.input = input;
|
||||
this.invalidIndex = invalidIndex;
|
||||
this.outputReplaced = outputReplaced;
|
||||
|
||||
}
|
||||
|
||||
public Utf8TestCase addForJvm(int flag) {
|
||||
if (this.flagsJvm == 0) {
|
||||
TestUtf8.workAroundCount++;
|
||||
}
|
||||
this.flagsJvm = this.flagsJvm | flag;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user