init

2024-11-30 19:03:49 +08:00
commit 1e6763c160
3806 changed files with 737676 additions and 0 deletions
--- a/test/org/apache/tomcat/util/buf/TestUtf8.java
+++ b/test/org/apache/tomcat/util/buf/TestUtf8.java
@@ -0,0 +1,678 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * These tests have been written with reference to
+ * <a href="http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf">unicode 6.2,
+ * chapter 3, section 3.9</a>.
+ */
+public class TestUtf8 {
+
+    // Indicates that at invalid sequence is detected one character later than
+    // the earliest possible moment
+    private static final int ERROR_POS_PLUS1 = 1;
+    // Indicates that at invalid sequence is detected two characters later than
+    // the earliest possible moment
+    private static final int ERROR_POS_PLUS2 = 2;
+    // Indicates that at invalid sequence is detected four characters later
+    // than the earliest possible moment
+    private static final int ERROR_POS_PLUS4 = 4;
+    // Indicates that the trailing valid byte is included in replacement of the
+    // previous error
+    private static final int REPLACE_SWALLOWS_TRAILER = 8;
+    // Indicates that one replacement character is missing
+    private static final int REPLACE_MISSING1 = 16;
+    // Indicates that two replacement characters are missing
+    private static final int REPLACE_MISSING2 = 32;
+    // Indicates that three replacement characters are missing
+    private static final int REPLACE_MISSING4 = 64;
+
+    public static final List<Utf8TestCase> TEST_CASES;
+
+    private static int workAroundCount = 0;
+
+    static {
+        // All known issues have been fixed in Java 8
+        // https://bugs.openjdk.java.net/browse/JDK-8039751
+        // Base assumption in Java 7
+        int javaVersion = 7;
+        try {
+            Class.forName("java.util.stream.Collector");
+            javaVersion = 8;
+        } catch (Exception e) {
+            // Ignore
+        }
+
+        Utf8TestCase testCase = null;
+        ArrayList<Utf8TestCase> testCases = new ArrayList<>();
+
+        testCases.add(new Utf8TestCase(
+                "Zero length input",
+                new int[] {},
+                -1,
+                ""));
+        testCases.add(new Utf8TestCase(
+                "Valid one byte sequence",
+                new int[] {0x41},
+                -1,
+                "A"));
+        testCases.add(new Utf8TestCase(
+                "Valid two byte sequence",
+                new int[] {0xC2, 0xA9},
+                -1,
+                "\u00A9"));
+        testCases.add(new Utf8TestCase(
+                "Valid three byte sequence",
+                new int[] {0xE0, 0xA4, 0x87},
+                -1,
+                "\u0907"));
+        testCases.add(new Utf8TestCase(
+                "Valid four byte sequence",
+                new int[] {0xF0, 0x90, 0x90, 0x80},
+                -1,
+                "\uD801\uDC00"));
+        // Java 7 JVM decoder does not report error until all 4 bytes are
+        // available
+        testCase = new Utf8TestCase(
+                "Invalid code point - out of range",
+                new int[] {0x41, 0xF4, 0x90, 0x80, 0x80, 0x41},
+                2,
+                "A\uFFFD\uFFFD\uFFFD\uFFFDA");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS2);
+        }
+        testCases.add(testCase);
+
+        // Java 7 JVM decoder does not report error until all 2 bytes are available
+        testCase = new Utf8TestCase(
+                "Valid sequence padded from one byte to two",
+                new int[] {0x41, 0xC0, 0xC1, 0x41},
+                1,
+                "A\uFFFD\uFFFDA");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        // Java 7 JVM decoder does not report error until all 3 bytes are available
+        testCase = new Utf8TestCase(
+                "Valid sequence padded from one byte to three",
+                new int[] {0x41, 0xE0, 0x80, 0xC1, 0x41},
+                2,
+                "A\uFFFD\uFFFD\uFFFDA");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        // Java 7 JVM decoder does not report error until all 4 bytes are
+        // available
+        testCase = new Utf8TestCase(
+                "Valid sequence padded from one byte to four",
+                new int[] {0x41, 0xF0, 0x80, 0x80, 0xC1, 0x41},
+                2,
+                "A\uFFFD\uFFFD\uFFFD\uFFFDA");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS2);
+        }
+        testCases.add(testCase);
+
+        testCases.add(new Utf8TestCase(
+                "Invalid one byte 1111 1111",
+                new int[] {0x41, 0xFF, 0x41},
+                1,
+                "A\uFFFDA"));
+
+        testCase = new Utf8TestCase(
+                "Invalid one byte 1111 0000",
+                new int[] {0x41, 0xF0, 0x41},
+                2,
+                "A\uFFFDA");
+        if (javaVersion < 8) {
+            testCase.addForJvm(REPLACE_SWALLOWS_TRAILER);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Invalid one byte 1110 0000",
+                new int[] {0x41, 0xE0, 0x41},
+                2,
+                "A\uFFFDA");
+        if (javaVersion < 8) {
+            testCase.addForJvm(REPLACE_SWALLOWS_TRAILER);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Invalid one byte 1100 0000",
+                new int[] {0x41, 0xC0, 0x41},
+                1,
+                "A\uFFFDA");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        testCases.add(new Utf8TestCase(
+                "Invalid one byte 1000 000",
+                new int[] {0x41, 0x80, 0x41},
+                1,
+                "A\uFFFDA"));
+        testCases.add(new Utf8TestCase(
+                "Invalid sequence from unicode 6.2 spec, table 3-8",
+                new int[] {0x61, 0xF1, 0x80, 0x80, 0xE1, 0x80, 0xC2, 0x62, 0x80,
+                        0x63, 0x80, 0xBF, 0x64},
+                4,
+                "a\uFFFD\uFFFD\uFFFDb\uFFFDc\uFFFD\uFFFDd"));
+        testCases.add(new Utf8TestCase(
+                "Valid 4-byte sequence truncated to 3 bytes",
+                new int[] {0x61, 0xF0, 0x90, 0x90},
+                3,
+                "a\uFFFD"));
+        testCases.add(new Utf8TestCase(
+                "Valid 4-byte sequence truncated to 2 bytes",
+                new int[] {0x61, 0xF0, 0x90},
+                2,
+                "a\uFFFD"));
+        testCases.add(new Utf8TestCase(
+                "Valid 4-byte sequence truncated to 1 byte",
+                new int[] {0x61, 0xF0},
+                1,
+                "a\uFFFD"));
+        testCases.add(new Utf8TestCase(
+                "Valid 4-byte sequence truncated to 3 bytes with trailer",
+                new int[] {0x61, 0xF0, 0x90, 0x90, 0x61},
+                4,
+                "a\uFFFDa"));
+
+        testCase = new Utf8TestCase(
+                "Valid 4-byte sequence truncated to 2 bytes with trailer",
+                new int[] {0x61, 0xF0, 0x90, 0x61},
+                3,
+                "a\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(REPLACE_SWALLOWS_TRAILER);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Valid 4-byte sequence truncated to 1 byte with trailer",
+                new int[] {0x61, 0xF0, 0x61},
+                2,
+                "a\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(REPLACE_SWALLOWS_TRAILER);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "U+0000 zero-padded to two bytes",
+                new int[] {0x61, 0xC0, 0x80, 0x61},
+                1,
+                "a\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "U+007F zero-padded to two bytes",
+                new int[] {0x61, 0xC1, 0xBF, 0x61},
+                1,
+                "a\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        testCases.add(new Utf8TestCase(
+                "Two bytes, all 1's",
+                new int[] {0x61, 0xFF, 0xFF, 0x61},
+                1,
+                "a\uFFFD\uFFFDa"));
+
+        testCase = new Utf8TestCase(
+                "Two bytes, 1110 first byte first nibble",
+                new int[] {0x61, 0xE0, 0x80, 0x61},
+                2,
+                "a\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        testCases.add(new Utf8TestCase(
+                "Two bytes, 101x first byte first nibble",
+                new int[] {0x61, 0xA0, 0x80, 0x61},
+                1,
+                "a\uFFFD\uFFFDa"));
+        testCases.add(new Utf8TestCase(
+                "Two bytes, invalid second byte",
+                new int[] {0x61, 0xC2, 0x00, 0x61},
+                2,
+                "a\uFFFD\u0000a"));
+        testCases.add(new Utf8TestCase(
+                "Two bytes, invalid second byte",
+                new int[] {0x61, 0xC2, 0xC0, 0x61},
+                2,
+                "a\uFFFD\uFFFDa"));
+
+        testCase = new Utf8TestCase(
+                "Three bytes, U+0000 zero-padded",
+                new int[] {0x61, 0xE0, 0x80, 0x80, 0x61},
+                2,
+                "a\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Three bytes, U+007F zero-padded",
+                new int[] {0x61, 0xE0, 0x81, 0xBF, 0x61},
+                2,
+                "a\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Three bytes, U+07FF zero-padded",
+                new int[] {0x61, 0xE0, 0x9F, 0xBF, 0x61},
+                2,
+                "a\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        testCases.add(new Utf8TestCase(
+                "Three bytes, all 1's",
+                new int[] {0x61, 0xFF, 0xFF, 0xFF, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFDa"));
+
+        testCase = new Utf8TestCase(
+                "Three bytes, invalid first byte",
+                new int[] {0x61, 0xF8, 0x80, 0x80, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(REPLACE_MISSING2).addForJvm(
+                    REPLACE_SWALLOWS_TRAILER);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Three bytes, invalid second byte",
+                new int[] {0x61, 0xE0, 0xC0, 0x80, 0x61},
+                2,
+                "a\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        testCases.add(new Utf8TestCase(
+                "Three bytes, invalid third byte",
+                new int[] {0x61, 0xE1, 0x80, 0xC0, 0x61},
+                3,
+                "a\uFFFD\uFFFDa"));
+        testCase = new Utf8TestCase(
+                "Four bytes, U+0000 zero-padded",
+                new int[] {0x61, 0xF0, 0x80, 0x80, 0x80, 0x61},
+                2,
+                "a\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS2);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Four bytes, U+007F zero-padded",
+                new int[] {0x61, 0xF0, 0x80, 0x81, 0xBF, 0x61},
+                2,
+                "a\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS2);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Four bytes, U+07FF zero-padded",
+                new int[] {0x61, 0xF0, 0x80, 0x9F, 0xBF, 0x61},
+                2,
+                "a\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS2);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Four bytes, U+FFFF zero-padded",
+                new int[] {0x61, 0xF0, 0x8F, 0xBF, 0xBF, 0x61},
+                2,
+                "a\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS2);
+        }
+        testCases.add(testCase);
+
+        testCases.add(new Utf8TestCase(
+                "Four bytes, all 1's",
+                new int[] {0x61, 0xFF, 0xFF, 0xFF, 0xFF, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFD\uFFFDa"));
+
+        testCase = new Utf8TestCase(
+                "Four bytes, invalid first byte",
+                new int[] {0x61, 0xF8, 0x80, 0x80, 0x80, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(
+                    REPLACE_MISSING2).addForJvm(REPLACE_MISSING1);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Four bytes, invalid second byte",
+                new int[] {0x61, 0xF1, 0xC0, 0x80, 0x80, 0x61},
+                2,
+                "a\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS2);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Four bytes, invalid third byte",
+                new int[] {0x61, 0xF1, 0x80, 0xC0, 0x80, 0x61},
+                3,
+                "a\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        testCases.add(new Utf8TestCase(
+                "Four bytes, invalid fourth byte",
+                new int[] {0x61, 0xF1, 0x80, 0x80, 0xC0, 0x61},
+                4,
+                "a\uFFFD\uFFFDa"));
+
+        testCase = new Utf8TestCase(
+                "Five bytes, U+0000 zero padded",
+                new int[] {0x61, 0xF8, 0x80, 0x80, 0x80, 0x80, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(REPLACE_MISSING4);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Five bytes, U+007F zero padded",
+                new int[] {0x61, 0xF8, 0x80, 0x80, 0x81, 0xBF, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(REPLACE_MISSING4);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Five bytes, U+07FF zero padded",
+                new int[] {0x61, 0xF8, 0x80, 0x80, 0x9F, 0xBF, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(REPLACE_MISSING4);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Five bytes, U+FFFF zero padded",
+                new int[] {0x61, 0xF8, 0x80, 0x8F, 0xBF, 0xBF, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(REPLACE_MISSING4);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Six bytes, U+0000 zero padded",
+                new int[] {0x61, 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(
+                    ERROR_POS_PLUS1).addForJvm(REPLACE_MISSING4).addForJvm(
+                            REPLACE_MISSING1);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Six bytes, U+007F zero padded",
+                new int[] {0x61, 0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(
+                    ERROR_POS_PLUS1).addForJvm(REPLACE_MISSING4).addForJvm(
+                            REPLACE_MISSING1);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Six bytes, U+07FF zero padded",
+                new int[] {0x61, 0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(
+                    ERROR_POS_PLUS1).addForJvm(REPLACE_MISSING4).addForJvm(
+                            REPLACE_MISSING1);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Six bytes, U+FFFF zero padded",
+                new int[] {0x61, 0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF, 0x61},
+                1,
+                "a\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDa");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS4).addForJvm(
+                    ERROR_POS_PLUS1).addForJvm(REPLACE_MISSING4).addForJvm(
+                            REPLACE_MISSING1);
+        }
+        testCases.add(testCase);
+
+        testCase = new Utf8TestCase(
+                "Original test case - derived from Autobahn?",
+                new int[] {0xCE, 0xBA, 0xE1, 0xDB, 0xB9, 0xCF, 0x83, 0xCE,
+                           0xBC, 0xCE, 0xB5, 0xED, 0x80, 0x65, 0x64, 0x69,
+                           0x74, 0x65, 0x64},
+                3,
+                "\u03BA\uFFFD\u06F9\u03C3\u03BC\u03B5\uFFFDedited");
+        if (javaVersion < 8) {
+            testCase.addForJvm(ERROR_POS_PLUS1);
+        }
+        testCases.add(testCase);
+
+        TEST_CASES = Collections.unmodifiableList(testCases);
+    }
+
+    @Test
+    public void testHarmonyDecoder() {
+        CharsetDecoder decoder = new Utf8Decoder();
+        for (Utf8TestCase testCase : TEST_CASES) {
+            doTest(decoder, testCase, 0);
+        }
+    }
+
+
+    @Test
+    public void testJvmDecoder() {
+        CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
+        int testCount = 0;
+        try {
+            for (Utf8TestCase testCase : TEST_CASES) {
+                doTest(decoder, testCase, testCase.flagsJvm);
+                testCount++;
+            }
+        } finally {
+            System.err.println("Workarounds added to " + workAroundCount +
+                    " tests to account for known JVM bugs");
+            if (testCount < TEST_CASES.size()) {
+                System.err.println("Executed " + testCount + " of " +
+                        TEST_CASES.size() + " UTF-8 tests before " +
+                        "encountering a failure");
+            }
+        }
+    }
+
+
+    private void doTest(CharsetDecoder decoder, Utf8TestCase testCase,
+            int flags) {
+
+        int len = testCase.input.length;
+        ByteBuffer bb = ByteBuffer.allocate(len);
+        CharBuffer cb = CharBuffer.allocate(len);
+
+        // Configure decoder to fail on an error
+        decoder.reset();
+        decoder.onMalformedInput(CodingErrorAction.REPORT);
+        decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+
+        // Add each byte one at a time. The decoder should fail as soon as
+        // an invalid sequence has been provided
+        for (int i = 0; i < len; i++) {
+            bb.put((byte) testCase.input[i]);
+            bb.flip();
+            CoderResult cr = decoder.decode(bb, cb, false);
+            if (cr.isError()) {
+                int expected =  testCase.invalidIndex;
+                if ((flags & ERROR_POS_PLUS1) != 0) {
+                    expected += 1;
+                }
+                if ((flags & ERROR_POS_PLUS2) != 0) {
+                    expected += 2;
+                }
+                if ((flags & ERROR_POS_PLUS4) != 0) {
+                    expected += 4;
+                }
+                Assert.assertEquals(testCase.description, expected, i);
+                break;
+            }
+            bb.compact();
+        }
+
+        // Configure decoder to replace on an error
+        decoder.reset();
+        decoder.onMalformedInput(CodingErrorAction.REPLACE);
+        decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+
+        // Add each byte one at a time.
+        bb.clear();
+        cb.clear();
+        for (int i = 0; i < len; i++) {
+            bb.put((byte) testCase.input[i]);
+            bb.flip();
+            CoderResult cr = decoder.decode(bb, cb, false);
+            if (cr.isError()) {
+                Assert.fail(testCase.description);
+            }
+            bb.compact();
+        }
+        // For incomplete sequences at the end of the input need to tell
+        // the decoder the input has ended
+        bb.flip();
+        CoderResult cr = decoder.decode(bb, cb, true);
+        if (cr.isError()) {
+            Assert.fail(testCase.description);
+        }
+        cb.flip();
+
+        String expected = testCase.outputReplaced;
+        if ((flags & REPLACE_SWALLOWS_TRAILER) != 0) {
+            expected = expected.substring(0, expected.length() - 1);
+        }
+
+        if ((flags & REPLACE_MISSING1) != 0) {
+            expected = expected.substring(0, 1) +
+                    expected.substring(2, expected.length());
+        }
+
+        if ((flags & REPLACE_MISSING2) != 0) {
+            expected = expected.substring(0, 1) +
+                    expected.substring(3, expected.length());
+        }
+
+        if ((flags & REPLACE_MISSING4) != 0) {
+            expected = expected.substring(0, 1) +
+                    expected.substring(5, expected.length());
+        }
+
+        Assert.assertEquals(testCase.description, expected, cb.toString());
+    }
+
+
+    /**
+     * Encapsulates a single UTF-8 test case
+     */
+    public static class Utf8TestCase {
+        public final String description;
+        public final int[] input;
+        public final int invalidIndex;
+        public final String outputReplaced;
+        public int flagsJvm = 0;
+
+        public Utf8TestCase(String description, int[] input, int invalidIndex,
+                String outputReplaced) {
+            this.description = description;
+            this.input = input;
+            this.invalidIndex = invalidIndex;
+            this.outputReplaced = outputReplaced;
+
+        }
+
+        public Utf8TestCase addForJvm(int flag) {
+            if (this.flagsJvm == 0) {
+                TestUtf8.workAroundCount++;
+            }
+            this.flagsJvm = this.flagsJvm | flag;
+            return this;
+        }
+    }
+}