init

2024-11-30 19:03:49 +08:00
commit 1e6763c160
3806 changed files with 737676 additions and 0 deletions
--- a/java/org/apache/jasper/xmlparser/UTF8Reader.java
+++ b/java/org/apache/jasper/xmlparser/UTF8Reader.java
@@ -0,0 +1,639 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jasper.xmlparser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.UTFDataFormatException;
+
+import org.apache.jasper.compiler.Localizer;
+import org.apache.juli.logging.Log;
+import org.apache.juli.logging.LogFactory;
+
+/**
+ * @author Andy Clark, IBM
+ *
+ * @deprecated Will be removed in Tomcat 9.0.x onwards
+ */
+@Deprecated
+public class UTF8Reader
+    extends Reader {
+
+    private final Log log = LogFactory.getLog(UTF8Reader.class); // must not be static
+
+    // debugging
+
+    /** Debug read. */
+    private static final boolean DEBUG_READ = false;
+
+    //
+    // Data
+    //
+
+    /** Input stream. */
+    private final InputStream fInputStream;
+
+    /** Byte buffer. */
+    private final byte[] fBuffer;
+
+    /** Offset into buffer. */
+    private int fOffset;
+
+    /** Surrogate character. */
+    private int fSurrogate = -1;
+
+    //
+    // Constructors
+    //
+
+    /**
+     * Constructs a UTF-8 reader from the specified input stream,
+     * buffer size and MessageFormatter.
+     *
+     * @param inputStream The input stream.
+     * @param size        The initial buffer size.
+     */
+    public UTF8Reader(InputStream inputStream, int size) {
+        fInputStream = inputStream;
+        fBuffer = new byte[size];
+    }
+
+    //
+    // Reader methods
+    //
+
+    /**
+     * Read a single character.  This method will block until a character is
+     * available, an I/O error occurs, or the end of the stream is reached.
+     *
+     * <p> Subclasses that intend to support efficient single-character input
+     * should override this method.
+     *
+     * @return     The character read, as an integer in the range 0 to 16383
+     *             (<code>0x00-0xffff</code>), or -1 if the end of the stream has
+     *             been reached
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    @Override
+    public int read() throws IOException {
+
+        // decode character
+        int c = fSurrogate;
+        if (fSurrogate == -1) {
+            // NOTE: We use the index into the buffer if there are remaining
+            //       bytes from the last block read. -Ac
+            int index = 0;
+
+            // get first byte
+            int b0 = index == fOffset
+                   ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+            if (b0 == -1) {
+                return -1;
+            }
+
+            // UTF-8:   [0xxx xxxx]
+            // Unicode: [0000 0000] [0xxx xxxx]
+            if (b0 < 0x80) {
+                c = (char)b0;
+            }
+
+            // UTF-8:   [110y yyyy] [10xx xxxx]
+            // Unicode: [0000 0yyy] [yyxx xxxx]
+            else if ((b0 & 0xE0) == 0xC0) {
+                int b1 = index == fOffset
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b1 == -1) {
+                    expectedByte(2, 2);
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    invalidByte(2, 2);
+                }
+                c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
+            }
+
+            // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
+            // Unicode: [zzzz yyyy] [yyxx xxxx]
+            else if ((b0 & 0xF0) == 0xE0) {
+                int b1 = index == fOffset
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b1 == -1) {
+                    expectedByte(2, 3);
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    invalidByte(2, 3);
+                }
+                int b2 = index == fOffset
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b2 == -1) {
+                    expectedByte(3, 3);
+                }
+                if ((b2 & 0xC0) != 0x80) {
+                    invalidByte(3, 3);
+                }
+                c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
+                    (b2 & 0x003F);
+            }
+
+            // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
+            // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
+            //          [1101 11yy] [yyxx xxxx] (low surrogate)
+            //          * uuuuu = wwww + 1
+            else if ((b0 & 0xF8) == 0xF0) {
+                int b1 = index == fOffset
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b1 == -1) {
+                    expectedByte(2, 4);
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    invalidByte(2, 3);
+                }
+                int b2 = index == fOffset
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b2 == -1) {
+                    expectedByte(3, 4);
+                }
+                if ((b2 & 0xC0) != 0x80) {
+                    invalidByte(3, 3);
+                }
+                int b3 = index == fOffset
+                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
+                if (b3 == -1) {
+                    expectedByte(4, 4);
+                }
+                if ((b3 & 0xC0) != 0x80) {
+                    invalidByte(4, 4);
+                }
+                int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
+                if (uuuuu > 0x10) {
+                    invalidSurrogate(uuuuu);
+                }
+                int wwww = uuuuu - 1;
+                int hs = 0xD800 |
+                         ((wwww << 6) & 0x03C0) | ((b1 << 2) & 0x003C) |
+                         ((b2 >> 4) & 0x0003);
+                int ls = 0xDC00 | ((b2 << 6) & 0x03C0) | (b3 & 0x003F);
+                c = hs;
+                fSurrogate = ls;
+            }
+
+            // error
+            else {
+                invalidByte(1, 1);
+            }
+        }
+
+        // use surrogate
+        else {
+            fSurrogate = -1;
+        }
+
+        // return character
+        if (DEBUG_READ) {
+            if (log.isDebugEnabled())
+                log.debug("read(): 0x"+Integer.toHexString(c));
+        }
+        return c;
+
+    } // read():int
+
+    /**
+     * Read characters into a portion of an array.  This method will block
+     * until some input is available, an I/O error occurs, or the end of the
+     * stream is reached.
+     *
+     * @param      ch     Destination buffer
+     * @param      offset Offset at which to start storing characters
+     * @param      length Maximum number of characters to read
+     *
+     * @return     The number of characters read, or -1 if the end of the
+     *             stream has been reached
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    @Override
+    public int read(char ch[], int offset, int length) throws IOException {
+
+        // handle surrogate
+        int out = offset;
+        if (fSurrogate != -1) {
+            ch[offset + 1] = (char)fSurrogate;
+            fSurrogate = -1;
+            length--;
+            out++;
+        }
+
+        // read bytes
+        int count = 0;
+        if (fOffset == 0) {
+            // adjust length to read
+            if (length > fBuffer.length) {
+                length = fBuffer.length;
+            }
+
+            // perform read operation
+            count = fInputStream.read(fBuffer, 0, length);
+            if (count == -1) {
+                return -1;
+            }
+            count += out - offset;
+        }
+
+        // skip read; last character was in error
+        // NOTE: Having an offset value other than zero means that there was
+        //       an error in the last character read. In this case, we have
+        //       skipped the read so we don't consume any bytes past the
+        //       error. By signaling the error on the next block read we
+        //       allow the method to return the most valid characters that
+        //       it can on the previous block read. -Ac
+        else {
+            count = fOffset;
+            fOffset = 0;
+        }
+
+        // convert bytes to characters
+        final int total = count;
+        for (int in = 0; in < total; in++) {
+            int b0 = fBuffer[in] & 0x00FF;
+
+            // UTF-8:   [0xxx xxxx]
+            // Unicode: [0000 0000] [0xxx xxxx]
+            if (b0 < 0x80) {
+                ch[out++] = (char)b0;
+                continue;
+            }
+
+            // UTF-8:   [110y yyyy] [10xx xxxx]
+            // Unicode: [0000 0yyy] [yyxx xxxx]
+            if ((b0 & 0xE0) == 0xC0) {
+                int b1 = -1;
+                if (++in < total) {
+                    b1 = fBuffer[in] & 0x00FF;
+                }
+                else {
+                    b1 = fInputStream.read();
+                    if (b1 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fOffset = 1;
+                            return out - offset;
+                        }
+                        expectedByte(2, 2);
+                    }
+                    count++;
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fOffset = 2;
+                        return out - offset;
+                    }
+                    invalidByte(2, 2);
+                }
+                int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
+                ch[out++] = (char)c;
+                count -= 1;
+                continue;
+            }
+
+            // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
+            // Unicode: [zzzz yyyy] [yyxx xxxx]
+            if ((b0 & 0xF0) == 0xE0) {
+                int b1 = -1;
+                if (++in < total) {
+                    b1 = fBuffer[in] & 0x00FF;
+                }
+                else {
+                    b1 = fInputStream.read();
+                    if (b1 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fOffset = 1;
+                            return out - offset;
+                        }
+                        expectedByte(2, 3);
+                    }
+                    count++;
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fOffset = 2;
+                        return out - offset;
+                    }
+                    invalidByte(2, 3);
+                }
+                int b2 = -1;
+                if (++in < total) {
+                    b2 = fBuffer[in] & 0x00FF;
+                }
+                else {
+                    b2 = fInputStream.read();
+                    if (b2 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fBuffer[1] = (byte)b1;
+                            fOffset = 2;
+                            return out - offset;
+                        }
+                        expectedByte(3, 3);
+                    }
+                    count++;
+                }
+                if ((b2 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fBuffer[2] = (byte)b2;
+                        fOffset = 3;
+                        return out - offset;
+                    }
+                    invalidByte(3, 3);
+                }
+                int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
+                        (b2 & 0x003F);
+                ch[out++] = (char)c;
+                count -= 2;
+                continue;
+            }
+
+            // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
+            // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
+            //          [1101 11yy] [yyxx xxxx] (low surrogate)
+            //          * uuuuu = wwww + 1
+            if ((b0 & 0xF8) == 0xF0) {
+                int b1 = -1;
+                if (++in < total) {
+                    b1 = fBuffer[in] & 0x00FF;
+                }
+                else {
+                    b1 = fInputStream.read();
+                    if (b1 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fOffset = 1;
+                            return out - offset;
+                        }
+                        expectedByte(2, 4);
+                    }
+                    count++;
+                }
+                if ((b1 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fOffset = 2;
+                        return out - offset;
+                    }
+                    invalidByte(2, 4);
+                }
+                int b2 = -1;
+                if (++in < total) {
+                    b2 = fBuffer[in] & 0x00FF;
+                }
+                else {
+                    b2 = fInputStream.read();
+                    if (b2 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fBuffer[1] = (byte)b1;
+                            fOffset = 2;
+                            return out - offset;
+                        }
+                        expectedByte(3, 4);
+                    }
+                    count++;
+                }
+                if ((b2 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fBuffer[2] = (byte)b2;
+                        fOffset = 3;
+                        return out - offset;
+                    }
+                    invalidByte(3, 4);
+                }
+                int b3 = -1;
+                if (++in < total) {
+                    b3 = fBuffer[in] & 0x00FF;
+                }
+                else {
+                    b3 = fInputStream.read();
+                    if (b3 == -1) {
+                        if (out > offset) {
+                            fBuffer[0] = (byte)b0;
+                            fBuffer[1] = (byte)b1;
+                            fBuffer[2] = (byte)b2;
+                            fOffset = 3;
+                            return out - offset;
+                        }
+                        expectedByte(4, 4);
+                    }
+                    count++;
+                }
+                if ((b3 & 0xC0) != 0x80) {
+                    if (out > offset) {
+                        fBuffer[0] = (byte)b0;
+                        fBuffer[1] = (byte)b1;
+                        fBuffer[2] = (byte)b2;
+                        fBuffer[3] = (byte)b3;
+                        fOffset = 4;
+                        return out - offset;
+                    }
+                    invalidByte(4, 4);
+                }
+
+                // decode bytes into surrogate characters
+                int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
+                if (uuuuu > 0x10) {
+                    invalidSurrogate(uuuuu);
+                }
+                int wwww = uuuuu - 1;
+                int zzzz = b1 & 0x000F;
+                int yyyyyy = b2 & 0x003F;
+                int xxxxxx = b3 & 0x003F;
+                int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4);
+                int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;
+
+                // set characters
+                ch[out++] = (char)hs;
+                ch[out++] = (char)ls;
+                count -= 2;
+                continue;
+            }
+
+            // error
+            if (out > offset) {
+                fBuffer[0] = (byte)b0;
+                fOffset = 1;
+                return out - offset;
+            }
+            invalidByte(1, 1);
+        }
+
+        // return number of characters converted
+        if (DEBUG_READ) {
+            if (log.isDebugEnabled())
+                log.debug("read(char[],"+offset+','+length+"): count="+count);
+        }
+        return count;
+
+    } // read(char[],int,int)
+
+    /**
+     * Skip characters.  This method will block until some characters are
+     * available, an I/O error occurs, or the end of the stream is reached.
+     *
+     * @param  n  The number of characters to skip
+     *
+     * @return    The number of characters actually skipped
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    @Override
+    public long skip(long n) throws IOException {
+
+        long remaining = n;
+        final char[] ch = new char[fBuffer.length];
+        do {
+            int length = ch.length < remaining ? ch.length : (int)remaining;
+            int count = read(ch, 0, length);
+            if (count > 0) {
+                remaining -= count;
+            }
+            else {
+                break;
+            }
+        } while (remaining > 0);
+
+        long skipped = n - remaining;
+        return skipped;
+
+    } // skip(long):long
+
+    /**
+     * Tell whether this stream is ready to be read.
+     *
+     * @return True if the next read() is guaranteed not to block for input,
+     * false otherwise.  Note that returning false does not guarantee that the
+     * next read will block.
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    @Override
+    public boolean ready() throws IOException {
+        return false;
+    } // ready()
+
+    /**
+     * Tell whether this stream supports the mark() operation.
+     */
+    @Override
+    public boolean markSupported() {
+        return false;
+    } // markSupported()
+
+    /**
+     * Mark the present position in the stream.  Subsequent calls to reset()
+     * will attempt to reposition the stream to this point.  Not all
+     * character-input streams support the mark() operation.
+     *
+     * @param  readAheadLimit  Limit on the number of characters that may be
+     *                         read while still preserving the mark.  After
+     *                         reading this many characters, attempting to
+     *                         reset the stream may fail.
+     *
+     * @exception  IOException  If the stream does not support mark(),
+     *                          or if some other I/O error occurs
+     */
+    @Override
+    public void mark(int readAheadLimit) throws IOException {
+        throw new IOException(
+                Localizer.getMessage("jsp.error.xml.operationNotSupported",
+                                     "mark()", "UTF-8"));
+    }
+
+    /**
+     * Reset the stream.  If the stream has been marked, then attempt to
+     * reposition it at the mark.  If the stream has not been marked, then
+     * attempt to reset it in some way appropriate to the particular stream,
+     * for example by repositioning it to its starting point.  Not all
+     * character-input streams support the reset() operation, and some support
+     * reset() without supporting mark().
+     *
+     * @exception  IOException  If the stream has not been marked,
+     *                          or if the mark has been invalidated,
+     *                          or if the stream does not support reset(),
+     *                          or if some other I/O error occurs
+     */
+    @Override
+    public void reset() throws IOException {
+        fOffset = 0;
+        fSurrogate = -1;
+    } // reset()
+
+    /**
+     * Close the stream.  Once a stream has been closed, further read(),
+     * ready(), mark(), or reset() invocations will throw an IOException.
+     * Closing a previously-closed stream, however, has no effect.
+     *
+     * @exception  IOException  If an I/O error occurs
+     */
+    @Override
+    public void close() throws IOException {
+        fInputStream.close();
+    } // close()
+
+    //
+    // Private methods
+    //
+
+    /** Throws an exception for expected byte. */
+    private void expectedByte(int position, int count)
+        throws UTFDataFormatException {
+
+        throw new UTFDataFormatException(
+                Localizer.getMessage("jsp.error.xml.expectedByte",
+                                     Integer.toString(position),
+                                     Integer.toString(count)));
+
+    }
+
+    /** Throws an exception for invalid byte. */
+    private void invalidByte(int position, int count)
+        throws UTFDataFormatException {
+
+        throw new UTFDataFormatException(
+                Localizer.getMessage("jsp.error.xml.invalidByte",
+                                     Integer.toString(position),
+                                     Integer.toString(count)));
+    }
+
+    /** Throws an exception for invalid surrogate bits. */
+    private void invalidSurrogate(int uuuuu) throws UTFDataFormatException {
+
+        throw new UTFDataFormatException(
+                Localizer.getMessage("jsp.error.xml.invalidHighSurrogate",
+                                     Integer.toHexString(uuuuu)));
+    }
+
+} // class UTF8Reader