001    /****************************************************************
002     * Licensed to the Apache Software Foundation (ASF) under one   *
003     * or more contributor license agreements.  See the NOTICE file *
004     * distributed with this work for additional information        *
005     * regarding copyright ownership.  The ASF licenses this file   *
006     * to you under the Apache License, Version 2.0 (the            *
007     * "License"); you may not use this file except in compliance   *
008     * with the License.  You may obtain a copy of the License at   *
009     *                                                              *
010     *   http://www.apache.org/licenses/LICENSE-2.0                 *
011     *                                                              *
012     * Unless required by applicable law or agreed to in writing,   *
013     * software distributed under the License is distributed on an  *
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015     * KIND, either express or implied.  See the License for the    *
016     * specific language governing permissions and limitations      *
017     * under the License.                                           *
018     ****************************************************************/
019    
020    package org.apache.james.mime4j.codec;
021    
022    import java.io.IOException;
023    import java.io.InputStream;
024    
025    import org.apache.commons.logging.Log;
026    import org.apache.commons.logging.LogFactory;
027    
028    /**
029     * Performs Quoted-Printable decoding on an underlying stream.
030     */
031    public class QuotedPrintableInputStream extends InputStream {
032        private static Log log = LogFactory.getLog(QuotedPrintableInputStream.class);
033        
034        private InputStream stream;
035        ByteQueue byteq = new ByteQueue();
036        ByteQueue pushbackq = new ByteQueue();
037        private byte state = 0;
038        private boolean closed = false;
039    
040        public QuotedPrintableInputStream(InputStream stream) {
041            this.stream = stream;
042        }
043        
044        /**
045         * Terminates Quoted-Printable coded content. This method does NOT close 
046         * the underlying input stream.
047         * 
048         * @throws IOException on I/O errors.
049         */
050        @Override
051        public void close() throws IOException {
052            this.closed = true;
053        }
054    
055        @Override
056        public int read() throws IOException {
057            if (closed) {
058                throw new IOException("QuotedPrintableInputStream has been closed");
059            }
060            fillBuffer();
061            if (byteq.count() == 0)
062                return -1;
063            else {
064                byte val = byteq.dequeue();
065                if (val >= 0)
066                    return val;
067                else
068                    return val & 0xFF;
069            }
070        }
071    
072        /**
073         * Pulls bytes out of the underlying stream and places them in the
074         * pushback queue.  This is necessary (vs. reading from the
075         * underlying stream directly) to detect and filter out "transport
076         * padding" whitespace, i.e., all whitespace that appears immediately
077         * before a CRLF.
078         *
079         * @throws IOException Underlying stream threw IOException.
080         */
081        private void populatePushbackQueue() throws IOException {
082            //Debug.verify(pushbackq.count() == 0, "PopulatePushbackQueue called when pushback queue was not empty!");
083    
084            if (pushbackq.count() != 0)
085                return;
086    
087            while (true) {
088                int i = stream.read();
089                switch (i) {
090                    case -1:
091                        // stream is done
092                        pushbackq.clear();  // discard any whitespace preceding EOF
093                        return;
094                    case ' ':
095                    case '\t':
096                        pushbackq.enqueue((byte)i);
097                        break;
098                    case '\r':
099                    case '\n':
100                        pushbackq.clear();  // discard any whitespace preceding EOL
101                        pushbackq.enqueue((byte)i);
102                        return;
103                    default:
104                        pushbackq.enqueue((byte)i);
105                        return;
106                }
107            }
108        }
109    
110        /**
111         * Causes the pushback queue to get populated if it is empty, then
112         * consumes and decodes bytes out of it until one or more bytes are
113         * in the byte queue.  This decoding step performs the actual QP
114         * decoding.
115         *
116         * @throws IOException Underlying stream threw IOException.
117         */
118        private void fillBuffer() throws IOException {
119            byte msdChar = 0;  // first digit of escaped num
120            while (byteq.count() == 0) {
121                if (pushbackq.count() == 0) {
122                    populatePushbackQueue();
123                    if (pushbackq.count() == 0)
124                        return;
125                }
126    
127                byte b = pushbackq.dequeue();
128    
129                switch (state) {
130                    case 0:  // start state, no bytes pending
131                        if (b != '=') {
132                            byteq.enqueue(b);
133                            break;  // state remains 0
134                        } else {
135                            state = 1;
136                            break;
137                        }
138                    case 1:  // encountered "=" so far
139                        if (b == '\r') {
140                            state = 2;
141                            break;
142                        } else if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) {
143                            state = 3;
144                            msdChar = b;  // save until next digit encountered
145                            break;
146                        } else if (b == '=') {
147                            /*
148                             * Special case when == is encountered.
149                             * Emit one = and stay in this state.
150                             */
151                            if (log.isWarnEnabled()) {
152                                log.warn("Malformed MIME; got ==");
153                            }
154                            byteq.enqueue((byte)'=');
155                            break;
156                        } else {
157                            if (log.isWarnEnabled()) {
158                                log.warn("Malformed MIME; expected \\r or "
159                                        + "[0-9A-Z], got " + b);
160                            }
161                            state = 0;
162                            byteq.enqueue((byte)'=');
163                            byteq.enqueue(b);
164                            break;
165                        }
166                    case 2:  // encountered "=\r" so far
167                        if (b == '\n') {
168                            state = 0;
169                            break;
170                        } else {
171                            if (log.isWarnEnabled()) {
172                                log.warn("Malformed MIME; expected " 
173                                        + (int)'\n' + ", got " + b);
174                            }
175                            state = 0;
176                            byteq.enqueue((byte)'=');
177                            byteq.enqueue((byte)'\r');
178                            byteq.enqueue(b);
179                            break;
180                        }
181                    case 3:  // encountered =<digit> so far; expecting another <digit> to complete the octet
182                        if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) {
183                            byte msd = asciiCharToNumericValue(msdChar);
184                            byte low = asciiCharToNumericValue(b);
185                            state = 0;
186                            byteq.enqueue((byte)((msd << 4) | low));
187                            break;
188                        } else {
189                            if (log.isWarnEnabled()) {
190                                log.warn("Malformed MIME; expected "
191                                         + "[0-9A-Z], got " + b);
192                            }
193                            state = 0;
194                            byteq.enqueue((byte)'=');
195                            byteq.enqueue(msdChar);
196                            byteq.enqueue(b);
197                            break;
198                        }
199                    default:  // should never happen
200                        log.error("Illegal state: " + state);
201                        state = 0;
202                        byteq.enqueue(b);
203                        break;
204                }
205            }
206        }
207    
208        /**
209         * Converts '0' => 0, 'A' => 10, etc.
210         * @param c ASCII character value.
211         * @return Numeric value of hexadecimal character.
212         */
213        private byte asciiCharToNumericValue(byte c) {
214            if (c >= '0' && c <= '9') {
215                return (byte)(c - '0');
216            } else if (c >= 'A' && c <= 'Z') {
217                return (byte)(0xA + (c - 'A'));
218            } else if (c >= 'a' && c <= 'z') {
219                return (byte)(0xA + (c - 'a'));
220            } else {
221                /*
222                 * This should never happen since all calls to this method
223                 * are preceded by a check that c is in [0-9A-Za-z]
224                 */
225                throw new IllegalArgumentException((char) c 
226                        + " is not a hexadecimal digit");
227            }
228        }
229    
230    }