001 /**************************************************************** 002 * Licensed to the Apache Software Foundation (ASF) under one * 003 * or more contributor license agreements. See the NOTICE file * 004 * distributed with this work for additional information * 005 * regarding copyright ownership. The ASF licenses this file * 006 * to you under the Apache License, Version 2.0 (the * 007 * "License"); you may not use this file except in compliance * 008 * with the License. You may obtain a copy of the License at * 009 * * 010 * http://www.apache.org/licenses/LICENSE-2.0 * 011 * * 012 * Unless required by applicable law or agreed to in writing, * 013 * software distributed under the License is distributed on an * 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 015 * KIND, either express or implied. See the License for the * 016 * specific language governing permissions and limitations * 017 * under the License. * 018 ****************************************************************/ 019 020 package org.apache.james.mime4j.codec; 021 022 import java.io.IOException; 023 import java.io.InputStream; 024 025 import org.apache.commons.logging.Log; 026 import org.apache.commons.logging.LogFactory; 027 028 /** 029 * Performs Quoted-Printable decoding on an underlying stream. 030 */ 031 public class QuotedPrintableInputStream extends InputStream { 032 private static Log log = LogFactory.getLog(QuotedPrintableInputStream.class); 033 034 private InputStream stream; 035 ByteQueue byteq = new ByteQueue(); 036 ByteQueue pushbackq = new ByteQueue(); 037 private byte state = 0; 038 private boolean closed = false; 039 040 public QuotedPrintableInputStream(InputStream stream) { 041 this.stream = stream; 042 } 043 044 /** 045 * Terminates Quoted-Printable coded content. This method does NOT close 046 * the underlying input stream. 047 * 048 * @throws IOException on I/O errors. 049 */ 050 @Override 051 public void close() throws IOException { 052 this.closed = true; 053 } 054 055 @Override 056 public int read() throws IOException { 057 if (closed) { 058 throw new IOException("QuotedPrintableInputStream has been closed"); 059 } 060 fillBuffer(); 061 if (byteq.count() == 0) 062 return -1; 063 else { 064 byte val = byteq.dequeue(); 065 if (val >= 0) 066 return val; 067 else 068 return val & 0xFF; 069 } 070 } 071 072 /** 073 * Pulls bytes out of the underlying stream and places them in the 074 * pushback queue. This is necessary (vs. reading from the 075 * underlying stream directly) to detect and filter out "transport 076 * padding" whitespace, i.e., all whitespace that appears immediately 077 * before a CRLF. 078 * 079 * @throws IOException Underlying stream threw IOException. 080 */ 081 private void populatePushbackQueue() throws IOException { 082 //Debug.verify(pushbackq.count() == 0, "PopulatePushbackQueue called when pushback queue was not empty!"); 083 084 if (pushbackq.count() != 0) 085 return; 086 087 while (true) { 088 int i = stream.read(); 089 switch (i) { 090 case -1: 091 // stream is done 092 pushbackq.clear(); // discard any whitespace preceding EOF 093 return; 094 case ' ': 095 case '\t': 096 pushbackq.enqueue((byte)i); 097 break; 098 case '\r': 099 case '\n': 100 pushbackq.clear(); // discard any whitespace preceding EOL 101 pushbackq.enqueue((byte)i); 102 return; 103 default: 104 pushbackq.enqueue((byte)i); 105 return; 106 } 107 } 108 } 109 110 /** 111 * Causes the pushback queue to get populated if it is empty, then 112 * consumes and decodes bytes out of it until one or more bytes are 113 * in the byte queue. This decoding step performs the actual QP 114 * decoding. 115 * 116 * @throws IOException Underlying stream threw IOException. 117 */ 118 private void fillBuffer() throws IOException { 119 byte msdChar = 0; // first digit of escaped num 120 while (byteq.count() == 0) { 121 if (pushbackq.count() == 0) { 122 populatePushbackQueue(); 123 if (pushbackq.count() == 0) 124 return; 125 } 126 127 byte b = pushbackq.dequeue(); 128 129 switch (state) { 130 case 0: // start state, no bytes pending 131 if (b != '=') { 132 byteq.enqueue(b); 133 break; // state remains 0 134 } else { 135 state = 1; 136 break; 137 } 138 case 1: // encountered "=" so far 139 if (b == '\r') { 140 state = 2; 141 break; 142 } else if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) { 143 state = 3; 144 msdChar = b; // save until next digit encountered 145 break; 146 } else if (b == '=') { 147 /* 148 * Special case when == is encountered. 149 * Emit one = and stay in this state. 150 */ 151 if (log.isWarnEnabled()) { 152 log.warn("Malformed MIME; got =="); 153 } 154 byteq.enqueue((byte)'='); 155 break; 156 } else { 157 if (log.isWarnEnabled()) { 158 log.warn("Malformed MIME; expected \\r or " 159 + "[0-9A-Z], got " + b); 160 } 161 state = 0; 162 byteq.enqueue((byte)'='); 163 byteq.enqueue(b); 164 break; 165 } 166 case 2: // encountered "=\r" so far 167 if (b == '\n') { 168 state = 0; 169 break; 170 } else { 171 if (log.isWarnEnabled()) { 172 log.warn("Malformed MIME; expected " 173 + (int)'\n' + ", got " + b); 174 } 175 state = 0; 176 byteq.enqueue((byte)'='); 177 byteq.enqueue((byte)'\r'); 178 byteq.enqueue(b); 179 break; 180 } 181 case 3: // encountered =<digit> so far; expecting another <digit> to complete the octet 182 if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) { 183 byte msd = asciiCharToNumericValue(msdChar); 184 byte low = asciiCharToNumericValue(b); 185 state = 0; 186 byteq.enqueue((byte)((msd << 4) | low)); 187 break; 188 } else { 189 if (log.isWarnEnabled()) { 190 log.warn("Malformed MIME; expected " 191 + "[0-9A-Z], got " + b); 192 } 193 state = 0; 194 byteq.enqueue((byte)'='); 195 byteq.enqueue(msdChar); 196 byteq.enqueue(b); 197 break; 198 } 199 default: // should never happen 200 log.error("Illegal state: " + state); 201 state = 0; 202 byteq.enqueue(b); 203 break; 204 } 205 } 206 } 207 208 /** 209 * Converts '0' => 0, 'A' => 10, etc. 210 * @param c ASCII character value. 211 * @return Numeric value of hexadecimal character. 212 */ 213 private byte asciiCharToNumericValue(byte c) { 214 if (c >= '0' && c <= '9') { 215 return (byte)(c - '0'); 216 } else if (c >= 'A' && c <= 'Z') { 217 return (byte)(0xA + (c - 'A')); 218 } else if (c >= 'a' && c <= 'z') { 219 return (byte)(0xA + (c - 'a')); 220 } else { 221 /* 222 * This should never happen since all calls to this method 223 * are preceded by a check that c is in [0-9A-Za-z] 224 */ 225 throw new IllegalArgumentException((char) c 226 + " is not a hexadecimal digit"); 227 } 228 } 229 230 }