001 /**************************************************************** 002 * Licensed to the Apache Software Foundation (ASF) under one * 003 * or more contributor license agreements. See the NOTICE file * 004 * distributed with this work for additional information * 005 * regarding copyright ownership. The ASF licenses this file * 006 * to you under the Apache License, Version 2.0 (the * 007 * "License"); you may not use this file except in compliance * 008 * with the License. You may obtain a copy of the License at * 009 * * 010 * http://www.apache.org/licenses/LICENSE-2.0 * 011 * * 012 * Unless required by applicable law or agreed to in writing, * 013 * software distributed under the License is distributed on an * 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 015 * KIND, either express or implied. See the License for the * 016 * specific language governing permissions and limitations * 017 * under the License. * 018 ****************************************************************/ 019 020 package org.apache.james.mime4j.parser; 021 022 import java.io.IOException; 023 import java.util.BitSet; 024 025 import org.apache.commons.logging.Log; 026 import org.apache.commons.logging.LogFactory; 027 import org.apache.james.mime4j.MimeException; 028 import org.apache.james.mime4j.descriptor.BodyDescriptor; 029 import org.apache.james.mime4j.descriptor.DefaultBodyDescriptor; 030 import org.apache.james.mime4j.descriptor.MaximalBodyDescriptor; 031 import org.apache.james.mime4j.descriptor.MutableBodyDescriptor; 032 import org.apache.james.mime4j.io.LineReaderInputStream; 033 import org.apache.james.mime4j.io.MaxHeaderLimitException; 034 import org.apache.james.mime4j.io.MaxLineLimitException; 035 import org.apache.james.mime4j.util.ByteArrayBuffer; 036 import org.apache.james.mime4j.util.CharsetUtil; 037 038 /** 039 * Abstract MIME entity. 040 */ 041 public abstract class AbstractEntity implements EntityStateMachine { 042 043 protected final Log log; 044 045 protected final BodyDescriptor parent; 046 protected final int startState; 047 protected final int endState; 048 protected final MimeEntityConfig config; 049 protected final MutableBodyDescriptor body; 050 051 protected int state; 052 053 private final ByteArrayBuffer linebuf; 054 055 private int lineCount; 056 private Field field; 057 private boolean endOfHeader; 058 private int headerCount; 059 060 private static final BitSet fieldChars = new BitSet(); 061 062 static { 063 for (int i = 0x21; i <= 0x39; i++) { 064 fieldChars.set(i); 065 } 066 for (int i = 0x3b; i <= 0x7e; i++) { 067 fieldChars.set(i); 068 } 069 } 070 071 /** 072 * Internal state, not exposed. 073 */ 074 private static final int T_IN_BODYPART = -2; 075 /** 076 * Internal state, not exposed. 077 */ 078 private static final int T_IN_MESSAGE = -3; 079 080 AbstractEntity( 081 BodyDescriptor parent, 082 int startState, 083 int endState, 084 MimeEntityConfig config) { 085 this.log = LogFactory.getLog(getClass()); 086 this.parent = parent; 087 this.state = startState; 088 this.startState = startState; 089 this.endState = endState; 090 this.config = config; 091 this.body = newBodyDescriptor(parent); 092 this.linebuf = new ByteArrayBuffer(64); 093 this.lineCount = 0; 094 this.endOfHeader = false; 095 this.headerCount = 0; 096 } 097 098 public int getState() { 099 return state; 100 } 101 102 /** 103 * Creates a new instance of {@link BodyDescriptor}. Subclasses may override 104 * this in order to create body descriptors, that provide more specific 105 * information. 106 */ 107 protected MutableBodyDescriptor newBodyDescriptor(BodyDescriptor pParent) { 108 final MutableBodyDescriptor result; 109 if (config.isMaximalBodyDescriptor()) { 110 result = new MaximalBodyDescriptor(pParent); 111 } else { 112 result = new DefaultBodyDescriptor(pParent); 113 } 114 return result; 115 } 116 117 /** 118 * Returns the current line number or <code>-1</code> if line number 119 * information is not available. 120 */ 121 protected abstract int getLineNumber(); 122 123 protected abstract LineReaderInputStream getDataStream(); 124 125 private ByteArrayBuffer fillFieldBuffer() throws IOException, MimeException { 126 if (endOfHeader) 127 throw new IllegalStateException(); 128 129 int maxLineLen = config.getMaxLineLen(); 130 LineReaderInputStream instream = getDataStream(); 131 ByteArrayBuffer fieldbuf = new ByteArrayBuffer(64); 132 133 for (;;) { 134 // If there's still data stuck in the line buffer 135 // copy it to the field buffer 136 int len = linebuf.length(); 137 if (maxLineLen > 0 && fieldbuf.length() + len >= maxLineLen) { 138 throw new MaxLineLimitException("Maximum line length limit exceeded"); 139 } 140 if (len > 0) { 141 fieldbuf.append(linebuf.buffer(), 0, len); 142 } 143 linebuf.clear(); 144 if (instream.readLine(linebuf) == -1) { 145 monitor(Event.HEADERS_PREMATURE_END); 146 endOfHeader = true; 147 break; 148 } 149 len = linebuf.length(); 150 if (len > 0 && linebuf.byteAt(len - 1) == '\n') { 151 len--; 152 } 153 if (len > 0 && linebuf.byteAt(len - 1) == '\r') { 154 len--; 155 } 156 if (len == 0) { 157 // empty line detected 158 endOfHeader = true; 159 break; 160 } 161 lineCount++; 162 if (lineCount > 1) { 163 int ch = linebuf.byteAt(0); 164 if (ch != CharsetUtil.SP && ch != CharsetUtil.HT) { 165 // new header detected 166 break; 167 } 168 } 169 } 170 171 return fieldbuf; 172 } 173 174 protected boolean parseField() throws MimeException, IOException { 175 int maxHeaderLimit = config.getMaxHeaderCount(); 176 for (;;) { 177 if (endOfHeader) { 178 return false; 179 } 180 if (headerCount >= maxHeaderLimit) { 181 throw new MaxHeaderLimitException("Maximum header limit exceeded"); 182 } 183 184 ByteArrayBuffer fieldbuf = fillFieldBuffer(); 185 headerCount++; 186 187 // Strip away line delimiter 188 int len = fieldbuf.length(); 189 if (len > 0 && fieldbuf.byteAt(len - 1) == '\n') { 190 len--; 191 } 192 if (len > 0 && fieldbuf.byteAt(len - 1) == '\r') { 193 len--; 194 } 195 fieldbuf.setLength(len); 196 197 boolean valid = true; 198 199 int pos = fieldbuf.indexOf((byte) ':'); 200 if (pos <= 0) { 201 monitor(Event.INALID_HEADER); 202 valid = false; 203 } else { 204 for (int i = 0; i < pos; i++) { 205 if (!fieldChars.get(fieldbuf.byteAt(i) & 0xff)) { 206 monitor(Event.INALID_HEADER); 207 valid = false; 208 break; 209 } 210 } 211 } 212 if (valid) { 213 field = new RawField(fieldbuf, pos); 214 body.addField(field); 215 return true; 216 } 217 } 218 } 219 220 /** 221 * <p>Gets a descriptor for the current entity. 222 * This method is valid if {@link #getState()} returns:</p> 223 * <ul> 224 * <li>{@link EntityStates#T_BODY}</li> 225 * <li>{@link EntityStates#T_START_MULTIPART}</li> 226 * <li>{@link EntityStates#T_EPILOGUE}</li> 227 * <li>{@link EntityStates#T_PREAMBLE}</li> 228 * </ul> 229 * @return <code>BodyDescriptor</code>, not nulls 230 */ 231 public BodyDescriptor getBodyDescriptor() { 232 switch (getState()) { 233 case EntityStates.T_BODY: 234 case EntityStates.T_START_MULTIPART: 235 case EntityStates.T_PREAMBLE: 236 case EntityStates.T_EPILOGUE: 237 case EntityStates.T_END_OF_STREAM: 238 return body; 239 default: 240 throw new IllegalStateException("Invalid state :" + stateToString(state)); 241 } 242 } 243 244 /** 245 * This method is valid, if {@link #getState()} returns {@link EntityStates#T_FIELD}. 246 * @return String with the fields raw contents. 247 * @throws IllegalStateException {@link #getState()} returns another 248 * value than {@link EntityStates#T_FIELD}. 249 */ 250 public Field getField() { 251 switch (getState()) { 252 case EntityStates.T_FIELD: 253 return field; 254 default: 255 throw new IllegalStateException("Invalid state :" + stateToString(state)); 256 } 257 } 258 259 /** 260 * Monitors the given event. 261 * Subclasses may override to perform actions upon events. 262 * Base implementation logs at warn. 263 * @param event <code>Event</code>, not null 264 * @throws MimeException subclasses may elect to throw this exception upon 265 * invalid content 266 * @throws IOException subclasses may elect to throw this exception 267 */ 268 protected void monitor(Event event) throws MimeException, IOException { 269 if (config.isStrictParsing()) { 270 throw new MimeParseEventException(event); 271 } else { 272 warn(event); 273 } 274 } 275 276 /** 277 * Creates an indicative message suitable for display 278 * based on the given event and the current state of the system. 279 * @param event <code>Event</code>, not null 280 * @return message suitable for use as a message in an exception 281 * or for logging 282 */ 283 protected String message(Event event) { 284 final String message; 285 if (event == null) { 286 message = "Event is unexpectedly null."; 287 } else { 288 message = event.toString(); 289 } 290 291 int lineNumber = getLineNumber(); 292 if (lineNumber <= 0) 293 return message; 294 else 295 return "Line " + lineNumber + ": " + message; 296 } 297 298 /** 299 * Logs (at warn) an indicative message based on the given event 300 * and the current state of the system. 301 * @param event <code>Event</code>, not null 302 */ 303 protected void warn(Event event) { 304 if (log.isWarnEnabled()) { 305 log.warn(message(event)); 306 } 307 } 308 309 /** 310 * Logs (at debug) an indicative message based on the given event 311 * and the current state of the system. 312 * @param event <code>Event</code>, not null 313 */ 314 protected void debug(Event event) { 315 if (log.isDebugEnabled()) { 316 log.debug(message(event)); 317 } 318 } 319 320 @Override 321 public String toString() { 322 return getClass().getName() + " [" + stateToString(state) 323 + "][" + body.getMimeType() + "][" + body.getBoundary() + "]"; 324 } 325 326 /** 327 * Renders a state as a string suitable for logging. 328 * @param state 329 * @return rendered as string, not null 330 */ 331 public static final String stateToString(int state) { 332 final String result; 333 switch (state) { 334 case EntityStates.T_END_OF_STREAM: 335 result = "End of stream"; 336 break; 337 case EntityStates.T_START_MESSAGE: 338 result = "Start message"; 339 break; 340 case EntityStates.T_END_MESSAGE: 341 result = "End message"; 342 break; 343 case EntityStates.T_RAW_ENTITY: 344 result = "Raw entity"; 345 break; 346 case EntityStates.T_START_HEADER: 347 result = "Start header"; 348 break; 349 case EntityStates.T_FIELD: 350 result = "Field"; 351 break; 352 case EntityStates.T_END_HEADER: 353 result = "End header"; 354 break; 355 case EntityStates.T_START_MULTIPART: 356 result = "Start multipart"; 357 break; 358 case EntityStates.T_END_MULTIPART: 359 result = "End multipart"; 360 break; 361 case EntityStates.T_PREAMBLE: 362 result = "Preamble"; 363 break; 364 case EntityStates.T_EPILOGUE: 365 result = "Epilogue"; 366 break; 367 case EntityStates.T_START_BODYPART: 368 result = "Start bodypart"; 369 break; 370 case EntityStates.T_END_BODYPART: 371 result = "End bodypart"; 372 break; 373 case EntityStates.T_BODY: 374 result = "Body"; 375 break; 376 case T_IN_BODYPART: 377 result = "Bodypart"; 378 break; 379 case T_IN_MESSAGE: 380 result = "In message"; 381 break; 382 default: 383 result = "Unknown"; 384 break; 385 } 386 return result; 387 } 388 389 }