001    /****************************************************************
002     * Licensed to the Apache Software Foundation (ASF) under one   *
003     * or more contributor license agreements.  See the NOTICE file *
004     * distributed with this work for additional information        *
005     * regarding copyright ownership.  The ASF licenses this file   *
006     * to you under the Apache License, Version 2.0 (the            *
007     * "License"); you may not use this file except in compliance   *
008     * with the License.  You may obtain a copy of the License at   *
009     *                                                              *
010     *   http://www.apache.org/licenses/LICENSE-2.0                 *
011     *                                                              *
012     * Unless required by applicable law or agreed to in writing,   *
013     * software distributed under the License is distributed on an  *
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015     * KIND, either express or implied.  See the License for the    *
016     * specific language governing permissions and limitations      *
017     * under the License.                                           *
018     ****************************************************************/
019    
020    package org.apache.james.mime4j.parser;
021    
022    import java.io.IOException;
023    import java.util.BitSet;
024    
025    import org.apache.commons.logging.Log;
026    import org.apache.commons.logging.LogFactory;
027    import org.apache.james.mime4j.MimeException;
028    import org.apache.james.mime4j.descriptor.BodyDescriptor;
029    import org.apache.james.mime4j.descriptor.DefaultBodyDescriptor;
030    import org.apache.james.mime4j.descriptor.MaximalBodyDescriptor;
031    import org.apache.james.mime4j.descriptor.MutableBodyDescriptor;
032    import org.apache.james.mime4j.io.LineReaderInputStream;
033    import org.apache.james.mime4j.io.MaxHeaderLimitException;
034    import org.apache.james.mime4j.io.MaxLineLimitException;
035    import org.apache.james.mime4j.util.ByteArrayBuffer;
036    import org.apache.james.mime4j.util.CharsetUtil;
037    
038    /**
039     * Abstract MIME entity.
040     */
041    public abstract class AbstractEntity implements EntityStateMachine {
042    
043        protected final Log log;
044        
045        protected final BodyDescriptor parent;
046        protected final int startState;
047        protected final int endState;
048        protected final MimeEntityConfig config;
049        protected final MutableBodyDescriptor body;
050        
051        protected int state;
052    
053        private final ByteArrayBuffer linebuf;
054    
055        private int lineCount;
056        private Field field;
057        private boolean endOfHeader;
058        private int headerCount;
059    
060        private static final BitSet fieldChars = new BitSet();
061    
062        static {
063            for (int i = 0x21; i <= 0x39; i++) {
064                fieldChars.set(i);
065            }
066            for (int i = 0x3b; i <= 0x7e; i++) {
067                fieldChars.set(i);
068            }
069        }
070    
071        /**
072         * Internal state, not exposed.
073         */
074        private static final int T_IN_BODYPART = -2;
075        /**
076         * Internal state, not exposed.
077         */
078        private static final int T_IN_MESSAGE = -3;
079    
080        AbstractEntity(
081                BodyDescriptor parent,
082                int startState, 
083                int endState,
084                MimeEntityConfig config) {
085            this.log = LogFactory.getLog(getClass());        
086            this.parent = parent;
087            this.state = startState;
088            this.startState = startState;
089            this.endState = endState;
090            this.config = config;
091            this.body = newBodyDescriptor(parent);
092            this.linebuf = new ByteArrayBuffer(64);
093            this.lineCount = 0;
094            this.endOfHeader = false;
095            this.headerCount = 0;
096        }
097    
098        public int getState() {
099            return state;
100        }
101        
102        /**
103         * Creates a new instance of {@link BodyDescriptor}. Subclasses may override
104         * this in order to create body descriptors, that provide more specific
105         * information.
106         */
107        protected MutableBodyDescriptor newBodyDescriptor(BodyDescriptor pParent) {
108            final MutableBodyDescriptor result;
109            if (config.isMaximalBodyDescriptor()) {
110                result = new MaximalBodyDescriptor(pParent);
111            } else {
112                result = new DefaultBodyDescriptor(pParent);
113            }
114            return result;
115        }
116    
117        /**
118         * Returns the current line number or <code>-1</code> if line number
119         * information is not available.
120         */
121        protected abstract int getLineNumber();
122        
123        protected abstract LineReaderInputStream getDataStream();
124        
125        private ByteArrayBuffer fillFieldBuffer() throws IOException, MimeException {
126            if (endOfHeader) 
127                throw new IllegalStateException();
128    
129            int maxLineLen = config.getMaxLineLen();
130            LineReaderInputStream instream = getDataStream();
131            ByteArrayBuffer fieldbuf = new ByteArrayBuffer(64);
132    
133            for (;;) {
134                // If there's still data stuck in the line buffer
135                // copy it to the field buffer
136                int len = linebuf.length();
137                if (maxLineLen > 0 && fieldbuf.length() + len >= maxLineLen) {
138                    throw new MaxLineLimitException("Maximum line length limit exceeded");
139                }
140                if (len > 0) {
141                    fieldbuf.append(linebuf.buffer(), 0, len);
142                }
143                linebuf.clear();
144                if (instream.readLine(linebuf) == -1) {
145                    monitor(Event.HEADERS_PREMATURE_END);
146                    endOfHeader = true;
147                    break;
148                }
149                len = linebuf.length();
150                if (len > 0 && linebuf.byteAt(len - 1) == '\n') {
151                    len--;
152                }
153                if (len > 0 && linebuf.byteAt(len - 1) == '\r') {
154                    len--;
155                }
156                if (len == 0) {
157                    // empty line detected 
158                    endOfHeader = true;
159                    break;
160                }
161                lineCount++;
162                if (lineCount > 1) {
163                    int ch = linebuf.byteAt(0);
164                    if (ch != CharsetUtil.SP && ch != CharsetUtil.HT) {
165                        // new header detected
166                        break;
167                    }
168                }
169            }
170    
171            return fieldbuf;
172        }
173    
174        protected boolean parseField() throws MimeException, IOException {
175            int maxHeaderLimit = config.getMaxHeaderCount();
176            for (;;) {
177                if (endOfHeader) {
178                    return false;
179                }
180                if (headerCount >= maxHeaderLimit) {
181                    throw new MaxHeaderLimitException("Maximum header limit exceeded");
182                }
183    
184                ByteArrayBuffer fieldbuf = fillFieldBuffer();
185                headerCount++;
186    
187                // Strip away line delimiter
188                int len = fieldbuf.length();
189                if (len > 0 && fieldbuf.byteAt(len - 1) == '\n') {
190                    len--;
191                }
192                if (len > 0 && fieldbuf.byteAt(len - 1) == '\r') {
193                    len--;
194                }
195                fieldbuf.setLength(len);
196                
197                boolean valid = true;
198                
199                int pos = fieldbuf.indexOf((byte) ':');
200                if (pos <= 0) {
201                    monitor(Event.INALID_HEADER);
202                    valid = false;
203                } else {
204                    for (int i = 0; i < pos; i++) {
205                        if (!fieldChars.get(fieldbuf.byteAt(i) & 0xff)) {
206                            monitor(Event.INALID_HEADER);
207                            valid = false;
208                            break;
209                        }
210                    }
211                }
212                if (valid) {
213                    field = new RawField(fieldbuf, pos);
214                    body.addField(field);            
215                    return true;
216                }
217            }
218        }
219    
220        /**
221         * <p>Gets a descriptor for the current entity.
222         * This method is valid if {@link #getState()} returns:</p>
223         * <ul>
224         * <li>{@link EntityStates#T_BODY}</li>
225         * <li>{@link EntityStates#T_START_MULTIPART}</li>
226         * <li>{@link EntityStates#T_EPILOGUE}</li>
227         * <li>{@link EntityStates#T_PREAMBLE}</li>
228         * </ul>
229         * @return <code>BodyDescriptor</code>, not nulls
230         */
231        public BodyDescriptor getBodyDescriptor() {
232            switch (getState()) {
233            case EntityStates.T_BODY:
234            case EntityStates.T_START_MULTIPART:
235            case EntityStates.T_PREAMBLE:
236            case EntityStates.T_EPILOGUE:
237            case EntityStates.T_END_OF_STREAM:
238                return body;
239            default:
240                throw new IllegalStateException("Invalid state :" + stateToString(state));
241            }
242        }
243    
244        /**
245         * This method is valid, if {@link #getState()} returns {@link EntityStates#T_FIELD}.
246         * @return String with the fields raw contents.
247         * @throws IllegalStateException {@link #getState()} returns another
248         *   value than {@link EntityStates#T_FIELD}.
249         */
250        public Field getField() {
251            switch (getState()) {
252            case EntityStates.T_FIELD:
253                return field;
254            default:
255                throw new IllegalStateException("Invalid state :" + stateToString(state));
256            }
257        }
258    
259        /**
260         * Monitors the given event.
261         * Subclasses may override to perform actions upon events.
262         * Base implementation logs at warn.
263         * @param event <code>Event</code>, not null
264         * @throws MimeException subclasses may elect to throw this exception upon
265         * invalid content
266         * @throws IOException subclasses may elect to throw this exception
267         */
268        protected void monitor(Event event) throws MimeException, IOException {
269            if (config.isStrictParsing()) {
270                throw new MimeParseEventException(event);
271            } else {
272                warn(event);
273            }
274        }
275        
276        /**
277         * Creates an indicative message suitable for display
278         * based on the given event and the current state of the system.
279         * @param event <code>Event</code>, not null
280         * @return message suitable for use as a message in an exception
281         * or for logging
282         */
283        protected String message(Event event) {
284            final String message;
285            if (event == null) {
286                message = "Event is unexpectedly null.";
287            } else {
288                message = event.toString();
289            }
290    
291            int lineNumber = getLineNumber();
292            if (lineNumber <= 0)
293                return message;
294            else
295                return "Line " + lineNumber + ": " + message;
296        }
297        
298        /**
299         * Logs (at warn) an indicative message based on the given event 
300         * and the current state of the system.
301         * @param event <code>Event</code>, not null
302         */
303        protected void warn(Event event) {
304            if (log.isWarnEnabled()) {
305                log.warn(message(event));
306            }
307        }
308        
309        /**
310         * Logs (at debug) an indicative message based on the given event
311         * and the current state of the system.
312         * @param event <code>Event</code>, not null
313         */
314        protected void debug(Event event) {
315            if (log.isDebugEnabled()) {
316                log.debug(message(event));
317            }
318        }
319    
320        @Override
321        public String toString() {
322            return getClass().getName() + " [" + stateToString(state)
323            + "][" + body.getMimeType() + "][" + body.getBoundary() + "]";
324        }
325    
326        /**
327         * Renders a state as a string suitable for logging.
328         * @param state 
329         * @return rendered as string, not null
330         */
331        public static final String stateToString(int state) {
332            final String result;
333            switch (state) {
334                case EntityStates.T_END_OF_STREAM:
335                    result = "End of stream";
336                    break;
337                case EntityStates.T_START_MESSAGE:
338                    result = "Start message";
339                    break;
340                case EntityStates.T_END_MESSAGE:
341                    result = "End message";
342                    break;
343                case EntityStates.T_RAW_ENTITY:
344                    result = "Raw entity";
345                    break;
346                case EntityStates.T_START_HEADER:
347                    result = "Start header";
348                    break;
349                case EntityStates.T_FIELD:
350                    result = "Field";
351                    break;
352                case EntityStates.T_END_HEADER:
353                    result = "End header";
354                    break;
355                case EntityStates.T_START_MULTIPART:
356                    result = "Start multipart";
357                    break;
358                case EntityStates.T_END_MULTIPART:
359                    result = "End multipart";
360                    break;
361                case EntityStates.T_PREAMBLE:
362                    result = "Preamble";
363                    break;
364                case EntityStates.T_EPILOGUE:
365                    result = "Epilogue";
366                    break;
367                case EntityStates.T_START_BODYPART:
368                    result = "Start bodypart";
369                    break;
370                case EntityStates.T_END_BODYPART:
371                    result = "End bodypart";
372                    break;
373                case EntityStates.T_BODY:
374                    result = "Body";
375                    break;
376                case T_IN_BODYPART:
377                    result = "Bodypart";
378                    break;
379                case T_IN_MESSAGE:
380                    result = "In message";
381                    break;
382                default:
383                    result = "Unknown";
384                    break;
385            }
386            return result;
387        }
388        
389    }