001    /****************************************************************
002     * Licensed to the Apache Software Foundation (ASF) under one   *
003     * or more contributor license agreements.  See the NOTICE file *
004     * distributed with this work for additional information        *
005     * regarding copyright ownership.  The ASF licenses this file   *
006     * to you under the Apache License, Version 2.0 (the            *
007     * "License"); you may not use this file except in compliance   *
008     * with the License.  You may obtain a copy of the License at   *
009     *                                                              *
010     *   http://www.apache.org/licenses/LICENSE-2.0                 *
011     *                                                              *
012     * Unless required by applicable law or agreed to in writing,   *
013     * software distributed under the License is distributed on an  *
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015     * KIND, either express or implied.  See the License for the    *
016     * specific language governing permissions and limitations      *
017     * under the License.                                           *
018     ****************************************************************/
019    
020    package org.apache.james.mime4j.parser;
021    
022    import org.apache.james.mime4j.MimeException;
023    import org.apache.james.mime4j.descriptor.BodyDescriptor;
024    
025    import java.io.IOException;
026    import java.io.InputStream;
027    
028    /**
029     * <p>
030     * Parses MIME (or RFC822) message streams of bytes or characters and reports 
031     * parsing events to a <code>ContentHandler</code> instance.
032     * </p>
033     * <p>
034     * Typical usage:<br/>
035     * <pre>
036     *      ContentHandler handler = new MyHandler();
037     *      MimeStreamParser parser = new MimeStreamParser();
038     *      parser.setContentHandler(handler);
039     *      parser.parse(new FileInputStream("mime.msg"));
040     * </pre>
041     */
042    public class MimeStreamParser {
043    
044        private ContentHandler handler = null;
045        private boolean contentDecoding;
046        
047        private final MimeTokenStream mimeTokenStream;
048    
049        public MimeStreamParser(final MimeEntityConfig config) {
050            super();
051            MimeEntityConfig localConfig;
052            if (config != null) {
053                localConfig = config.clone();
054            } else {
055                localConfig = new MimeEntityConfig();
056            }
057            this.mimeTokenStream = new MimeTokenStream(localConfig);
058            this.contentDecoding = false;
059        }
060        
061        public MimeStreamParser() {
062            this(null);
063        }
064        
065        /**
066         * Determines whether this parser automatically decodes body content
067         * based on the on the MIME fields with the standard defaults.
068         */ 
069        public boolean isContentDecoding() {
070            return contentDecoding;
071        }
072    
073        /**
074         * Defines whether parser should automatically decode body content
075         * based on the on the MIME fields with the standard defaults.
076         */ 
077        public void setContentDecoding(boolean b) {
078            this.contentDecoding = b;
079        }
080    
081        /**
082         * Parses a stream of bytes containing a MIME message.
083         * 
084         * @param is the stream to parse.
085         * @throws MimeException if the message can not be processed
086         * @throws IOException on I/O errors.
087         */
088        public void parse(InputStream is) throws MimeException, IOException {
089            mimeTokenStream.parse(is);
090            OUTER: for (;;) {
091                int state = mimeTokenStream.getState();
092                switch (state) {
093                    case MimeTokenStream.T_BODY:
094                        BodyDescriptor desc = mimeTokenStream.getBodyDescriptor();
095                        InputStream bodyContent;
096                        if (contentDecoding) {
097                            bodyContent = mimeTokenStream.getDecodedInputStream(); 
098                        } else {
099                            bodyContent = mimeTokenStream.getInputStream(); 
100                        }
101                        handler.body(desc, bodyContent);
102                        break;
103                    case MimeTokenStream.T_END_BODYPART:
104                        handler.endBodyPart();
105                        break;
106                    case MimeTokenStream.T_END_HEADER:
107                        handler.endHeader();
108                        break;
109                    case MimeTokenStream.T_END_MESSAGE:
110                        handler.endMessage();
111                        break;
112                    case MimeTokenStream.T_END_MULTIPART:
113                        handler.endMultipart();
114                        break;
115                    case MimeTokenStream.T_END_OF_STREAM:
116                        break OUTER;
117                    case MimeTokenStream.T_EPILOGUE:
118                        handler.epilogue(mimeTokenStream.getInputStream());
119                        break;
120                    case MimeTokenStream.T_FIELD:
121                        handler.field(mimeTokenStream.getField());
122                        break;
123                    case MimeTokenStream.T_PREAMBLE:
124                        handler.preamble(mimeTokenStream.getInputStream());
125                        break;
126                    case MimeTokenStream.T_RAW_ENTITY:
127                        handler.raw(mimeTokenStream.getInputStream());
128                        break;
129                    case MimeTokenStream.T_START_BODYPART:
130                        handler.startBodyPart();
131                        break;
132                    case MimeTokenStream.T_START_HEADER:
133                        handler.startHeader();
134                        break;
135                    case MimeTokenStream.T_START_MESSAGE:
136                        handler.startMessage();
137                        break;
138                    case MimeTokenStream.T_START_MULTIPART:
139                        handler.startMultipart(mimeTokenStream.getBodyDescriptor());
140                        break;
141                    default:
142                        throw new IllegalStateException("Invalid state: " + state);
143                }
144                state = mimeTokenStream.next();
145            }
146        }
147        
148        /**
149         * Determines if this parser is currently in raw mode.
150         * 
151         * @return <code>true</code> if in raw mode, <code>false</code>
152         *         otherwise.
153         * @see #setRaw(boolean)
154         */
155        public boolean isRaw() {
156            return mimeTokenStream.isRaw();
157        }
158        
159        /**
160         * Enables or disables raw mode. In raw mode all future entities 
161         * (messages or body parts) in the stream will be reported to the
162         * {@link ContentHandler#raw(InputStream)} handler method only.
163         * The stream will contain the entire unparsed entity contents 
164         * including header fields and whatever is in the body.
165         * 
166         * @param raw <code>true</code> enables raw mode, <code>false</code>
167         *        disables it.
168         */
169        public void setRaw(boolean raw) {
170            mimeTokenStream.setRecursionMode(MimeTokenStream.M_RAW);
171        }
172        
173        /**
174         * Finishes the parsing and stops reading lines.
175         * NOTE: No more lines will be parsed but the parser
176         * will still call 
177         * {@link ContentHandler#endMultipart()},
178         * {@link ContentHandler#endBodyPart()},
179         * {@link ContentHandler#endMessage()}, etc to match previous calls
180         * to 
181         * {@link ContentHandler#startMultipart(BodyDescriptor)},
182         * {@link ContentHandler#startBodyPart()},
183         * {@link ContentHandler#startMessage()}, etc.
184         */
185        public void stop() {
186            mimeTokenStream.stop();
187        }
188        
189        /**
190         * Sets the <code>ContentHandler</code> to use when reporting 
191         * parsing events.
192         * 
193         * @param h the <code>ContentHandler</code>.
194         */
195        public void setContentHandler(ContentHandler h) {
196            this.handler = h;
197        }
198    
199    }