001 /**************************************************************** 002 * Licensed to the Apache Software Foundation (ASF) under one * 003 * or more contributor license agreements. See the NOTICE file * 004 * distributed with this work for additional information * 005 * regarding copyright ownership. The ASF licenses this file * 006 * to you under the Apache License, Version 2.0 (the * 007 * "License"); you may not use this file except in compliance * 008 * with the License. You may obtain a copy of the License at * 009 * * 010 * http://www.apache.org/licenses/LICENSE-2.0 * 011 * * 012 * Unless required by applicable law or agreed to in writing, * 013 * software distributed under the License is distributed on an * 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 015 * KIND, either express or implied. See the License for the * 016 * specific language governing permissions and limitations * 017 * under the License. * 018 ****************************************************************/ 019 020 package org.apache.james.mime4j.parser; 021 022 import org.apache.james.mime4j.MimeException; 023 import org.apache.james.mime4j.descriptor.BodyDescriptor; 024 025 import java.io.IOException; 026 import java.io.InputStream; 027 028 /** 029 * <p> 030 * Parses MIME (or RFC822) message streams of bytes or characters and reports 031 * parsing events to a <code>ContentHandler</code> instance. 032 * </p> 033 * <p> 034 * Typical usage:<br/> 035 * <pre> 036 * ContentHandler handler = new MyHandler(); 037 * MimeStreamParser parser = new MimeStreamParser(); 038 * parser.setContentHandler(handler); 039 * parser.parse(new FileInputStream("mime.msg")); 040 * </pre> 041 */ 042 public class MimeStreamParser { 043 044 private ContentHandler handler = null; 045 private boolean contentDecoding; 046 047 private final MimeTokenStream mimeTokenStream; 048 049 public MimeStreamParser(final MimeEntityConfig config) { 050 super(); 051 MimeEntityConfig localConfig; 052 if (config != null) { 053 localConfig = config.clone(); 054 } else { 055 localConfig = new MimeEntityConfig(); 056 } 057 this.mimeTokenStream = new MimeTokenStream(localConfig); 058 this.contentDecoding = false; 059 } 060 061 public MimeStreamParser() { 062 this(null); 063 } 064 065 /** 066 * Determines whether this parser automatically decodes body content 067 * based on the on the MIME fields with the standard defaults. 068 */ 069 public boolean isContentDecoding() { 070 return contentDecoding; 071 } 072 073 /** 074 * Defines whether parser should automatically decode body content 075 * based on the on the MIME fields with the standard defaults. 076 */ 077 public void setContentDecoding(boolean b) { 078 this.contentDecoding = b; 079 } 080 081 /** 082 * Parses a stream of bytes containing a MIME message. 083 * 084 * @param is the stream to parse. 085 * @throws MimeException if the message can not be processed 086 * @throws IOException on I/O errors. 087 */ 088 public void parse(InputStream is) throws MimeException, IOException { 089 mimeTokenStream.parse(is); 090 OUTER: for (;;) { 091 int state = mimeTokenStream.getState(); 092 switch (state) { 093 case MimeTokenStream.T_BODY: 094 BodyDescriptor desc = mimeTokenStream.getBodyDescriptor(); 095 InputStream bodyContent; 096 if (contentDecoding) { 097 bodyContent = mimeTokenStream.getDecodedInputStream(); 098 } else { 099 bodyContent = mimeTokenStream.getInputStream(); 100 } 101 handler.body(desc, bodyContent); 102 break; 103 case MimeTokenStream.T_END_BODYPART: 104 handler.endBodyPart(); 105 break; 106 case MimeTokenStream.T_END_HEADER: 107 handler.endHeader(); 108 break; 109 case MimeTokenStream.T_END_MESSAGE: 110 handler.endMessage(); 111 break; 112 case MimeTokenStream.T_END_MULTIPART: 113 handler.endMultipart(); 114 break; 115 case MimeTokenStream.T_END_OF_STREAM: 116 break OUTER; 117 case MimeTokenStream.T_EPILOGUE: 118 handler.epilogue(mimeTokenStream.getInputStream()); 119 break; 120 case MimeTokenStream.T_FIELD: 121 handler.field(mimeTokenStream.getField()); 122 break; 123 case MimeTokenStream.T_PREAMBLE: 124 handler.preamble(mimeTokenStream.getInputStream()); 125 break; 126 case MimeTokenStream.T_RAW_ENTITY: 127 handler.raw(mimeTokenStream.getInputStream()); 128 break; 129 case MimeTokenStream.T_START_BODYPART: 130 handler.startBodyPart(); 131 break; 132 case MimeTokenStream.T_START_HEADER: 133 handler.startHeader(); 134 break; 135 case MimeTokenStream.T_START_MESSAGE: 136 handler.startMessage(); 137 break; 138 case MimeTokenStream.T_START_MULTIPART: 139 handler.startMultipart(mimeTokenStream.getBodyDescriptor()); 140 break; 141 default: 142 throw new IllegalStateException("Invalid state: " + state); 143 } 144 state = mimeTokenStream.next(); 145 } 146 } 147 148 /** 149 * Determines if this parser is currently in raw mode. 150 * 151 * @return <code>true</code> if in raw mode, <code>false</code> 152 * otherwise. 153 * @see #setRaw(boolean) 154 */ 155 public boolean isRaw() { 156 return mimeTokenStream.isRaw(); 157 } 158 159 /** 160 * Enables or disables raw mode. In raw mode all future entities 161 * (messages or body parts) in the stream will be reported to the 162 * {@link ContentHandler#raw(InputStream)} handler method only. 163 * The stream will contain the entire unparsed entity contents 164 * including header fields and whatever is in the body. 165 * 166 * @param raw <code>true</code> enables raw mode, <code>false</code> 167 * disables it. 168 */ 169 public void setRaw(boolean raw) { 170 mimeTokenStream.setRecursionMode(MimeTokenStream.M_RAW); 171 } 172 173 /** 174 * Finishes the parsing and stops reading lines. 175 * NOTE: No more lines will be parsed but the parser 176 * will still call 177 * {@link ContentHandler#endMultipart()}, 178 * {@link ContentHandler#endBodyPart()}, 179 * {@link ContentHandler#endMessage()}, etc to match previous calls 180 * to 181 * {@link ContentHandler#startMultipart(BodyDescriptor)}, 182 * {@link ContentHandler#startBodyPart()}, 183 * {@link ContentHandler#startMessage()}, etc. 184 */ 185 public void stop() { 186 mimeTokenStream.stop(); 187 } 188 189 /** 190 * Sets the <code>ContentHandler</code> to use when reporting 191 * parsing events. 192 * 193 * @param h the <code>ContentHandler</code>. 194 */ 195 public void setContentHandler(ContentHandler h) { 196 this.handler = h; 197 } 198 199 }