001/****************************************************************
002 * Licensed to the Apache Software Foundation (ASF) under one   *
003 * or more contributor license agreements.  See the NOTICE file *
004 * distributed with this work for additional information        *
005 * regarding copyright ownership.  The ASF licenses this file   *
006 * to you under the Apache License, Version 2.0 (the            *
007 * "License"); you may not use this file except in compliance   *
008 * with the License.  You may obtain a copy of the License at   *
009 *                                                              *
010 *   http://www.apache.org/licenses/LICENSE-2.0                 *
011 *                                                              *
012 * Unless required by applicable law or agreed to in writing,   *
013 * software distributed under the License is distributed on an  *
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
015 * KIND, either express or implied.  See the License for the    *
016 * specific language governing permissions and limitations      *
017 * under the License.                                           *
018 ****************************************************************/
019
020package org.apache.james.mime4j.parser;
021
022import java.io.IOException;
023import java.io.InputStream;
024
025import org.apache.james.mime4j.MimeException;
026import org.apache.james.mime4j.codec.DecodeMonitor;
027import org.apache.james.mime4j.stream.BodyDescriptor;
028import org.apache.james.mime4j.stream.BodyDescriptorBuilder;
029import org.apache.james.mime4j.stream.EntityState;
030import org.apache.james.mime4j.stream.Field;
031import org.apache.james.mime4j.stream.MimeConfig;
032import org.apache.james.mime4j.stream.MimeTokenStream;
033import org.apache.james.mime4j.stream.RecursionMode;
034
035/**
036 * <p>
037 * Parses MIME (or RFC822) message streams of bytes or characters and reports
038 * parsing events to a {@link ContentHandler} instance.
039 * </p>
040 * <p>
041 * Typical usage:<br/>
042 * <pre>
043 *      ContentHandler handler = new MyHandler();
044 *      MimeConfig config = new MimeConfig();
045 *      MimeStreamParser parser = new MimeStreamParser(config);
046 *      parser.setContentHandler(handler);
047 *      InputStream instream = new FileInputStream("mime.msg");
048 *      try {
049 *          parser.parse(instream);
050 *      } finally {
051 *          instream.close();
052 *      }
053 * </pre>
054 */
055public class MimeStreamParser {
056
057    private ContentHandler handler = null;
058    private boolean contentDecoding;
059
060    private final MimeTokenStream mimeTokenStream;
061
062    public MimeStreamParser(MimeTokenStream tokenStream) {
063        super();
064        this.mimeTokenStream = tokenStream;
065        this.contentDecoding = false;
066    }
067
068    public MimeStreamParser(
069            final MimeConfig config,
070            final DecodeMonitor monitor,
071            final BodyDescriptorBuilder bodyDescBuilder) {
072        this(new MimeTokenStream(config != null ? config.clone() : new MimeConfig(),
073                monitor, bodyDescBuilder));
074    }
075
076    public MimeStreamParser(final MimeConfig config) {
077        this(config, null, null);
078    }
079
080    public MimeStreamParser() {
081        this(new MimeTokenStream(new MimeConfig(), null, null));
082    }
083
084    /**
085     * Determines whether this parser automatically decodes body content
086     * based on the on the MIME fields with the standard defaults.
087     */
088    public boolean isContentDecoding() {
089        return contentDecoding;
090    }
091
092    /**
093     * Defines whether parser should automatically decode body content
094     * based on the on the MIME fields with the standard defaults.
095     */
096    public void setContentDecoding(boolean b) {
097        this.contentDecoding = b;
098    }
099
100    /**
101     * Parses a stream of bytes containing a MIME message. Please note that if the
102     * {@link MimeConfig} associated with the mime stream returns a not null Content-Type
103     * value from its {@link MimeConfig#getHeadlessParsing()} method, the message is
104     * assumed to have no head section and the headless parsing mode will be used.
105     *
106     * @param instream the stream to parse.
107     * @throws MimeException if the message can not be processed
108     * @throws IOException on I/O errors.
109     */
110    public void parse(InputStream instream) throws MimeException, IOException {
111        MimeConfig config = mimeTokenStream.getConfig();
112        if (config.getHeadlessParsing() != null) {
113            Field contentType = mimeTokenStream.parseHeadless(
114                    instream, config.getHeadlessParsing());
115            handler.startMessage();
116            handler.startHeader();
117            handler.field(contentType);
118            handler.endHeader();
119        } else {
120            mimeTokenStream.parse(instream);
121        }
122        OUTER: for (;;) {
123            EntityState state = mimeTokenStream.getState();
124            switch (state) {
125                case T_BODY:
126                    BodyDescriptor desc = mimeTokenStream.getBodyDescriptor();
127                    InputStream bodyContent;
128                    if (contentDecoding) {
129                        bodyContent = mimeTokenStream.getDecodedInputStream();
130                    } else {
131                        bodyContent = mimeTokenStream.getInputStream();
132                    }
133                    handler.body(desc, bodyContent);
134                    break;
135                case T_END_BODYPART:
136                    handler.endBodyPart();
137                    break;
138                case T_END_HEADER:
139                    handler.endHeader();
140                    break;
141                case T_END_MESSAGE:
142                    handler.endMessage();
143                    break;
144                case T_END_MULTIPART:
145                    handler.endMultipart();
146                    break;
147                case T_END_OF_STREAM:
148                    break OUTER;
149                case T_EPILOGUE:
150                    handler.epilogue(mimeTokenStream.getInputStream());
151                    break;
152                case T_FIELD:
153                    handler.field(mimeTokenStream.getField());
154                    break;
155                case T_PREAMBLE:
156                    handler.preamble(mimeTokenStream.getInputStream());
157                    break;
158                case T_RAW_ENTITY:
159                    handler.raw(mimeTokenStream.getInputStream());
160                    break;
161                case T_START_BODYPART:
162                    handler.startBodyPart();
163                    break;
164                case T_START_HEADER:
165                    handler.startHeader();
166                    break;
167                case T_START_MESSAGE:
168                    handler.startMessage();
169                    break;
170                case T_START_MULTIPART:
171                    handler.startMultipart(mimeTokenStream.getBodyDescriptor());
172                    break;
173                default:
174                    throw new IllegalStateException("Invalid state: " + state);
175            }
176            state = mimeTokenStream.next();
177        }
178    }
179
180    /**
181     * Determines if this parser is currently in raw mode.
182     *
183     * @return <code>true</code> if in raw mode, <code>false</code>
184     *         otherwise.
185     * @see #setRaw()
186     */
187    public boolean isRaw() {
188        return mimeTokenStream.isRaw();
189    }
190
191    /**
192     * Enables raw mode. In raw mode all future entities (messages
193     * or body parts) in the stream will be reported to the
194     * {@link ContentHandler#raw(InputStream)} handler method only.
195     * The stream will contain the entire unparsed entity contents
196     * including header fields and whatever is in the body.
197     */
198    public void setRaw() {
199        mimeTokenStream.setRecursionMode(RecursionMode.M_RAW);
200    }
201
202    /**
203     * Enables flat mode. In flat mode rfc822 parts are not recursively
204     * parsed and multipart content is handled as a single "simple" stream.
205     */
206    public void setFlat() {
207        mimeTokenStream.setRecursionMode(RecursionMode.M_FLAT);
208    }
209
210    /**
211     * Enables recursive mode. In this mode rfc822 parts are recursively
212     * parsed.
213     */
214    public void setRecurse() {
215        mimeTokenStream.setRecursionMode(RecursionMode.M_RECURSE);
216    }
217
218    /**
219     * Finishes the parsing and stops reading lines.
220     * NOTE: No more lines will be parsed but the parser
221     * will still call
222     * {@link ContentHandler#endMultipart()},
223     * {@link ContentHandler#endBodyPart()},
224     * {@link ContentHandler#endMessage()}, etc to match previous calls
225     * to
226     * {@link ContentHandler#startMultipart(BodyDescriptor)},
227     * {@link ContentHandler#startBodyPart()},
228     * {@link ContentHandler#startMessage()}, etc.
229     */
230    public void stop() {
231        mimeTokenStream.stop();
232    }
233
234    /**
235     * Sets the <code>ContentHandler</code> to use when reporting
236     * parsing events.
237     *
238     * @param h the <code>ContentHandler</code>.
239     */
240    public void setContentHandler(ContentHandler h) {
241        this.handler = h;
242    }
243
244}