001 /**************************************************************** 002 * Licensed to the Apache Software Foundation (ASF) under one * 003 * or more contributor license agreements. See the NOTICE file * 004 * distributed with this work for additional information * 005 * regarding copyright ownership. The ASF licenses this file * 006 * to you under the Apache License, Version 2.0 (the * 007 * "License"); you may not use this file except in compliance * 008 * with the License. You may obtain a copy of the License at * 009 * * 010 * http://www.apache.org/licenses/LICENSE-2.0 * 011 * * 012 * Unless required by applicable law or agreed to in writing, * 013 * software distributed under the License is distributed on an * 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 015 * KIND, either express or implied. See the License for the * 016 * specific language governing permissions and limitations * 017 * under the License. * 018 ****************************************************************/ 019 020 package org.apache.james.mime4j.util; 021 022 import java.text.DateFormat; 023 import java.text.FieldPosition; 024 import java.text.SimpleDateFormat; 025 import java.util.Date; 026 import java.util.GregorianCalendar; 027 import java.util.HashMap; 028 import java.util.Locale; 029 import java.util.Map; 030 import java.util.Random; 031 import java.util.TimeZone; 032 033 import org.apache.commons.logging.Log; 034 import org.apache.commons.logging.LogFactory; 035 036 /** 037 * A utility class, which provides some MIME related application logic. 038 */ 039 public final class MimeUtil { 040 private static final Log log = LogFactory.getLog(MimeUtil.class); 041 042 /** 043 * The <code>quoted-printable</code> encoding. 044 */ 045 public static final String ENC_QUOTED_PRINTABLE = "quoted-printable"; 046 /** 047 * The <code>binary</code> encoding. 048 */ 049 public static final String ENC_BINARY = "binary"; 050 /** 051 * The <code>base64</code> encoding. 052 */ 053 public static final String ENC_BASE64 = "base64"; 054 /** 055 * The <code>8bit</code> encoding. 056 */ 057 public static final String ENC_8BIT = "8bit"; 058 /** 059 * The <code>7bit</code> encoding. 060 */ 061 public static final String ENC_7BIT = "7bit"; 062 063 /** <code>MIME-Version</code> header name (lowercase) */ 064 public static final String MIME_HEADER_MIME_VERSION = "mime-version"; 065 /** <code>Content-ID</code> header name (lowercase) */ 066 public static final String MIME_HEADER_CONTENT_ID = "content-id"; 067 /** <code>Content-Description</code> header name (lowercase) */ 068 public static final String MIME_HEADER_CONTENT_DESCRIPTION = "content-description"; 069 /** 070 * <code>Content-Disposition</code> header name (lowercase). 071 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 072 */ 073 public static final String MIME_HEADER_CONTENT_DISPOSITION = "content-disposition"; 074 /** 075 * <code>Content-Disposition</code> filename parameter (lowercase). 076 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 077 */ 078 public static final String PARAM_FILENAME = "filename"; 079 /** 080 * <code>Content-Disposition</code> modification-date parameter (lowercase). 081 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 082 */ 083 public static final String PARAM_MODIFICATION_DATE = "modification-date"; 084 /** 085 * <code>Content-Disposition</code> creation-date parameter (lowercase). 086 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 087 */ 088 public static final String PARAM_CREATION_DATE = "creation-date"; 089 /** 090 * <code>Content-Disposition</code> read-date parameter (lowercase). 091 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 092 */ 093 public static final String PARAM_READ_DATE = "read-date"; 094 /** 095 * <code>Content-Disposition</code> size parameter (lowercase). 096 * See <a href='http://www.faqs.org/rfcs/rfc2183.html'>RFC2183</a>. 097 */ 098 public static final String PARAM_SIZE = "size"; 099 /** 100 * <code>Content-Langauge</code> header (lower case). 101 * See <a href='http://www.faqs.org/rfcs/rfc4646.html'>RFC4646</a>. 102 */ 103 public static final String MIME_HEADER_LANGAUGE = "content-language"; 104 /** 105 * <code>Content-Location</code> header (lower case). 106 * See <a href='http://www.faqs.org/rfcs/rfc2557.html'>RFC2557</a>. 107 */ 108 public static final String MIME_HEADER_LOCATION = "content-location"; 109 /** 110 * <code>Content-MD5</code> header (lower case). 111 * See <a href='http://www.faqs.org/rfcs/rfc1864.html'>RFC1864</a>. 112 */ 113 public static final String MIME_HEADER_MD5 = "content-md5"; 114 115 // used to create unique ids 116 private static final Random random = new Random(); 117 118 // used to create unique ids 119 private static int counter = 0; 120 121 private MimeUtil() { 122 // this is an utility class to be used statically. 123 // this constructor protect from instantiation. 124 } 125 126 /** 127 * Returns, whether the given two MIME types are identical. 128 */ 129 public static boolean isSameMimeType(String pType1, String pType2) { 130 return pType1 != null && pType2 != null && pType1.equalsIgnoreCase(pType2); 131 } 132 133 /** 134 * Returns true, if the given MIME type is that of a message. 135 */ 136 public static boolean isMessage(String pMimeType) { 137 return pMimeType != null && pMimeType.equalsIgnoreCase("message/rfc822"); 138 } 139 140 /** 141 * Return true, if the given MIME type indicates a multipart entity. 142 */ 143 public static boolean isMultipart(String pMimeType) { 144 return pMimeType != null && pMimeType.toLowerCase().startsWith("multipart/"); 145 } 146 147 /** 148 * Returns, whether the given transfer-encoding is "base64". 149 */ 150 public static boolean isBase64Encoding(String pTransferEncoding) { 151 return ENC_BASE64.equalsIgnoreCase(pTransferEncoding); 152 } 153 154 /** 155 * Returns, whether the given transfer-encoding is "quoted-printable". 156 */ 157 public static boolean isQuotedPrintableEncoded(String pTransferEncoding) { 158 return ENC_QUOTED_PRINTABLE.equalsIgnoreCase(pTransferEncoding); 159 } 160 161 /** 162 * <p>Parses a complex field value into a map of key/value pairs. You may 163 * use this, for example, to parse a definition like 164 * <pre> 165 * text/plain; charset=UTF-8; boundary=foobar 166 * </pre> 167 * The above example would return a map with the keys "", "charset", 168 * and "boundary", and the values "text/plain", "UTF-8", and "foobar". 169 * </p><p> 170 * Header value will be unfolded and excess white space trimmed. 171 * </p> 172 * @param pValue The field value to parse. 173 * @return The result map; use the key "" to retrieve the first value. 174 */ 175 @SuppressWarnings("fallthrough") 176 public static Map<String, String> getHeaderParams(String pValue) { 177 pValue = pValue.trim(); 178 179 pValue = unfold(pValue); 180 181 Map<String, String> result = new HashMap<String, String>(); 182 183 // split main value and parameters 184 String main; 185 String rest; 186 if (pValue.indexOf(";") == -1) { 187 main = pValue; 188 rest = null; 189 } else { 190 main = pValue.substring(0, pValue.indexOf(";")); 191 rest = pValue.substring(main.length() + 1); 192 } 193 194 result.put("", main); 195 if (rest != null) { 196 char[] chars = rest.toCharArray(); 197 StringBuilder paramName = new StringBuilder(64); 198 StringBuilder paramValue = new StringBuilder(64); 199 200 final byte READY_FOR_NAME = 0; 201 final byte IN_NAME = 1; 202 final byte READY_FOR_VALUE = 2; 203 final byte IN_VALUE = 3; 204 final byte IN_QUOTED_VALUE = 4; 205 final byte VALUE_DONE = 5; 206 final byte ERROR = 99; 207 208 byte state = READY_FOR_NAME; 209 boolean escaped = false; 210 for (char c : chars) { 211 switch (state) { 212 case ERROR: 213 if (c == ';') 214 state = READY_FOR_NAME; 215 break; 216 217 case READY_FOR_NAME: 218 if (c == '=') { 219 log.error("Expected header param name, got '='"); 220 state = ERROR; 221 break; 222 } 223 224 paramName.setLength(0); 225 paramValue.setLength(0); 226 227 state = IN_NAME; 228 // fall-through 229 230 case IN_NAME: 231 if (c == '=') { 232 if (paramName.length() == 0) 233 state = ERROR; 234 else 235 state = READY_FOR_VALUE; 236 break; 237 } 238 239 // not '='... just add to name 240 paramName.append(c); 241 break; 242 243 case READY_FOR_VALUE: 244 boolean fallThrough = false; 245 switch (c) { 246 case ' ': 247 case '\t': 248 break; // ignore spaces, especially before '"' 249 250 case '"': 251 state = IN_QUOTED_VALUE; 252 break; 253 254 default: 255 state = IN_VALUE; 256 fallThrough = true; 257 break; 258 } 259 if (!fallThrough) 260 break; 261 262 // fall-through 263 264 case IN_VALUE: 265 fallThrough = false; 266 switch (c) { 267 case ';': 268 case ' ': 269 case '\t': 270 result.put( 271 paramName.toString().trim().toLowerCase(), 272 paramValue.toString().trim()); 273 state = VALUE_DONE; 274 fallThrough = true; 275 break; 276 default: 277 paramValue.append(c); 278 break; 279 } 280 if (!fallThrough) 281 break; 282 283 case VALUE_DONE: 284 switch (c) { 285 case ';': 286 state = READY_FOR_NAME; 287 break; 288 289 case ' ': 290 case '\t': 291 break; 292 293 default: 294 state = ERROR; 295 break; 296 } 297 break; 298 299 case IN_QUOTED_VALUE: 300 switch (c) { 301 case '"': 302 if (!escaped) { 303 // don't trim quoted strings; the spaces could be intentional. 304 result.put( 305 paramName.toString().trim().toLowerCase(), 306 paramValue.toString()); 307 state = VALUE_DONE; 308 } else { 309 escaped = false; 310 paramValue.append(c); 311 } 312 break; 313 314 case '\\': 315 if (escaped) { 316 paramValue.append('\\'); 317 } 318 escaped = !escaped; 319 break; 320 321 default: 322 if (escaped) { 323 paramValue.append('\\'); 324 } 325 escaped = false; 326 paramValue.append(c); 327 break; 328 } 329 break; 330 331 } 332 } 333 334 // done looping. check if anything is left over. 335 if (state == IN_VALUE) { 336 result.put( 337 paramName.toString().trim().toLowerCase(), 338 paramValue.toString().trim()); 339 } 340 } 341 342 return result; 343 } 344 345 /** 346 * Creates a new unique message boundary string that can be used as boundary 347 * parameter for the Content-Type header field of a message. 348 * 349 * @return a new unique message boundary string. 350 */ 351 public static String createUniqueBoundary() { 352 StringBuilder sb = new StringBuilder(); 353 sb.append("-=Part."); 354 sb.append(Integer.toHexString(nextCounterValue())); 355 sb.append('.'); 356 sb.append(Long.toHexString(random.nextLong())); 357 sb.append('.'); 358 sb.append(Long.toHexString(System.currentTimeMillis())); 359 sb.append('.'); 360 sb.append(Long.toHexString(random.nextLong())); 361 sb.append("=-"); 362 return sb.toString(); 363 } 364 365 /** 366 * Creates a new unique message identifier that can be used in message 367 * header field such as Message-ID or In-Reply-To. If the given host name is 368 * not <code>null</code> it will be used as suffix for the message ID 369 * (following an at sign). 370 * 371 * The resulting string is enclosed in angle brackets (< and >); 372 * 373 * @param hostName host name to be included in the message ID or 374 * <code>null</code> if no host name should be included. 375 * @return a new unique message identifier. 376 */ 377 public static String createUniqueMessageId(String hostName) { 378 StringBuilder sb = new StringBuilder("<Mime4j."); 379 sb.append(Integer.toHexString(nextCounterValue())); 380 sb.append('.'); 381 sb.append(Long.toHexString(random.nextLong())); 382 sb.append('.'); 383 sb.append(Long.toHexString(System.currentTimeMillis())); 384 if (hostName != null) { 385 sb.append('@'); 386 sb.append(hostName); 387 } 388 sb.append('>'); 389 return sb.toString(); 390 } 391 392 /** 393 * Formats the specified date into a RFC 822 date-time string. 394 * 395 * @param date 396 * date to be formatted into a string. 397 * @param zone 398 * the time zone to use or <code>null</code> to use the default 399 * time zone. 400 * @return the formatted time string. 401 */ 402 public static String formatDate(Date date, TimeZone zone) { 403 DateFormat df = RFC822_DATE_FORMAT.get(); 404 405 if (zone == null) { 406 df.setTimeZone(TimeZone.getDefault()); 407 } else { 408 df.setTimeZone(zone); 409 } 410 411 return df.format(date); 412 } 413 414 /** 415 * Splits the specified string into a multiple-line representation with 416 * lines no longer than 76 characters (because the line might contain 417 * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 418 * 2047</a> section 2). If the string contains non-whitespace sequences 419 * longer than 76 characters a line break is inserted at the whitespace 420 * character following the sequence resulting in a line longer than 76 421 * characters. 422 * 423 * @param s 424 * string to split. 425 * @param usedCharacters 426 * number of characters already used up. Usually the number of 427 * characters for header field name plus colon and one space. 428 * @return a multiple-line representation of the given string. 429 */ 430 public static String fold(String s, int usedCharacters) { 431 final int maxCharacters = 76; 432 433 final int length = s.length(); 434 if (usedCharacters + length <= maxCharacters) 435 return s; 436 437 StringBuilder sb = new StringBuilder(); 438 439 int lastLineBreak = -usedCharacters; 440 int wspIdx = indexOfWsp(s, 0); 441 while (true) { 442 if (wspIdx == length) { 443 sb.append(s.substring(Math.max(0, lastLineBreak))); 444 return sb.toString(); 445 } 446 447 int nextWspIdx = indexOfWsp(s, wspIdx + 1); 448 449 if (nextWspIdx - lastLineBreak > maxCharacters) { 450 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx)); 451 sb.append("\r\n"); 452 lastLineBreak = wspIdx; 453 } 454 455 wspIdx = nextWspIdx; 456 } 457 } 458 459 /** 460 * Unfold a multiple-line representation into a single line. 461 * 462 * @param s 463 * string to unfold. 464 * @return unfolded string. 465 */ 466 public static String unfold(String s) { 467 final int length = s.length(); 468 for (int idx = 0; idx < length; idx++) { 469 char c = s.charAt(idx); 470 if (c == '\r' || c == '\n') { 471 return unfold0(s, idx); 472 } 473 } 474 475 return s; 476 } 477 478 private static String unfold0(String s, int crlfIdx) { 479 final int length = s.length(); 480 StringBuilder sb = new StringBuilder(length); 481 482 if (crlfIdx > 0) { 483 sb.append(s.substring(0, crlfIdx)); 484 } 485 486 for (int idx = crlfIdx + 1; idx < length; idx++) { 487 char c = s.charAt(idx); 488 if (c != '\r' && c != '\n') { 489 sb.append(c); 490 } 491 } 492 493 return sb.toString(); 494 } 495 496 private static int indexOfWsp(String s, int fromIndex) { 497 final int len = s.length(); 498 for (int index = fromIndex; index < len; index++) { 499 char c = s.charAt(index); 500 if (c == ' ' || c == '\t') 501 return index; 502 } 503 return len; 504 } 505 506 private static synchronized int nextCounterValue() { 507 return counter++; 508 } 509 510 private static final ThreadLocal<DateFormat> RFC822_DATE_FORMAT = new ThreadLocal<DateFormat>() { 511 @Override 512 protected DateFormat initialValue() { 513 return new Rfc822DateFormat(); 514 } 515 }; 516 517 private static final class Rfc822DateFormat extends SimpleDateFormat { 518 private static final long serialVersionUID = 1L; 519 520 public Rfc822DateFormat() { 521 super("EEE, d MMM yyyy HH:mm:ss ", Locale.US); 522 } 523 524 @Override 525 public StringBuffer format(Date date, StringBuffer toAppendTo, 526 FieldPosition pos) { 527 StringBuffer sb = super.format(date, toAppendTo, pos); 528 529 int zoneMillis = calendar.get(GregorianCalendar.ZONE_OFFSET); 530 int dstMillis = calendar.get(GregorianCalendar.DST_OFFSET); 531 int minutes = (zoneMillis + dstMillis) / 1000 / 60; 532 533 if (minutes < 0) { 534 sb.append('-'); 535 minutes = -minutes; 536 } else { 537 sb.append('+'); 538 } 539 540 sb.append(String.format("%02d%02d", minutes / 60, minutes % 60)); 541 542 return sb; 543 } 544 } 545 }