001/**************************************************************** 002 * Licensed to the Apache Software Foundation (ASF) under one * 003 * or more contributor license agreements. See the NOTICE file * 004 * distributed with this work for additional information * 005 * regarding copyright ownership. The ASF licenses this file * 006 * to you under the Apache License, Version 2.0 (the * 007 * "License"); you may not use this file except in compliance * 008 * with the License. You may obtain a copy of the License at * 009 * * 010 * http://www.apache.org/licenses/LICENSE-2.0 * 011 * * 012 * Unless required by applicable law or agreed to in writing, * 013 * software distributed under the License is distributed on an * 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 015 * KIND, either express or implied. See the License for the * 016 * specific language governing permissions and limitations * 017 * under the License. * 018 ****************************************************************/ 019 020package org.apache.james.mime4j.codec; 021 022import java.io.ByteArrayInputStream; 023import java.io.ByteArrayOutputStream; 024import java.io.IOException; 025import java.io.UnsupportedEncodingException; 026import java.nio.charset.Charset; 027import java.util.regex.Matcher; 028import java.util.regex.Pattern; 029 030import org.apache.james.mime4j.util.CharsetUtil; 031 032/** 033 * Static methods for decoding strings, byte arrays and encoded words. 034 */ 035public class DecoderUtil { 036 037 private static final Pattern PATTERN_ENCODED_WORD = Pattern.compile( 038 "(.*?)=\\?(.+?)\\?(\\w)\\?(.+?)\\?=", Pattern.DOTALL); 039 040 /** 041 * Decodes a string containing quoted-printable encoded data. 042 * 043 * @param s the string to decode. 044 * @return the decoded bytes. 045 */ 046 private static byte[] decodeQuotedPrintable(String s, DecodeMonitor monitor) { 047 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 048 049 try { 050 byte[] bytes = s.getBytes("US-ASCII"); 051 052 QuotedPrintableInputStream is = new QuotedPrintableInputStream( 053 new ByteArrayInputStream(bytes), monitor); 054 055 int b = 0; 056 while ((b = is.read()) != -1) { 057 baos.write(b); 058 } 059 } catch (IOException e) { 060 // This should never happen! 061 throw new IllegalStateException(e); 062 } 063 064 return baos.toByteArray(); 065 } 066 067 /** 068 * Decodes a string containing base64 encoded data. 069 * 070 * @param s the string to decode. 071 * @param monitor 072 * @return the decoded bytes. 073 */ 074 private static byte[] decodeBase64(String s, DecodeMonitor monitor) { 075 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 076 077 try { 078 byte[] bytes = s.getBytes("US-ASCII"); 079 080 Base64InputStream is = new Base64InputStream( 081 new ByteArrayInputStream(bytes), monitor); 082 083 int b = 0; 084 while ((b = is.read()) != -1) { 085 baos.write(b); 086 } 087 } catch (IOException e) { 088 // This should never happen! 089 throw new IllegalStateException(e); 090 } 091 092 return baos.toByteArray(); 093 } 094 095 /** 096 * Decodes an encoded text encoded with the 'B' encoding (described in 097 * RFC 2047) found in a header field body. 098 * 099 * @param encodedText the encoded text to decode. 100 * @param charset the Java charset to use. 101 * @param monitor 102 * @return the decoded string. 103 * @throws UnsupportedEncodingException if the given Java charset isn't 104 * supported. 105 */ 106 static String decodeB(String encodedText, String charset, DecodeMonitor monitor) 107 throws UnsupportedEncodingException { 108 byte[] decodedBytes = decodeBase64(encodedText, monitor); 109 return new String(decodedBytes, charset); 110 } 111 112 /** 113 * Decodes an encoded text encoded with the 'Q' encoding (described in 114 * RFC 2047) found in a header field body. 115 * 116 * @param encodedText the encoded text to decode. 117 * @param charset the Java charset to use. 118 * @return the decoded string. 119 * @throws UnsupportedEncodingException if the given Java charset isn't 120 * supported. 121 */ 122 static String decodeQ(String encodedText, String charset, DecodeMonitor monitor) 123 throws UnsupportedEncodingException { 124 encodedText = replaceUnderscores(encodedText); 125 126 byte[] decodedBytes = decodeQuotedPrintable(encodedText, monitor); 127 return new String(decodedBytes, charset); 128 } 129 130 static String decodeEncodedWords(String body) { 131 return decodeEncodedWords(body, DecodeMonitor.SILENT); 132 } 133 134 /** 135 * Decodes a string containing encoded words as defined by RFC 2047. Encoded 136 * words have the form =?charset?enc?encoded-text?= where enc is either 'Q' 137 * or 'q' for quoted-printable and 'B' or 'b' for base64. 138 * 139 * @param body the string to decode 140 * @param monitor the DecodeMonitor to be used. 141 * @return the decoded string. 142 * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing) 143 */ 144 public static String decodeEncodedWords(String body, DecodeMonitor monitor) throws IllegalArgumentException { 145 int tailIndex = 0; 146 boolean lastMatchValid = false; 147 148 StringBuilder sb = new StringBuilder(); 149 150 for (Matcher matcher = PATTERN_ENCODED_WORD.matcher(body); matcher.find();) { 151 String separator = matcher.group(1); 152 String mimeCharset = matcher.group(2); 153 String encoding = matcher.group(3); 154 String encodedText = matcher.group(4); 155 156 String decoded = null; 157 decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor); 158 if (decoded == null) { 159 sb.append(matcher.group(0)); 160 } else { 161 if (!lastMatchValid || !CharsetUtil.isWhitespace(separator)) { 162 sb.append(separator); 163 } 164 sb.append(decoded); 165 } 166 167 tailIndex = matcher.end(); 168 lastMatchValid = decoded != null; 169 } 170 171 if (tailIndex == 0) { 172 return body; 173 } else { 174 sb.append(body.substring(tailIndex)); 175 return sb.toString(); 176 } 177 } 178 179 // return null on error 180 private static String tryDecodeEncodedWord(final String mimeCharset, 181 final String encoding, final String encodedText, final DecodeMonitor monitor) { 182 Charset charset = CharsetUtil.lookup(mimeCharset); 183 if (charset == null) { 184 monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded", 185 "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset"); 186 return null; 187 } 188 189 if (encodedText.length() == 0) { 190 monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded", 191 "Missing encoded text in encoded word"); 192 return null; 193 } 194 195 try { 196 if (encoding.equalsIgnoreCase("Q")) { 197 return DecoderUtil.decodeQ(encodedText, charset.name(), monitor); 198 } else if (encoding.equalsIgnoreCase("B")) { 199 return DecoderUtil.decodeB(encodedText, charset.name(), monitor); 200 } else { 201 monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded", 202 "Warning: Unknown encoding in encoded word"); 203 return null; 204 } 205 } catch (UnsupportedEncodingException e) { 206 // should not happen because of isDecodingSupported check above 207 monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded", 208 "Unsupported encoding (", e.getMessage(), ") in encoded word"); 209 return null; 210 } catch (RuntimeException e) { 211 monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded", 212 "Could not decode (", e.getMessage(), ") encoded word"); 213 return null; 214 } 215 } 216 217 private static void monitor(DecodeMonitor monitor, String mimeCharset, String encoding, 218 String encodedText, String dropDesc, String... strings) throws IllegalArgumentException { 219 if (monitor.isListening()) { 220 String encodedWord = recombine(mimeCharset, encoding, encodedText); 221 StringBuilder text = new StringBuilder(); 222 for (String str : strings) { 223 text.append(str); 224 } 225 text.append(" ("); 226 text.append(encodedWord); 227 text.append(")"); 228 String exceptionDesc = text.toString(); 229 if (monitor.warn(exceptionDesc, dropDesc)) 230 throw new IllegalArgumentException(text.toString()); 231 } 232 } 233 234 private static String recombine(final String mimeCharset, 235 final String encoding, final String encodedText) { 236 return "=?" + mimeCharset + "?" + encoding + "?" + encodedText + "?="; 237 } 238 239 // Replace _ with =20 240 private static String replaceUnderscores(String str) { 241 // probably faster than String#replace(CharSequence, CharSequence) 242 243 StringBuilder sb = new StringBuilder(128); 244 245 for (int i = 0; i < str.length(); i++) { 246 char c = str.charAt(i); 247 if (c == '_') { 248 sb.append("=20"); 249 } else { 250 sb.append(c); 251 } 252 } 253 254 return sb.toString(); 255 } 256}