mime: try to decode with given charset or utf-8 while file is read

This commit is contained in:
Vincent Breitmoser
2016-02-12 17:08:09 +01:00
parent 0784a0b430
commit bdae99c084
4 changed files with 226 additions and 11 deletions

View File

@@ -23,6 +23,13 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import android.content.ClipDescription;
@@ -67,10 +74,15 @@ import org.sufficientlysecure.keychain.service.input.CryptoInputParcel;
*/
public class InputDataOperation extends BaseOperation<InputDataParcel> {
final private byte[] buf = new byte[256];
private final byte[] buf = new byte[256];
private final ByteBuffer bufWrap;
private final CharBuffer dummyOutput;
public InputDataOperation(Context context, ProviderHelper providerHelper, Progressable progressable) {
super(context, providerHelper, progressable);
bufWrap = ByteBuffer.wrap(buf);
dummyOutput = CharBuffer.allocate(256);
}
Uri mSignedDataUri;
@@ -326,20 +338,82 @@ public class InputDataOperation extends BaseOperation<InputDataParcel> {
throw new IOException("Error getting file for writing!");
}
boolean isPossibleTextMimeType = ClipDescription.compareMimeTypes(mimeType, "application/octet-stream")
|| ClipDescription.compareMimeTypes(mimeType, "application/x-download")
|| ClipDescription.compareMimeTypes(mimeType, "text/*");
// If this data looks like text, we pipe the incoming data into a charset
// decoder, to see if the data is legal for the assumed charset.
String charset;
boolean charsetIsFaulty;
boolean charsetIsGuessed;
CharsetDecoder charsetDecoder = null;
if (isPossibleTextMimeType) {
charset = bd.getCharset();
// the charset defaults to us-ascii, but we want to default to utf-8
if (charset == null || "us-ascii".equals(charset)) {
charset = "utf-8";
charsetIsGuessed = true;
} else {
charsetIsGuessed = false;
}
try {
charsetDecoder = Charset.forName(charset).newDecoder();
charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
charsetDecoder.reset();
charsetIsFaulty = false;
} catch (UnsupportedCharsetException e) {
charsetIsFaulty = true;
}
} else {
charsetIsFaulty = true;
charsetIsGuessed = false;
charset = null;
}
int totalLength = 0;
do {
totalLength += len;
out.write(buf, 0, len);
if (isPossibleTextMimeType && !charsetIsFaulty) {
bufWrap.rewind();
bufWrap.limit(len);
dummyOutput.rewind();
CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, false);
if (result.isError()) {
charsetIsFaulty = true;
}
}
} while ((len = is.read(buf)) > 0);
log.add(LogType.MSG_DATA_MIME_LENGTH, 3, Long.toString(totalLength));
String charset = bd.getCharset();
// the charset defaults to us-ascii, but we want to default to utf-8
if ("us-ascii".equals(charset)) {
charset = "utf-8";
if (!charsetIsFaulty) {
bufWrap.rewind();
bufWrap.limit(0);
dummyOutput.rewind();
CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, true);
if (result.isError()) {
charsetIsFaulty = true;
}
}
if (isPossibleTextMimeType) {
if (charsetIsFaulty && charsetIsGuessed) {
log.add(LogType.MSG_DATA_MIME_CHARSET_UNKNOWN, 3, charset);
charset = null;
} else if (charsetIsFaulty) {
log.add(LogType.MSG_DATA_MIME_CHARSET_FAULTY, 3, charset);
} else if (charsetIsGuessed) {
log.add(LogType.MSG_DATA_MIME_CHARSET_GUESS, 3, charset);
} else {
log.add(LogType.MSG_DATA_MIME_CHARSET, 3, charset);
}
}
log.add(LogType.MSG_DATA_MIME_LENGTH, 3, Long.toString(totalLength));
OpenPgpMetadata metadata = new OpenPgpMetadata(mFilename, mimeType, 0L, totalLength, charset);
out.close();

View File

@@ -860,6 +860,10 @@ public abstract class OperationResult implements Parcelable {
MSG_DATA_MIME_FROM_EXTENSION (LogLevel.DEBUG, R.string.msg_data_mime_from_extension),
MSG_DATA_MIME_FILENAME (LogLevel.DEBUG, R.string.msg_data_mime_filename),
MSG_DATA_MIME_LENGTH (LogLevel.DEBUG, R.string.msg_data_mime_length),
MSG_DATA_MIME_CHARSET (LogLevel.DEBUG, R.string.msg_data_mime_charset),
MSG_DATA_MIME_CHARSET_FAULTY (LogLevel.WARN, R.string.msg_data_mime_charset_faulty),
MSG_DATA_MIME_CHARSET_GUESS (LogLevel.DEBUG, R.string.msg_data_mime_charset_guess),
MSG_DATA_MIME_CHARSET_UNKNOWN (LogLevel.DEBUG, R.string.msg_data_mime_charset_unknown),
MSG_DATA_MIME (LogLevel.DEBUG, R.string.msg_data_mime),
MSG_DATA_MIME_OK (LogLevel.INFO, R.string.msg_data_mime_ok),
MSG_DATA_MIME_NONE (LogLevel.DEBUG, R.string.msg_data_mime_none),

View File

@@ -1401,6 +1401,10 @@
<string name="msg_data_mime_filename">"Filename: '%s'"</string>
<string name="msg_data_mime_from_extension">"Guessing MIME type from extension"</string>
<string name="msg_data_mime_length">"Content-Length: %s"</string>
<string name="msg_data_mime_charset">"Charset is '%s'"</string>
<string name="msg_data_mime_charset_faulty">"Charset is '%s', but decoding failed!"</string>
<string name="msg_data_mime_charset_guess">"Charset appears to be '%s'"</string>
<string name="msg_data_mime_charset_unknown">"Charset is unknown, or data is not text."</string>
<string name="msg_data_mime">"Parsing MIME data structure"</string>
<string name="msg_data_mime_ok">"Finished parsing"</string>
<string name="msg_data_mime_none">"No MIME structure found"</string>

View File

@@ -20,6 +20,7 @@ package org.sufficientlysecure.keychain.pgp;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.security.Security;
import java.util.ArrayList;
@@ -30,18 +31,20 @@ import android.content.ContentValues;
import android.net.Uri;
import junit.framework.Assert;
import org.bouncycastle.jce.provider.BouncyCastleProvider;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.openintents.openpgp.OpenPgpMetadata;
import org.robolectric.RobolectricGradleTestRunner;
import org.robolectric.RuntimeEnvironment;
import org.robolectric.annotation.Config;
import org.robolectric.shadows.ShadowLog;
import org.bouncycastle.jce.provider.BouncyCastleProvider;
import org.sufficientlysecure.keychain.WorkaroundBuildConfig;
import org.sufficientlysecure.keychain.operations.InputDataOperation;
import org.sufficientlysecure.keychain.operations.results.InputDataResult;
import org.sufficientlysecure.keychain.operations.results.OperationResult.LogType;
import org.sufficientlysecure.keychain.provider.ProviderHelper;
import org.sufficientlysecure.keychain.provider.TemporaryFileProvider;
import org.sufficientlysecure.keychain.service.InputDataParcel;
@@ -80,20 +83,20 @@ public class InputDataOperationTest {
}
@Test
public void testMimeDecoding () throws Exception {
public void testMimeDecoding() throws Exception {
String mimeMail =
"Content-Type: multipart/mixed; boundary=\"=-26BafqxfXmhVNMbYdoIi\"\n" +
"\n" +
"--=-26BafqxfXmhVNMbYdoIi\n" +
"Content-Type: text/plain\n" +
"Content-Type: text/plain; charset=utf-8\n" +
"Content-Transfer-Encoding: quoted-printable\n" +
"Content-Disposition: attachment; filename=data.txt\n" +
"\n" +
"message part 1\n" +
"\n" +
"--=-26BafqxfXmhVNMbYdoIi\n" +
"Content-Type: text/testvalue\n" +
"Content-Type: text/testvalue; charset=iso-8859-1\n" +
"Content-Description: Dummy content description\n" +
"\n" +
"message part 2.1\n" +
@@ -156,7 +159,137 @@ public class InputDataOperationTest {
Assert.assertEquals("second part must have expected content",
"message part 2.1\nmessage part 2.2\n", new String(outStream2.toByteArray()));
OpenPgpMetadata metadata = result.mMetadata.get(0);
Assert.assertEquals("text/plain", metadata.getMimeType());
Assert.assertEquals("utf-8", metadata.getCharset());
metadata = result.mMetadata.get(1);
Assert.assertEquals("text/testvalue", metadata.getMimeType());
Assert.assertEquals("iso-8859-1", metadata.getCharset());
}
@Test
public void testMimeDecodingExplicitFaultyCharset() throws Exception {
String mimeContent = "Content-Type: text/plain; charset=utf-8\n" +
"\n" +
"message with binary data in it\n";
byte[] data = mimeContent.getBytes();
data[60] = (byte) 0xc3;
data[61] = (byte) 0x28;
InputDataResult result = runSimpleDataInputOperation(data);
// must be successful, no verification, have two output URIs
Assert.assertTrue(result.success());
Assert.assertNull(result.mDecryptVerifyResult);
OpenPgpMetadata metadata = result.mMetadata.get(0);
Assert.assertEquals("text/plain", metadata.getMimeType());
Assert.assertEquals("charset should be set since it was explicitly specified",
"utf-8", metadata.getCharset());
Assert.assertTrue("faulty charset should have been detected",
result.getLog().containsType(LogType.MSG_DATA_MIME_CHARSET_FAULTY));
}
@Test
public void testMimeDecodingImplicitFaultyCharset() throws Exception {
String mimeContent = "Content-Type: text/plain\n" +
"\n" +
"message with binary data in it\n";
byte[] data = mimeContent.getBytes();
data[45] = (byte) 0xc3;
data[46] = (byte) 0x28;
InputDataResult result = runSimpleDataInputOperation(data);
// must be successful, no verification, have two output URIs
Assert.assertTrue(result.success());
Assert.assertNull(result.mDecryptVerifyResult);
OpenPgpMetadata metadata = result.mMetadata.get(0);
Assert.assertEquals("text/plain", metadata.getMimeType());
Assert.assertNull("charset was bad so it should not be set", metadata.getCharset());
Assert.assertTrue("faulty charset should have been detected",
result.getLog().containsType(LogType.MSG_DATA_MIME_CHARSET_UNKNOWN));
}
@Test
public void testMimeDecodingImplicitGuessedCharset() throws Exception {
String mimeContent = "Content-Type: text/plain\n" +
"\n" +
"proper, utf-8 encoded message ☭\n";
InputDataResult result = runSimpleDataInputOperation(mimeContent.getBytes());
// must be successful, no verification, have two output URIs
Assert.assertTrue(result.success());
Assert.assertNull(result.mDecryptVerifyResult);
OpenPgpMetadata metadata = result.mMetadata.get(0);
Assert.assertEquals("text/plain", metadata.getMimeType());
Assert.assertEquals("charset should be set since it was guessed and not faulty",
"utf-8", metadata.getCharset());
Assert.assertTrue("charset should have been guessed",
result.getLog().containsType(LogType.MSG_DATA_MIME_CHARSET_GUESS));
}
@Test
public void testMimeDecodingOctetStreamGuessedCharset() throws Exception {
String mimeContent = "Content-Type: application/octet-stream\n" +
"\n" +
"proper, utf-8 encoded message ☭\n";
InputDataResult result = runSimpleDataInputOperation(mimeContent.getBytes());
// must be successful, no verification, have two output URIs
Assert.assertTrue(result.success());
Assert.assertNull(result.mDecryptVerifyResult);
OpenPgpMetadata metadata = result.mMetadata.get(0);
Assert.assertEquals("application/octet-stream", metadata.getMimeType());
Assert.assertEquals("charset should be set since it was guessed and not faulty",
"utf-8", metadata.getCharset());
Assert.assertTrue("charset should have been guessed",
result.getLog().containsType(LogType.MSG_DATA_MIME_CHARSET_GUESS));
}
private InputDataResult runSimpleDataInputOperation(byte[] mimeContentBytes) throws FileNotFoundException {
ByteArrayOutputStream outStream1 = new ByteArrayOutputStream();
ByteArrayOutputStream outStream2 = new ByteArrayOutputStream();
ContentResolver mockResolver = mock(ContentResolver.class);
// fake openOutputStream first and second
when(mockResolver.openOutputStream(any(Uri.class), eq("w")))
.thenReturn(outStream1, outStream2);
// fake openInputStream
Uri fakeInputUri = Uri.parse("content://fake/1");
when(mockResolver.openInputStream(fakeInputUri)).thenReturn(
new ByteArrayInputStream(mimeContentBytes));
Uri fakeOutputUri1 = Uri.parse("content://fake/out/1");
when(mockResolver.insert(eq(TemporaryFileProvider.CONTENT_URI), any(ContentValues.class)))
.thenReturn(fakeOutputUri1);
// application which returns mockresolver
Application spyApplication = spy(RuntimeEnvironment.application);
when(spyApplication.getContentResolver()).thenReturn(mockResolver);
InputDataOperation op = new InputDataOperation(spyApplication,
new ProviderHelper(RuntimeEnvironment.application), null);
InputDataParcel input = new InputDataParcel(fakeInputUri, null);
return op.execute(input, new CryptoInputParcel());
}
}