move CharsetVerifier to utils package
This commit is contained in:
@@ -0,0 +1,142 @@
|
||||
package org.sufficientlysecure.keychain.util;
|
||||
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
|
||||
import android.content.ClipDescription;
|
||||
import android.support.annotation.NonNull;
|
||||
import android.support.annotation.Nullable;
|
||||
|
||||
/** This class can be used to guess whether a stream of data is encoded in a given
|
||||
* charset or not.
|
||||
*
|
||||
* An object of this class must be initialized with a byte[] buffer, which should
|
||||
* be filled with data, then processed with {@link #readBytesFromBuffer}. This can
|
||||
* be done any number of times. Once all data has been read, a final status can be
|
||||
* read using the getter methods.
|
||||
*/
|
||||
public class CharsetVerifier {
|
||||
|
||||
private final ByteBuffer bufWrap;
|
||||
private final CharBuffer dummyOutput;
|
||||
|
||||
private final CharsetDecoder charsetDecoder;
|
||||
|
||||
private boolean isFinished;
|
||||
private boolean isFaulty;
|
||||
private boolean isGuessed;
|
||||
private boolean isPossibleTextMimeType;
|
||||
private boolean isTextMimeType;
|
||||
private String charset;
|
||||
|
||||
public CharsetVerifier(@NonNull byte[] buf, String mimeType, @Nullable String charset) {
|
||||
|
||||
isPossibleTextMimeType = ClipDescription.compareMimeTypes(mimeType, "application/octet-stream")
|
||||
|| ClipDescription.compareMimeTypes(mimeType, "application/x-download")
|
||||
|| ClipDescription.compareMimeTypes(mimeType, "text/*");
|
||||
if (!isPossibleTextMimeType) {
|
||||
charsetDecoder = null;
|
||||
bufWrap = null;
|
||||
dummyOutput = null;
|
||||
return;
|
||||
}
|
||||
isTextMimeType = ClipDescription.compareMimeTypes(mimeType, "text/*");
|
||||
|
||||
bufWrap = ByteBuffer.wrap(buf);
|
||||
dummyOutput = CharBuffer.allocate(buf.length);
|
||||
|
||||
// the charset defaults to us-ascii, but we want to default to utf-8
|
||||
if (charset == null || "us-ascii".equals(charset)) {
|
||||
charset = "utf-8";
|
||||
isGuessed = true;
|
||||
} else {
|
||||
isGuessed = false;
|
||||
}
|
||||
this.charset = charset;
|
||||
|
||||
charsetDecoder = Charset.forName(charset).newDecoder();
|
||||
charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
|
||||
charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
charsetDecoder.reset();
|
||||
}
|
||||
|
||||
public void readBytesFromBuffer(int pos, int len) {
|
||||
if (isFinished) {
|
||||
throw new IllegalStateException("cannot write again after reading charset status!");
|
||||
}
|
||||
if (isFaulty || bufWrap == null) {
|
||||
return;
|
||||
}
|
||||
bufWrap.rewind();
|
||||
bufWrap.position(pos);
|
||||
bufWrap.limit(len);
|
||||
dummyOutput.rewind();
|
||||
CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, false);
|
||||
if (result.isError()) {
|
||||
isFaulty = true;
|
||||
}
|
||||
}
|
||||
|
||||
private void finishIfNecessary() {
|
||||
if (isFinished || isFaulty || bufWrap == null) {
|
||||
return;
|
||||
}
|
||||
isFinished = true;
|
||||
bufWrap.rewind();
|
||||
bufWrap.limit(0);
|
||||
dummyOutput.rewind();
|
||||
CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, true);
|
||||
if (result.isError()) {
|
||||
isFaulty = true;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isCharsetFaulty() {
|
||||
finishIfNecessary();
|
||||
return isFaulty;
|
||||
}
|
||||
|
||||
public boolean isCharsetGuessed() {
|
||||
finishIfNecessary();
|
||||
return isGuessed;
|
||||
}
|
||||
|
||||
public String getCharset() {
|
||||
finishIfNecessary();
|
||||
if (!isPossibleTextMimeType || (isGuessed && isFaulty)) {
|
||||
return null;
|
||||
}
|
||||
return charset;
|
||||
}
|
||||
|
||||
public String getMaybeFaultyCharset() {
|
||||
return charset;
|
||||
}
|
||||
|
||||
/** Returns true if the data which was read is definitely binary.
|
||||
*
|
||||
* This can happen when either the supplied mimeType indicated a non-ambiguous
|
||||
* binary data type, or if we guessed a charset but got errors while decoding.
|
||||
*/
|
||||
public boolean isDefinitelyBinary() {
|
||||
finishIfNecessary();
|
||||
return !isTextMimeType && (!isPossibleTextMimeType || (isGuessed && isFaulty));
|
||||
}
|
||||
|
||||
/** Returns true iff the data which was read is probably (or
|
||||
* definitely) text.
|
||||
*
|
||||
* The corner case where isDefinitelyBinary returns false but isProbablyText
|
||||
* returns true is where the charset was provided by the data (so is not
|
||||
* guessed) but is still faulty.
|
||||
*/
|
||||
public boolean isProbablyText() {
|
||||
finishIfNecessary();
|
||||
return isTextMimeType || isPossibleTextMimeType && (!isGuessed || !isFaulty);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user