/*
 * Decompiled with CFR 0.152.
 */
package ir.ac.iust.htmlchardet;

import ir.ac.iust.htmlchardet.Charsets;
import ir.ac.iust.icu.CharsetDetector;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.intl.chardet.nsDetector;

public class HTMLCharsetDetector {
    private static final int threshold = 40;

    private HTMLCharsetDetector() {
    }

    public static String detect(byte[] rawHtmlByteSequence, boolean ... lookInMeta) {
        Document domTree = null;
        String charset = null;
        if (lookInMeta != null && lookInMeta.length > 0 && lookInMeta[0] && Charsets.isValid(charset = HTMLCharsetDetector.lookInMetaTags(domTree = HTMLCharsetDetector.createDomTree(rawHtmlByteSequence, "ISO-8859-1")))) {
            return Charsets.normalize(charset);
        }
        charset = HTMLCharsetDetector.mozillaJCharDet(rawHtmlByteSequence);
        if (charset.equalsIgnoreCase("UTF-8")) {
            return Charsets.normalize(charset);
        }
        if (domTree == null) {
            domTree = HTMLCharsetDetector.createDomTree(rawHtmlByteSequence, "ISO-8859-1");
        }
        String visibleText = domTree.text();
        byte[] visibleTextbyteSequence = null;
        try {
            visibleTextbyteSequence = visibleText.getBytes("ISO-8859-1");
        }
        catch (UnsupportedEncodingException unsupportedEncodingException) {
            // empty catch block
        }
        if (visibleTextbyteSequence == null || visibleTextbyteSequence.length < 40) {
            visibleTextbyteSequence = rawHtmlByteSequence;
        }
        charset = HTMLCharsetDetector.ibmICU4j(visibleTextbyteSequence);
        return Charsets.normalize(charset);
    }

    private static Document createDomTree(byte[] rawHtmlByteSequence, String charset) {
        String trueHtmlStructure = new String(rawHtmlByteSequence, Charset.forName(charset));
        return Jsoup.parse((String)trueHtmlStructure);
    }

    private static String lookInMetaTags(Document domTree) {
        String charset = null;
        Elements metaElements = domTree.select("meta");
        for (Element meta : metaElements) {
            int charsetEnd;
            int charsetBegin;
            charset = meta.attr("charset");
            if (Charsets.isValid(charset)) {
                return charset;
            }
            String contentAttr = meta.attr("content");
            if (!contentAttr.contains("charset") || !Charsets.isValid(charset = contentAttr.substring(charsetBegin = contentAttr.indexOf("charset=") + 8, charsetEnd = contentAttr.length()).trim())) continue;
            return charset;
        }
        return null;
    }

    private static String mozillaJCharDet(byte[] bytes) {
        nsDetector det = new nsDetector(0);
        det.DoIt(bytes, bytes.length, false);
        det.DataEnd();
        return det.getProbableCharsets()[0];
    }

    private static String ibmICU4j(byte[] bytes) {
        CharsetDetector charsetDetector = new CharsetDetector();
        charsetDetector.setText(bytes);
        return charsetDetector.detect().getName();
    }

    @Deprecated
    public static String detect(byte[] rawHtmlByteSequence) {
        int i;
        String charset = HTMLCharsetDetector.mozillaJCharDet(rawHtmlByteSequence);
        if (charset.equalsIgnoreCase(Charsets.UTF_8.getValue())) {
            return Charsets.UTF_8.getValue();
        }
        String bodyBegin = "<body";
        String bodyEnd = "/body>";
        String scriptBegin = "<script";
        String scriptEnd = "/script>";
        String styleBegin = "<style";
        String styleEnd = "/style>";
        byte[] tempArr = new byte[rawHtmlByteSequence.length * 2];
        int tempArrIndex = 0;
        int beginIndex = 0;
        int endIndex = 0;
        beginIndex = HTMLCharsetDetector.findPattern(rawHtmlByteSequence, bodyBegin, 0);
        while (beginIndex != -1) {
            endIndex = HTMLCharsetDetector.findPattern(rawHtmlByteSequence, bodyEnd, beginIndex);
            for (i = beginIndex + 6; i < endIndex - 1; ++i) {
                tempArr[tempArrIndex] = rawHtmlByteSequence[i];
                ++tempArrIndex;
            }
            beginIndex = HTMLCharsetDetector.findPattern(tempArr, bodyBegin, 0);
        }
        beginIndex = HTMLCharsetDetector.findPattern(tempArr, scriptBegin, 0);
        while (beginIndex != -1) {
            endIndex = HTMLCharsetDetector.findPattern(tempArr, scriptEnd, beginIndex);
            tempArrIndex = beginIndex - 1;
            for (i = endIndex + 8; i < tempArr.length; ++i) {
                tempArr[tempArrIndex] = tempArr[i];
                ++tempArrIndex;
            }
            beginIndex = HTMLCharsetDetector.findPattern(tempArr, scriptBegin, 0);
        }
        beginIndex = HTMLCharsetDetector.findPattern(tempArr, styleBegin, 0);
        while (beginIndex != -1) {
            endIndex = HTMLCharsetDetector.findPattern(tempArr, styleEnd, beginIndex);
            tempArrIndex = beginIndex - 1;
            for (i = endIndex + 8; i < tempArr.length; ++i) {
                tempArr[tempArrIndex] = tempArr[i];
                ++tempArrIndex;
            }
            beginIndex = HTMLCharsetDetector.findPattern(tempArr, scriptBegin, 0);
        }
        tempArr = HTMLCharsetDetector.removeTags(tempArr);
        charset = HTMLCharsetDetector.ibmICU4j(tempArr);
        return charset;
    }

    private static int findPattern(byte[] content, String pattern, int index) {
        int patternSize = pattern.length();
        char[] patternLowerCaseChars = new char[patternSize];
        pattern.toLowerCase().getChars(0, patternSize, patternLowerCaseChars, 0);
        char[] patternUpperCaseChars = new char[patternSize];
        pattern.toUpperCase().getChars(0, patternSize, patternUpperCaseChars, 0);
        int i = index;
        while (i + patternSize - 1 < content.length) {
            boolean found = true;
            for (int j = 0; j < patternSize; ++j) {
                char currentChar = (char)content[i + j];
                if (currentChar == patternLowerCaseChars[j] || currentChar == patternUpperCaseChars[j]) continue;
                found = false;
                break;
            }
            if (found) {
                return i;
            }
            ++i;
        }
        return -1;
    }

    private static byte[] removeTags(byte[] html) {
        int inside = 0;
        byte[] tempHtml = new byte[html.length];
        int j = 0;
        block4: for (int i = 0; i < html.length; ++i) {
            switch (html[i]) {
                case 60: {
                    ++inside;
                    continue block4;
                }
                case 62: {
                    --inside;
                    continue block4;
                }
                default: {
                    if (inside != 0) continue block4;
                    tempHtml[j++] = html[i];
                }
            }
        }
        byte[] removedHtmlByteArray = new byte[j];
        System.arraycopy(tempHtml, 0, removedHtmlByteArray, 0, j);
        return removedHtmlByteArray;
    }
}

