package de.l3s.boilerpipe.filters.heuristics;

import de.l3s.boilerpipe.BoilerpipeFilter;
import de.l3s.boilerpipe.document.TextBlock;
import de.l3s.boilerpipe.document.TextDocument;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;

/* loaded from: classes.dex */
public final class DocumentTitleMatchClassifier implements BoilerpipeFilter {
    private static final Pattern PAT_REMOVE_CHARACTERS = Pattern.compile("[\\?\\!\\.\\-\\:]+");
    private final Set<String> potentialTitles;

    public DocumentTitleMatchClassifier(String str) {
        if (str == null) {
            this.potentialTitles = null;
            return;
        }
        String lowerCase = str.replace((char) 160, ' ').replace("'", "").trim().toLowerCase();
        if (lowerCase.length() == 0) {
            this.potentialTitles = null;
            return;
        }
        this.potentialTitles = new HashSet();
        this.potentialTitles.add(lowerCase);
        String longestPart = getLongestPart(lowerCase, "[ ]*[\\|»|-][ ]*");
        if (longestPart != null) {
            this.potentialTitles.add(longestPart);
        }
        String longestPart2 = getLongestPart(lowerCase, "[ ]*[\\|»|:][ ]*");
        if (longestPart2 != null) {
            this.potentialTitles.add(longestPart2);
        }
        String longestPart3 = getLongestPart(lowerCase, "[ ]*[\\|»|:\\(\\)][ ]*");
        if (longestPart3 != null) {
            this.potentialTitles.add(longestPart3);
        }
        String longestPart4 = getLongestPart(lowerCase, "[ ]*[\\|»|:\\(\\)\\-][ ]*");
        if (longestPart4 != null) {
            this.potentialTitles.add(longestPart4);
        }
        String longestPart5 = getLongestPart(lowerCase, "[ ]*[\\|»|,|:\\(\\)\\-][ ]*");
        if (longestPart5 != null) {
            this.potentialTitles.add(longestPart5);
        }
        String longestPart6 = getLongestPart(lowerCase, "[ ]*[\\|»|,|:\\(\\)\\- ][ ]*");
        if (longestPart6 != null) {
            this.potentialTitles.add(longestPart6);
        }
        addPotentialTitles(this.potentialTitles, lowerCase, "[ ]+[\\|][ ]+", 4);
        addPotentialTitles(this.potentialTitles, lowerCase, "[ ]+[\\-][ ]+", 4);
        this.potentialTitles.add(lowerCase.replaceFirst(" - [^\\-]+$", ""));
        this.potentialTitles.add(lowerCase.replaceFirst("^[^\\-]+ - ", ""));
    }

    private void addPotentialTitles(Set<String> set, String str, String str2, int i) {
        String[] split = str.split(str2);
        if (split.length == 1) {
            return;
        }
        for (String str3 : split) {
            if (!str3.contains(".com") && str3.split("[\b ]+").length >= i) {
                set.add(str3);
            }
        }
    }

    private String getLongestPart(String str, String str2) {
        int length;
        String[] split = str.split(str2);
        if (split.length == 1) {
            return null;
        }
        String str3 = "";
        int i = 0;
        for (String str4 : split) {
            if (!str4.contains(".com") && ((length = str4.split("[\b ]+").length) > i || str4.length() > str3.length())) {
                str3 = str4;
                i = length;
            }
        }
        if (str3.length() == 0) {
            return null;
        }
        return str3.trim();
    }

    @Override // de.l3s.boilerpipe.BoilerpipeFilter
    public boolean process(TextDocument textDocument) {
        if (this.potentialTitles == null) {
            return false;
        }
        for (TextBlock textBlock : textDocument.getTextBlocks()) {
            String lowerCase = textBlock.getText().replace((char) 160, ' ').replace("'", "").trim().toLowerCase();
            if (this.potentialTitles.contains(lowerCase)) {
                textBlock.addLabel("de.l3s.boilerpipe/TITLE");
                return true;
            }
            if (this.potentialTitles.contains(PAT_REMOVE_CHARACTERS.matcher(lowerCase).replaceAll("").trim())) {
                textBlock.addLabel("de.l3s.boilerpipe/TITLE");
                return true;
            }
        }
        return false;
    }
}
