/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats.masc;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import opennlp.tools.formats.masc.MascToken;
import opennlp.tools.formats.masc.MascWord;
import opennlp.tools.util.Span;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MascSentence
extends Span {
    private static final long serialVersionUID = 6295507533472650848L;
    private static final Logger logger = LoggerFactory.getLogger(MascSentence.class);
    private final List<MascWord> allDocumentWords;
    private final String text;
    private final List<MascWord> words;
    private final Map<Integer, MascWord> wordsById;
    private List<MascToken> sentenceTokens = null;
    private final Map<Integer, Integer> tokensById = new HashMap<Integer, Integer>();
    private List<Span> namedEntities = new ArrayList<Span>();

    public MascSentence(int s, int e, String text, List<MascWord> sentenceQuarks, List<MascWord> allQuarks) {
        super(s, e);
        this.text = text;
        this.words = sentenceQuarks;
        this.allDocumentWords = allQuarks;
        HashMap<Integer, MascWord> idToWordMap = new HashMap<Integer, MascWord>();
        for (MascWord w : sentenceQuarks) {
            idToWordMap.put(w.getId(), w);
        }
        this.wordsById = idToWordMap;
    }

    boolean tokenizePenn(Map<Integer, int[]> tokenToQuarks, Map<Integer, int[]> quarkToTokens, Map<Integer, String> tokenToBase, Map<Integer, String> tokenToTag) throws IOException {
        boolean fileWithoutIssues = true;
        QuarkExtractor extractor = new QuarkExtractor(this.wordsById, this.allDocumentWords);
        this.sentenceTokens = new ArrayList<MascToken>();
        HashMap<Integer, Boolean> tokensProcessed = new HashMap<Integer, Boolean>();
        for (MascWord w : this.words) {
            int currentQuarkId = w.getId();
            int[] tokens = quarkToTokens.get(currentQuarkId);
            if (tokens == null) continue;
            for (int token : tokens) {
                if (tokensProcessed.containsKey(token)) continue;
                int[] quarksOfToken = tokenToQuarks.get(token);
                if (quarksOfToken == null) {
                    logger.warn("Token without quarks found: {}", (Object)token);
                }
                for (int quark : quarksOfToken) {
                    if (this.wordsById.containsKey(quark)) continue;
                    fileWithoutIssues = false;
                    logger.warn("Some tokens cross sentence boundaries.\n\tQuark ID: {}\n\tPenn token ID: {}", (Object)quark, (Object)token);
                }
                MascWord[] quarks = new MascWord[quarksOfToken.length];
                for (int currentQuark = 0; currentQuark < quarks.length; ++currentQuark) {
                    quarks[currentQuark] = extractor.get(quarksOfToken[currentQuark]);
                }
                int start = extractor.get(quarksOfToken[0]).getStart();
                int end = extractor.get(quarksOfToken[quarksOfToken.length - 1]).getEnd();
                if (end - start <= 0) continue;
                this.sentenceTokens.add(new MascToken(start, end, token, tokenToTag.get(token), tokenToBase.get(token), quarks));
                tokensProcessed.put(token, true);
            }
        }
        for (int i = 0; i < this.sentenceTokens.size(); ++i) {
            MascToken t = this.sentenceTokens.get(i);
            this.tokensById.put(t.getTokenId(), i);
        }
        this.sentenceTokens = Collections.unmodifiableList(this.sentenceTokens);
        return fileWithoutIssues;
    }

    boolean addNamedEntities(Map<Integer, String> entityIDtoEntityType, Map<Integer, List<Integer>> entityIDsToTokens) throws IOException {
        boolean fileWithoutIssues = true;
        if (this.sentenceTokens == null) {
            throw new IOException("Named entity labels provided for an un-tokenized sentence.");
        }
        for (Map.Entry<Integer, List<Integer>> namedEntity : entityIDsToTokens.entrySet()) {
            int entityID = namedEntity.getKey();
            String type = entityIDtoEntityType.get(entityID);
            List<Integer> tokenIDs = namedEntity.getValue();
            int start = this.sentenceTokens.size();
            int end = 0;
            boolean entityInThisSentence = false;
            for (int tokenID : tokenIDs) {
                if (!this.tokensById.containsKey(tokenID)) continue;
                entityInThisSentence = true;
                if (this.tokensById.get(tokenID) < start) {
                    start = this.tokensById.get(tokenID);
                }
                if (this.tokensById.get(tokenID) <= end) continue;
                end = this.tokensById.get(tokenID) + 1;
            }
            if (!entityInThisSentence) continue;
            this.namedEntities.add(new Span(start, end, type));
        }
        Comparator<Span> compareByStart = Comparator.comparingInt(Span::getStart);
        this.namedEntities.sort(compareByStart);
        HashSet<Integer> overlaps = new HashSet<Integer>();
        int leftIndex = 0;
        int rightIndex = leftIndex + 1;
        while (rightIndex < this.namedEntities.size()) {
            Span rightSpan;
            Span leftSpan = this.namedEntities.get(leftIndex);
            if (leftSpan.contains(rightSpan = this.namedEntities.get(rightIndex)) || leftSpan.crosses(rightSpan)) {
                logger.warn("Named entities overlap. This is forbidden in OpenNLP.\n\tKeeping the longer of them.");
                if (rightSpan.length() > leftSpan.length()) {
                    overlaps.add(leftIndex);
                } else {
                    overlaps.add(rightIndex);
                }
                fileWithoutIssues = false;
                ++rightIndex;
                continue;
            }
            ++leftIndex;
        }
        if (!fileWithoutIssues) {
            ArrayList<Span> namedEntitiesNoOverlaps = new ArrayList<Span>();
            for (int i = 0; i < this.namedEntities.size() - 1; ++i) {
                if (overlaps.contains(i)) continue;
                namedEntitiesNoOverlaps.add(this.namedEntities.get(i));
            }
            this.namedEntities = Collections.unmodifiableList(namedEntitiesNoOverlaps);
        }
        return fileWithoutIssues;
    }

    public List<Span> getNamedEntities() {
        return this.namedEntities;
    }

    public String getSentDetectText() {
        return this.text.substring(this.getStart(), this.getEnd());
    }

    public String getTokenText() {
        if (this.sentenceTokens.isEmpty()) {
            return "";
        }
        return this.text.substring(this.sentenceTokens.get(0).getStart(), this.sentenceTokens.get(this.sentenceTokens.size() - 1).getEnd());
    }

    public List<String> getTokenStrings() {
        ArrayList<String> tokenArray = new ArrayList<String>();
        for (MascToken t : this.sentenceTokens) {
            tokenArray.add(this.text.substring(t.getStart(), t.getEnd()));
        }
        return Collections.unmodifiableList(tokenArray);
    }

    public List<Span> getTokensSpans() {
        ArrayList<Span> tokenSpans = new ArrayList<Span>();
        int offset = this.sentenceTokens.isEmpty() ? 0 : this.sentenceTokens.get(0).getStart();
        for (MascToken i : this.sentenceTokens) {
            tokenSpans.add(new Span(i.getStart() - offset, i.getEnd() - offset));
        }
        return Collections.unmodifiableList(tokenSpans);
    }

    public List<String> getTags() throws IOException {
        ArrayList<String> tags = new ArrayList<String>();
        for (MascToken t : this.sentenceTokens) {
            tags.add(t.getPos());
        }
        return tags;
    }

    private record QuarkExtractor(Map<Integer, MascWord> wordsById, List<MascWord> allDocumentWords) {
        private MascWord get(int key) throws IOException {
            if (this.wordsById.containsKey(key)) {
                return this.wordsById.get(key);
            }
            for (MascWord wordFromWholeDocument : this.allDocumentWords) {
                if (wordFromWholeDocument.getId() != key) continue;
                return wordFromWholeDocument;
            }
            throw new IOException("Word" + key + " not found in the document.");
        }
    }
}

