package net.sf.iqser.plugin.file.parser.pdf;

import com.iqser.core.model.Attribute;
import com.iqser.core.model.Content;
import java.io.IOException;
import java.io.InputStream;
import net.sf.iqser.plugin.file.parser.FileParser;
import net.sf.iqser.plugin.file.parser.FileParserException;
import net.sf.iqser.plugin.file.parser.FileParserUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.util.PDFTextStripper;

/* loaded from: input_file:net/sf/iqser/plugin/file/parser/pdf/PdfFileParser.class */
public class PdfFileParser implements FileParser {
    private static final String PDF_FILE_CONTENT_TYPE = "PDF Document";
    private static Logger logger = Logger.getLogger(PdfFileParser.class);

    @Override // net.sf.iqser.plugin.file.parser.FileParser
    public Content getContent(String str, InputStream inputStream) throws FileParserException {
        logger.info("Parsing file " + str);
        Content content = new Content();
        content.setType(PDF_FILE_CONTENT_TYPE);
        content.addAttribute(new Attribute("FILENAME", FilenameUtils.getName(str), 0, false));
        content.addAttribute(new Attribute("TITLE", FileParserUtils.getFileTitle(str), 0, true));
        try {
            parsePdfDocument(inputStream, content);
            Attribute attributeByName = content.getAttributeByName("KEYWORDS");
            FileParserUtils.transformIntoMultiValue(attributeByName, ", ");
            Attribute attributeByName2 = content.getAttributeByName("PDFKEYWORDS");
            FileParserUtils.transformIntoMultiValue(attributeByName2, ", ");
            if (null == attributeByName && null != attributeByName2) {
                attributeByName2.setName("KEYWORDS");
            }
            return content;
        } catch (IOException e) {
            logger.error("Failed to read stream for file " + str, e);
            throw new FileParserException("Failed to read stream for file " + str, e);
        }
    }

    private void parsePdfDocument(InputStream inputStream, Content content) throws IOException {
        PDFParser pDFParser = new PDFParser(inputStream);
        pDFParser.parse();
        COSDocument document = pDFParser.getDocument();
        PDDocument pDDocument = new PDDocument(document);
        if (pDDocument != null && !pDDocument.isEncrypted()) {
            getDocumentInformation(pDDocument, content);
            content.setFulltext(FileParserUtils.cleanUpText(extractText(document)));
        }
        document.close();
        pDDocument.close();
    }

    private void getDocumentInformation(PDDocument pDDocument, Content content) throws IOException {
        PDDocumentInformation documentInformation = pDDocument.getDocumentInformation();
        if (documentInformation == null || pDDocument.isEncrypted()) {
            return;
        }
        if (!StringUtils.isEmpty(documentInformation.getAuthor())) {
            addOrUpdateAttribute(content, new Attribute("AUTHOR", documentInformation.getAuthor(), 0, true));
        }
        if (documentInformation.getCreationDate() != null) {
            addOrUpdateAttribute(content, new Attribute("CREATIONDATE", String.valueOf(documentInformation.getCreationDate().getTimeInMillis()), 2, true));
        }
        if (!StringUtils.isEmpty(documentInformation.getTitle())) {
            addOrUpdateAttribute(content, new Attribute("TITLE", documentInformation.getTitle(), 0, true));
        }
        if (!StringUtils.isEmpty(documentInformation.getSubject())) {
            addOrUpdateAttribute(content, new Attribute("SUBJECT", documentInformation.getSubject(), 0, true));
        }
        if (StringUtils.isEmpty(documentInformation.getKeywords())) {
            return;
        }
        addOrUpdateAttribute(content, new Attribute("PDFKEYWORDS", documentInformation.getKeywords(), 0, true));
    }

    private void addOrUpdateAttribute(Content content, Attribute attribute) {
        Attribute attributeByName = content.getAttributeByName(attribute.getName());
        if (attributeByName == null) {
            content.addAttribute(attribute);
            return;
        }
        attributeByName.setValue(attribute.getValue());
        attributeByName.setType(attribute.getType());
        attributeByName.setKey(attribute.isKey());
    }

    private String extractText(COSDocument cOSDocument) {
        String str = "";
        try {
            PDFTextStripper pDFTextStripper = new PDFTextStripper();
            if (cOSDocument.isEncrypted()) {
                logger.warn("Cannot parse encrypted Document");
            } else {
                try {
                    str = pDFTextStripper.getText(new PDDocument(cOSDocument));
                } catch (IOException e) {
                    logger.error("Cannot extract Document", e);
                }
            }
        } catch (Exception e2) {
            logger.warn("Error parsing PDF Document", e2);
        }
        return str;
    }
}
