/*
 * Decompiled with CFR 0.152.
 */
package org.seasar.robot.extractor.impl;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.seasar.robot.RobotSystemException;
import org.seasar.robot.entity.ExtractData;
import org.seasar.robot.extractor.ExtractException;
import org.seasar.robot.extractor.Extractor;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class PdfExtractor
implements Extractor {
    protected String encoding = "UTF-8";

    @Override
    public ExtractData getText(InputStream in, Map<String, String> params) {
        if (in == null) {
            throw new RobotSystemException("The inputstream is null.");
        }
        PDDocument document = null;
        try {
            document = PDDocument.load((InputStream)in);
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            OutputStreamWriter output = new OutputStreamWriter((OutputStream)baos, this.encoding);
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.writeText(document, (Writer)output);
            ExtractData extractData = new ExtractData(baos.toString(this.encoding));
            return extractData;
        }
        catch (Exception e) {
            throw new ExtractException(e);
        }
        finally {
            if (document != null) {
                try {
                    document.close();
                }
                catch (IOException e) {}
            }
        }
    }

    public String getEncoding() {
        return this.encoding;
    }

    public void setEncoding(String encoding) {
        this.encoding = encoding;
    }
}

