5
votes

So far I only have a working code for retrieving texts from ppt slide notes

try {
    FileInputStream is = new FileInputStream("C:\\sample\\test.ppt");
    SlideShow ppt = new SlideShow(is);

    Slide[] slide = ppt.getSlides();
    for (int i = 0; i < slide.length; i++) {

        System.out.println(i);
        TextRun[] runs = slide[i].getNotesSheet().getTextRuns();
        if (runs.length < 1) {
            System.out.println("null");
        } else {
            for (TextRun run : runs) {
                System.out.println(" > " + run.getText());
            }
        }
    }

} catch (IOException ioe) {

}

But how do you retrieve text from pptx slide notes?

1
Did you try looking at the code and examples in Apache POI? For example, the XSLF text extractor class XSLFPowerPointExtractor which is able to extract text from slide notes?Gagravarr

1 Answers

8
votes

After constant trial and error, found a solution.

try {

    FileInputStream fis = new FileInputStream("C:\\sample\\sample.pptx");
    XMLSlideShow pptxshow = new XMLSlideShow(fis);

    XSLFSlide[] slide2 = pptxshow.getSlides();
    for (int i = 0; i < slide2.length; i++) {
        System.out.println(i);
        try {
            XSLFNotes mynotes = slide2[i].getNotes();
            for (XSLFShape shape : mynotes) {
                if (shape instanceof XSLFTextShape) {
                    XSLFTextShape txShape = (XSLFTextShape) shape;
                    for (XSLFTextParagraph xslfParagraph : txShape.getTextParagraphs()) {
                        System.out.println(xslfParagraph.getText());
                    }
                }
            }
        } catch (Exception e) {

        }

    }
} catch (IOException e) {

}