Read PDF Files with Selenium in Java

HOME

  <dependency>
      <groupId>org.seleniumhq.selenium</groupId>
      <artifactId>selenium-java</artifactId>
      <version>4.24.0</version>
    </dependency>

    <dependency>
      <groupId>commons-io</groupId>
      <artifactId>commons-io</artifactId>
      <version>2.16.1</version>
    </dependency>

    <dependency>
      <groupId>org.apache.pdfbox</groupId>
      <artifactId>pdfbox</artifactId>
      <version>3.0.3</version>
    </dependency>

 String downloadFilepath = System.getProperty("user.dir") + File.separator + "downloads";

        ChromeOptions options = new ChromeOptions();
        Map<String, Object> prefs = new HashMap<>();
        prefs.put("plugins.always_open_pdf_externally", true);
        prefs.put("download.default_directory", downloadFilepath);
        options.setExperimentalOption("prefs", prefs);

        WebDriver driver = new ChromeDriver(options);
        driver.manage().window().maximize();
        driver.get("https://freetestdata.com/document-files/pdf/");

        // Locate and click the download link or button if necessary
        WebElement downloadLink = driver.findElement(By.xpath("//*[@class=\"elementor-button-text\"]"));
        downloadLink.click();

        //Wait for download to complete
        File downloadedFile = new File(downloadFilepath + "/Free_Test_Data_100KB_PDF.pdf");
        WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30));
        wait.until((ExpectedCondition<Boolean>) wd -> downloadedFile.exists());

        // Check if the file exists
        if (downloadedFile.exists()) {
            System.out.println("File is downloaded!");
        } else {
            System.out.println("File is not downloaded.");
        }

File file = new File("Path of Document");   
PDDocument doc = Loader.loadPDF(file);   
PDFTextStripper pdfStripper = new PDFTextStripper();  
String text = pdfStripper.getText(doc);  

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.support.ui.ExpectedCondition;
import org.openqa.selenium.support.ui.WebDriverWait;

import java.io.File;
import java.io.IOException;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;

public class ReadPDF_Chrome_Demo {

    public static void main(String[] args) throws InterruptedException {

        String downloadFilepath = System.getProperty("user.dir") + File.separator + "chrome_downloads";

        ChromeOptions options = new ChromeOptions();
        Map<String, Object> prefs = new HashMap<>();
        prefs.put("plugins.always_open_pdf_externally", true);
        prefs.put("download.default_directory", downloadFilepath);
        options.setExperimentalOption("prefs", prefs);

        WebDriver driver = new ChromeDriver(options);
        driver.manage().window().maximize();
        driver.get("https://freetestdata.com/document-files/pdf/");

        // Locate the download link/button and click and wait for the download to complete
        WebElement downloadLink = driver.findElement(By.xpath("//*[@class='elementor-button-text']"));
        downloadLink.click();

        //Wait for download to complete
        File downloadedFile = new File(downloadFilepath + "/Free_Test_Data_100KB_PDF.pdf");
        WebDriverWait wait = new WebDriverWait(driver, Duration.ofSeconds(30));
        wait.until((ExpectedCondition<Boolean>) wd -> downloadedFile.exists());
        
        // Check if the file exists
        if (downloadedFile.exists()) {
            System.out.println("File is downloaded!");
        } else {
            System.out.println("File is not downloaded.");
        }

        driver.quit();

        // Read the downloaded PDF using PDFBox
        PDDocument document = null;
        try {
            document = Loader.loadPDF(downloadedFile);
            PDFTextStripper pdfStripper = new PDFTextStripper();
            String text = pdfStripper.getText(document);
            document.close();

            // Print the PDF text content
            System.out.println("Text in PDF: ");
            System.out.println(text);
        } catch (IOException e) {
            System.err.println("An error occurred while loading or reading the PDF file: " + e.getMessage());
            e.printStackTrace();
        }

    }

}

### Summary