Skip to content

Commit

Permalink
Convert PDF to Docx, powerpoint and others (#90)
Browse files Browse the repository at this point in the history
  • Loading branch information
Frooodle authored Apr 16, 2023
1 parent 0a7517e commit c311f9a
Show file tree
Hide file tree
Showing 23 changed files with 626 additions and 40 deletions.
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@
[![GitHub Repo stars](https://img.shields.io/github/stars/frooodle/stirling-pdf?style=social)](https://github.com/Frooodle/stirling-pdf)
[![Paypal Donate](https://img.shields.io/badge/Paypal%20Donate-yellow?style=flat&logo=paypal)](https://www.paypal.com/paypalme/froodleplex)

This is a locally hosted web application that allows you to perform various operations on PDF files, such as splitting and adding images.
This is a powerful locally hosted web based PDF manipulation tool using docker that allows you to perform various operations on PDF files, such as splitting merging, converting, reorganizing, adding images, rotating, compressing, and more. This locally hosted web application started as a 100% ChatGPT-made application and has evolved to include a wide range of features to handle all your PDF needs.

Started off as a 100% ChatGPT made application, slowly moving away from that as more features are added

I will support and fix/add things to this if there is a demand [Discord](https://discord.gg/Cn8pWhQRxZ)
Feel free to request any features of bug fixes either in github issues or our [Discord](https://discord.gg/Cn8pWhQRxZ)


![stirling-home](images/stirling-home.png)
Expand All @@ -29,6 +27,7 @@ I will support and fix/add things to this if there is a demand [Discord](https:/
- Set PDF Permissions
- Add watermark(s)
- Convert Any common file to PDF (using LibreOffice)
- Convert PDF to Word/Powerpoint/Others (using LibreOffice)
- Extract images from PDF
- OCR on PDF (Using OCRMyPDF)
- Edit metadata
Expand Down
11 changes: 4 additions & 7 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,17 @@ plugins {
}

group = 'stirling.software'
version = '0.4.8'
version = '0.5.0'
sourceCompatibility = '17'

repositories {
mavenCentral()
}

dependencies {
implementation 'org.springframework.boot:spring-boot-starter-web'
implementation 'org.springframework.boot:spring-boot-starter-thymeleaf'
testImplementation 'org.springframework.boot:spring-boot-starter-test'

implementation 'org.apache.logging.log4j:log4j-core:2.20.0'

implementation 'org.springframework.boot:spring-boot-starter-web:3.0.5'
implementation 'org.springframework.boot:spring-boot-starter-thymeleaf:3.0.5'
testImplementation 'org.springframework.boot:spring-boot-starter-test:3.0.5'
// https://mvnrepository.com/artifact/org.apache.pdfbox/jbig2-imageio
implementation group: 'org.apache.pdfbox', name: 'jbig2-imageio', version: '3.0.4'
implementation 'commons-io:commons-io:2.11.0'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package stirling.software.SPDF.controller;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
Expand All @@ -11,8 +9,6 @@
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.InputStreamResource;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
Expand All @@ -28,9 +28,6 @@
import org.springframework.web.servlet.ModelAndView;

import stirling.software.SPDF.utils.ProcessExecutor;
//import com.spire.pdf.*;
import java.util.concurrent.Semaphore;
import java.util.regex.Pattern;
@Controller
public class OCRController {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,14 @@
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
Expand All @@ -31,8 +27,6 @@
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
@Controller
public class SplitPDFController {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
package stirling.software.SPDF.controller.converters;

import java.io.IOException;
import java.nio.file.StandardCopyOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.io.FilenameUtils;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.apache.commons.io.FilenameUtils;

import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
Expand Down Expand Up @@ -75,4 +76,5 @@ private boolean isValidFileExtension(String fileExtension) {
String extensionPattern = "^(?i)[a-z0-9]{2,4}$";
return fileExtension.matches(extensionPattern);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package stirling.software.SPDF.controller.converters;

import java.io.IOException;

import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;

import stirling.software.SPDF.utils.PDFToFile;

@Controller
public class ConvertPDFToOffice {



@GetMapping("/pdf-to-word")
public ModelAndView pdfToWord() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-word");
modelAndView.addObject("currentPage", "pdf-to-word");
return modelAndView;
}

@GetMapping("/pdf-to-presentation")
public ModelAndView pdfToPresentation() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-presentation");
modelAndView.addObject("currentPage", "pdf-to-presentation");
return modelAndView;
}

@GetMapping("/pdf-to-text")
public ModelAndView pdfToText() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-text");
modelAndView.addObject("currentPage", "pdf-to-text");
return modelAndView;
}

@GetMapping("/pdf-to-html")
public ModelAndView pdfToHTML() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-html");
modelAndView.addObject("currentPage", "pdf-to-html");
return modelAndView;
}

@GetMapping("/pdf-to-xml")
public ModelAndView pdfToXML() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-xml");
modelAndView.addObject("currentPage", "pdf-to-xml");
return modelAndView;
}


@PostMapping("/pdf-to-word")
public ResponseEntity<byte[]> processPdfToWord(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}

@PostMapping("/pdf-to-presentation")
public ResponseEntity<byte[]> processPdfToPresentation(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
}

@PostMapping("/pdf-to-text")
public ResponseEntity<byte[]> processPdfToRTForTXT(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}


@PostMapping("/pdf-to-html")
public ResponseEntity<byte[]> processPdfToHTML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, "html", "writer_pdf_import");
}

@PostMapping("/pdf-to-xml")
public ResponseEntity<byte[]> processPdfToXML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import");
}








}
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
package stirling.software.SPDF.controller.converters;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.springframework.http.HttpHeaders;
Expand All @@ -18,9 +16,6 @@
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;

import com.itextpdf.xmp.XMPException;

import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class ConvertPDFToPDFA {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
Expand All @@ -26,7 +27,6 @@

import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.WatermarkRemover;
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;

@Controller
public class WatermarkController {
Expand Down
101 changes: 101 additions & 0 deletions src/main/java/stirling/software/SPDF/utils/PDFToFile.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package stirling.software.SPDF.utils;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.multipart.MultipartFile;
public class PDFToFile {
public ResponseEntity<byte[]> processPdfToOfficeFormat(MultipartFile inputFile, String outputFormat, String libreOfficeFilter)
throws IOException, InterruptedException {

if (!"application/pdf".equals(inputFile.getContentType())) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}

// Get the original PDF file name without the extension
String originalPdfFileName = inputFile.getOriginalFilename();
String pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));

// Validate output format
List<String> allowedFormats = Arrays.asList("doc", "docx", "odt", "ppt", "pptx", "odp", "rtf", "html","xml","txt:Text");
if (!allowedFormats.contains(outputFormat)) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}

Path tempInputFile = null;
Path tempOutputDir = null;
byte[] fileBytes;
// Prepare response
HttpHeaders headers = new HttpHeaders();

try {
// Save the uploaded file to a temporary location
tempInputFile = Files.createTempFile("input_", ".pdf");
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);

// Prepare the output directory
tempOutputDir = Files.createTempDirectory("output_");

// Run the LibreOffice command
List<String> command = new ArrayList<>(Arrays.asList(
"soffice", "--infilter=" + libreOfficeFilter, "--convert-to", outputFormat, "--outdir", tempOutputDir.toString(), tempInputFile.toString()
));
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);

// Get output files
List<File> outputFiles = Arrays.asList(tempOutputDir.toFile().listFiles());

if (outputFiles.size() == 1) {
// Return single output file
File outputFile = outputFiles.get(0);
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
if(outputFormat.equals("txt:Text")) {
outputFormat="txt";
}
headers.setContentDispositionFormData("attachment", pdfBaseName + "." + outputFormat);
fileBytes = FileUtils.readFileToByteArray(outputFile);
} else {
// Return output files in a ZIP archive
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
headers.setContentDispositionFormData("attachment", pdfBaseName + "To" + outputFormat + ".zip");
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
ZipOutputStream zipOutputStream = new ZipOutputStream(byteArrayOutputStream);

for (File outputFile : outputFiles) {
ZipEntry entry = new ZipEntry(outputFile.getName());
zipOutputStream.putNextEntry(entry);
FileInputStream fis = new FileInputStream(outputFile);
IOUtils.copy(fis, zipOutputStream);
fis.close();
zipOutputStream.closeEntry();
}

zipOutputStream.close();
fileBytes = byteArrayOutputStream.toByteArray();
}

} finally {
// Clean up the temporary files
if (tempInputFile != null)
Files.delete(tempInputFile);
if (tempOutputDir != null)
FileUtils.deleteDirectory(tempOutputDir.toFile());
}
return new ResponseEntity<>(fileBytes, headers, HttpStatus.OK);
}
}
Loading

0 comments on commit c311f9a

Please sign in to comment.