Skip to content

Commit 6eb8607

Browse files
[CCAP-629][CCAP-575] Support for converting documents and images to pdfs (#645)
* [CCAP-629] Support for converting images to pdfs * [CCAP-629] Support for converting images to pdfs * [CCAP-629] Support for converting images to pdfs * [CCAP-629] Support for converting images to pdfs * [CCAP-629] Support for converting images to pdfs * [CCAP-629] Support for converting images to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * Bump version to 1.6.8-SNAPSHOT * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-575] Support for converting docs to pdfs * [CCAP-629] Review cleanup * [CCAP-629] Review cleanup * [CCAP-629] Review cleanup --------- Co-authored-by: CfA Platforms Robot <platforms-robot@codeforamerica.org>
1 parent fee84df commit 6eb8607

File tree

12 files changed

+660
-103
lines changed

12 files changed

+660
-103
lines changed

README.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1859,6 +1859,40 @@ bucket. This will automatically delete files in your bucket that are older than
18591859
permits.
18601860
[You can read more about configuring a retention policy in S3 here.](https://docs.aws.amazon.com/AmazonS3/latest/userguide/how-to-set-lifecycle-configuration-intro.html)
18611861

1862+
### File Conversion
1863+
1864+
File uploads made through form flow can be converted to PDFs and uploaded in parallel to the original
1865+
files. Images are converted using OpenPDF and no further dependencies are needed. Office documents are
1866+
converted using <a href="https://www.libreoffice.org/" target="_blank">LibreOffice</a> and will require
1867+
installation.
1868+
1869+
To enable PDF conversion, set the property to true:
1870+
1871+
```yaml
1872+
form-flow:
1873+
uploads:
1874+
file-conversion:
1875+
convert-to-pdf: true
1876+
```
1877+
1878+
By default, the converted file will retain the original extension as part of the file name. For example,
1879+
`sample file.doc` will be converted to a new file called `sample file-doc.pdf`
1880+
1881+
If you wish to set up a prefix and/or suffix for the converted file name, you can do so using the yaml:
1882+
1883+
```yaml
1884+
form-flow:
1885+
uploads:
1886+
file-conversion:
1887+
convert-to-pdf: true
1888+
prefix: new-
1889+
suffix: -converted
1890+
```
1891+
1892+
With the above example, `sample file.doc` will be converted to `new-sample file-doc-converted.pdf`
1893+
1894+
Converted documents do not count against the total number of user uploaded files.
1895+
18621896
### Virus Scanning
18631897

18641898
#### ClamAV Server

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@ dependencies {
7373
implementation 'org.springframework.boot:spring-boot-starter-validation'
7474
implementation 'org.springframework.boot:spring-boot-starter-security'
7575
implementation 'org.springframework.boot:spring-boot-starter-webflux'
76+
implementation 'org.springframework.boot:spring-boot-starter-test'
7677
implementation 'org.jetbrains:annotations:26.0.2'
7778
implementation 'io.hypersistence:hypersistence-utils-hibernate-63:3.9.1'
78-
// implementation 'jakarta.persistence:jakarta.persistence-api:3.2.0'
7979
implementation 'org.flywaydb:flyway-core:[9.+,10.+['
8080
implementation 'org.webjars.npm:dropzone:5.9.3'
8181
implementation 'com.amazonaws:aws-java-sdk-s3:1.12.364'

src/main/java/formflow/library/FileController.java

Lines changed: 114 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,26 @@
1212
import formflow.library.data.UserFileRepositoryService;
1313
import formflow.library.file.CloudFile;
1414
import formflow.library.file.CloudFileRepository;
15+
import formflow.library.file.FileConversionService;
1516
import formflow.library.file.FileValidationService;
1617
import formflow.library.file.FileVirusScanner;
1718
import formflow.library.utils.UserFileMap;
1819
import jakarta.servlet.http.HttpServletRequest;
1920
import jakarta.servlet.http.HttpSession;
21+
import java.io.ByteArrayOutputStream;
22+
import java.io.File;
23+
import java.io.IOException;
24+
import java.util.HashSet;
2025
import java.util.List;
2126
import java.util.Locale;
2227
import java.util.Objects;
2328
import java.util.Optional;
29+
import java.util.Set;
2430
import java.util.UUID;
31+
import java.util.concurrent.CompletableFuture;
32+
import java.util.concurrent.TimeoutException;
33+
import java.util.zip.ZipEntry;
34+
import java.util.zip.ZipOutputStream;
2535
import lombok.extern.slf4j.Slf4j;
2636
import org.springframework.beans.factory.annotation.Value;
2737
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
@@ -30,6 +40,7 @@
3040
import org.springframework.http.HttpStatus;
3141
import org.springframework.http.MediaType;
3242
import org.springframework.http.ResponseEntity;
43+
import org.springframework.mock.web.MockMultipartFile;
3344
import org.springframework.stereotype.Controller;
3445
import org.springframework.web.bind.annotation.GetMapping;
3546
import org.springframework.web.bind.annotation.PathVariable;
@@ -42,12 +53,6 @@
4253
import org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody;
4354
import org.springframework.web.servlet.view.RedirectView;
4455

45-
import java.io.ByteArrayOutputStream;
46-
import java.io.IOException;
47-
import java.util.concurrent.TimeoutException;
48-
import java.util.zip.ZipEntry;
49-
import java.util.zip.ZipOutputStream;
50-
5156
@Controller
5257
@EnableAutoConfiguration
5358
@Slf4j
@@ -57,15 +62,19 @@ public class FileController extends FormFlowController {
5762
private final Boolean blockIfClammitUnreachable;
5863
private final FileVirusScanner fileVirusScanner;
5964
private final FileValidationService fileValidationService;
65+
private final FileConversionService fileConversionService;
6066
private final String SESSION_USERFILES_KEY = "userFiles";
6167
private final Integer maxFiles;
6268

63-
@Value("${form-flow.uploads.default-doc-type-label:#{null}}")
69+
@Value("${form-flow.uploads.default-doc-type-label:}")
6470
private String defaultDocType;
6571

6672
@Value("${form-flow.uploads.virus-scanning.enabled:false}")
6773
private boolean isVirusScanningEnabled;
6874

75+
@Value("${form-flow.uploads.file-conversion.convert-to-pdf:false}")
76+
private boolean convertUploadToPDF;
77+
6978
private final ObjectMapper objectMapper = new ObjectMapper();
7079

7180
public FileController(
@@ -77,12 +86,14 @@ public FileController(
7786
FormFlowConfigurationProperties formFlowConfigurationProperties,
7887
MessageSource messageSource,
7988
FileValidationService fileValidationService,
89+
FileConversionService fileConversionService,
8090
@Value("${form-flow.uploads.max-files:20}") Integer maxFiles,
8191
@Value("${form-flow.uploads.virus-scanning.block-if-unreachable:false}") boolean blockIfClammitUnreachable) {
8292
super(submissionRepositoryService, userFileRepositoryService, flowConfigurations, formFlowConfigurationProperties,
8393
messageSource);
8494
this.cloudFileRepository = cloudFileRepository;
8595
this.fileValidationService = fileValidationService;
96+
this.fileConversionService = fileConversionService;
8697
this.maxFiles = maxFiles;
8798
this.fileVirusScanner = fileVirusScanner;
8899
this.blockIfClammitUnreachable = blockIfClammitUnreachable;
@@ -126,7 +137,6 @@ public ResponseEntity<?> upload(
126137
return new ResponseEntity<>(message, HttpStatus.BAD_REQUEST);
127138
}
128139

129-
UUID userFileId = UUID.randomUUID();
130140
if (submission.getId() == null) {
131141
submission.setFlow(flow);
132142
submission = saveToRepository(submission);
@@ -175,16 +185,19 @@ public ResponseEntity<?> upload(
175185
}
176186
}
177187

178-
if (userFileRepositoryService.countBySubmission(submission) >= maxFiles) {
188+
if (userFileRepositoryService.countOfUploadedFilesBySubmission(submission) >= maxFiles) {
179189
String message = messageSource.getMessage("upload-documents.error-maximum-number-of-files", null, locale);
180190
return new ResponseEntity<>(message, HttpStatus.BAD_REQUEST);
181191
}
192+
193+
UUID userFileId = UUID.randomUUID();
182194
String uploadLocation = String.format("%s/%s_%s_%s.%s", submission.getId(), flow, inputName, userFileId,
183195
fileExtension);
184196

185197
cloudFileRepository.upload(uploadLocation, file);
186198

187199
UserFile uploadedFile = UserFile.builder()
200+
.fileId(userFileId)
188201
.submission(submission)
189202
.originalName(file.getOriginalFilename())
190203
.repositoryPath(uploadLocation)
@@ -208,6 +221,10 @@ public ResponseEntity<?> upload(
208221
userFileMap.addUserFileToMap(flow, inputName, uploadedFile, thumbDataUrl);
209222
httpSession.setAttribute(SESSION_USERFILES_KEY, objectMapper.writeValueAsString(userFileMap));
210223

224+
if (convertUploadToPDF) {
225+
convertUploadedFileToPDF(file, flow, inputName, userFileId, submission);
226+
}
227+
211228
return ResponseEntity.status(HttpStatus.OK).contentType(MediaType.TEXT_PLAIN).body(uploadedFile.getFileId().toString());
212229
} catch (Exception e) {
213230
if (e instanceof ResponseStatusException) {
@@ -219,6 +236,85 @@ public ResponseEntity<?> upload(
219236
}
220237
}
221238

239+
private void convertUploadedFileToPDF(MultipartFile file, String flow, String inputName, UUID userFileId, Submission submission)
240+
throws IOException {
241+
log.info("Converting upload {} to PDF", userFileId);
242+
// To be able to do safely across threads and asynchronously with potentially large files
243+
// we need to save the uploaded multipart file to a temp file on disk.
244+
File tempFile = File.createTempFile("upload_", ".tmp");
245+
file.transferTo(tempFile);
246+
247+
CompletableFuture<Set<MultipartFile>> fileConversion = CompletableFuture.supplyAsync(() -> {
248+
try {
249+
// Now we can read the temp file version of the uploaded file off the disk
250+
// and recreate the Multipart File for conversion.
251+
byte[] fileContent = java.nio.file.Files.readAllBytes(tempFile.toPath());
252+
MultipartFile multipartFile = new MockMultipartFile(
253+
file.getName(),
254+
file.getOriginalFilename(),
255+
file.getContentType(),
256+
fileContent
257+
);
258+
return fileConversionService.convertFileToPDF(multipartFile);
259+
} catch (Exception e) {
260+
log.error("Error converting file {} to PDF", userFileId, e);
261+
return new HashSet<MultipartFile>();
262+
} finally {
263+
// Always delete the tmp file from disk, on success or error.
264+
tempFile.delete();
265+
}
266+
});
267+
268+
// Need this to be final, for the lambda below
269+
final Submission finalSubmission = submission;
270+
271+
fileConversion.thenAccept(convertedMultipartFiles -> {
272+
// We've waited around for the original conversion call to complete and return from its thread,
273+
// and now we can save and upload the file(s), if the original was converted.
274+
if (convertedMultipartFiles != null && !convertedMultipartFiles.isEmpty()) {
275+
log.info("File {} was converted into {} new PDF files.", userFileId, convertedMultipartFiles.size());
276+
for (MultipartFile convertedMultipartFile : convertedMultipartFiles) {
277+
uploadConvertedPdf(convertedMultipartFile, userFileId, finalSubmission, flow, inputName);
278+
}
279+
} else {
280+
log.info("No conversion of upload {} to PDF", userFileId);
281+
}
282+
});
283+
284+
}
285+
286+
private void uploadConvertedPdf(MultipartFile convertedMultipartFile, UUID originalUserFileId, Submission submission, String flow, String inputName) {
287+
log.info("Successfully converted upload {} to PDF, saving to repository", originalUserFileId);
288+
String convertedFileExtension = Files.getFileExtension(
289+
Objects.requireNonNull(convertedMultipartFile.getOriginalFilename()));
290+
UUID convertedUserFileId = UUID.randomUUID();
291+
String convertedFileUploadLocation = String.format("%s/%s_%s_%s.%s", submission.getId(), flow, inputName,
292+
convertedUserFileId,
293+
convertedFileExtension);
294+
295+
try {
296+
cloudFileRepository.upload(convertedFileUploadLocation, convertedMultipartFile);
297+
298+
UserFile uploadedConvertedFile = UserFile.builder()
299+
.fileId(convertedUserFileId)
300+
.submission(submission)
301+
.originalName(convertedMultipartFile.getOriginalFilename())
302+
.repositoryPath(convertedFileUploadLocation)
303+
.filesize((float) convertedMultipartFile.getSize())
304+
.mimeType(convertedMultipartFile.getContentType())
305+
.virusScanned(true)
306+
.docTypeLabel(defaultDocType)
307+
.conversionSourceFileId(originalUserFileId)
308+
.build();
309+
310+
uploadedConvertedFile = userFileRepositoryService.save(uploadedConvertedFile);
311+
log.info("Created new converted file with id {} from original {}", uploadedConvertedFile.getFileId(), originalUserFileId);
312+
} catch (IOException | InterruptedException e) {
313+
log.error("Unable to create and upload converted file with id {} from original {}", convertedUserFileId,
314+
originalUserFileId);
315+
}
316+
}
317+
222318
/**
223319
* @param fileId The id of an uploaded file that should be deleted
224320
* @param returnPath The path to the page that they came from
@@ -264,6 +360,15 @@ public RedirectView delete(
264360
return new RedirectView("/error");
265361
}
266362

363+
List<UserFile> convertedFiles = userFileRepositoryService.findAll(submission, file.getFileId());
364+
if (convertedFiles != null) {
365+
for (UserFile convertedFile : convertedFiles) {
366+
log.info("Delete convertedfile {} from cloud storage", convertedFile.getFileId());
367+
cloudFileRepository.delete(convertedFile.getRepositoryPath());
368+
userFileRepositoryService.deleteById(convertedFile.getFileId());
369+
}
370+
}
371+
267372
log.info("Delete file {} from cloud storage", fileId);
268373
cloudFileRepository.delete(file.getRepositoryPath());
269374
userFileRepositoryService.deleteById(file.getFileId());

src/main/java/formflow/library/data/UserFile.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
public class UserFile {
4141

4242
@Id
43-
@GeneratedValue
4443
private UUID fileId;
4544

4645
@ManyToOne
@@ -69,6 +68,9 @@ public class UserFile {
6968
@Column(name = "doc_type_label")
7069
private String docTypeLabel;
7170

71+
@Column(name = "conversion_source_file_id")
72+
private UUID conversionSourceFileId;
73+
7274
@Override
7375
public boolean equals(Object o) {
7476
if (this == o) {

src/main/java/formflow/library/data/UserFileRepository.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,13 @@ public interface UserFileRepository extends JpaRepository<UserFile, UUID> {
2121
List<UserFile> findAllBySubmission(Submission submission);
2222

2323
/**
24-
* Gets a count of all the {@link UserFile}s associated with a {@link Submission}}
24+
* Gets a count of all the {@link UserFile}s associated with a {@link Submission}} where the
25+
* file has never been converted
2526
*
2627
* @param submission the {@link Submission} for which the count of associated {@link UserFile}s are sought
2728
* @return count of {@link UserFile}s
2829
*/
29-
long countBySubmission(Submission submission);
30+
long countBySubmissionAndConversionSourceFileIdIsNull(Submission submission);
31+
32+
List<UserFile> findAllBySubmissionAndConversionSourceFileId(Submission submission, UUID conversionSourceFileId);
3033
}

0 commit comments

Comments
 (0)