Java - 解壓縮 (unzip)，並寫一個單元測試進行測試－讀處

	import lombok.extern.slf4j.Slf4j;

	import java.io.File;
	import java.io.FileInputStream;
	import java.io.FileOutputStream;
	import java.io.IOException;
	import java.nio.charset.Charset;
	import java.nio.charset.StandardCharsets;
	import java.nio.file.Path;
	import java.util.zip.ZipEntry;
	import java.util.zip.ZipInputStream;

	@Slf4j
	public final class ZipUtils {
	private ZipUtils() {}

	public static void unzip(Path inputZipFile, Path outputDirectory) throws IOException {

	File destDir = new File(outputDirectory.toString());

	try {
	// 先試試 utf8
	unzip(inputZipFile, destDir, StandardCharsets.UTF_8);
	} catch (RuntimeException e) {
	log.error("UTF_8解壓縮失敗");
	// 再試試看 big5
	unzip(inputZipFile, destDir, Charset.forName("Big5"));
	}
	}

	private static void unzip(Path inputZipFile, File destDir, Charset charset) throws IOException {
	log.info("開始使用 Charset: {} 進行解壓縮", charset);
	byte[] buffer = new byte[1024];

	try (ZipInputStream zis = new ZipInputStream(new FileInputStream(inputZipFile.toFile()), charset)) {
	ZipEntry zipEntry = zis.getNextEntry();
	while (zipEntry != null) {
	File newFile = newFile(destDir, zipEntry);
	if (zipEntry.isDirectory()) {
	if (!newFile.isDirectory() && !newFile.mkdirs()) {
	throw new IOException("Failed to create directory " + newFile);
	}
	} else {
	File parent = newFile.getParentFile();
	if (!parent.isDirectory() && !parent.mkdirs()) {
	throw new IOException("Failed to create directory " + parent);
	}

	writeFileContent(zis, newFile, buffer);
	}
	zipEntry = zis.getNextEntry();
	}
	}

	}

	private static File newFile(File destinationDir, ZipEntry zipEntry) throws IOException {
	File destFile = new File(destinationDir, zipEntry.getName());

	String destDirPath = destinationDir.getCanonicalPath();
	String destFilePath = destFile.getCanonicalPath();

	if (!destFilePath.startsWith(destDirPath + File.separator)) {
	throw new IOException("Entry is outside of the target dir: " + zipEntry.getName());
	}

	return destFile;
	}

	private static void writeFileContent(ZipInputStream zis, File newFile, byte[] buffer) throws IOException {
	// write file content
	try (FileOutputStream fos = new FileOutputStream(newFile)) {
	int len;
	while ((len = zis.read(buffer)) > 0) {
	fos.write(buffer, 0, len);
	}
	}
	}
	}

view raw ZipUtils.java hosted with ❤ by GitHub

接著我們寫一個單元測試來測試一下功能是否正確

先準備一個 test.zip，其中 test.zip 包含以下檔案:

1. 一個文字檔名稱叫做 testfile.txt 文字檔 (內容不重要)

2. 一個資料夾名稱叫做 folder，這個 folder 放置一個檔案 infolderfile.txt 文字檔 (內容不重要)

Java - 解壓縮 (unzip)，並寫一個單元測試進行測

並將這個準備好的測試檔案放到Spring Boot 專案 src/test/resources 的路徑下

如果沒有 resources 自己建一個 Directory，接著在 resources 在建立一個 test_unZip 的目錄

把 test.zip 放到 test_unZip 目錄下

Java - 解壓縮 (unzip)，並寫一個單元測試進行測

接著就可以進行下面的測試程式撰寫

	import org.junit.jupiter.api.AfterEach;
	import org.junit.jupiter.api.Test;
	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	import java.io.IOException;
	import java.nio.file.Files;
	import java.nio.file.Path;
	import java.nio.file.Paths;

	import static org.junit.jupiter.api.Assertions.assertTrue;
	import static org.junit.jupiter.api.Assertions.fail;

	class ZipUtilsTest {
	private static final Logger log = LoggerFactory.getLogger(ZipUtilsTest.class);
	private static final Path TEST_RESOURCES_DIRECTORY = Paths.get("src", "test", "resources", "test_unZip");
	private static final String ZIP_FILE_NAME = "test.zip";
	private static final String UNZIP_FOLDER_NAME = "unZip_result";

	@Test
	void testUnzip() {
	// 準備測試資料
	Path inputZipFile = TEST_RESOURCES_DIRECTORY.resolve(ZIP_FILE_NAME);
	Path outputDirectory = TEST_RESOURCES_DIRECTORY.resolve(UNZIP_FOLDER_NAME);

	try {
	// 執行解壓縮
	ZipUtils.unzip(inputZipFile, outputDirectory);

	// 檢查解壓縮後的檔案是否存在
	Path testFile = TEST_RESOURCES_DIRECTORY.resolve(UNZIP_FOLDER_NAME).resolve("testfile.txt");
	Path inFolderFile = TEST_RESOURCES_DIRECTORY.resolve(UNZIP_FOLDER_NAME).resolve("folder").resolve("infolderfile.txt");
	assertTrue(testFile.toFile().exists(), "解壓縮後 testfile.txt 不存在");
	assertTrue(inFolderFile.toFile().exists(), "解壓縮後 folder 下的 inforderfile.txt 不存在");
	} catch (Exception e) {
	fail("解壓縮時出現異常：" + e.getMessage());
	}
	}

	@AfterEach
	public void teardown() throws IOException {
	// 清空 testUpZip 資料夾
	Path outputDirectory = TEST_RESOURCES_DIRECTORY.resolve(UNZIP_FOLDER_NAME);
	deleteDirectory(outputDirectory);
	assertTrue(Files.notExists(outputDirectory), "資料夾清空失敗");
	}

	private void deleteDirectory(Path directory) throws IOException {
	if (Files.exists(directory) && Files.isDirectory(directory)) {
	try (var stream = Files.walk(directory)) {
	stream.sorted((path1, path2) -> -path1.compareTo(path2))
	.forEach(path -> {
	try {
	Files.delete(path);
	} catch (IOException e) {
	log.error("刪除檔案失敗", e);
	}
	});
	}
	}
	}
	}

view raw ZipUtilsTest.java hosted with ❤ by GitHub

因為測試過程中會將 test.zip 進行解壓縮，並將解壓縮出來的檔案放置 unZip_result 目錄下

所以我們在測試完的最後，我們要把這個資料夾的內容刪除，這部分做在 teardown

另外補充一下，這邊可以看到我們並沒有預先建立 unZip_result 目錄，但程式還是能正常跑

這是因為 Path.resolve() 方法會根據給定的路徑解析出一個新的路徑。如果該路徑不存在，則在解析時會自動創建相應的目錄結構

ref: https://www.baeldung.com/java-compress-and-uncompress#unzip

------後續補充------

上述的程式寫法雖然程式可以work，但在做sonarqube的弱掃掃描的時候會出現安全性問題

Java - 解壓縮 (unzip)，並寫一個單元測試進行測

在 zis.getNextEntrty()的時候，會有一個警告提醒「Make sure that expanding this archive file is safe here」

這是因為在解壓縮的時候並沒針對解出來的檔案做一些安全性檢查，隱藏著一些安全性問題

根據sonarqube的建議如下:

1. Define and control the ratio between compressed and uncompressed data, in general the data compression ratio for most of the legit archives is 1 to 3.

(定義並控制壓縮和解壓縮數據之間的比例，在一般情況下，大多數合法存檔的數據壓縮比為 1:3)

2. Define and control the threshold for maximum total size of the uncompressed data.

(定義並控制解壓縮後的數據的最大總大小閾值)

3. Count the number of file entries extracted from the archive and abort the extraction if their number is greater than a predefined threshold

(計算從存檔中提取的文件條目的數量，如果它們的數量超過了預定的閾值，則中止提取)

最後，另外sonarqube有另外一個提醒:

Do not rely on getsize to retrieve the size of an uncompressed entry because this method returns what is defined in the archive headers which can be forged by attackers, instead calculate the actual entry size when unzipping it

(不要依賴 getsize 方法來檢索未壓縮條目的大小，因為該方法返回存檔標頭中定義的大小，這可能會被攻擊者偽造。相反，應在解壓縮時計算實際條目的大小)

因此根據以上建議，調整程式如下:

	import lombok.extern.slf4j.Slf4j;

	import java.io.*;
	import java.nio.charset.Charset;
	import java.nio.charset.StandardCharsets;
	import java.nio.file.Path;
	import java.util.zip.ZipEntry;
	import java.util.zip.ZipInputStream;

	@Slf4j
	public final class ZipUtils {

	static final int MAX_FILES_NUM = 1024; // Max number of files
	private static final long THRESHOLD_SIZE = 100L * 1024L * 1024L; // 100MB
	private static final double THRESHOLD_RATIO = 10.0;

	private ZipUtils() {
	}

	public static void unzip(Path inputZipFile, Path outputDirectory) throws IOException {

	File destDir = new File(outputDirectory.toString());

	try {
	// 先試試 utf8
	unzip(inputZipFile, destDir, StandardCharsets.UTF_8);
	} catch (RuntimeException e) {
	log.error("UTF_8解壓縮失敗");
	// 再試試看 big5
	unzip(inputZipFile, destDir, Charset.forName("Big5"));
	}
	}

	private static void unzip(Path inputZipFile, File destDir, Charset charset) throws IOException {
	log.info("開始使用 Charset: {} 進行解壓縮", charset);

	long totalUncompressedSize = 0;
	int totalEntries = 0;

	try (ZipInputStream zis = new ZipInputStream(new FileInputStream(inputZipFile.toFile()), charset)) {
	ZipEntry zipEntry;
	while ((zipEntry = zis.getNextEntry()) != null) {
	byte[] buffer = new byte[1024];

	File newFile = newFile(destDir, zipEntry);

	long uncompressedEntrySize = 0;

	if (zipEntry.isDirectory()) {
	if (!newFile.isDirectory() && !newFile.mkdirs()) {
	throw new IOException("Failed to create directory " + newFile);
	}
	} else {
	File parent = newFile.getParentFile();
	if (!parent.isDirectory() && !parent.mkdirs()) {
	throw new IOException("Failed to create directory " + parent);
	}

	try (FileOutputStream fos = new FileOutputStream(newFile);
	BufferedOutputStream bos = new BufferedOutputStream(fos)) {
	int len;
	while ((len = zis.read(buffer)) > 0) {
	bos.write(buffer, 0, len);
	uncompressedEntrySize += len;
	totalUncompressedSize += len;

	checkSizeAndRatio(totalUncompressedSize, uncompressedEntrySize, zipEntry);
	}
	}
	}

	totalEntries++;
	checkNumberOfFiles(totalEntries);
	}
	}
	}

	private static File newFile(File destinationDir, ZipEntry zipEntry) throws IOException {
	File destFile = new File(destinationDir, zipEntry.getName());

	String destDirPath = destinationDir.getCanonicalPath();
	String destFilePath = destFile.getCanonicalPath();

	if (!destFilePath.startsWith(destDirPath + File.separator)) {
	throw new IOException("Entry is outside of the target dir: " + zipEntry.getName());
	}

	return destFile;
	}

	private static void checkSizeAndRatio(long totalUncompressedSize, long uncompressedEntrySize, ZipEntry zipEntry) {
	if (totalUncompressedSize > THRESHOLD_SIZE) {
	throw new IllegalStateException("Uncompressed size exceeds threshold.");
	}

	double compressionRatio = uncompressedEntrySize / (double) zipEntry.getCompressedSize();
	if (compressionRatio > THRESHOLD_RATIO) {
	throw new IllegalStateException("Compression ratio exceeds threshold, possible zip bomb.");
	}
	}

	private static void checkNumberOfFiles(int totalEntries) {
	if (totalEntries > MAX_FILES_NUM) {
	throw new IllegalStateException("Too many files to unzip.");
	}
	}
	}