引言
一、妙传
简单来说,当你尝试上传某个文件时,服务器会首先进行MD5校验。如果服务器上已经存在与该文件MD5值相同的文件,服务器就会直接给你一个新地址,这样你下载或访问的其实都是服务器上已有的那个文件,这个过程就被称为“秒传”。想要避免秒传,关键在于改变文件的MD5值,因为MD5值是文件内容的唯一标识。仅仅改变文件名是不足以改变MD5值的,你需要对文件本身进行实质性的修改,比如在一个文本文件中添加几个字,这样文件的MD5值就会改变,上传时就不会被识别为重复文件,从而实现非秒传上传。
代码实现
前端代码:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>秒传文件上传</title>
</head>
<body>
<input type="file" id="fileInput" />
<button onclick="uploadFile()">上传文件</button>
<script>
async function uploadFile() {
const fileInput = document.getElementById('fileInput');
const file = fileInput.files[0];
if (!file) {
alert('请选择文件');
return;
}
const formData = new FormData();
formData.append('file', file);
try {
const response = await fetch('/upload', {
method: 'POST',
body: formData
});
const result = await response.json();
if (result.success) {
alert('文件上传成功');
} else {
alert('文件上传失败');
}
} catch (error) {
console.error('文件上传失败:', error);
}
}
</script>
</body>
</html>
后端代码:
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
@SpringBootApplication
public class FileUploadApplication {
public static void main(String[] args) {
SpringApplication.run(FileUploadApplication.class, args);
}
}
@RestController
public class FileUploadController {
@Autowired
private RedisTemplate<String, String> redisTemplate;
@PostMapping("/upload")
public String uploadFile(@RequestParam("file") MultipartFile file) {
try {
Path path = Paths.get("uploads/" + file.getOriginalFilename());
if (!Files.exists(path.getParent())) {
Files.createDirectories(path.getParent());
}
file.transferTo(path.toFile());
String fileMd5 = getFileMd5(file.getInputStream());
String key = "file_" + fileMd5;
if (redisTemplate.opsForValue().get(key) != null) {
return "秒传成功";
} else {
redisTemplate.opsForValue().set(key, "false");
redisTemplate.opsForValue().set("block_" + fileMd5, path.toString());
return "文件正在上传中";
}
} catch (IOException | NoSuchAlgorithmException e) {
return "文件上传失败";
}
}
private String getFileMd5(InputStream inputStream) throws NoSuchAlgorithmException, IOException {
MessageDigest digest = MessageDigest.getInstance("MD5");
byte[] hash = digest.digest(inputStream.readAllBytes());
return String.format("%032x", new BigInteger(1, hash));
}
}
配置文件:
spring.servlet.multipart.max-file-size=2MB
spring.servlet.multipart.max-request-size=10MB
# Redis配置
spring.redis.host=localhost
spring.redis.port=6379
二、分片上传
分片上传是一种高效处理大文件上传的方法,其核心思想是将待上传的大文件按照预设的大小切割成多个较小的数据片段(通常称为分片或Part)。这些分片随后被逐一上传至服务器。在上传过程中,前端会记录分片的总数以及当前正在上传的分片编号,并通知后端相应的信息。待所有分片均成功上传至服务器后,后端将负责将这些分散的分片重新组合拼接,以还原成原始的完整文件。这样,即便是面对体积庞大的文件,也能通过分片的方式实现顺畅且稳定的上传过程。
注:大文件的分片是在前端做的,不是后端!
代码实现:
前端上传代码计算文件MD5值用了spark-md5这个库,使用也是比较简单的。这里为什么要计算MD5简单说一下,因为文件在传输写入过程中可能会出现错误,导致最终合成的文件可能和原文件不一样,所以要对比一下前端计算的MD5和后端计算的MD5是不是一样,保证上传数据的一致性。
前端代码:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>分片上传</title>
<script src="https://cdn.bootcdn.net/ajax/libs/spark-md5/3.0.2/spark-md5.min.js"></script>
</head>
<body>
分片上传
<form enctype="multipart/form-data">
<input type="file" name="fileInput" id="fileInput">
<input type="button" value="计算文件MD5" onclick="calculateFileMD5()">
<input type="button" value="上传" onclick="uploadFile()">
<input type="button" value="检测文件完整性" onclick="checkFile()">
</form>
<p>
文件MD5:
<span id="fileMd5"></span>
</p>
<p>
上传结果:
<span id="uploadResult"></span>
</p>
<p>
检测文件完整性:
<span id="checkFileRes"></span>
</p>
<script>
//每片的大小
var chunkSize = 1 * 1024 * 1024;
var uploadResult = document.getElementById("uploadResult")
var fileMd5Span = document.getElementById("fileMd5")
var checkFileRes = document.getElementById("checkFileRes")
var fileMd5;
function calculateFileMD5(){
var fileInput = document.getElementById('fileInput');
var file = fileInput.files[0];
getFileMd5(file).then((md5) => {
console.info(md5)
fileMd5=md5;
fileMd5Span.innerHTML=md5;
})
}
function uploadFile() {
var fileInput = document.getElementById('fileInput');
var file = fileInput.files[0];
if (!file) return;
if (!fileMd5) return;
//获取到文件
let fileArr = this.sliceFile(file);
//保存文件名称
let fileName = file.name;
fileArr.forEach((e, i) => {
//创建formdata对象
let data = new FormData();
data.append("totalNumber", fileArr.length)
data.append("chunkSize", chunkSize)
data.append("chunkNumber", i)
data.append("md5", fileMd5)
data.append("file", new File([e],fileName));
upload(data);
})
}
/**
* 计算文件md5值
*/
function getFileMd5(file) {
return new Promise((resolve, reject) => {
let fileReader = new FileReader()
fileReader.onload = function (event) {
let fileMd5 = SparkMD5.ArrayBuffer.hash(event.target.result)
resolve(fileMd5)
}
fileReader.readAsArrayBuffer(file)
})
}
function upload(data) {
var xhr = new XMLHttpRequest();
// 当上传完成时调用
xhr.onload = function () {
if (xhr.status === 200) {
uploadResult.append( '上传成功分片:' +data.get("chunkNumber")+'\t' ) ;
}
}
xhr.onerror = function () {
uploadResult.innerHTML = '上传失败';
}
// 发送请求
xhr.open('POST', '/uploadBig', true);
xhr.send(data);
}
function checkFile() {
var xhr = new XMLHttpRequest();
// 当上传完成时调用
xhr.onload = function () {
if (xhr.status === 200) {
checkFileRes.innerHTML = '检测文件完整性成功:' + xhr.responseText;
}
}
xhr.onerror = function () {
checkFileRes.innerHTML = '检测文件完整性失败';
}
// 发送请求
xhr.open('POST', '/checkFile', true);
let data = new FormData();
data.append("md5", fileMd5)
xhr.send(data);
}
function sliceFile(file) {
const chunks = [];
let start = 0;
let end;
while (start < file.size) {
end = Math.min(start + chunkSize, file.size);
chunks.push(file.slice(start, end));
start = end;
}
return chunks;
}
</script>
</body>
</html>
后端代码实现
MD5.conf每一次检测文件不存在里创建个空文件,使用 byte[] bytes = new byte[totalNumber]
;将每一位状态设置为0,从0位天始,第N位表示第N个分片的上传状态,0-未上传 1-已上传,当每将上传成功后使用randomAccessConfFile.seek(chunkNumber)
将对就设置为1。randomAccessFile.seek(chunkNumber * chunkSize);
可以将光标移到文件指定位置开始写数据,每一个文件每将上传分片编号chunkNumber都是不一样的,所以各自写自己文件块,多线程写同一个文件不会出现线程安全问题。大文件写入时用 RandomAccessFile
可能比较慢,可以使用MappedByteBuffer
内存映射来加速大文件写入,不过使用MappedByteBuffer
如果要删除文件可能会存在删除不掉,因为删除了磁盘上的文件,内存的文件还是存在的。@RestController
public class UploadController {
public static final String UPLOAD_PATH = "D:\\upload\\";
/**
* @param chunkSize 每个分片大小
* @param chunkNumber 当前分片
* @param md5 文件总MD5
* @param file 当前分片文件数据
* @return
* @throws IOException
*/
@RequestMapping("/uploadBig")
public ResponseEntity<Map<String, String>> uploadBig(@RequestParam Long chunkSize, @RequestParam Integer totalNumber, @RequestParam Long chunkNumber, @RequestParam String md5, @RequestParam MultipartFile file) throws IOException {
//文件存放位置
String dstFile = String.format("%s\\%s\\%s.%s", UPLOAD_PATH, md5, md5, StringUtils.getFilenameExtension(file.getOriginalFilename()));
//上传分片信息存放位置
String confFile = String.format("%s\\%s\\%s.conf", UPLOAD_PATH, md5, md5);
//第一次创建分片记录文件
//创建目录
File dir = new File(dstFile).getParentFile();
if (!dir.exists()) {
dir.mkdir();
//所有分片状态设置为0
byte[] bytes = new byte[totalNumber];
Files.write(Path.of(confFile), bytes);
}
//随机分片写入文件
try (RandomAccessFile randomAccessFile = new RandomAccessFile(dstFile, "rw");
RandomAccessFile randomAccessConfFile = new RandomAccessFile(confFile, "rw");
InputStream inputStream = file.getInputStream()) {
//定位到该分片的偏移量
randomAccessFile.seek(chunkNumber * chunkSize);
//写入该分片数据
randomAccessFile.write(inputStream.readAllBytes());
//定位到当前分片状态位置
randomAccessConfFile.seek(chunkNumber);
//设置当前分片上传状态为1
randomAccessConfFile.write(1);
}
return ResponseEntity.ok(Map.of("path", dstFile));
}
/**
* 获取文件分片状态,检测文件MD5合法性
*
* @param md5
* @return
* @throws Exception
*/
@RequestMapping("/checkFile")
public ResponseEntity<Map<String, String>> uploadBig(@RequestParam String md5) throws Exception {
String uploadPath = String.format("%s\\%s\\%s.conf", UPLOAD_PATH, md5, md5);
Path path = Path.of(uploadPath);
//MD5目录不存在文件从未上传过
if (!Files.exists(path.getParent())) {
return ResponseEntity.ok(Map.of("msg", "文件未上传"));
}
//判断文件是否上传成功
StringBuilder stringBuilder = new StringBuilder();
byte[] bytes = Files.readAllBytes(path);
for (byte b : bytes) {
stringBuilder.append(String.valueOf(b));
}
//所有分片上传完成计算文件MD5
if (!stringBuilder.toString().contains("0")) {
File file = new File(String.format("%s\\%s\\", UPLOAD_PATH, md5));
File[] files = file.listFiles();
String filePath = "";
for (File f : files) {
//计算文件MD5是否相等
if (!f.getName().contains("conf")) {
filePath = f.getAbsolutePath();
try (InputStream inputStream = new FileInputStream(f)) {
String md5pwd = DigestUtils.md5DigestAsHex(inputStream);
if (!md5pwd.equalsIgnoreCase(md5)) {
return ResponseEntity.ok(Map.of("msg", "文件上传失败"));
}
}
}
}
return ResponseEntity.ok(Map.of("path", filePath));
} else {
//文件未上传完成,反回每个分片状态,前端将未上传的分片继续上传
return ResponseEntity.ok(Map.of("chucks", stringBuilder.toString()));
}
}
}
断点续传
/checkFile
。该接口的作用是在上传开始前或过程中被调用,以检查服务器上是否存在该文件的部分上传记录。如果文件中有尚未完成上传的分片(即之前上传过程中断留下的数据片段),/checkFile
接口会返回一个包含chunks
字段的响应,其中对应位置的分片如果尚未上传完成,其值将被标记为0。前端接收到这个响应后,会智能地识别出哪些分片是已经上传成功的,哪些是需要继续上传的,并仅针对那些未完成的分片发起上传请求。一旦所有未完成的分片都被成功上传,前端会再次调用/checkFile
接口进行最终的检查,确认所有分片都已完整上传,从而完成整个断点续传的过程。前端代码实现
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>分片上传</title>
<script src="https://cdn.bootcdn.net/ajax/libs/spark-md5/3.0.2/spark-md5.min.js"></script>
</head>
<body>
分片上传
<form enctype="multipart/form-data">
<input type="file" name="fileInput" id="fileInput">
<input type="button" value="计算文件MD5" onclick="calculateFileMD5()">
<input type="button" value="上传" onclick="uploadFile()">
<input type="button" value="检测文件完整性" onclick="checkFile()">
</form>
<p>
文件MD5:
<span id="fileMd5"></span>
</p>
<p>
上传结果:
<span id="uploadResult"></span>
</p>
<p>
检测文件完整性:
<span id="checkFileRes"></span>
</p>
<script>
//每片的大小
var chunkSize = 1 * 1024 * 1024;
var uploadResult = document.getElementById("uploadResult")
var fileMd5Span = document.getElementById("fileMd5")
var checkFileRes = document.getElementById("checkFileRes")
var fileMd5;
function calculateFileMD5(){
var fileInput = document.getElementById('fileInput');
var file = fileInput.files[0];
getFileMd5(file).then((md5) => {
console.info(md5)
fileMd5=md5;
fileMd5Span.innerHTML=md5;
})
}
function uploadFile() {
var fileInput = document.getElementById('fileInput');
var file = fileInput.files[0];
if (!file) return;
if (!fileMd5) return;
//获取到文件
let fileArr = this.sliceFile(file);
//保存文件名称
let fileName = file.name;
fileArr.forEach((e, i) => {
//创建formdata对象
let data = new FormData();
data.append("totalNumber", fileArr.length)
data.append("chunkSize", chunkSize)
data.append("chunkNumber", i)
data.append("md5", fileMd5)
data.append("file", new File([e],fileName));
upload(data);
})
}
/**
* 计算文件md5值
*/
function getFileMd5(file) {
return new Promise((resolve, reject) => {
let fileReader = new FileReader()
fileReader.onload = function (event) {
let fileMd5 = SparkMD5.ArrayBuffer.hash(event.target.result)
resolve(fileMd5)
}
fileReader.readAsArrayBuffer(file)
})
}
function upload(data) {
var xhr = new XMLHttpRequest();
// 当上传完成时调用
xhr.onload = function () {
if (xhr.status === 200) {
uploadResult.append( '上传成功分片:' +data.get("chunkNumber")+'\t' ) ;
}
}
xhr.onerror = function () {
uploadResult.innerHTML = '上传失败';
}
// 发送请求
xhr.open('POST', '/uploadBig', true);
xhr.send(data);
}
function checkFile() {
var xhr = new XMLHttpRequest();
// 当上传完成时调用
xhr.onload = function () {
if (xhr.status === 200) {
checkFileRes.innerHTML = '检测文件完整性成功:' + xhr.responseText;
}
}
xhr.onerror = function () {
checkFileRes.innerHTML = '检测文件完整性失败';
}
// 发送请求
xhr.open('POST', '/checkFile', true);
let data = new FormData();
data.append("md5", fileMd5)
xhr.send(data);
}
function sliceFile(file) {
const chunks = [];
let start = 0;
let end;
while (start < file.size) {
end = Math.min(start + chunkSize, file.size);
chunks.push(file.slice(start, end));
start = end;
}
return chunks;
}
// 其他代码保持不变...
function checkFileAndContinue() {
var xhr = new XMLHttpRequest();
xhr.open('POST', '/checkFile', true);
let data = new FormData();
data.append("md5", fileMd5);
xhr.onreadystatechange = function () {
if (xhr.readyState === 4) {
if (xhr.status === 200) {
const response = JSON.parse(xhr.responseText);
if (response.msg === '文件未上传') {
// 文件未上传,开始上传所有分片
uploadFile();
} else if (response.msg === '文件上传失败') {
uploadResult.innerHTML = '文件MD5校验失败';
} else {
// 有分片未上传,继续上传未完成的分片
const chunksStatus = response.chucks;
let chunksToUpload = [];
for (let i = 0; i < chunksStatus.length; i++) {
if (chunksStatus.charAt(i) === '0') {
chunksToUpload.push(fileArr[i]);
}
}
if (chunksToUpload.length > 0) {
uploadChunks(chunksToUpload);
} else {
uploadResult.innerHTML = '所有分片已上传';
}
}
} else {
uploadResult.innerHTML = '检查文件状态失败';
}
}
};
xhr.send(data);
}
function uploadChunks(chunksToUpload) {
chunksToUpload.forEach((chunk, i) => {
let data = new FormData();
data.append("totalNumber", fileArr.length);
data.append("chunkSize", chunkSize);
data.append("chunkNumber", i);
data.append("md5", fileMd5);
data.append("file", new File([chunk], fileName));
upload(data);
});
}
// 在用户选择文件后,先计算MD5并检查文件上传状态
document.getElementById('fileInput').addEventListener('change', function() {
var fileInput = document.getElementById('fileInput');
var file = fileInput.files[0];
if (file) {
calculateFileMD5(file).then(md5 => {
fileMd5 = md5;
fileMd5Span.innerHTML = md5;
checkFileAndContinue(); // 检查文件上传状态
});
}
});
</script>
</body>
</html>
后端代码
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.*;
import java.util.HashMap;
import java.util.Map;
@RestController
public class UploadController {
public static final String UPLOAD_PATH = "D:\\upload\\";
/**
* 上传分片文件
*
* @param chunkSize 每个分片大小
* @param totalNumber 总分片数
* @param chunkNumber 当前分片编号
* @param md5 文件MD5
* @param file 当前分片文件数据
* @return
* @throws IOException
*/
@PostMapping("/uploadBig")
public ResponseEntity<Map<String, String>> uploadBig(
@RequestParam("chunkSize") Long chunkSize,
@RequestParam("totalNumber") Integer totalNumber,
@RequestParam("chunkNumber") Integer chunkNumber,
@RequestParam("md5") String md5,
@RequestParam("file") MultipartFile file) throws IOException {
// 确保文件目录存在
String dirPath = String.format("%s%s\\", UPLOAD_PATH, md5);
File dir = new File(dirPath);
if (!dir.exists()) {
dir.mkdirs();
}
// 文件存放位置
String fileName = String.format("%s.part", md5);
String dstFilePath = String.format("%s%s\\%s", UPLOAD_PATH, md5, fileName);
// 分片信息存放位置
String confFilePath = String.format("%s%s\\%s.conf", UPLOAD_PATH, md5, md5);
// 写入分片数据
try (FileOutputStream fos = new FileOutputStream(dstFilePath, true);
FileChannel fileChannel = fos.getChannel();
InputStream is = file.getInputStream()) {
long offset = (long) chunkNumber * chunkSize;
fileChannel.position(offset);
fileChannel.transferFrom(is.getChannel(), offset, file.getSize());
}
// 更新分片状态
byte[] statusArray = new byte[totalNumber];
int index = chunkNumber;
if (!Files.exists(Paths.get(confFilePath))) {
try (FileOutputStream confFos = new FileOutputStream(confFilePath)) {
confFos.write(statusArray);
}
}
statusArray[index] = 1;
try (RandomAccessFile randomAccessFile = new RandomAccessFile(confFilePath, "rw")) {
randomAccessFile.seek(index);
randomAccessFile.write(statusArray[index]);
}
Map<String, String> response = new HashMap<>();
response.put("path", dstFilePath);
return ResponseEntity.ok(response);
}
/**
* 检查文件分片状态,检测文件MD5合法性
*
* @param md5 文件MD5
* @return
* @throws IOException
*/
@PostMapping("/checkFile")
public ResponseEntity<Map<String, Object>> checkFile(@RequestParam("md5") String md5) throws IOException {
String confFilePath = String.format("%s%s\\%s.conf", UPLOAD_PATH, md5, md5);
Path confPath = Paths.get(confFilePath);
if (!Files.exists(confPath.getParent())) {
return ResponseEntity.ok(Map.of("msg", "文件未上传"));
}
boolean isComplete = true;
try (RandomAccessFile accessFile = new RandomAccessFile(confFilePath, "r")) {
for (int i = 0; i < accessFile.length(); i++) {
if (accessFile.readByte() == 0) {
isComplete = false;
break;
}
}
}
if (isComplete) {
// 计算合并后的文件MD5
String finalFilePath = String.format("%s%s\\%s", UPLOAD_PATH, md5, md5);
try (InputStream is = new FileInputStream(finalFilePath)) {
String calculatedMd5 = DigestUtils.md5Hex(IOUtils.toByteArray(is));
return ResponseEntity.ok(Map.of("msg", "文件上传成功", "md5", calculatedMd5));
} catch (IOException e) {
return ResponseEntity.ok(Map.of("msg", "文件合并失败"));
}
} else {
// 未完成上传,返回未上传的分片信息
try (RandomAccessFile accessFile = new RandomAccessFile(confFilePath, "r")) {
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < accessFile.length(); i++) {
stringBuilder.append(accessFile.readByte() == 0 ? "0" : "1");
}
return ResponseEntity.ok(Map.of("msg", "文件未上传完成", "chunks", stringBuilder.toString()));
}
}
}
}