分割千万级,将大文件分割为小件 csv
依赖
<dependency><groupId>commons-io</groupId><artifactId>commons-io</artifactId><version>2.9.0</version></dependency>
package com.topnet.controller;import com.topnet.utils.R;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;import java.io.*;
import java.util.ArrayList;
import java.util.List;@RestController
@RequestMapping("/csvuitl")
@Slf4j
public class TestController {public static List<String> index;@RequestMapping("/getExcel")public R getZdysx(){String path = "D:\\WeChat\\WeChat Files\\wxid_cc8j9osnuqh822\\FileStorage\\File\\2024-09\\附件2.csv";try {splitBigFile(path, "utf-8", ",");} catch (Exception e) {e.printStackTrace();}return R.ok();}/*** 按数据条数分割文件,* @param path* @param ENCODE* @param splitStr* @return* @throws Exception*/public static void splitBigFile(String path, String ENCODE, String splitStr) throws Exception {List<List<String>> outArr = new ArrayList<>();File filePath = new File(path);String destName = filePath.getName().replace(".csv", "");// 重写文件名String destpath = filePath.getParent();// 重写文件路径int splitLen = 1000000;// 分割子文件的条数,一个文件100w条int i = 0;// 数据文件不为空if (filePath.exists() && filePath.length() > 0) {LineIterator it = FileUtils.lineIterator(filePath, ENCODE);while (it.hasNext()) {String dataLine = it.nextLine();if (dataLine.length() != 0) {String[] arr = dataLine.split(splitStr, -1);List<String> out = new ArrayList<>();for (String str : arr) {out.add(str.replace("^", "")); // 清除特殊字符}if (i == 0 && outArr.size() == 1){index = outArr.get(0);}if (i > 0 && outArr.size() == 0){outArr.add(index);}String newName =new File(destpath + File.separator + destName + "_" + i + ".csv").getAbsolutePath();reWriteFile(newName, replaceContent(out).toString(), ENCODE);// 重写文件outArr.add(out);if (outArr.size() == splitLen) {outArr.clear();// 重写完清空文件i++;}}}LineIterator.closeQuietly(it);}}private static StringBuffer replaceContent(List<String> outArr) {StringBuffer strbuf = new StringBuffer();for (String txt : outArr) {// 如果是属于这类数据 无效 置空 华为新数据会有这种情况if ("--".equals(txt)) {txt = "";}strbuf.append(txt).append(",");//注意:行分割字符}return strbuf.append("\n");}/*** 重新写入文件* @param fileName* @param content* @param ENCODE*/public static void reWriteFile(String fileName, String content, String ENCODE) {try {File ff = new File(fileName);if (!ff.exists()) {ff.createNewFile();}// 打开一个随机访问文件流,按读写方式RandomAccessFile randomFile = new RandomAccessFile(fileName, "rw");// 文件长度,字节数long fileLength = randomFile.length();// 将写文件指针移到文件尾。randomFile.seek(fileLength);String toCn = null;// 处理中文问题toCn = new String(content.getBytes(ENCODE), "ISO-8859-1");randomFile.writeBytes(toCn);randomFile.close();} catch (IOException e) {e.printStackTrace();}}}