【工具】雀语queyu文件批量下载 文档内容复刻导出
【工具】雀语queyu文件批量下载 文档内容复刻导出
文档下载
https://github.com/gxr404/yuque-dl
npm i -g yuque-dl
# url 为对应需要的知识库地址
yuque-dl "https://www.yuque.com/yuque/thyzgp"
文件批量下载
文件会按照对应的目录去批量下载 建议开3个线程以内, 多了会报错
package com.gw.coze.语雀;import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpResponse;
import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;public class YuQue {private static final List<String> h后缀白名单 = List.of("doc", "docx", "pdf", "xlsx", "xls", "ppt", "pptx", "lake");public static void main(String[] args) throws IOException {j解析目录("D:\\Data\\1850007", "1850007", "");// HttpRequest get = HttpUtil.createGet("https://yuque.com/r/resources/download/29853662");
// get.header("cookie", "lang=zh-cn; yuque_ctoken=f8YKyADhODS24tmUjucwqQ_Q; _yuque_session=nkulcKtzZ1RBX6UkAEvyxrH7-y8rdQCky8P-1rxmLUPPPtGLeDG17oGBSrfLpX3A_KI7--PdqZVJ13m5ZA9m9g==; receive-cookie-deprecation=1; aliyungf_tc=dd452bcc191f007aa0351febda59f15d1d6e34fc3447c962b6e81fd6b5135501; current_theme=default; acw_tc=ac11000117550636414504842e66cbbf913d99beb9db64e70dad4e4a8ee1e1");
//
// HttpResponse execute = get.execute();
// String location = execute.sync().header("Location");
// HttpUtil.downloadFile(location, "D:\\Data\\222222\\1.jpeg");}public static void j解析目录(String b保存目录地址, String book_id, String m目录Id) {String url = "";if(StrUtil.isBlank(m目录Id)){//1. 递归目录url = "https://yuque.com/api/resources?book_id={}&offset=0";}else{url = "https://yuque.com/api/resources?book_id={}&parent_id={}&offset=0";}url = StrUtil.format(url, book_id, m目录Id);HttpRequest cookie = HttpUtil.createGet(url).header("cookie", "TODO 这里放cookie");String body = cookie.execute().body();
// System.out.println(body);JSONObject entries = JSONUtil.parseObj(body);JSONArray data = entries.getJSONArray("data");// 线程数量:停车位! 限流!Semaphore semaphore = new Semaphore(3);List<Thread> threads = new ArrayList<Thread>();int size = data.size();for (int i = 0; i < size; i++) {JSONObject jsonObject = data.getJSONObject(i);Thread thread = new Thread(() -> {try {semaphore.acquire(); // acquire() 得到j解析单个(b保存目录地址, jsonObject);} catch (InterruptedException e) {e.printStackTrace();} finally {semaphore.release(); // release() 释放}}, String.valueOf(i));thread.start();threads.add(thread);//开线程}//阻塞 防止最后几个没执行结束的for (Thread thread : threads) {try {thread.join();} catch (Exception e) {e.printStackTrace();}}}public static void j解析单个(String b保存目录地址, JSONObject jsonObject) {b保存目录地址 = StrUtil.trim(b保存目录地址);String type = jsonObject.getStr("type");String id = jsonObject.getStr("id");String book_id = jsonObject.getStr("book_id");String filename = jsonObject.getStr("filename");filename = StrUtil.trim(filename);String ext后缀 = jsonObject.getStr("ext");if(ext后缀 != null){ext后缀 = StrUtil.trim(ext后缀);}String j绝对路径 = StrUtil.format("{}//{}", b保存目录地址, filename);if("folder".equals(type)) {//创建目录FileUtil.mkdir(j绝对路径);//目录j解析目录(j绝对路径, book_id, id);//继续递归}else{if(!h后缀白名单.contains(ext后缀)){return;}String s = j绝对路径 + "." + ext后缀;if(FileUtil.exist(s)){return;}System.out.println(s + "-" + jsonObject);//文件//保存起来HttpRequest get = HttpUtil.createGet("https://yuque.com/r/resources/download/" + id);get.header("cookie", "TODO 这里放cookie");HttpResponse execute = get.execute();String location = execute.sync().header("Location");if(StrUtil.isBlank(location)){get = HttpUtil.createGet("https://yuque.com/r/resources/download/" + id);get.header("cookie", "TODO 这里放cookie");execute = get.execute();location = execute.sync().header("Location");}if(StrUtil.isBlank(location)){System.out.println("下载失败" + id);return;}try {HttpUtil.downloadFile(location, j绝对路径 + "." + ext后缀);}catch (Exception e){System.out.println(j绝对路径 + "." + ext后缀);e.printStackTrace();}}}}