当前位置: 首页 > news >正文

Java实现简单爬虫——爬取疫情数据

1.项目准备

        在项目中使用到了jsoup和fastjson jsoup用于创建一个连接(绘画) 用于获取和解析HTML页面

而fastjson对数据进行一个格式化

        在pom.xml导入坐标

    <dependencies><dependency><groupId>com.alibaba</groupId><artifactId>fastjson</artifactId><version>2.0.15.graal</version></dependency><dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.15.3</version></dependency></dependencies>

        在爬取数据之前需要先找到对应的数据接口:https://api.inews.qq.com/newsqa/v1/query/inner/publish/modules/list?modules=localCityNCOVDataList,diseaseh5Shelf

        返回的是json数据 这边建议使用json格式化工具观看方便后面提取数据

JSON在线解析,JSON格式化,JSON解析,JSON 校验(SO JSON)

2.实现

package com.czxy;import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.jsoup.Jsoup;import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;/*** @author 陶然同学* @version 1.0* @date 2022/10/15 10:41*/
public class QQData {public static void main(String[] args) throws IOException {getAllData();}public static Map<String,Object> getAllData() throws IOException {//1 发送请求 连接 获得疫情数据String resultBody = Jsoup.connect("https://api.inews.qq.com/newsqa/v1/query/inner/publish/modules/list?modules=localCityNCOVDataList,diseaseh5Shelf").ignoreContentType(true).execute().body();//2 转换成Object类型JSONObject jsonObject = JSON.parseObject(resultBody);//3 获取data部分JSONObject data = jsonObject.getJSONObject("data");//4 获取高风险地区数据JSONArray localCityNCOVDataList = data.getJSONArray("localCityNCOVDataList");Map<String,Object> highCitysMap = new HashMap<>();System.out.println("高风险疫情地区数据");for (int i = 0; i < localCityNCOVDataList.size(); i++) {JSONObject highCity = localCityNCOVDataList.getJSONObject(i);//高风险地区疫情数据
//            System.out.println(highCity);//数据部分String city = highCity.getString("city");String province = highCity.getString("province");//本土确诊int local_confirm_add = highCity.getIntValue("local_confirm_add");//新增无症状int local_wzz_add = highCity.getIntValue("local_wzz_add");//高风险地区int highRiskAreaNum = highCity.getIntValue("highRiskAreaNum");//中风险地区int mediumRiskAreaNum = highCity.getIntValue("mediumRiskAreaNum");Map<String,Object> highCityMap = new HashMap<>();highCityMap.put("city",city);highCityMap.put("province",province);highCityMap.put("local_confirm_add",local_confirm_add);highCityMap.put("highRiskAreaNum",highRiskAreaNum);highCityMap.put("mediumRiskAreaNum",mediumRiskAreaNum);highCityMap.put("name",highCityMap);}JSONObject diseaseh5Shelf = data.getJSONObject("diseaseh5Shelf");//获取国内34个省市的疫情数据JSONArray areaTree = diseaseh5Shelf.getJSONArray("areaTree");JSONObject allProvinces = areaTree.getJSONObject(0);JSONArray provinces = allProvinces.getJSONArray("children");Map<String,Object> provincesMap = new HashMap<>();System.out.println("各省份疫情数据");for (int i = 0; i < provinces.size(); i++) {JSONObject province = provinces.getJSONObject(i);//获取省份数据Map<String, Object> provinceMap = getCityValues(province);//打印省份数据System.out.println((String) provinceMap.get("name") + province + "\n\t城市数据");provincesMap.put((String)provinceMap.get("name"),provinceMap);//获得省下面的市JSONArray citys = province.getJSONArray("children");Map<String,Object> citysMap = new HashMap<>();for (int j = 0; j < citys.size(); j++) {JSONObject city = citys.getJSONObject(j);//获取城市数据Map<String, Object> cityMap = getCityValues(city);//打印城市数据System.out.println("\t" +(String)cityMap.get("name") + city);citysMap.put((String)cityMap.get("name"),cityMap);}//获取国内全国疫情数据Map<String,Object> chinaMap = new HashMap<>();JSONObject chinaTotal = diseaseh5Shelf.getJSONObject("chinaTotal");//已治愈人数int heal = chinaTotal.getIntValue("heal");//累计死亡int dead = chinaTotal.getIntValue("dead");//新增无症状int localWzzAdd = chinaTotal.getIntValue("localWzzAdd");//累计确诊 (所有病例)int confirm = chinaTotal.getIntValue("confirm");//新增病例int confirmAdd = chinaTotal.getIntValue("confirmAdd");//当前病例int nowConfirm = chinaTotal.getIntValue("nowConfirm");//本土病例int localConfirm = chinaTotal.getIntValue("localConfirm");//新增死亡int deadAdd = chinaTotal.getIntValue("deadAdd");//本土新增病例int localConfirmAdd = chinaTotal.getIntValue("localConfirmAdd");//中风险地区int mediumRiskAreaNum = chinaTotal.getIntValue("mediumRiskAreaNum");//高风险地区int highRiskAreaNum = chinaTotal.getIntValue("highRiskAreaNum");chinaMap.put("head",heal);chinaMap.put("dead",dead);chinaMap.put("localWzzAdd",localWzzAdd);chinaMap.put("confirm",confirm);chinaMap.put("confirmAdd",confirmAdd);chinaMap.put("nowConfirm",nowConfirm);chinaMap.put("deadAdd",deadAdd);chinaMap.put("localConfirmAdd",localConfirmAdd);chinaMap.put("mediumRiskAreaNum",mediumRiskAreaNum);chinaMap.put("highRiskAreaNum",highRiskAreaNum);//数据截止时间Date lastUpdateTime = diseaseh5Shelf.getDate("lastUpdateTime");Map<String,Object> resultMap = new HashMap<>();resultMap.put("provincesMap",provincesMap);resultMap.put("chinaMap",chinaMap);resultMap.put("highCitysMap",highCitysMap);resultMap.put("lastUpdateTime",lastUpdateTime);return resultMap;}return null;}/**** @param province 省数据* @return*/public static Map<String,Object> getCityValues(JSONObject province){//省名String name = province.getString("name");//省的今天数据JSONObject today = province.getJSONObject("today");//新增确证int todayConfirm = today.getIntValue("confirm");//新增本土无症状int wzz_add = today.getIntValue("wzz_add");//新增本土int local_confirm_add = today.getIntValue("local_confirm_add");//省的历史数据JSONObject total = province.getJSONObject("total");//累计确诊int confirm = total.getIntValue("confirm");int nowConfirm = total.getIntValue("nowConfirm");int wzz = total.getIntValue("wzz");//中风险地区数量int mediumRiskAreaNum = total.getIntValue("mediumRiskAreaNum");//高风险地区数量int highRiskAreaNum = total.getIntValue("highRiskAreaNum");//累计死亡int heal = total.getIntValue("heal");//累计确诊int dead = total.getIntValue("confirm");Map<String,Object> provinceMap = new HashMap<>();provinceMap.put("name",name);provinceMap.put("todayConfirm",todayConfirm);provinceMap.put("confirm",confirm);provinceMap.put("newConfirm",nowConfirm);provinceMap.put("wzz",wzz);provinceMap.put("mediumRiskAreaNum",mediumRiskAreaNum);provinceMap.put("highRiskAreaNum",highRiskAreaNum);provinceMap.put("heal",heal);provinceMap.put("dead",dead);return provinceMap;}
}

http://www.lryc.cn/news/511717.html

相关文章:

  • 大数据技术-Hadoop(一)Hadoop集群的安装与配置
  • 04.HTTPS的实现原理-HTTPS的混合加密流程
  • flutter插件开发-ios
  • 【AI日记】24.12.29 kaggle 比赛 2-17
  • 设计模式-创建型-工厂方法模式
  • 解决opencv在windows环境下读取中文图片名问题
  • Apache Commons Pool :介绍与使用
  • sentinel-请求限流、线程隔离、本地回调、熔断
  • 微信小程序 app.json 配置文件解析与应用
  • C语言-共用体(联合体)
  • C++算法知识点
  • 芝法酱学习笔记(2.3)——shardingsphere分库分表
  • vue3+vite+nginx打包
  • 爬虫与反爬虫实现全流程
  • Kimi进行学术方向选择精讲!
  • 湖北产教融合教育研究院重庆分院揭牌成立
  • 探索CSS Houdini:下一代样式与动画技术
  • winserver搭建域环境
  • 鸿蒙开发工程师成长的五个阶段
  • Redis集成到SpingBoot 的数据结构常见操作
  • 输入输出(I/O):熟悉 Java 的 I/O 类库,尤其是 NIO 和文件操作
  • mysql建立主从集群
  • Python AI 教程之四:无监督学习
  • ReactiveStreams、Reactor、SpringWebFlux
  • Qt 的信号槽机制详解:之信号槽引发的 Segmentation Fault 问题拆析(下)
  • opencv(cpp) Mat使用总结
  • 【Hackthebox 中英 Write-Up】Web Request | 分析 HTTP 请求和响应
  • c#多线程之生产者-消费者模型
  • Spring Boot中幂等性的应用
  • 【机器学习】分类