文本数据保存
文本数据保存
- 工具
- 目的
- 代码
- 运行结果
工具
pycharm
目的
'''
网址:https://ljgk.envsc.cn/
需求:获取到地址(address),公司名字(ps_name),创建的时间(create_time),将数据分别保存在json文件和csv表格
'''
代码
# 动态数据,刷新网页后,有一个包含所有数据的XHR
import requests
import json
import csvclass Spider():# 初始化,text_type代表要返回的数据类型:0text,1content,2jsondef __init__(self, url, headers, file_name, data=None, json=None, text_type=0):self.list = []self.url = urlself.headers = headersself.data = dataself.json = jsonself.text_type = text_typeself.file_name = file_name # 要保存的文件名称,不包含后缀# 请求数据def get_data(self):response = requests.get(self.url, headers=self.headers, data=self.data, json=self.json)# 动态数据返回的是json格式if self.text_type == 0:return response.textelif self.text_type == 1:return response.contentelif self.text_type == 2:return response.json()# 数据处理(之后补充其他情况的代码)def parse_data(self, text):# json数据处理for i in text:list_element = {}list_element['ps_name'] = i['ps_name']list_element['address'] = i['address']list_element['create_time'] = i['create_time']self.list.append(list_element)# print(self.list)# 数据保存def save_data(self):with open('{}.json'.format(self.file_name),'w',encoding='utf8') as f:json.dump(self.list,f,ensure_ascii=False,indent=2)with open('{}.csv'.format(self.file_name),'a',encoding='utf8',newline='') as f:fieldnames = ['ps_name', 'address', 'create_time']csv_writer = csv.DictWriter(f,fieldnames=fieldnames)csv_writer.writeheader()# for list_element in self.list:# csv_writer.writerow(list_element)csv_writer.writerows(self.list)# 入口函数def run(self):text = self.get_data()self.parse_data(text)self.save_data()if __name__ == '__main__':url = 'https://ljgk.envsc.cn/OutInterface/GetPSList.ashx?regionCode=0&psname=' \'&SystemType=C16A882D480E678F&sgn=2c887fad3076724ffd70d22320308a5d7b501610' \'&ts=1691844481490&tc=11515962'headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'}file_name = 'companies'spider = Spider(url, headers, file_name, text_type=2)spider.run()
运行结果
见资源