Data processing flow
1. 找出第一年的address,有lat和long,自动生成
csv_log_lat_county.ipynb
import csv
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOutgeolocator = Nominatim(user_agent="my-app")
data_csv = r"D:/year1.csv"
output_csv = r"D:\year1_address.csv"with open(data_csv, 'r') as f_in, open(output_csv, 'w', newline='') as f_out:reader = csv.reader(f_in)writer = csv.writer(f_out)header = next(reader)header.append('address')writer.writerow(header)lat_idx = header.index('Lat')long_idx = header.index('Long')for row in reader:if not row[lat_idx] or not row[long_idx]:continue lat = float(row[lat_idx])long = float(row[long_idx])try:location = geolocator.reverse(f"{lat}, {long}")address = location.addressrow.append(address)writer.writerow(row)print(address)except GeocoderTimedOut:print(f"Error geocoding {lat}, {long}. Skipping...")
2. 两个年份的表格contact,保证所有列数的标题相同,并且数量相同
import pandas as pddf1 = pd.read_csv(r"year2_address.csv")
df2 = pd.read_csv(r"year2_address.csv")# 现将表构成list,然后在作为concat的输入
frames = [df1, df2]result = pd.concat(frames)
print(result)
result.to_csv(r"D:\all_year1_year2.csv",index=True,header=True)
3. 对两个表格做出统计,分别分析各项数据
table_stastistics
最终表格发在邮箱