当前位置: 首页 > news >正文

删除无点击数据offer数据分析使用

梳理思路:

1、  获取 7month  和 8month  fullreport 报表中 所有offer;输出结果:offerid, totalClickCount;

2、 分析数据7month  totalClickCount=0 and 8month  totalClickCount=0 的offer去除;

      result.loc[(result['totalClickCount_7'] >0)| (result['totalClickCount_8'] > 0)]    获取有效的offer数据data2;

3、 获取点击系统自动跑量列表中offer数据data3,输出结果:offerid,dataSourceid;

4、 分析数据  根据data2有效offer,获取到有效的offer对应的每个dataSourceid;

       result = pd.merge(data1, data2, on='offerId', how='left')

5、 计算出删除后,dataSourceid应该保留的个数及id;

6、 计算出删除offer的个数及offer_id;

操作方法

一、  获取 7month  和 8month  fullreport 报表中 所有offer

         class getFullreportOffer_8month

         class getFullreportOffer_7month

class getFullreportOffer_8month:def _process(self,page):offer_lit = []totalClickCount_lit = []fromDate = "2023-08-01"toDate = "2023-08-31"url = "···?page="+str(page)+"&pageSize=100&fromDate="+ fromDate +"&toDate="+ toDate +"&fromHour=00&toHour=23&timezone=%2B00:00&affiliateIds=&sourceIds=&sorting=rejectionTotal&sortType=desc&ruleIds=&businessTypes=&accountTypes=&columns=offer_id"res = (requests.get(url=url, headers=header, verify=False).json())['result']['dataList']for result in res:offer_lit.append(result['offerId'])totalClickCount_lit.append(result['totalClickCount'])return offer_lit,totalClickCount_litdef _process_multithread(self,list_):# 多线程 下载task_list = [thread_pool_executor.submit(self._process,(page)) for page in list_]response_list = [task.result() for task in task_list]return response_listdef run(self):offer_lit_total = []totalClickCount_lit = []lit = np.arange(1,33)offer_lit = self._process_multithread(lit)for res in offer_lit:offer_lit_total.extend(res[0])totalClickCount_lit.extend(res[1])lis_dic = {'offerId': offer_lit_total,'totalClickCount8': totalClickCount_lit,}result = pd.DataFrame(lis_dic)result.to_csv(filename8,index=False)
class getFullreportOffer_7month:def _process(self,page):offer_lit = []totalClickCount_lit = []fromDate = "2023-07-01"toDate = "2023-07-31"url = "···?page="+str(page)+"&pageSize=100&fromDate="+ fromDate +"&toDate="+ toDate +"&fromHour=00&toHour=23&timezone=%2B00:00&affiliateIds=&sourceIds=&sorting=rejectionTotal&sortType=desc&ruleIds=&businessTypes=&accountTypes=&columns=offer_id"res = (requests.get(url=url, headers=header, verify=False).json())['result']['dataList']for result in res:offer_lit.append(result['offerId'])totalClickCount_lit.append(result['totalClickCount'])return offer_lit,totalClickCount_litdef _process_multithread(self,list_):# 多线程 下载task_list = [thread_pool_executor.submit(self._process,(page)) for page in list_]response_list = [task.result() for task in task_list]return response_listdef run(self):offer_lit_total = []totalClickCount_lit = []lit = np.arange(1,34)offer_lit = self._process_multithread(lit)for res in offer_lit:offer_lit_total.extend(res[0])totalClickCount_lit.extend(res[1])lis_dic = {'offerId': offer_lit_total,'totalClickCount7': totalClickCount_lit,}result = pd.DataFrame(lis_dic)result.to_csv(filename7,index=False)

二、分析数据7month  totalClickCount=0 and 8month  totalClickCount=0 的offer去除

      result.loc[(result['totalClickCount_7'] >0)| (result['totalClickCount_8'] > 0)]    获取有效的offer数据data2;

class dataAnalysis:def get_datas(self):data7 = pd.read_csv(filename7)data8 = pd.read_csv(filename8)result = pd.merge(data7,data8, on='offerId', how='outer')result1 = result.loc[(result['totalClickCount7'] >0)| (result['totalClickCount8'] > 0)]result1.to_csv(filename, index=False)

三、获取点击系统自动跑量列表中offer数据data3,输出结果:offerid,dataSourceid;

       

class get_click_offer_datasourceid:def process(self,page):offer_lit = []datanumber_lit = []url = "····?pageNum="+str(page)+"&pageSize=10"res = (requests.get(url=url, headers=header, verify=False).json())['result']['records']for result in res:value_lit = []try:if result['dataSourceId'] != "-1":# print()     #  实时数据包value1 = result['dataSourceId']value_lit.append(value1)if result['dataSourceIds'] != "-1":if "," in result['dataSourceIds']:# print("存在多个数据包{}".format(result['dataSourceIds']))value2 = result['dataSourceIds'].split(',')value_lit.extend(value2)else:value3 = result['dataSourceIds']value_lit.append(value3)except:value_lit.append("NaN")offer_lit.append(result['offerId'])datanumber_lit.append(value_lit)# print("offer: {}, 数据包id:{}".format(result['offerId'],value_lit))return offer_lit,datanumber_litdef process_multithread(self,list_):# 多线程 下载task_list = [thread_pool_executor.submit(self.process,(page)) for page in list_]response_list = [task.result() for task in task_list]return response_listdef run(self):offer_lit_total = []datanumber_lit_total = []lit = np.arange(1,457)offer_lit = self.process_multithread(lit)for res in offer_lit:offer_lit_total.extend(res[0])datanumber_lit_total.extend(res[1])lis_dic = {'offerId': offer_lit_total,'dataSources': datanumber_lit_total,}result = pd.DataFrame(lis_dic)result.to_csv(filenameclick,index=False)

四、分析数据  根据data2有效offer,获取到有效的offer对应的每个dataSourceid

       result = pd.merge(data1, data2, on='offerId', how='left')

class effectiveOffer:def get_datas(self):data1 = pd.read_csv(filename)data2 = pd.read_csv(filenameclick)result = pd.merge(data1, data2, on='offerId', how='left')result.to_csv(filenameoffer, index=False)

五、计算出删除后,dataSourceid应该保留的个数及id

class offerdatasource:def get_datasource(self):datasource_id = []data = pd.read_csv(filenameoffer)data1 = data['dataSources']data2 = data1.dropna(axis=0)for res in data2.values:lst = ast.literal_eval(res)datasource_id.extend(lst)# #ll = list(set(datasource_id))print(ll)print(len(ll))

六、计算出删除offer的个数及offer_id;

class deleteOfferid:def get_delete_offerid(self):data1 = pd.read_csv(filename)           #  3547data2 = pd.read_csv(filenameclick)      #  4544data1_new = data1['offerId'].valuesdata2_new = data2['offerId'].values# lis02中存在,lis01中不存在d = [y for y in data2_new if y not in data1_new]print(len(d))print(d)

http://www.lryc.cn/news/147993.html

相关文章:

  • 【Apollo学习笔记】——规划模块TASK之SPEED_BOUNDS_PRIORI_DECIDER
  • 物理机ping不通windows server 2012
  • 誉天HCIE-Datacom丨为什么选择誉天数通HCIE课程学习
  • Python文本终端GUI框架详解
  • 01_lwip_raw_udp_test
  • 学习ts(十一)本地存储与发布订阅模式
  • MySQL对NULL值处理
  • Vector 动态数组(迭代器)
  • 多组背包恰好装满方案数
  • Oracle查询语句中做日期加减运算
  • Unity贝塞尔曲线的落地应用-驱动飞行特效
  • VTK——设置交互样式上的鼠标回调函数
  • Flutter实现动画列表AnimateListView
  • 【LeetCode-中等题】236. 二叉树的最近公共祖先
  • 如何拼接两个视频在一起?
  • Programming abstractions in C阅读笔记:p130-p131
  • 如何在Windows本地快速搭建SFTP文件服务器,并通过端口映射实现公网远程访问
  • C#---第二十:不同类型方法的执行顺序(new / virtual / common / override)
  • lnmp架构-PHP
  • 【javascript实操记录】
  • Mysql--技术文档--悲观锁、乐观锁-《控制并发机制简单认知、深度理解》
  • 【GO】LGTM_Grafana_Tempo(2)_官方用例改后实操
  • git 口令
  • 【回眸】剑指offer(二)解题思路
  • Python 基本文件操作及os库
  • YOLOv5算法改进(9)— 替换主干网络之ShuffleNetV2
  • 三、mycat分库分表
  • gitlab提交项目Log in with Access Token错误
  • openGauss学习笔记-56 openGauss 高级特性-DCF
  • Xcode 14 pod init报错