当前位置: 首页 > news >正文

hadoop学习:mapreduce入门案例二:统计学生成绩

这里相较于 wordcount,新的知识点在于学生实体类的编写以及使用

数据信息:

1. Student 实体类

import org.apache.hadoop.io.WritableComparable;import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;public class Student implements WritableComparable<Student> {
//    Objectprivate long stuid;private String stuName;private int score;public Student(long stuid, String stuName, int score) {this.stuid = stuid;this.stuName = stuName;this.score = score;}@Overridepublic String toString() {return "Student{" +"stuid=" + stuid +", stuName='" + stuName + '\'' +", score=" + score +'}';}public Student() {}public long getStuid() {return stuid;}public void setStuid(long stuid) {this.stuid = stuid;}public String getStuName() {return stuName;}public void setStuName(String stuName) {this.stuName = stuName;}public int getScore() {return score;}public void setScore(int score) {this.score = score;}// 自动整理文件格式 ctrl + shift + f  英文输放状态@Overridepublic int compareTo(Student o) {return this.score > o.score ? 1 : 0;}@Overridepublic void write(DataOutput dataOutput) throws IOException {dataOutput.writeLong(stuid);dataOutput.writeUTF(stuName);dataOutput.writeInt(score);}@Overridepublic void readFields(DataInput dataInput) throws IOException {this.stuid = dataInput.readLong();this.stuName = dataInput.readUTF();this.score = dataInput.readInt();}
}

2.  mapper 阶段,StudentMapper 类

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;/*** 输出  key:学生id   value:Student对象*/
public class StudentMapper extends Mapper<LongWritable, Text,LongWritable,Student> {@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String[] split = value.toString().split(",");LongWritable stuidKey = new LongWritable(Long.parseLong(split[0]));Student stuValue = new Student(Long.parseLong(split[0]),split[1],Integer.parseInt(split[2]));context.write(stuidKey,stuValue);}
}

3. reduce 阶段,StudentReduce 类

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;public class StudentReducer extends Reducer<LongWritable,Student,Student, NullWritable> {@Overrideprotected void reduce(LongWritable key, Iterable<Student> values, Context context) throws IOException,InterruptedException {Student stuOut = new Student();int sumScore = 0;String stuName = "";for (Student stu :values) {sumScore+=stu.getScore();stuName = stu.getStuName();}stuOut.setScore(sumScore);stuOut.setStuid(key.get());stuOut.setStuName(stuName);System.out.println(stuOut.toString());context.write(stuOut, NullWritable.get());}
}

4. 驱动类,studentDriver 类

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;public class StudentDriver {public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {Configuration conf = new Configuration();Job job = Job.getInstance(conf);job.setJarByClass(StudentDriver.class);//配置 job中map阶段处理类和map阶段的输出类型job.setMapperClass(StudentMapper.class);job.setMapOutputKeyClass(LongWritable.class);job.setMapOutputValueClass(Student.class);//配置 job中deduce阶段处理类和reduce阶段的输出类型job.setReducerClass(StudentReducer.class);job.setOutputKeyClass(Student.class);job.setOutputValueClass(NullWritable.class);// 输入路径配置  "hdfs://kb131:9000/kb23/hadoopstu/stuscore.csv"Path inpath = new Path(args[0]);  // 外界获取文件输入路径FileInputFormat.setInputPaths(job, inpath);// 输出路径配置  "hdfs://kb131:9000/kb23/hadoopstu/out2"Path path = new Path(args[1]);    //FileSystem fs = FileSystem.get(path.toUri(), conf);if (fs.exists(path))fs.delete(path,true);FileOutputFormat.setOutputPath(job,path);job.waitForCompletion(true);}
}

http://www.lryc.cn/news/145645.html

相关文章:

  • 自学TypeScript-基础、编译、类型
  • nginx配置https
  • windows Etcd的安装与使用
  • 【py】为什么用 import tkinter 不能运行
  • 【深度学习】实验04 交叉验证
  • whisper语音识别部署及WER评价
  • java太卷了,怎么办?
  • android多屏触摸相关的详解方案-安卓framework开发手机车载车机系统开发课程
  • 微信小程序 实时日志
  • Spring AOP基于注解方式实现和细节
  • CVPR2023论文及代码合集来啦~
  • 基于ETLCloud的自定义规则调用第三方jar包实现繁体中文转为简体中文
  • TDesign在按钮上加入图标组件
  • Linux 终端命令行 产品介绍
  • 计算机毕设 基于深度学习的植物识别算法 - cnn opencv python
  • 【STM32】学习笔记-江科大
  • Doris架构中包含哪些技术?
  • 《vue3实战》通过indexOf方法实现电影评价系统的模糊查询功能
  • java对时间序列每x秒进行分组
  • 八月更新 | CI 构建计划触发机制升级、制品扫描 SBOM 分析功能上线!
  • Spring核心配置步骤-完全基于XML的配置
  • 宏基官网下载的驱动怎么安装(宏基笔记本如何安装系统)
  • 基于AVR128单片机抢答器proteus仿真设计
  • openGauss学习笔记-54 openGauss 高级特性-MOT
  • InsCode AI 创作助手
  • java对时间序列根据阈值进行连续性分片
  • Pillow:Python的图像处理库(安装与使用教程)
  • 自然语言处理-NLP
  • 柠檬水找零【贪心算法-】
  • el-date-picker设置开始时间小于结束时间