南海营销网站建设,福建富通建设有限公司网站,如果评价网站做的好不好,网站做多语言csv文件求某个平均数据
查询每个部门的平均工资#xff0c;最后输出
数据处理过程
employee_noheader.csv#xff08;没做关于首行的处理#xff0c;运行时请自行删除#xff09;
EmployeeID,EmployeeName,DepartmentID,Salary
1,ZhangSan,101,5000
2,LiSi,102,6000…csv文件求某个平均数据
查询每个部门的平均工资最后输出
数据处理过程
employee_noheader.csv没做关于首行的处理运行时请自行删除
EmployeeID,EmployeeName,DepartmentID,Salary
1,ZhangSan,101,5000
2,LiSi,102,6000
3,WangWu,101,5500
4,ZhaoLiu,103,7000
5,SunQi,102,6500pom.xml
?xml version1.0 encodingUTF-8?
project xmlnshttp://maven.apache.org/POM/4.0.0xmlns:xsihttp://www.w3.org/2001/XMLSchema-instancexsi:schemaLocationhttp://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsdmodelVersion4.0.0/modelVersiongroupIdcom.hadoop/groupIdartifactIdMapreduce_csv_average/artifactIdversion1.0-SNAPSHOT/versionnameMapreduce_csv_average/namedescriptionwunaiieq/descriptionpropertiesmaven.compiler.source8/maven.compiler.sourcemaven.compiler.target8/maven.compiler.targetproject.build.sourceEncodingUTF-8/project.build.sourceEncoding!--版本控制--hadoop.version2.7.3/hadoop.version/propertiesdependenciesdependencygroupIdorg.apache.hadoop/groupIdartifactIdhadoop-common/artifactIdversion${hadoop.version}/version/dependencydependencygroupIdorg.apache.hadoop/groupIdartifactIdhadoop-hdfs/artifactIdversion${hadoop.version}/version/dependencydependencygroupIdorg.apache.hadoop/groupIdartifactIdhadoop-mapreduce-client-core/artifactIdversion${hadoop.version}/version/dependencydependencygroupIdorg.apache.hadoop/groupIdartifactIdhadoop-client/artifactIdversion${hadoop.version}/version/dependencydependencygroupIdorg.apache.hadoop/groupIdartifactIdhadoop-yarn-api/artifactIdversion${hadoop.version}/version/dependencydependencygroupIdorg.apache.hadoop/groupIdartifactIdhadoop-streaming/artifactIdversion${hadoop.version}/version/dependency/dependencies!--构建配置--buildpluginsplugin!--声明--groupIdorg.apache.maven.plugins/groupIdartifactIdmaven-assembly-plugin/artifactIdversion3.3.0/version!--具体配置--configurationarchivemanifest!--jar包的执行入口--mainClasscom.hadoop.Main/mainClass/manifest/archivedescriptorRefs!--描述符此处为预定义的表示创建一个包含项目所有依赖的可执行 JAR 文件;允许自定义生成jar文件内容--descriptorRefjar-with-dependencies/descriptorRef/descriptorRefs/configuration!--执行配置--executionsexecution!--执行配置ID可修改--idmake-assembly/id!--执行的生命周期--phasepackage/phasegoals!--执行的目标single表示创建一个分发包--goalsingle/goal/goals/execution/executions/plugin/plugins/build/project
Map_1
package com.hadoop;import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;public class Map_1 extends MapperLongWritable, Text,IntWritable,IntWritable {Overrideprotected void map(LongWritable k1, Text v1, Context context)throws IOException, InterruptedException {//处理输入数据类型转换//以 1,ZhangSan,101,5000 为例String data v1.toString();//分词操作,csv用,进行分割//一般而言分词操作大多使用String进行获取后面可以附跟类型转换String[] words data.split(,);//下文输出context.write(//K2:部门号输出new IntWritable(Integer.parseInt(words[2])),//K3:工资输出new IntWritable(Integer.parseInt(words[3])));}
}
Reduce_1
package com.hadoop;import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.io.IntWritable;
import java.io.IOException;
public class Reduce_1 extends ReducerIntWritable,IntWritable,IntWritable,IntWritable{Overrideprotected void reduce(IntWritable k3, IterableIntWritable v3, Context context)throws IOException, InterruptedException {//对v3进行求和计算总额int total0;int i0;for (IntWritable v:v3){total v.get();i;}int averagetotal/i;context.write(k3,new IntWritable(average));}
}
Main
package com.hadoop;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class Main {public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {Job job Job.getInstance(new Configuration());job.setJarByClass(Main.class);//mapjob.setMapperClass(Map_1.class);job.setMapOutputKeyClass(IntWritable.class);//k2job.setMapOutputValueClass(IntWritable.class);//v2//reducejob.setReducerClass(Reduce_1.class);job.setOutputKeyClass(IntWritable.class);job.setOutputValueClass(IntWritable.class);//输入和输出FileInputFormat.setInputPaths(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));//执行job.waitForCompletion(true);}
}
运行 请自行上传至hdfs中
hadoop jar Mapreduce_average.jar /input/employee_noheader.csv /output/csv_average效果
hdfs dfs -cat /output/csv_average/part-r-00000