hadoop map-reduce 入门示例代码

无任何干货,仅供复制

程序说明:

  1. 分析一个应该的访问日志文件,找出每个用户ID的访问次数。日志格式基本上是:"2012-10-26 14:41:30,748  userNameId-777 from IP-10.232.25.144 invoked URL-http://xxx/hello.jsonp"

  2. Standalone模式,但直接用maven项目所依赖的hadoop库,你不必再另装hadoop

<!-- pom.xml -->
<dependency>
  <groupId>org.apache.hadoop</groupId>
  <artifactId>hadoop-core</artifactId>
  <version>1.0.4</version>
</dependency>

//Mapper
public class Coupon11LogMapper extends Mapper<LongWritable, Text, Text, LongWritable> {

	@Override
	protected void map(LongWritable key, Text value, Context context) throws java.io.IOException, InterruptedException {
		String line = value.toString();

		String accessRegex = ".*userNameId\\-(\\d+).*";
		Pattern pattern = Pattern.compile(accessRegex);
		Matcher matcher = pattern.matcher(line);
		if (!matcher.find()) {
			return;
		}
		String userNameId = matcher.group(1);
		context.write(new Text(userNameId), new LongWritable(1l));
	};

 
}

//Reducer
public class Coupon11LogReducer extends Reducer<Text, LongWritable, Text, LongWritable> {

	@Override
	protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
		Long sum = 0l;
		for (LongWritable value : values) {
			sum = sum + value.get();
		}
		context.write(key, new LongWritable(sum));
	}

}

//Job Runner


public class Coupon11LogJobMain {

	public static void main(String[] args) throws Exception {

		String inputFile = "/home/kent/dev/hadoop/bigdata/coupon11/coupon11.log";
		String outDir = "/home/kent/dev/hadoop/bigdata/coupon11/output" + System.currentTimeMillis();

		Job job = new Job();
		job.setJarByClass(Coupon11LogJobMain.class);

		FileInputFormat.addInputPaths(job, inputFile);
		FileOutputFormat.setOutputPath(job, new Path(outDir));

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);

		job.setMapperClass(Coupon11LogMapper.class);
		job.setReducerClass(Coupon11LogReducer.class);

		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

Leave a Comment

Your email address will not be published.

This site uses Akismet to reduce spam. Learn how your comment data is processed.