-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWordCountSpark.java
More file actions
39 lines (28 loc) · 1.21 KB
/
WordCountSpark.java
File metadata and controls
39 lines (28 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
package edu.easternct.bigdata;
import java.util.Arrays;
import java.util.List;
import java.lang.Iterable;
import scala.Tuple2;
import org.apache.commons.lang.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
public class WordCount {
public static void main(String[] args) throws Exception {
SparkConf conf = new SparkConf().setAppName("wordCount");
conf.setMaster(args[0]);
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> rdd = sc.textFile(args[1]);
JavaPairRDD<String, Integer> wcounts = rdd.flatMap(x -> Arrays.asList(x.split(" ")))
.mapToPair(w -> new Tuple2<String, Integer>(w, 1))
.reduceByKey((x, y) -> x + y);
System.out.println("Input found : " + wcounts.count());
// JavaPairRDD<String, Integer> samples = wcounts.take(10);
// wcounts.take(10).saveAsTextFile(args[1]);
sc.stop();
}
}