package com.sample.wc
import org.apache.spark.sql.SparkSession
import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter.WildcardFileFilter
import java.io.File
object WordCount {
def main(args: Array[String]): Unit = {
// Creating the spark object
val spark = SparkSession.builder().master("local").appName("Word Count").getOrCreate()
//reading the text file and create the RDD
val data = spark.read.textFile(args(0)).rdd
//Split the line in the text file with space
val wordsSplits = data.flatMap(lines => lines.split(" "))
//Map each word to word,1, to ease the counting
val wordMaptoOne = wordsSplits.map(value => (value, 1))
//Count each word
val count = wordMaptoOne.reduceByKey(_ + _)
//Delete the output file, if already exists
FileUtils.deleteDirectory(new File(args(1)))
//Save the output file as text
count.saveAsTextFile(args(1))
//Stop the spark object
spark.stop()
}
}
Command to execute the Jar file
// bin/spark-submit --class com.sample.wc.WordCount WordCounts.jar text.txt output
Saturday, April 21, 2018
My first Spark Program for Word count using Scala
Subscribe to:
Comments (Atom)