package com.sample.wc import org.apache.spark.sql.SparkSession import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter.WildcardFileFilter import java.io.File object WordCount { def main(args: Array[String]): Unit = { // Creating the spark object val spark = SparkSession.builder().master("local").appName("Word Count").getOrCreate() //reading the text file and create the RDD val data = spark.read.textFile(args(0)).rdd //Split the line in the text file with space val wordsSplits = data.flatMap(lines => lines.split(" ")) //Map each word to word,1, to ease the counting val wordMaptoOne = wordsSplits.map(value => (value, 1)) //Count each word val count = wordMaptoOne.reduceByKey(_ + _) //Delete the output file, if already exists FileUtils.deleteDirectory(new File(args(1))) //Save the output file as text count.saveAsTextFile(args(1)) //Stop the spark object spark.stop() } }Command to execute the Jar file // bin/spark-submit --class com.sample.wc.WordCount WordCounts.jar text.txt output
Saturday, April 21, 2018
My first Spark Program for Word count using Scala
Subscribe to:
Posts (Atom)