41 lines
1.1 KiB
Scala
41 lines
1.1 KiB
Scala
|
|
package cn.ac.iie.spark
|
||
|
|
|
||
|
|
import cn.ac.iie.spark.rdd.ReadOptions
|
||
|
|
import cn.ac.iie.utils.SparkSessionUtil
|
||
|
|
import com.arangodb.entity.BaseDocument
|
||
|
|
import org.apache.spark.rdd.RDD
|
||
|
|
import org.apache.spark.storage.StorageLevel
|
||
|
|
|
||
|
|
object RDDTest {
|
||
|
|
def main(args: Array[String]): Unit = {
|
||
|
|
|
||
|
|
val sparkContext = SparkSessionUtil.spark.sparkContext
|
||
|
|
|
||
|
|
println(sparkContext.getConf.get("arangodb.hosts"))
|
||
|
|
|
||
|
|
// val options = ReadOptions("iplearn_media_domain").copy(collection = "R_LOCATE_FQDN2IP")
|
||
|
|
val options = ReadOptions("ip-learning-test-0")
|
||
|
|
|
||
|
|
val ipOptions = options.copy(collection = "IP")
|
||
|
|
|
||
|
|
val rdd = ArangoSpark.load[BaseDocument](sparkContext,"IP",options)
|
||
|
|
|
||
|
|
println(rdd.count())
|
||
|
|
println(rdd.getNumPartitions)
|
||
|
|
|
||
|
|
val value: RDD[BaseDocument] = rdd.filter(doc => doc.getAttribute("CLIENT_SESSION_COUNT").asInstanceOf[Long] > 100).map(doc => {
|
||
|
|
doc.addAttribute("abc", 1)
|
||
|
|
doc
|
||
|
|
})
|
||
|
|
value.persist(StorageLevel.MEMORY_AND_DISK)
|
||
|
|
|
||
|
|
value.foreach(row => println(row.toString))
|
||
|
|
println(value.count())
|
||
|
|
|
||
|
|
SparkSessionUtil.spark.close()
|
||
|
|
System.exit(0)
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|