自定义ArangoRDD
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
package cn.ac.iie.spark
|
||||
|
||||
import cn.ac.iie.spark.rdd.ReadOptions
|
||||
import cn.ac.iie.utils.SparkSessionUtil
|
||||
import com.arangodb.entity.BaseDocument
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.storage.StorageLevel
|
||||
|
||||
object RDDTest {
|
||||
def main(args: Array[String]): Unit = {
|
||||
|
||||
val sparkContext = SparkSessionUtil.spark.sparkContext
|
||||
|
||||
println(sparkContext.getConf.get("arangodb.hosts"))
|
||||
|
||||
// val options = ReadOptions("iplearn_media_domain").copy(collection = "R_LOCATE_FQDN2IP")
|
||||
val options = ReadOptions("ip-learning-test-0")
|
||||
|
||||
val ipOptions = options.copy(collection = "IP")
|
||||
|
||||
val rdd = ArangoSpark.load[BaseDocument](sparkContext,"IP",options)
|
||||
|
||||
println(rdd.count())
|
||||
println(rdd.getNumPartitions)
|
||||
|
||||
val value: RDD[BaseDocument] = rdd.filter(doc => doc.getAttribute("CLIENT_SESSION_COUNT").asInstanceOf[Long] > 100).map(doc => {
|
||||
doc.addAttribute("abc", 1)
|
||||
doc
|
||||
})
|
||||
value.persist(StorageLevel.MEMORY_AND_DISK)
|
||||
|
||||
value.foreach(row => println(row.toString))
|
||||
println(value.count())
|
||||
|
||||
SparkSessionUtil.spark.close()
|
||||
System.exit(0)
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user