Skip to content

Commit 425176c

Browse files
authored
add more example (#65)
1 parent 0e0668e commit 425176c

File tree

3 files changed

+101
-0
lines changed

3 files changed

+101
-0
lines changed

README-CN.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ nebula-algorithm 是一款基于 [GraphX](https://spark.apache.org/graphx/) 的
2121
| ClusteringCoefficient | 聚集系数 |推荐、电信诈骗分析|
2222
| Jaccard |杰卡德相似度计算|相似度计算、推荐|
2323
| BFS |广度优先遍历 |层序遍历、最短路径规划|
24+
| DFS |深度优先遍历 |层序遍历、最短路径规划|
2425
| Node2Vec | - |图分类|
2526

2627
使用 `nebula-algorithm`,可以通过提交 `Spark` 任务的形式使用完整的算法工具对 `Nebula Graph` 数据库中的数据执行图计算,也可以通过编程形式调用`lib`库下的算法针对DataFrame执行图计算。
@@ -101,6 +102,7 @@ nebula-algorithm 是一款基于 [GraphX](https://spark.apache.org/graphx/) 的
101102
| closeness | closeness |double/string|
102103
| hanp | hanp | int/string |
103104
| bfs | bfs | string |
105+
| dfs | dfs | string |
104106
| jaccard | jaccard | string |
105107
| node2vec | node2vec | string |
106108

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ nebula-algorithm is a Spark Application based on [GraphX](https://spark.apache.o
2525
| ClusteringCoefficient | recommended, telecom fraud analysis|
2626
| Jaccard | similarity calculation, recommendation|
2727
| BFS | sequence traversal, Shortest path plan|
28+
| DFS | sequence traversal, Shortest path plan|
2829
| Node2Vec | graph machine learning, recommendation|
2930

3031

@@ -111,6 +112,7 @@ If you want to write the algorithm execution result into NebulaGraph(`sink: nebu
111112
| closeness | closeness |double/string|
112113
| hanp | hanp | int/string |
113114
| bfs | bfs | string |
115+
| bfs | dfs | string |
114116
| jaccard | jaccard | string |
115117
| node2vec | node2vec | string |
116118
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/* Copyright (c) 2022 vesoft inc. All rights reserved.
2+
*
3+
* This source code is licensed under Apache 2.0 License.
4+
*/
5+
6+
package com.vesoft.nebula.algorithm
7+
8+
import com.vesoft.nebula.connector.connector.{NebulaDataFrameReader}
9+
import com.facebook.thrift.protocol.TCompactProtocol
10+
import com.vesoft.nebula.algorithm.config.{CcConfig, LPAConfig, LouvainConfig, PRConfig}
11+
import com.vesoft.nebula.algorithm.lib.{
12+
ConnectedComponentsAlgo,
13+
LabelPropagationAlgo,
14+
LouvainAlgo,
15+
PageRankAlgo
16+
}
17+
import com.vesoft.nebula.connector.{NebulaConnectionConfig, ReadNebulaConfig}
18+
import org.apache.spark.SparkConf
19+
import org.apache.spark.sql.{DataFrame, SparkSession}
20+
21+
object AlgoPerformanceTest {
22+
23+
def main(args: Array[String]): Unit = {
24+
val sparkConf = new SparkConf()
25+
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
26+
.registerKryoClasses(Array[Class[_]](classOf[TCompactProtocol]))
27+
val spark = SparkSession
28+
.builder()
29+
.config(sparkConf)
30+
.getOrCreate()
31+
32+
val df = readNebulaData(spark)
33+
lpa(spark, df)
34+
louvain(spark, df)
35+
pagerank(spark, df)
36+
wcc(spark, df)
37+
38+
}
39+
40+
def readNebulaData(spark: SparkSession): DataFrame = {
41+
val start = System.currentTimeMillis()
42+
val config =
43+
NebulaConnectionConfig
44+
.builder()
45+
.withMetaAddress("127.0.0.0.1:9559")
46+
.withTimeout(6000)
47+
.withConenctionRetry(2)
48+
.build()
49+
val nebulaReadEdgeConfig: ReadNebulaConfig = ReadNebulaConfig
50+
.builder()
51+
.withSpace("twitter")
52+
.withLabel("FOLLOW")
53+
.withNoColumn(true)
54+
.withLimit(20000)
55+
.withPartitionNum(120)
56+
.build()
57+
val df: DataFrame =
58+
spark.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToDF()
59+
df.cache()
60+
df.count()
61+
println(s"read data cost time ${(System.currentTimeMillis() - start)}")
62+
df
63+
}
64+
65+
def lpa(spark: SparkSession, df: DataFrame): Unit = {
66+
val start = System.currentTimeMillis()
67+
val lpaConfig = LPAConfig(10)
68+
val lpa = LabelPropagationAlgo.apply(spark, df, lpaConfig, false)
69+
lpa.write.csv("hdfs://127.0.0.1:9000/tmp/lpa")
70+
println(s"lpa compute and save cost ${System.currentTimeMillis() - start}")
71+
}
72+
73+
def pagerank(spark: SparkSession, df: DataFrame): Unit = {
74+
val start = System.currentTimeMillis()
75+
val pageRankConfig = PRConfig(10, 0.85)
76+
val pr = PageRankAlgo.apply(spark, df, pageRankConfig, false)
77+
pr.write.csv("hdfs://127.0.0.1:9000/tmp/pagerank")
78+
println(s"pagerank compute and save cost ${System.currentTimeMillis() - start}")
79+
}
80+
81+
def wcc(spark: SparkSession, df: DataFrame): Unit = {
82+
val start = System.currentTimeMillis()
83+
val ccConfig = CcConfig(20)
84+
val cc = ConnectedComponentsAlgo.apply(spark, df, ccConfig, false)
85+
cc.write.csv("hdfs://127.0.0.1:9000/tmp/wcc")
86+
println(s"wcc compute and save cost ${System.currentTimeMillis() - start}")
87+
}
88+
89+
def louvain(spark: SparkSession, df: DataFrame): Unit = {
90+
val start = System.currentTimeMillis()
91+
val louvainConfig = LouvainConfig(10, 5, 0.5)
92+
val louvain = LouvainAlgo.apply(spark, df, louvainConfig, false)
93+
louvain.write.csv("hdfs://127.0.0.1:9000/tmp/louvain")
94+
println(s"louvain compute and save cost ${System.currentTimeMillis() - start}")
95+
}
96+
97+
}

0 commit comments

Comments
 (0)