[네트워크이론] Louvain algorithm - Python으로 구현하기

Louvain algorithm은 사용하기 엄청 쉽다. python 버전으로 package를 만들어 놓은 사람이 있기 때문이다. package는 networkx와 연동되며, 소스코드 한줄만에 community 추출이 완료된다.

https://github.com/taynaud/python-louvain

여기 url로 가서 다운로드 & package 설치를 한다.

먼저, sample을 만들기 위해 다음과 같이 networkx로 community3개로 구성된 인공 network를 만든다.


import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import copy
from networkx.algorithms import community
import matplotlib.animation as animation


def Add_Inner_edges(range, num):
    inner_edges = []
    while len(inner_edges) < num:
        tmp = np.sort(np.random.choice(range, size=2, replace=None)).tolist()
        tmp += [np.random.uniform(0, 1)]  # random weight
        new_edge = tuple(tmp)
        if new_edge not in inner_edges:
            inner_edges = inner_edges + [new_edge]
    return inner_edges


def Add_Outer_edges(Community_all, num):
    # 두 커뮤니티 선택
    outter_edges = []
    while len(outter_edges) < num:
        #group_choiced = np.random.choice(range(len(Community_all)), size=2, replace=None) # 범용적으로 커뮤니티 선택할 시
        if len(outter_edges) < 3:
            group_choiced = np.random.choice([0, 1], size=2, replace=None)
        elif len(outter_edges) < 6:
            group_choiced = np.random.choice([0, 2], size=2, replace=None)
        elif len(outter_edges) < 10:
            group_choiced = np.random.choice([1, 2], size=2, replace=None)
        tmp = np.sort([np.random.choice(Community_all[group_choiced[0]], replace=None),
                 np.random.choice(Community_all[group_choiced[1]], replace=None)]).tolist()
        tmp += [np.random.uniform(0, 1)]  # random weight
        new_edge = tuple(tmp)
        if new_edge not in outter_edges:
            outter_edges = outter_edges + [new_edge]
    return outter_edges


""" Network 생성 """
# 총 150명
G = nx.Graph()
G.add_nodes_from(range(150))
# Community 설정 3개의 커뮤니티
Community1 = range(0, 40)
Community2 = range(40, 90)
Community3 = range(90, 150)
Community_all = Community1, Community2, Community3

# 내부 연결 추가
Inner_edges_1 = Add_Inner_edges(range=Community1, num=100)  # Community 1 100개
Inner_edges_2 = Add_Inner_edges(range=Community2, num=150)  # Community 1 150개
Inner_edges_3 = Add_Inner_edges(range=Community3, num=200)  # Community 1 200개
G.add_weighted_edges_from(Inner_edges_1)
G.add_weighted_edges_from(Inner_edges_2)
G.add_weighted_edges_from(Inner_edges_3)

# 외부 연결 추가
Outter_edges = Add_Outer_edges(Community_all, 50)  # Community 1-2-3 간에 50개
G.add_weighted_edges_from(Outter_edges)
pos = nx.spring_layout(G)

그리고는 community package를 불러와서 best_partition 명령을 내려주면 끝!

import community as lvcm


""" Louvain method """
partition = lvcm.best_partition(graph=G, partition=None, weight='weight', resolution=1., randomize=True)
max_k_w = []
for com in set(partition.values()):
    list_nodes = [nodes for nodes in partition.keys()
                  if partition[nodes] == com]
    max_k_w = max_k_w + [list_nodes]

""" Make Community Color list """
community_num_group = len(max_k_w)
color_list_community = [[] for i in range(len(G.nodes()))]
for i in range(len(G.nodes())):
    for j in range(community_num_group):
        if i in max_k_w[j]:
            color_list_community[i] = j

""" Plot Community """
fig = plt.figure()
edges = G.edges()
weights = [G[u][v]['weight'] for u, v in edges]
Feature_color_sub = color_list_community
node_size = 50
im = nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color=Feature_color_sub, cmap='jet', vmin=0, vmax=community_num_group)
nx.draw_networkx_edges(G, pos, width=weights)
nx.draw_networkx_labels(G, pos, font_size=5, font_color="black")
plt.xticks([])
plt.yticks([])
plt.colorbar(im)
plt.show(block=False)

사실상 알고리즘은 best_partition에서 끝난다. 밑의 코드는 아래처럼 figure를 뽑기 위해 작성

직접 실행해보면 느끼지만 엄청 빠르다. 그리고 community가 의도대로 잘 뽑혀있는 것을 확인할 수 있다.

조금 더 놀아보고 싶어서 수동 코드를 작성해 보았다. 놀기용이라 대충 만들어서 계산 시간은 훨씬 더 걸린다..

def CAL_dQ(communities, edges, node_s, community_s):
    sigma_in = sum([edge[2] for edge in edges if (edge[0] in communities[community_s]) & (edge[1] in communities[community_s])])
    sigma_total = sum([edge[2] for edge in edges if (edge[0] in communities[community_s]) | (edge[1] in communities[community_s])])
    k_i = sum([edge[2] for edge in edges if (edge[0] == node_s) | (edge[1] == node_s)])
    k_i_in = sum([edge[2] for edge in edges
                  if ((edge[0] == node_s) & (edge[1] in communities[community_s])) |
                  ((edge[1] == node_s) & (edge[0] in communities[community_s]))])
    m = sum([edge[2] for edge in edges])
    dQ = ((sigma_in + k_i_in)/2/m - ((sigma_total+k_i)/2/m)**2) - ((sigma_in/2/m) - ((sigma_total/2/m)**2)-((k_i/2/m)**2))
    return dQ


def Phase_First(communities_p1, nodes_p1, edges_p1):
    """ Phase 1 """
    flag_changed = 1
    cnt = 0
    log_community = [copy.deepcopy(communities_p1)]
    while flag_changed == 1:
        cnt += 1
        flag_changed = 0
        for node_s in nodes_p1:
            """ node_s를 community로부터 분리 """
            communities_ = copy.deepcopy(communities_p1)
            tmp_x = [comm for comm in communities_ if node_s not in comm]
            tmp_o = [comm for comm in communities_ if node_s in comm]
            if len(tmp_o[0]) != 1:
                tmp_o[0].remove(node_s)
                communities_cal = tmp_x + tmp_o + [[node_s]]
            else:
                communities_cal = communities_

            """ dQ 계산 """
            max_dQ = 0
            for community_s in range(len(communities_cal)):
                if communities_cal[community_s] != [node_s]:
                    dQ = CAL_dQ(communities_cal, edges_p1, node_s, community_s)
                    if dQ > max_dQ:
                        community_d = community_s
                        max_dQ = dQ

            """ max_dQ에 node_s를 편입 """
            if max_dQ > 0:
                communities_cal[community_d] += [node_s]
                communities_cal[community_d].sort()
                communities_cal.remove([node_s])
                communities_cal.sort()
                if communities_p1 != communities_cal:
                    communities_p1 = copy.deepcopy(communities_cal)
                    flag_changed = 1
                    log_community += [communities_p1]

    return communities_p1, log_community, cnt


def Convert_p2_to_p1(communities_p1, communities_p2_result):
    communities_result = []
    for community_p2 in communities_p2_result:
        tmp = []
        for comm in community_p2:
            tmp += communities_p1[comm]
        communities_result += [tmp]
    return communities_result


""" initial """
communities_init = [[i] for i in list(G.nodes())]
nodes_init = list(G.nodes())
edges_init = list(G.edges.data('weight', default=True))
Q_init = community.modularity(G, communities=communities_init, weight='weight')

communities_p1 = copy.deepcopy(communities_init)
nodes_p1 = copy.deepcopy(nodes_init)
edges_p1 = copy.deepcopy(edges_init)

""" Louvain algorithm """
end_cnt = 0
log_community_all = []
while end_cnt != 1:
    communities_p2 = [[i] for i in range(len(communities_p1))]
    nodes_p2 = list(range(len(communities_p2)))
    edges_p2 = []
    for c_1 in range(len(communities_p1)):
        for c_2 in range(len(communities_p1)):
            tmp = sum([edge[2] for edge in edges_p1
                     if ((edge[0] in communities_p1[c_1]) & (edge[1] in communities_p1[c_2])) |
                     ((edge[0] in communities_p1[c_1]) & (edge[1] in communities_p1[c_2]))])
            if tmp != 0:
                edges_p2 += [(c_1, c_2, tmp)]
    communities_p2_result, log_community, end_cnt = Phase_First(communities_p2, nodes_p2, edges_p2)

    for comm in log_community:
        log_community_all += [Convert_p2_to_p1(communities_p1, comm)]

    communities_result = Convert_p2_to_p1(communities_p1, communities_p2_result)
    communities_p1 = copy.deepcopy(communities_result)

community_num_group = len(communities_result)
color_list_community = [[] for i in range(len(G.nodes()))]
for i in range(len(G.nodes())):
    for j in range(community_num_group):
        if i in communities_result[j]:
            color_list_community[i] = j

""" Plot Community """
fig = plt.figure()
Gedges = G.edges()
weights = [G[u][v]['weight'] for u, v in Gedges]
Feature_color_sub = color_list_community
node_size = 50
im = nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color=Feature_color_sub, cmap='jet', vmin=0, vmax=community_num_group)
nx.draw_networkx_edges(G, pos, width=weights)
nx.draw_networkx_labels(G, pos, font_size=5, font_color="black")
plt.xticks([])
plt.yticks([])
plt.colorbar(im)
plt.show(block=False)

그래도 여러가지 logging을 해두어서 이것저것 볼 수 있는 장점이 있다.

아래 처럼 코드를 쓰면 Dynamic 한 변화를 볼 수 있다.

def ANI_Undirected_plot(time, G, q, color_list, pos, weight, fig):
    global step

    all_step = len(color_list)
    step_view = step % all_step

    subplot = fig.add_subplot(1, 2, 1)
    plt.cla()

    Feature_color_sub = color_list[step_view]
    Gedges = G.edges()
    node_size = 50
    if weight==True:
        weights = [G[u][v]['weight'] for u, v in Gedges]
        nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color=Feature_color_sub, cmap='jet', vmin=0, vmax=len(set(Feature_color_sub)))
        nx.draw_networkx_edges(G, pos, width=weights)
    else:
        nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color=Feature_color_sub, cmap='jet', vmin=0, vmax=len(set(Feature_color_sub)))
        nx.draw_networkx_edges(G, pos)
    nx.draw_networkx_labels(G, pos, font_size=5, font_color="black")
    plt.xticks([])
    plt.yticks([])
    plt.title('Community')

    subplot = fig.add_subplot(1, 2, 2)
    plt.scatter(step_view, q[step_view], c='blue')
    plt.title('Modularity')

    plt.suptitle('step: ' + str(step_view))

    if step < all_step-2:
        step = step + 1


""" Make Community Color list """
color_list_community_all = []
for communities_result in log_community_all:
    community_num_group = len(communities_result)
    color_list_community = [[] for i in range(len(G.nodes()))]
    for i in range(len(G.nodes())):
        for j in range(community_num_group):
            if i in communities_result[j]:
                color_list_community[i] = j
    color_list_community_all += [color_list_community]

""" Modularity """
t = []
q = []
for i in range(len(log_community_all)):
    Q = community.modularity(G, communities=log_community_all[i], weight='weight')
    t += [i]
    q += [Q]

""" Plot Community (Dynamic) """
fig_ani = plt.figure(figsize=(14, 6))
step = 0
weight = False
ani = animation.FuncAnimation(fig_ani, ANI_Undirected_plot, fargs=(G, q, color_list_community_all, pos, weight, fig_ani),
                              interval=25, save_count=len(q))
plt.show(block=False)

출력화면)

다른 색은 다른 community를 나타낸다. 한개씩 깜빡이면서 바뀌는건 Phase1 이 동작하는 과정이다. Louvain Phase1에서는 node 한개 한개가 근접 community에 재배치 되면서 modularity가 변화한다.

이게 다 끝나면 Phase2가 동작한다 Phase2에서는 Phase1결과 생성된 community가 한개한개의 node에 해당하는 network를 생성, 그 네트워크를 이용해서 Phase1의 작업을 반복해 나간다. 위 영상의 100대 후반부터 여러 개의 node들이 한꺼번에 깜빡이면서 합쳐지는 것을 볼 수 있다. 이 때 modularity가 확확 치고 올라가면서 3개의 community를 구분해낸다.

(Label이 없는 상황에서는 modularity가 높을수록 현재의 community조합이 최적화 되었다고 볼 수 있다.)

저작자표시 비영리 동일조건 (새창열림)

'공부 > 네트워크과학' 카테고리의 다른 글

[네트워크과학] 척도 없는 연결망 논란 (0)	2019.06.05
[네트워크이론] degree에 따른 attachment rate 구하기 (0)	2018.12.07
[네트워크이론] Louvain algorithm for community detection (6)	2018.08.10
[논문소개] Monophily in social networks introduces similarity among friends-of-friends (0)	2018.07.31
[네트워크과학] '네트워크이론'이란 무엇인가 (0)	2018.07.05

종혁의 저장소

[네트워크이론] Louvain algorithm - Python으로 구현하기

'공부 > 네트워크과학' 카테고리의 다른 글

댓글

티스토리툴바

[네트워크이론] Louvain algorithm - Python으로 구현하기

'공부 > 네트워크과학' 카테고리의 다른 글

관련글

댓글

티스토리툴바