社交网络影响力最大化——线性阈值模型(LT模型)算法实现(Python实现)
环境配置:Win7 Pycharm Anaconda2
该算法每个节点的阈值设为 0.5
linear_threshold.py (LT传播模型算法)
# -*- coding: utf-8 -*-
"""
Implement linear threshold models
社交网络影响力最大化 传播模型——线性阈值(LT)模型算法实现
"""
import copy
import itertools
import random
import math
import networkx as nx
__all__ = ['linear_threshold']
#-------------------------------------------------------------------------
# Some Famous Diffusion Models
#-------------------------------------------------------------------------
def linear_threshold(G, seeds, steps=0): #LT线性阈值算法
"""
Parameters
----------
G : networkx graph #所有节点构成的图
The number of nodes.
seeds: list of nodes #子节点集
The seed nodes of the graph
steps: int #激活节点的层数(深度),当steps<=0时,返回子节点集能激活的所有节点
The number of steps to diffuse
When steps <= 0, the model diffuses until no more nodes
can be activated
Return
------
layer_i_nodes : list of list of activated nodes
layer_i_nodes[0]: the seeds #子节点集
layer_i_nodes[k]: the nodes activated at the kth diffusion step #该子节点集激活的节点集
Notes
-----
1. Each node is supposed to have an attribute "threshold". If not, the
default value is given (0.5). #每个节点有一个阈值,这里默认阈值为:0.5
2. Each edge is supposed to have an attribute "influence". If not, the
default value is given (1/in_degree) #每个边有一个权重值,这里默认为:1/入度
References
----------
[1] GranovetterMark. Threshold models of collective behavior.
The American journal of sociology, 1978.
"""
if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
raise Exception( \
"linear_threshold() is not defined for graphs with multiedges.")
# make sure the seeds are in the graph
for s in seeds:
if s not in G.nodes():
raise Exception("seed", s, "is not in graph")
# change to directed graph
if not G.is_directed():
DG = G.to_directed()
else:
DG = copy.deepcopy(G) # copy.deepcopy 深拷贝 拷贝对象及其子对象
# init thresholds
for n in DG.nodes():
if 'threshold' not in DG.node[n]:
DG.node[n]['threshold'] = 0.5
elif DG.node[n]['threshold'] > 1:
raise Exception("node threshold:", DG.node[n]['threshold'], \
"cannot be larger than 1")
# init influences
in_deg = DG.in_degree() #获取所有节点的入度
for e in DG.edges():
if 'influence' not in DG[e[0]][e[1]]:
DG[e[0]][e[1]]['influence'] = 1.0 / in_deg[e[1]] #计算边的权重
elif DG[e[0]][e[1]]['influence'] > 1:
raise Exception("edge influence:", DG[e[0]][e[1]]['influence'], \
"cannot be larger than 1")
# perform diffusion
A = copy.deepcopy(seeds)
if steps <= 0:
# perform diffusion until no more nodes can be activated
return _diffuse_all(DG, A)
# perform diffusion for at most "steps" rounds only
return _diffuse_k_rounds(DG, A, steps)
def _diffuse_all(G, A):
layer_i_nodes = [ ]
layer_i_nodes.append([i for i in A])
while True:
len_old = len(A)
A, activated_nodes_of_this_round = _diffuse_one_round(G, A)
layer_i_nodes.append(activated_nodes_of_this_round)
if len(A) == len_old:
break
return layer_i_nodes
def _diffuse_k_rounds(G, A, steps):
layer_i_nodes = [ ]
layer_i_nodes.append([i for i in A])
while steps > 0 and len(A) < len(G):
len_old = len(A)
A, activated_nodes_of_this_round = _diffuse_one_round(G, A)
layer_i_nodes.append(activated_nodes_of_this_round)
if len(A) == len_old:
break
steps -= 1
return layer_i_nodes
def _diffuse_one_round(G, A):
activated_nodes_of_this_round = set()
for s in A:
nbs = G.successors(s)
for nb in nbs:
if nb in A:
continue
active_nb = list(set(G.predecessors(nb)).intersection(set(A)))
if _influence_sum(G, active_nb, nb) >= G.node[nb]['threshold']:
activated_nodes_of_this_round.add(nb)
A.extend(list(activated_nodes_of_this_round))
return A, list(activated_nodes_of_this_round)
def _influence_sum(G, froms, to):
influence_sum = 0.0
for f in froms:
influence_sum += G[f][to]['influence']
return influence_sum
test_linear_threshold.py(LT模型算法测试)
#!/usr/bin/env python
# coding=UTF-8 #支持中文字符需要添加 coding=UTF-8
from nose.tools import *
from networkx import *
from linear_threshold import *
import time
"""Test Diffusion Models
----------------------------
"""
if __name__=='__main__':
start=time.clock()
datasets=[]
f=open("Wiki-Vote.txt","r") #读取文件数据(边的数据)
data=f.read()
rows=data.split('\n')
for row in rows:
split_row=row.split('\t')
name=(int(split_row[0]),int(split_row[1]))
datasets.append(name) #将边的数据以元组的形式存放到列表中
G=networkx.DiGraph() #建立一个空的有向图G
G.add_edges_from(datasets) #向有向图G中添加边的数据列表
layers=linear_threshold(G,[6],2) #调用LT线性阈值算法,返回子节点集和该子节点集的最大激活节点集
del layers[-1]
length=0
for i in range(len(layers)):
length =length+len(layers[i])
lengths=length-len(layers[0]) #获得子节点的激活节点的个数(长度)
end=time.clock()
#测试数据输出结果
print(layers) #[[25], [33, 3, 6, 8, 55, 80, 50, 19, 54, 23, 75, 28, 29, 30, 35]]
print(lengths) #15
print('Running time: %s Seconds'%(end-start)) #输出代码运行时间
注释:测试文件Wiki-Vote.txt数据如下(每组数据代表图的有向边)
# FromNodeId ToNodeId
30 1412
30 3352
30 5254
30 5543
30 7478
3 28
3 30
3 39
3 54
3 108
3 152
3 178
3 182
3 214
3 271
3 286
3 300
3 348
3 349
3 371
3 567
3 581
3 584
3 586
3 590
3 604
3 611
3 8283
25 3
25 6
25 8
25 19
25 23
25 28
25 29
25 30
25 33
25 35
25 50
25 54
25 55
25 75
25 80
具体代码和数据集可以在我的GitHub上下载:https://github.com/Asia-Lee/Linear_Threshold