链路预测算法MATLAB实现-阿里云开发者社区

链路预测是复杂网络分析中的重要任务，旨在预测网络中尚未连接的两个节点之间未来产生连接的可能性。

程序概述

MATLAB程序实现了以下链路预测算法：

基于局部信息的相似性指标（Common Neighbors, Jaccard, Adamic-Adar等）
基于路径的相似性指标（Katz指数）
基于随机游走的相似性指标（Rooted PageRank, SimRank）
矩阵分解方法

代码

classdef LinkPrediction
    %LINKPREDICTION 链路预测算法实现
    %   包含多种链路预测算法

    properties
        A;          % 邻接矩阵
        train_mask; % 训练掩码矩阵
        test_mask;  % 测试掩码矩阵
        methods;    % 可用的预测方法
    end

    methods
        function obj = LinkPrediction(adj_matrix, train_ratio)
            %LINKPREDICTION 构造函数
            %   adj_matrix - 网络邻接矩阵
            %   train_ratio - 训练集比例(0-1)

            obj.A = adj_matrix;
            obj.methods = {
   'CommonNeighbors', 'Jaccard', 'AdamicAdar', ...
                          'PreferentialAttachment', 'Katz', 'RootedPageRank', ...
                          'SimRank', 'MatrixFactorization'};

            % 划分训练集和测试集
            if nargin > 1
                obj = obj.split_dataset(train_ratio);
            else
                obj.train_mask = ones(size(obj.A));
                obj.test_mask = zeros(size(obj.A));
            end
        end

        function obj = split_dataset(obj, train_ratio)
            %SPLIT_DATASET 划分训练集和测试集
            %   随机隐藏一部分边作为测试集

            [n, ~] = size(obj.A);
            obj.train_mask = ones(n);
            obj.test_mask = zeros(n);

            % 获取所有边的索引
            [rows, cols] = find(triu(obj.A, 1)); % 只取上三角避免重复
            num_edges = length(rows);
            num_train = round(num_edges * train_ratio);

            % 随机选择训练边
            idx = randperm(num_edges);
            train_idx = idx(1:num_train);
            test_idx = idx(num_train+1:end);

            % 创建掩码矩阵
            for i = 1:length(test_idx)
                r = rows(test_idx(i));
                c = cols(test_idx(i));
                obj.train_mask(r, c) = 0;
                obj.train_mask(c, r) = 0;
                obj.test_mask(r, c) = 1;
                obj.test_mask(c, r) = 1;
            end

            % 确保对角线为0
            obj.train_mask(1:n+1:end) = 0;
            obj.test_mask(1:n+1:end) = 0;
        end

        function scores = common_neighbors(obj)
            %COMMON_NEIGHBORS 共同邻居算法
            scores = (obj.A * obj.A) .* obj.train_mask;
        end

        function scores = jaccard(obj)
            %JACCARD Jaccard相似系数
            [n, ~] = size(obj.A);
            scores = zeros(n);

            for i = 1:n
                for j = i+1:n
                    if obj.train_mask(i, j) == 0
                        continue;
                    end

                    neighbors_i = find(obj.A(i, :));
                    neighbors_j = find(obj.A(j, :));

                    intersection = length(intersect(neighbors_i, neighbors_j));
                    union = length(union(neighbors_i, neighbors_j));

                    if union > 0
                        scores(i, j) = intersection / union;
                    else
                        scores(i, j) = 0;
                    end

                    scores(j, i) = scores(i, j);
                end
            end
        end

        function scores = adamic_adar(obj)
            %ADAMIC_ADAR Adamic-Adar指数
            [n, ~] = size(obj.A);
            scores = zeros(n);

            % 计算每个节点的度
            degrees = sum(obj.A, 2);

            for i = 1:n
                for j = i+1:n
                    if obj.train_mask(i, j) == 0
                        continue;
                    end

                    common_neighbors = find(obj.A(i, :) & obj.A(j, :));
                    score = 0;

                    for k = common_neighbors
                        if degrees(k) > 1 % 避免除以0
                            score = score + 1 / log(degrees(k));
                        end
                    end

                    scores(i, j) = score;
                    scores(j, i) = score;
                end
            end
        end

        function scores = preferential_attachment(obj)
            %PREFERENTIAL_ATTACHMENT 优先连接算法
            degrees = sum(obj.A, 2);
            scores = (degrees * degrees') .* obj.train_mask;
        end

        function scores = katz(obj, beta)
            %KATZ Katz指数
            %   beta - 衰减因子，默认0.01

            if nargin < 2
                beta = 0.01;
            end

            [n, ~] = size(obj.A);
            I = eye(n);
            scores = beta * obj.A; % 长度为1的路径

            % 计算Katz指数：S = βA + β²A² + β³A³ + ...
            % 使用矩阵求逆近似：S = (I - βA)^-1 - I
            scores = inv(I - beta * obj.A) - I;
            scores = scores .* obj.train_mask;
        end

        function scores = rooted_pagerank(obj, alpha, max_iter, tol)
            %ROOTED_PAGERANK Rooted PageRank算法
            %   alpha - 随机游走概率，默认0.85
            %   max_iter - 最大迭代次数，默认100
            %   tol - 收敛容差，默认1e-6

            if nargin < 2
                alpha = 0.85;
            end
            if nargin < 3
                max_iter = 100;
            end
            if nargin < 4
                tol = 1e-6;
            end

            [n, ~] = size(obj.A);
            scores = zeros(n);

            % 创建列随机矩阵（转移概率矩阵）
            P = obj.A ./ sum(obj.A, 1);
            P(isnan(P)) = 0; % 处理度为0的节点

            % 对每个节点计算Rooted PageRank
            for i = 1:n
                r = zeros(n, 1);
                r(i) = 1;

                for iter = 1:max_iter
                    r_new = alpha * P * r + (1 - alpha) * r;

                    if norm(r_new - r, 1) < tol
                        break;
                    end

                    r = r_new;
                end

                scores(:, i) = r;
            end

            scores = scores .* obj.train_mask;
        end

        function scores = simrank(obj, C, max_iter, tol)
            %SIMRANK SimRank算法
            %   C - 衰减因子，默认0.8
            %   max_iter - 最大迭代次数，默认10
            %   tol - 收敛容差，默认1e-4

            if nargin < 2
                C = 0.8;
            end
            if nargin < 3
                max_iter = 10;
            end
            if nargin < 4
                tol = 1e-4;
            end

            [n, ~] = size(obj.A);
            S = eye(n); % 初始化SimRank矩阵

            % 计算入邻居
            in_neighbors = cell(n, 1);
            for i = 1:n
                in_neighbors{
   i} = find(obj.A(:, i));
            end

            % 迭代计算SimRank
            for iter = 1:max_iter
                S_old = S;

                for i = 1:n
                    for j = 1:n
                        if i == j
                            S(i, j) = 1;
                            continue;
                        end

                        in_i = in_neighbors{
   i};
                        in_j = in_neighbors{
   j};

                        if isempty(in_i) || isempty(in_j)
                            S(i, j) = 0;
                            continue;
                        end

                        sum_sim = 0;
                        for a = 1:length(in_i)
                            for b = 1:length(in_j)
                                sum_sim = sum_sim + S_old(in_i(a), in_j(b));
                            end
                        end

                        S(i, j) = C * sum_sim / (length(in_i) * length(in_j));
                    end
                end

                if norm(S - S_old, 'fro') < tol
                    break;
                end
            end

            scores = S .* obj.train_mask;
        end

        function scores = matrix_factorization(obj, dim, lambda, max_iter, learning_rate)
            %MATRIX_FACTORIZATION 矩阵分解方法
            %   dim - 潜在特征维度，默认10
            %   lambda - 正则化参数，默认0.01
            %   max_iter - 最大迭代次数，默认100
            %   learning_rate - 学习率，默认0.01

            if nargin < 2
                dim = 10;
            end
            if nargin < 3
                lambda = 0.01;
            end
            if nargin < 4
                max_iter = 100;
            end
            if nargin < 5
                learning_rate = 0.01;
            end

            [n, ~] = size(obj.A);

            % 初始化用户和物品特征矩阵
            U = randn(n, dim) * 0.01;
            V = randn(n, dim) * 0.01;

            % 获取训练集中的非零元素（即存在的边）
            [rows, cols] = find(obj.train_mask);
            values = ones(length(rows), 1);

            % 随机梯度下降
            for iter = 1:max_iter
                total_error = 0;

                for idx = 1:length(rows)
                    i = rows(idx);
                    j = cols(idx);

                    % 计算预测值和误差
                    prediction = U(i, :) * V(j, :)';
                    error = values(idx) - prediction;
                    total_error = total_error + error^2;

                    % 更新特征向量
                    U_i_old = U(i, :);
                    U(i, :) = U(i, :) + learning_rate * (error * V(j, :) - lambda * U(i, :));
                    V(j, :) = V(j, :) + learning_rate * (error * U_i_old - lambda * V(j, :));
                end

                % 添加正则化项
                total_error = total_error + lambda * (norm(U, 'fro')^2 + norm(V, 'fro')^2);

                if mod(iter, 10) == 0
                    fprintf('迭代 %d, 误差: %.4f\n', iter, total_error);
                end
            end

            % 计算得分矩阵
            scores = U * V';
            scores = scores .* obj.train_mask;
        end

        function [precision, recall, auc] = evaluate(obj, scores, top_k)
            %EVALUATE 评估预测结果
            %   scores - 预测得分矩阵
            %   top_k - 计算precision@k和recall@k的k值

            if nargin < 3
                top_k = 100;
            end

            % 获取测试集中的正样本
            [test_rows, test_cols] = find(obj.test_mask);
            positive_pairs = [test_rows, test_cols];
            num_positives = size(positive_pairs, 1);

            % 获取所有未连接的节点对（负样本+测试正样本）
            negative_mask = (obj.train_mask == 0) & (obj.A == 0) & (eye(size(obj.A)) == 0);
            [negative_rows, negative_cols] = find(negative_mask);
            negative_pairs = [negative_rows, negative_cols];

            % 随机选择与正样本数量相同的负样本
            idx = randperm(size(negative_pairs, 1), num_positives);
            negative_pairs = negative_pairs(idx, :);

            % 合并正负样本
            all_pairs = [positive_pairs; negative_pairs];
            labels = [ones(num_positives, 1); zeros(num_positives, 1)];

            % 获取预测得分
            pred_scores = zeros(size(all_pairs, 1), 1);
            for i = 1:size(all_pairs, 1)
                pred_scores(i) = scores(all_pairs(i, 1), all_pairs(i, 2));
            end

            % 计算AUC
            [~, ~, ~, auc] = perfcurve(labels, pred_scores, 1);

            % 计算Precision@K和Recall@K
            % 获取得分最高的top_k个节点对
            [~, sorted_idx] = sort(pred_scores(1:num_positives), 'descend');
            top_predictions = sorted_idx(1:min(top_k, length(sorted_idx)));

            true_positives = sum(ismember(top_predictions, 1:num_positives));
            precision = true_positives / top_k;
            recall = true_positives / num_positives;
        end

        function results = compare_methods(obj, methods, top_k)
            %COMPARE_METHODS 比较不同算法的性能
            %   methods - 要比较的方法列表
            %   top_k - 计算precision@k和recall@k的k值

            if nargin < 2
                methods = obj.methods;
            end
            if nargin < 3
                top_k = 100;
            end

            results = struct();

            for i = 1:length(methods)
                method = methods{
   i};
                fprintf('正在计算 %s...\n', method);

                try
                    % 调用相应的方法
                    tic;
                    scores = obj.(lower(method))();
                    time = toc;

                    % 评估性能
                    [precision, recall, auc] = obj.evaluate(scores, top_k);

                    % 保存结果
                    results.(method).scores = scores;
                    results.(method).precision = precision;
                    results.(method).recall = recall;
                    results.(method).auc = auc;
                    results.(method).time = time;

                    fprintf('%s: Precision@%d=%.4f, Recall@%d=%.4f, AUC=%.4f, 时间=%.2fs\n', ...
                            method, top_k, precision, top_k, recall, auc, time);

                catch ME
                    fprintf('计算 %s 时出错: %s\n', method, ME.message);
                    results.(method).error = ME.message;
                end
            end
        end

        function plot_results(obj, results)
            %PLOT_RESULTS 可视化比较结果
            methods = fieldnames(results);
            num_methods = length(methods);

            precisions = zeros(num_methods, 1);
            recalls = zeros(num_methods, 1);
            aucs = zeros(num_methods, 1);
            times = zeros(num_methods, 1);

            for i = 1:num_methods
                if isfield(results.(methods{
   i}), 'error')
                    continue;
                end
                precisions(i) = results.(methods{
   i}).precision;
                recalls(i) = results.(methods{
   i}).recall;
                aucs(i) = results.(methods{
   i}).auc;
                times(i) = results.(methods{
   i}).time;
            end

            % 创建图形
            figure('Position', [100, 100, 1200, 800]);

            % 绘制精确度
            subplot(2, 2, 1);
            bar(precisions);
            set(gca, 'XTickLabel', methods, 'XTickLabelRotation', 45);
            title('Precision@K');
            ylabel('Precision');
            grid on;

            % 绘制召回率
            subplot(2, 2, 2);
            bar(recalls);
            set(gca, 'XTickLabel', methods, 'XTickLabelRotation', 45);
            title('Recall@K');
            ylabel('Recall');
            grid on;

            % 绘制AUC
            subplot(2, 2, 3);
            bar(aucs);
            set(gca, 'XTickLabel', methods, 'XTickLabelRotation', 45);
            title('AUC');
            ylabel('AUC');
            grid on;

            % 绘制运行时间
            subplot(2, 2, 4);
            bar(times);
            set(gca, 'XTickLabel', methods, 'XTickLabelRotation', 45);
            title('运行时间');
            ylabel('时间 (秒)');
            grid on;

            % 调整布局
            set(gcf, 'Color', 'w');
        end
    end
end

% 示例使用代码
function example_usage()
    % 生成示例网络（无标度网络）
    n = 100; % 节点数量
    A = create_scale_free_network(n);

    % 创建链路预测对象，使用80%的边作为训练集
    lp = LinkPrediction(A, 0.8);

    % 比较所有方法的性能
    results = lp.compare_methods();

    % 可视化结果
    lp.plot_results(results);

    % 单独使用某个方法
    scores = lp.common_neighbors();
    [precision, recall, auc] = lp.evaluate(scores);
    fprintf('\nCommon Neighbors: Precision=%.4f, Recall=%.4f, AUC=%.4f\n', precision, recall, auc);
end

function A = create_scale_free_network(n)
    %CREATE_SCALE_FREE_NETWORK 生成无标度网络（Barabási-Albert模型）
    %   n - 网络节点数

    % 初始完全图
    m0 = 5; % 初始节点数
    A = zeros(n);
    A(1:m0, 1:m0) = ones(m0) - eye(m0);

    % 添加新节点
    for i = m0+1:n
        % 计算现有节点的度
        degrees = sum(A(1:i-1, 1:i-1), 2);
        total_degree = sum(degrees);

        % 根据度分布选择连接节点
        if total_degree > 0
            prob = degrees / total_degree;
            targets = randsample(1:i-1, m0, true, prob);
        else
            targets = randperm(i-1, min(m0, i-1));
        end

        % 添加连接
        for j = targets
            A(i, j) = 1;
            A(j, i) = 1;
        end
    end
end

% 运行示例
example_usage();

说明

这个MATLAB链路预测程序提供了以下功能：

1. 核心类 `LinkPrediction`

包含多种链路预测算法的实现，以及评估和比较功能。

2. 实现的算法

Common Neighbors (共同邻居)：基于两个节点共同邻居的数量
Jaccard Coefficient：共同邻居数除以总邻居数
Adamic-Adar：考虑共同邻居的度，度越小权重越大
Preferential Attachment：基于两个节点的度乘积
Katz Index：考虑所有路径，路径越短权重越大
Rooted PageRank：基于随机游走的相似性度量
SimRank：基于结构上下文的相似性度量
Matrix Factorization：基于矩阵分解的潜在特征方法

3. 评估指标

Precision@K：前K个预测中正确预测的比例
Recall@K：正确预测的正样本占所有正样本的比例
AUC：ROC曲线下面积，衡量分类器整体性能

4. 可视化功能

提供四种评估指标的可视化比较，便于分析不同算法的性能。

推荐代码链路预测程序,主程序，包含31个链路预测的函数代码 www.youwenfan.com/contentalj/52463.html

使用

程序最后提供了一个示例使用代码：

生成一个无标度网络（Barabási-Albert模型）
创建链路预测对象，划分训练集和测试集
比较所有算法的性能
可视化比较结果
单独使用Common Neighbors算法并进行评估

链路预测算法MATLAB实现

程序概述

代码

说明

1. 核心类 `LinkPrediction`

2. 实现的算法

3. 评估指标

4. 可视化功能

使用

热门文章

最新文章

相关电子书

探索云世界

热门

云计算

大数据

云原生

人工智能

数据库

开发与运维

活动广场

任务中心

训练营

直播

乘风者计划

下载

镜像站

技术资料

链路预测算法MATLAB实现

程序概述

代码

说明

1. 核心类 LinkPrediction

2. 实现的算法

3. 评估指标

4. 可视化功能

使用

热门文章

最新文章

相关电子书

1. 核心类 `LinkPrediction`