前置数据见我的另一篇博客:
PyG遍历生成20节点到500节点的大规模无线通信网络拓扑推理数据
时间复杂度测试注意事项:
1.测试时间复杂度前一定要训练模型,没训练的模型和训练好的模型测试结果肯定是不一样的。预测时长和模型参数是有关系的,因为,不同数值的参数对应的二进制位数长度不一样。
2.步进不要设置太密集。测试复杂度肯定要训练大规模图数据,一个1000节点的无线通信网络拓扑图数据.mat文件就307M:
步进太密集一个是占用硬盘空间太大,另一个是没必要花那么长时间测试。步进为10下,20到1000个节点就有100个左右的点了,够拟合复杂度曲线了。
程序:
#作者:zhouzhichao
#创建时间:25年6月9日
#内容:进行时间复杂度测试
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import sys
import torch
import time
torch.set_printoptions(linewidth=200)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch_geometric.nn import GCNConv
sys.path.append('D:\无线通信网络认知\论文1\大修意见\Reviewer1-1 阈值相似性图对比实验')
from gcn_dataset import graph_data
print(torch.__version__)
print(torch.cuda.is_available())
from sklearn.metrics import roc_auc_score, precision_score, recall_score, accuracy_score
class Net(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = GCNConv(Input_L, 100)
self.conv2 = GCNConv(100, 50)
def encode(self, x, edge_index):
c1_in = x.T
c1_out = self.conv1(c1_in, edge_index)
c1_relu = c1_out.relu()
c2_out = self.conv2(c1_relu, edge_index)
c2_relu = c2_out.relu()
return c2_relu
def decode(self, z, edge_label_index):
# 节点和边都是矩阵,不同的计算方法致使:节点->节点,节点->边
distance_squared = torch.sum((z[edge_label_index[0]] - z[edge_label_index[1]]) ** 2, dim=-1)
return distance_squared
def decode_all(self, z):
prob_adj = z @ z.t() # 得到所有边概率矩阵
return (prob_adj > 0).nonzero(as_tuple=False).t() # 返回概率大于0的边,以edge_index的形式
@torch.no_grad()
def get_val(self, gcn_data):
#获取未参与训练的节点索引
edge_index = gcn_data.edge_index # [2, 30]
edge_label_index = gcn_data.edge_label_index # [2, 60]
edge_label = gcn_data.edge_label
# 转置方便处理,变成 (num_edges, 2)
edge_index_t = edge_index.t() # [30, 2]
edge_label_index_t = edge_label_index.t() # [60, 2]
# 把边转成集合形式的字符串,方便查找(也可用tuple)
edge_index_set = set([tuple(e.tolist()) for e in edge_index_t])
# 判断edge_label_index中的每个边是否在edge_index_set里
is_in_edge_index = [tuple(e.tolist()) in edge_index_set for e in edge_label_index_t]
is_in_edge_index = torch.tensor(is_in_edge_index)
# 不相同的列(边)
val_col = edge_label_index[:, ~is_in_edge_index]
val_label = edge_label[~is_in_edge_index]
val_col = val_col[:,:100]
val_label = val_label[:100]
divide_index = 50
val_col_1 = val_col[:,:divide_index]
val_label_1 = val_label[:divide_index]
val_col_0 = val_col[:, divide_index:]
val_label_0 = val_label[divide_index:]
return val_col_1, val_label_1, val_col_0, val_label_0
@torch.no_grad()
def test_val(self, gcn_data, threshhold):
model.eval()
# same_col, diff_col, same_label, diff_label = col_devide(gcn_data)
val_col_1, val_label_1, val_col_0, val_label_0 = self.get_val(gcn_data)
# 1
z = model.encode(gcn_data.x, gcn_data.edge_index)
out = model.decode(z, val_col_1).view(-1)
out = 1 - out
out_np = out.cpu().numpy()
labels_1 = val_label_1.cpu().numpy()
# roc_auc_s = roc_auc_score(labels_np, out_np)
pred_1 = (out_np > threshhold).astype(int)
accuracy_1 = accuracy_score(labels_1, pred_1)
precision_1 = precision_score(labels_1, pred_1, zero_division=1)
recall_1 = recall_score(labels_1, pred_1, zero_division=1)
# 0
z = model.encode(gcn_data.x, gcn_data.edge_index)
out = model.decode(z, val_col_0).view(-1)
out = 1 - out
out_np = out.cpu().numpy()
labels_0 = val_label_0.cpu().numpy()
# roc_auc_d = roc_auc_score(labels_np, out_np)
pred_0 = (out_np > threshhold).astype(int)
accuracy_0 = accuracy_score(labels_0, pred_0)
precision_0 = precision_score(labels_0, pred_0, zero_division=1)
recall_0 = recall_score(labels_0, pred_0, zero_division=1)
accuracy = (accuracy_1 + accuracy_0)/2
precision = (precision_1 + precision_0)/2
recall = (recall_1 + recall_0)/2
return accuracy, precision, recall
@torch.no_grad()
def predict(self, gcn_data, threshhold):
model.eval()
z = model.encode(gcn_data.x, gcn_data.edge_index)
out = model.decode(z, gcn_data.complex_test).view(-1)
out = 1 - out
out_np = out.cpu().numpy()
pred = (out_np > threshhold).astype(int)
return pred
@torch.no_grad()
def calculate_threshhold(self, gcn_data):
model.eval()
z = model.encode(gcn_data.x, gcn_data.edge_index)
out = model.decode(z, gcn_data.edge_label_index).view(-1)
out = 1 - out
out_np = out.cpu().numpy()
labels_np = gcn_data.edge_label.cpu().numpy()
threshhold = 0
accuracy_max = 0
for th in np.arange(-2, 1.1, 0.1):
pred_labels = (out_np > th).astype(int)
accuracy = accuracy_score(labels_np, pred_labels)
if accuracy>accuracy_max:
accuracy_max = accuracy
threshhold = th
return threshhold
def graph_normalize(gcn_data):
for i in range(gcn_data.x.shape[1]):
gcn_data.x[:, i] = gcn_data.x[:,i]/torch.max(torch.abs(gcn_data.x[:,i]))
cost_time = []
N_list = []
# for N in range(20,510,10):
for N in range(510, 1010, 10):
N_list.append(N)
root = "D:\无线通信网络认知\论文1\大修意见\Reviewer1-4 大规模图实验\\20-500节点网络(PyG)\\"+str(N)+"_nodes_data"
gcn_data = graph_data(root)
graph_normalize(gcn_data)
Input_L = gcn_data.x.shape[0]
model = Net()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()
model.train()
def train():
optimizer.zero_grad()
z = model.encode(gcn_data.x, gcn_data.edge_index)
out = model.decode(z, gcn_data.edge_label_index).view(-1)
out = 1 - out
loss = criterion(out, gcn_data.edge_label)
loss.backward()
optimizer.step()
return loss
min_loss = 99999
count = 0#早停
for epoch in range(100000):
loss = train()
if loss<min_loss:
min_loss = loss
count = 0
print("N: ", N, " epoch: ", epoch, " loss: ",
round(loss.item(), 4), " min_loss: ", round(min_loss.item(), 4))
count = count + 1
if count>100:
break
t1 = time.time()
for p in range(100):
threshhold = model.calculate_threshhold(gcn_data)
pred = model.predict(gcn_data,threshhold)
t2 = time.time()
delta_t = (t2 - t1)/100
cost_time.append(delta_t)
data = {
'N_list': N_list,
'cost_time': cost_time
}
# 创建一个 DataFrame
df = pd.DataFrame(data)
#
# # 保存到 Excel 文件
# file_path = 'D:\无线通信网络认知\论文1\大修意见\Reviewer1-4 大规模图实验\\20-500 nodes time cost.xlsx'
file_path = 'D:\无线通信网络认知\论文1\大修意见\Reviewer1-4 大规模图实验\\510-1000 nodes time cost.xlsx'
df.to_excel(file_path, index=False)
复杂度测试结果: