1. 转置操作 (Transpose)
- 概念:将矩阵的行和列互换
- 应用场景:
- 在卷积神经网络中转换特征图维度
- 矩阵乘法运算前的维度调整
- 数据预处理过程中的特征重排
原始矩阵 A = [[1, 2, 3], 转置后 A^T = [[1, 4],
[4, 5, 6]] [2, 5],
[3, 6]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
matrix = np.array([[1, 2, 3], [4, 5, 6]])
print("原始矩阵:")
print(matrix)
print("形状:", matrix.shape)
print("\n1. NumPy 转置:")
np_transpose = matrix.T
print(np_transpose)
print("形状:", np_transpose.shape)
np_transpose2 = np.transpose(matrix)
print(np_transpose2)
print("\n2. TensorFlow 转置:")
tf_matrix = tf.constant(matrix)
tf_transpose = tf.transpose(tf_matrix)
print(tf_transpose.numpy())
print("\n3. PyTorch 转置:")
torch_matrix = torch.tensor(matrix)
torch_transpose = torch.transpose(torch_matrix, 0, 1)
print(torch_transpose)
torch_transpose2 = torch_matrix.T
print(torch_transpose2)
print("\n4. pandas 转置:")
df = pd.DataFrame(matrix)
pd_transpose = df.T
print(pd_transpose)
print("\n5. SciPy 转置 (稀疏矩阵):")
sparse_matrix = sp.csr_matrix(matrix)
scipy_transpose = sparse_matrix.transpose()
print(scipy_transpose.toarray())
2. 矩阵乘法 (Matrix Multiplication)
- 概念:两个矩阵相乘,要求第一个矩阵的列数等于第二个矩阵的行数
- 应用场景:
- Transformer 架构中的注意力计算和全连接层
- 线性层的权重和输入相乘
- 嵌入向量与查询向量的相似度计算
矩阵 A = [[1, 2], 矩阵 B = [[5, 6], A×B = [[19, 22],
[3, 4]] [7, 8]] [43, 50]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
print("矩阵A:")
print(A)
print("矩阵B:")
print(B)
print("\n1. NumPy 矩阵乘法:")
np_matmul = np.matmul(A, B)
print(np_matmul)
np_matmul2 = A @ B
print(np_matmul2)
np_dot = np.dot(A, B)
print(np_dot)
print("\n2. TensorFlow 矩阵乘法:")
tf_A = tf.constant(A)
tf_B = tf.constant(B)
tf_matmul = tf.matmul(tf_A, tf_B)
print(tf_matmul.numpy())
print("\n3. PyTorch 矩阵乘法:")
torch_A = torch.tensor(A)
torch_B = torch.tensor(B)
torch_matmul = torch.matmul(torch_A, torch_B)
print(torch_matmul)
torch_matmul2 = torch_A @ torch_B
print(torch_matmul2)
torch_mm = torch.mm(torch_A, torch_B)
print(torch_mm)
print("\n4. pandas 矩阵乘法:")
df_A = pd.DataFrame(A)
df_B = pd.DataFrame(B)
pd_matmul = df_A.values @ df_B.values
print(pd_matmul)
print("\n5. SciPy 矩阵乘法 (稀疏矩阵):")
sparse_A = sp.csr_matrix(A)
sparse_B = sp.csr_matrix(B)
scipy_matmul = sparse_A @ sparse_B
print(scipy_matmul.toarray())
3. 逐元素乘法 (Element-wise Multiplication / Hadamard Product)
- 概念:两个相同形状的矩阵对应位置元素相乘
- 应用场景:
- 门控机制(如LSTM、GRU中的门控操作)
- 特征选择和加权
- 掩码操作(如注意力掩码)
矩阵 A = [[1, 2], 矩阵 B = [[5, 6], A⊙B = [[5, 12],
[3, 4]] [7, 8]] [21, 32]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
print("矩阵A:")
print(A)
print("矩阵B:")
print(B)
print("\n1. NumPy 逐元素乘法:")
np_element_wise = A * B
print(np_element_wise)
np_multiply = np.multiply(A, B)
print(np_multiply)
print("\n2. TensorFlow 逐元素乘法:")
tf_A = tf.constant(A)
tf_B = tf.constant(B)
tf_element_wise = tf_A * tf_B
print(tf_element_wise.numpy())
tf_multiply = tf.multiply(tf_A, tf_B)
print(tf_multiply.numpy())
print("\n3. PyTorch 逐元素乘法:")
torch_A = torch.tensor(A)
torch_B = torch.tensor(B)
torch_element_wise = torch_A * torch_B
print(torch_element_wise)
torch_mul = torch.mul(torch_A, torch_B)
print(torch_mul)
print("\n4. pandas 逐元素乘法:")
df_A = pd.DataFrame(A)
df_B = pd.DataFrame(B)
pd_element_wise = df_A * df_B
print(pd_element_wise)
print("\n5. SciPy 逐元素乘法 (稀疏矩阵):")
sparse_A = sp.csr_matrix(A)
sparse_B = sp.csr_matrix(B)
scipy_element_wise = sparse_A.multiply(sparse_B)
print(scipy_element_wise.toarray())
4. 矩阵求逆 (Matrix Inversion)
- 概念:求一个方阵的逆矩阵,满足 A × A⁻¹ = A⁻¹ × A = I(单位矩阵)
- 应用场景:
矩阵 A = [[4, 7], A⁻¹ = [[-8/19, 7/19],
[2, 6]] [2/19, -4/19]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.linalg as spla
A = np.array([[4, 7], [2, 6]])
print("原始矩阵:")
print(A)
print("\n1. NumPy 矩阵求逆:")
np_inv = np.linalg.inv(A)
print(np_inv)
print("验证 A × A⁻¹ ≈ I:")
print(A @ np_inv)
print("\n2. TensorFlow 矩阵求逆:")
tf_A = tf.constant(A, dtype=tf.float32)
tf_inv = tf.linalg.inv(tf_A)
print(tf_inv.numpy())
print("\n3. PyTorch 矩阵求逆:")
torch_A = torch.tensor(A, dtype=torch.float32)
torch_inv = torch.inverse(torch_A)
print(torch_inv)
print("\n4. pandas 矩阵求逆:")
df_A = pd.DataFrame(A)
pd_inv = pd.DataFrame(np.linalg.inv(df_A.values))
print(pd_inv)
print("\n5. SciPy 矩阵求逆:")
scipy_inv = spla.inv(A)
print(scipy_inv)
5. 矩阵分解 - SVD (奇异值分解)
- 概念:将矩阵分解为 A = U × Σ × V^T,其中U和V是正交矩阵,Σ是对角矩阵
- 应用场景:
- 降维处理(如PCA的实现)
- 推荐系统中的矩阵分解
- 图像压缩和噪声过滤
- 潜在语义分析(LSA)
矩阵 A = [[1, 2], [3, 4], [5, 6]]
分解为:
U = [[-0.2298, 0.8835, 0.4082],
[-0.5247, 0.2408, -0.8165],
[-0.8196, -0.4019, 0.4082]]
Σ = [[9.5255, 0],
[0, 0.5144],
[0, 0]]
V^T = [[-0.6196, -0.7849],
[-0.7849, 0.6196]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.linalg as spla
A = np.array([[1, 2], [3, 4], [5, 6]])
print("原始矩阵:")
print(A)
print("\n1. NumPy SVD分解:")
U, s, Vh = np.linalg.svd(A, full_matrices=True)
print("U 矩阵:")
print(U)
print("奇异值:")
print(s)
print("V^T 矩阵:")
print(Vh)
S = np.zeros((A.shape[0], A.shape[1]))
S[:len(s), :len(s)] = np.diag(s)
A_reconstructed = U @ S @ Vh
print("重构矩阵:")
print(A_reconstructed)
print("\n2. TensorFlow SVD分解:")
tf_A = tf.constant(A, dtype=tf.float32)
s_tf, u_tf, v_tf = tf.linalg.svd(tf_A)
print("奇异值:")
print(s_tf.numpy())
print("U 矩阵:")
print(u_tf.numpy())
print("V 矩阵 (注意:这是V不是V^T):")
print(v_tf.numpy())
print("\n3. PyTorch SVD分解:")
torch_A = torch.tensor(A, dtype=torch.float32)
U_torch, s_torch, V_torch = torch.svd(torch_A)
print("U 矩阵:")
print(U_torch)
print("奇异值:")
print(s_torch)
print("V 矩阵 (注意:这是V不是V^T):")
print(V_torch)
print("\n4. pandas SVD分解:")
df_A = pd.DataFrame(A)
U_pd, s_pd, Vh_pd = np.linalg.svd(df_A.values)
print("通过pandas数据,使用NumPy的SVD实现")
print("\n5. SciPy SVD分解:")
U_scipy, s_scipy, Vh_scipy = spla.svd(A)
print("U 矩阵:")
print(U_scipy)
print("奇异值:")
print(s_scipy)
print("V^T 矩阵:")
print(Vh_scipy)
6. 矩阵reshape操作
- 概念:改变矩阵的形状,同时保持元素总数不变
- 应用场景:
- 神经网络层间维度变换(如展平操作)
- 批处理数据的维度调整
- 特征图转换(如在CNN和RNN之间)
矩阵 A = [[1, 2, 3], reshape后 = [[1, 2],
[4, 5, 6]] [3, 4],
[5, 6]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
A = np.array([[1, 2, 3], [4, 5, 6]])
print("原始矩阵:")
print(A)
print("形状:", A.shape)
print("\n1. NumPy reshape:")
np_reshaped = A.reshape(3, 2)
print(np_reshaped)
print("新形状:", np_reshaped.shape)
np_reshaped_auto = A.reshape(-1, 2)
print("自动计算行数:")
print(np_reshaped_auto)
print("\n2. TensorFlow reshape:")
tf_A = tf.constant(A)
tf_reshaped = tf.reshape(tf_A, [3, 2])
print(tf_reshaped.numpy())
print("\n3. PyTorch reshape:")
torch_A = torch.tensor(A)
torch_reshaped = torch_A.reshape(3, 2)
print(torch_reshaped)
torch_viewed = torch_A.view(3, 2)
print("使用view:")
print(torch_viewed)
print("\n4. pandas reshape:")
df_A = pd.DataFrame(A)
pd_reshaped = df_A.values.reshape(3, 2)
print(pd_reshaped)
pd_reshaped_df = pd.DataFrame(pd_reshaped)
print(pd_reshaped_df)
print("\n5. SciPy reshape (稀疏矩阵):")
sparse_A = sp.csr_matrix(A)
scipy_reshaped = sparse_A.reshape(3, 2)
print(scipy_reshaped.toarray())
7. 矩阵正则化 (Normalization)
- 概念:对矩阵进行缩放,使其满足特定的范数约束
- 应用场景:
- 批归一化(Batch Normalization)
- 权重正则化减少过拟合
- 特征缩放提高训练效率
- 梯度裁剪防止梯度爆炸
矩阵 A = [[1, 2], [3, 4]]
L2行归一化后 = [[0.4472, 0.8944],
[0.6, 0.8]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
from sklearn.preprocessing import normalize
A = np.array([[1, 2], [3, 4]])
print("原始矩阵:")
print(A)
print("\n1. NumPy 矩阵行归一化:")
row_norms = np.sqrt(np.sum(A**2, axis=1, keepdims=True))
np_normalized = A / row_norms
print(np_normalized)
np_normalized_sklearn = normalize(A, norm='l2', axis=1)
print("使用sklearn:")
print(np_normalized_sklearn)
print("\n2. TensorFlow 矩阵归一化:")
tf_A = tf.constant(A, dtype=tf.float32)
tf_normalized = tf.nn.l2_normalize(tf_A, axis=1)
print(tf_normalized.numpy())
tf_batch_norm = tf.keras.layers.BatchNormalization()(tf.reshape(tf_A, [1, 2, 2, 1]))
print("Batch Normalization (要求4D输入):")
print(tf.reshape(tf_batch_norm, [2, 2]).numpy())
print("\n3. PyTorch 矩阵归一化:")
torch_A = torch.tensor(A, dtype=torch.float32)
torch_normalized = torch.nn.functional.normalize(torch_A, p=2, dim=1)
print(torch_normalized)
batch_norm = torch.nn.BatchNorm1d(2)
torch_batch_norm = batch_norm(torch_A)
print("Batch Normalization:")
print(torch_batch_norm)
print("\n4. pandas 矩阵归一化:")
df_A = pd.DataFrame(A)
pd_normalized = df_A.apply(lambda x: x / np.sqrt(np.sum(x**2)), axis=1)
print(pd_normalized)
print("\n5. SciPy 矩阵归一化 (稀疏矩阵):")
sparse_A = sp.csr_matrix(A)
row_norms = sp.linalg.norm(sparse_A, axis=1)
row_norms_inv = 1.0 / row_norms
diag_inv = sp.spdiags(row_norms_inv, 0, sparse_A.shape[0], sparse_A.shape[0])
scipy_normalized = diag_inv @ sparse_A
print(scipy_normalized.toarray())
8. 矩阵拼接 (Concatenation)
- 概念:沿指定轴将多个矩阵连接在一起
- 应用场景:
- 特征融合(多模态学习)
- 批处理数据合并
- 序列数据拼接(如在RNN中)
- 层级特征组合(如在Skip Connection中)
矩阵 A = [[1, 2], 矩阵 B = [[5, 6],
[3, 4]] [7, 8]]
横向拼接 = [[1, 2, 5, 6],
[3, 4, 7, 8]]
纵向拼接 = [[1, 2],
[3, 4],
[5, 6],
[7, 8]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
print("矩阵A:")
print(A)
print("矩阵B:")
print(B)
print("\n1. NumPy 矩阵拼接:")
np_hconcat = np.concatenate([A, B], axis=1)
print("横向拼接 (axis=1):")
print(np_hconcat)
np_vconcat = np.concatenate([A, B], axis=0)
print("纵向拼接 (axis=0):")
print(np_vconcat)
np_hstack = np.hstack([A, B])
np_vstack = np.vstack([A, B])
print("使用hstack:")
print(np_hstack)
print("使用vstack:")
print(np_vstack)
print("\n2. TensorFlow 矩阵拼接:")
tf_A = tf.constant(A)
tf_B = tf.constant(B)
tf_hconcat = tf.concat([tf_A, tf_B], axis=1)
print("横向拼接 (axis=1):")
print(tf_hconcat.numpy())
tf_vconcat = tf.concat([tf_A, tf_B], axis=0)
print("纵向拼接 (axis=0):")
print(tf_vconcat.numpy())
print("\n3. PyTorch 矩阵拼接:")
torch_A = torch.tensor(A)
torch_B = torch.tensor(B)
torch_hconcat = torch.cat([torch_A, torch_B], dim=1)
print("横向拼接 (dim=1):")
print(torch_hconcat)
torch_vconcat = torch.cat([torch_A, torch_B], dim=0)
print("纵向拼接 (dim=0):")
print(torch_vconcat)
torch_hstack = torch.hstack([torch_A, torch_B])
torch_vstack = torch.vstack([torch_A, torch_B])
print("使用hstack:")
print(torch_hstack)
print("使用vstack:")
print(torch_vstack)
print("\n4. pandas 矩阵拼接:")
df_A = pd.DataFrame(A)
df_B = pd.DataFrame(B)
pd_hconcat = pd.concat([df_A, df_B], axis=1)
print("横向拼接 (axis=1):")
print(pd_hconcat)
pd_vconcat = pd.concat([df_A, df_B], axis=0)
print("纵向拼接 (axis=0):")
print(pd_vconcat)
print("\n5. SciPy 矩阵拼接 (稀疏矩阵):")
sparse_A = sp.csr_matrix(A)
sparse_B = sp.csr_matrix(B)
scipy_hconcat = sp.hstack([sparse_A, sparse_B])
print("横向拼接:")
print(scipy_hconcat.toarray())
scipy_vconcat = sp.vstack([sparse_A, sparse_B])
print("纵向拼接:")
print(scipy_vconcat.toarray())
9. 矩阵求和与归约操作
- 概念:沿指定轴对矩阵进行求和或归约操作
- 应用场景:
- 池化操作(平均池化、最大池化)
- 注意力权重的归一化
- 批处理统计量的计算
- 损失函数中的归约
矩阵 A = [[1, 2, 3],
[4, 5, 6]]
按行求和 = [6, 15]
按列求和 = [5, 7, 9]
全局求和 = 21
```python
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
A = np.array([[1, 2, 3], [4, 5, 6]])
print("原始矩阵:")
print(A)
print("\n1. NumPy 矩阵归约操作:")
np_row_sum = np.sum(A, axis=1)
print("按行求和 (axis=1):")
print(np_row_sum)
np_col_sum = np.sum(A, axis=0)
print("按列求和 (axis=0):")
print(np_col_sum)
np_total_sum = np.sum(A)
print("全局求和:")
print(np_total_sum)
np_max = np.max(A, axis=1)
np_min = np.min(A, axis=0)
np_mean = np.mean(A)
print("按行最大值:", np_max)
print("按列最小值:", np_min)
print("全局平均值:", np_mean)
print("\n2. TensorFlow 矩阵归约操作:")
tf_A = tf.constant(A)
tf_row_sum = tf.reduce_sum(tf_A, axis=1)
print("按行求和 (axis=1):")
print(tf_row_sum.numpy())
tf_col_sum = tf.reduce_sum(tf_A, axis=0)
print("按列求和 (axis=0):")
print(tf_col_sum.numpy())
tf_total_sum = tf.reduce_sum(tf_A)
print("全局求和:")
print(tf_total_sum.numpy())
tf_max = tf.reduce_max(tf_A, axis=1)
tf_min = tf.reduce_min(tf_A, axis=0)
tf_mean = tf.reduce_mean(tf_A)
print("按行最大值:", tf_max.numpy())
print("按列最小值:", tf_min.numpy())
print("全局平均值:", tf_mean.numpy())
print("\n3. PyTorch 矩阵归约操作:")
torch_A = torch.tensor(A)
torch_row_sum = torch.sum(torch_A, dim=1)
print("按行求和 (dim=1):")
print(torch_row_sum)
torch_col_sum = torch.sum(torch_A, dim=0)
print("按列求和 (dim=0):")
print(torch_col_sum)
torch_total_sum = torch.sum(torch_A)
print("全局求和:")
print(torch_total_sum)
torch_max = torch.max(torch_A, dim=1).values
torch_min = torch.min(torch_A, dim=0).values
torch_mean = torch.mean(torch_A)
print("按行最大值:", torch_max)
print("按列最小值:", torch_min)
print("全局平均值:", torch_mean)
print("\n4. pandas 矩阵归约操作:")
df_A = pd.DataFrame(A)
pd_row_sum = df_A.sum(axis=1)
print("按行求和 (axis=1):")
print(pd_row_sum)
pd_col_sum = df_A.sum(axis=0)
print("按列求和 (axis=0):")
print(pd_col_sum)
pd_total_sum = df_A.values.sum()
print("全局求和:")
print(pd_total_sum)
pd_max = df_A.max(axis=1)
pd_min = df_A.min(axis=0)
pd_mean = df_A.mean().mean()
print("按行最大值:", pd_max.values)
print("按列最小值:", pd_min.values)
print("全局平均值:", pd_mean)
print("\n5. SciPy 矩阵归约操作 (稀疏矩阵):")
sparse_A = sp.csr_matrix(A)
scipy_row_sum = sparse_A.sum(axis=1)
print("按行求和 (axis=1):")
print(scipy_row_sum)
scipy_col_sum = sparse_A.sum(axis=0)
print("按列求和 (axis=0):")
print(scipy_col_sum)
scipy_total_sum = sparse_A.sum()
print("全局求和:")
print(scipy_total_sum)
10. 矩阵广播 (Broadcasting)
- 概念:自动扩展较小的矩阵以匹配较大矩阵的形状,用于不同形状矩阵间的运算
- 应用场景:
- 批处理中的权重共享
- 添加偏置项到特征矩阵
- 批量数据缩放
- 注意力机制的掩码应用
矩阵 A = [[1, 2, 3], 向量 b = [10, 20, 30]
[4, 5, 6]]
A + b = [[11, 22, 33],
[14, 25, 36]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
A = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([10, 20, 30])
print("矩阵A:")
print(A)
print("向量b:")
print(b)
print("\n1. NumPy 广播操作:")
np_broadcast = A + b
print("A + b (广播):")
print(np_broadcast)
c = np.array([[100], [200]])
np_broadcast2 = A + c
print("A + c (列向量广播):")
print(np_broadcast2)
print("\n2. TensorFlow 广播操作:")
tf_A = tf.constant(A)
tf_b = tf.constant(b)
tf_broadcast = tf_A + tf_b
print("A + b (广播):")
print(tf_broadcast.numpy())
tf_c = tf.constant([[100], [200]])
tf_broadcast2 = tf_A + tf_c
print("A + c (列向量广播):")
print(tf_broadcast2.numpy())
print("\n3. PyTorch 广播操作:")
torch_A = torch.tensor(A)
torch_b = torch.tensor(b)
torch_broadcast = torch_A + torch_b
print("A + b (广播):")
print(torch_broadcast)
torch_c = torch.tensor([[100], [200]])
torch_broadcast2 = torch_A + torch_c
print("A + c (列向量广播):")
print(torch_broadcast2)
torch_b_expanded = torch.broadcast_to(torch_b, (2, 3))
print("显式广播b:")
print(torch_b_expanded)
print("\n4. pandas 广播操作:")
df_A = pd.DataFrame(A)
s_b = pd.Series(b)
pd_broadcast = df_A + s_b.values
print("使用numpy值进行广播:")
print(pd_broadcast)
pd_broadcast2 = df_A.values + b
print("直接使用numpy操作:")
print(pd_broadcast2)
print("\n5. SciPy 广播操作 (与NumPy类似):")
sparse_A = sp.csr_matrix(A)
scipy_broadcast = sp.csr_matrix(sparse_A.toarray() + b)
print("稀疏矩阵广播 (通过转换):")
print(scipy_broadcast.toarray())
11. 矩阵掩码操作 (Masking)
- 概念:使用布尔矩阵选择性地保留或修改矩阵中的特定元素
- 应用场景:
- 注意力掩码(Transformer中的掩码机制)
- 序列填充处理(处理变长序列)
- 条件计算(选择性激活)
- 梯度掩码(控制反向传播)
矩阵 A = [[1, 2, 3], 掩码 M = [[True, False, True],
[4, 5, 6]] [False, True, False]]
应用掩码后 = [[1, 0, 3],
[0, 5, 0]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
A = np.array([[1, 2, 3], [4, 5, 6]])
mask = np.array([[True, False, True], [False, True, False]])
print("原始矩阵A:")
print(A)
print("掩码矩阵mask:")
print(mask)
print("\n1. NumPy 掩码操作:")
np_masked = A * mask
print("应用掩码 (A * mask):")
print(np_masked)
np_masked_specific = np.where(mask, A, -1)
print("应用掩码 (特定值替换):")
print(np_masked_specific)
condition_mask = A > 3
np_conditional = np.where(condition_mask, A, 0)
print("条件掩码 (A > 3):")
print(np_conditional)
print("\n2. TensorFlow 掩码操作:")
tf_A = tf.constant(A)
tf_mask = tf.constant(mask)
tf_masked = tf_A * tf.cast(tf_mask, tf.int32)
print("应用掩码 (A * mask):")
print(tf_masked.numpy())
tf_masked_specific = tf.where(tf_mask, tf_A, -1)
print("应用掩码 (特定值替换):")
print(tf_masked_specific.numpy())
tf_condition_mask = tf_A > 3
tf_conditional = tf.where(tf_condition_mask, tf_A, 0)
print("条件掩码 (A > 3):")
print(tf_conditional.numpy())
print("\n3. PyTorch 掩码操作:")
torch_A = torch.tensor(A)
torch_mask = torch.tensor(mask)
torch_masked = torch_A * torch_mask
print("应用掩码 (A * mask):")
print(torch_masked)
torch_masked_specific = torch.where(torch_mask, torch_A, torch.tensor(-1))
print("应用掩码 (特定值替换):")
print(torch_masked_specific)
torch_condition_mask = torch_A > 3
torch_conditional = torch.where(torch_condition_mask, torch_A, torch.tensor(0))
print("条件掩码 (A > 3):")
print(torch_conditional)
seq_length = 4
causal_mask = torch.tril(torch.ones(seq_length, seq_length))
print("因果掩码 (下三角矩阵):")
print(causal_mask)
print("\n4. pandas 掩码操作:")
df_A = pd.DataFrame(A)
pd_mask = pd.DataFrame(mask)
pd_masked = df_A.mask(~pd_mask, 0)
print("应用掩码 (A.mask(~mask)):")
print(pd_masked)
pd_masked_specific = df_A.where(pd_mask, -1)
print("应用掩码 (特定值替换):")
print(pd_masked_specific)
pd_condition_mask = df_A > 3
pd_conditional = df_A.where(pd_condition_mask, 0)
print("条件掩码 (A > 3):")
print(pd_conditional)
print("\n5. SciPy 掩码操作 (稀疏矩阵):")
sparse_A = sp.csr_matrix(A)
sparse_mask = sp.csr_matrix(mask)
scipy_masked = sparse_A.multiply(sparse_mask)
print("应用掩码 (稀疏矩阵):")
print(scipy_masked.toarray())
12. 矩阵切片和索引
- 概念:提取矩阵的子集或特定元素
- 应用场景:
- 特征选择
- 批处理数据的提取
- 注意力机制中的头部分割
- 隐藏状态的选择性访问
矩阵 A = [[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12]]
子矩阵 (行1-2, 列1-3) = [[6, 7],
[10, 11]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
A = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
print("原始矩阵:")
print(A)
print("\n1. NumPy 切片和索引:")
np_slice = A[1:3, 1:3]
print("子矩阵 (行1-2, 列1-3):")
print(np_slice)
np_row = A[1, :]
np_col = A[:, 2]
print("第二行:")
print(np_row)
print("第三列:")
print(np_col)
np_specific = A[[0, 2], [1, 3]]
print("特定元素 (A[0,1] 和 A[2,3]):")
print(np_specific)
np_bool_idx = A[A > 5]
print("所有大于5的元素:")
print(np_bool_idx)
print("\n2. TensorFlow 切片和索引:")
tf_A = tf.constant(A)
tf_slice = tf_A[1:3, 1:3]
print("子矩阵 (行1-2, 列1-3):")
print(tf_slice.numpy())
tf_row = tf_A[1, :]
tf_col = tf_A[:, 2]
print("第二行:")
print(tf_row.numpy())
print("第三列:")
print(tf_col.numpy())
tf_specific = tf.gather_nd(tf_A, [[0, 1], [2, 3]])
print("特定元素 (A[0,1] 和 A[2,3]):")
print(tf_specific.numpy())
tf_bool_mask = tf_A > 5
tf_bool_idx = tf.boolean_mask(tf_A, tf_bool_mask)
print("所有大于5的元素:")
print(tf_bool_idx.numpy())
print("\n3. PyTorch 切片和索引:")
torch_A = torch.tensor(A)
torch_slice = torch_A[1:3, 1:3]
print("子矩阵 (行1-2, 列1-3):")
print(torch_slice)
torch_row = torch_A[1, :]
torch_col = torch_A[:, 2]
print("第二行:")
print(torch_row)
print("第三列:")
print(torch_col)
torch_specific = torch_A[[0, 2], [1, 3]]
print("特定元素 (A[0,1] 和 A[2,3]):")
print(torch_specific)
torch_bool_mask = torch_A > 5
torch_bool_idx = torch_A[torch_bool_mask]
print("所有大于5的元素:")
print(torch_bool_idx)
print("\n4. pandas 切片和索引:")
df_A = pd.DataFrame(A)
pd_slice = df_A.iloc[1:3, 1:3]
print("子矩阵 (行1-2, 列1-3):")
print(pd_slice)
pd_row = df_A.iloc[1, :]
pd_col = df_A.iloc[:, 2]
print("第二行:")
print(pd_row)
print("第三列:")
print(pd_col)
pd_bool_idx = df_A[df_A > 5]
print("所有大于5的元素 (保留NaN):")
print(pd_bool_idx)
print("\n5. SciPy 切片和索引 (稀疏矩阵):")
sparse_A = sp.csr_matrix(A)
scipy_slice = sparse_A[1:3, 1:3]
print("子矩阵 (行1-2, 列1-3):")
print(scipy_slice.toarray())
scipy_row = sparse_A[1, :].toarray()
scipy_col = sparse_A[:, 2].toarray()
print("第二行:")
print(scipy_row)
print("第三列:")
print(scipy_col)
13. 维度压缩与扩展 (Squeeze/Unsqueeze)
- 概念:
- squeeze: 移除tensor中所有大小为1的维度
- unsqueeze: 在指定位置添加一个大小为1的维度
- 应用场景:
- 添加批处理维度进行批量操作
- 在CNN中调整通道维度
- 适配不同网络层之间的输入输出维度
- 使用广播机制进行计算前的维度准备
原始tensor: shape=[2, 1, 3] squeeze后: shape=[2, 3]
[[[ 1, 2, 3]], [[ 1, 2, 3],
[[ 4, 5, 6]]] [ 4, 5, 6]]
原始tensor: shape=[2, 3] unsqueeze(1)后: shape=[2, 1, 3]
[[ 1, 2, 3], [[[ 1, 2, 3]],
[ 4, 5, 6]] [[ 4, 5, 6]]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
x = np.array([[[1, 2, 3]], [[4, 5, 6]]])
print("原始数组:")
print(x)
print("形状:", x.shape)
print("\n1. NumPy squeeze/expand_dims:")
np_squeezed = np.squeeze(x, axis=1)
print("squeeze后:")
print(np_squeezed)
print("形状:", np_squeezed.shape)
y = np.array([[1, 2, 3], [4, 5, 6]])
np_unsqueezed = np.expand_dims(y, axis=1)
print("\nexpand_dims后:")
print(np_unsqueezed)
print("形状:", np_unsqueezed.shape)
print("\n2. TensorFlow squeeze/expand_dims:")
tf_x = tf.constant(x)
tf_squeezed = tf.squeeze(tf_x, axis=1)
print("squeeze后:")
print(tf_squeezed.numpy())
print("形状:", tf_squeezed.shape)
tf_y = tf.constant(y)
tf_unsqueezed = tf.expand_dims(tf_y, axis=1)
print("\nexpand_dims后:")
print(tf_unsqueezed.numpy())
print("形状:", tf_unsqueezed.shape)
print("\n3. PyTorch squeeze/unsqueeze:")
torch_x = torch.tensor(x)
torch_squeezed = torch_x.squeeze(1)
print("squeeze后:")
print(torch_squeezed)
print("形状:", torch_squeezed.size())
torch_y = torch.tensor(y)
torch_unsqueezed = torch_y.unsqueeze(1)
print("\nunsqueeze后:")
print(torch_unsqueezed)
print("形状:", torch_unsqueezed.size())
print("\n4. Pandas 没有直接对应的squeeze/unsqueeze函数,需转换为NumPy处理")
print("\n5. SciPy 稀疏矩阵不直接支持多维squeeze/unsqueeze,通常需转为密集数组处理")
14. 内存连续性 (Contiguous)
- 概念:确保张量在内存中连续存储,某些操作(如转置、切片)会导致内存非连续
- 应用场景:
- 执行需要连续内存的操作前调用
- 视图操作后提高后续计算效率
- CUDA操作优化
- 与要求连续内存的外部库交互
# 视觉表示(简化):
非连续内存: [1, 2, 3, 4, 5, 6] <-- 内存中的实际存储
↑ ↑ ↑ 但逻辑访问顺序是: 1,4,2,5,3,6
| | |
连续内存: [1, 4, 2, 5, 3, 6] <-- 重排后的内存存储
import numpy as np
import tensorflow as tf
import torch
matrix = np.array([[1, 2, 3], [4, 5, 6]])
print("1. PyTorch contiguous:")
torch_matrix = torch.tensor(matrix)
print("原始张量是否连续:", torch_matrix.is_contiguous())
torch_transposed = torch_matrix.T
print("\n转置后是否连续:", torch_transposed.is_contiguous())
torch_cont = torch_transposed.contiguous()
print("应用contiguous后是否连续:", torch_cont.is_contiguous())
print("数据内容是否相同:", torch.all(torch_transposed == torch_cont).item())
print("\n2. NumPy ascontiguousarray:")
np_transposed = matrix.T
print("转置后是否C连续:", np_transposed.flags.c_contiguous)
np_cont = np.ascontiguousarray(np_transposed)
print("应用ascontiguousarray后是否C连续:", np_cont.flags.c_contiguous)
print("\n3. TensorFlow 没有直接对应的contiguous函数,但可使用tf.identity创建副本")
tf_matrix = tf.constant(matrix)
tf_transposed = tf.transpose(tf_matrix)
tf_copy = tf.identity(tf_transposed)
print("\n4. Pandas 没有直接对应的contiguous函数")
print("\n5. SciPy 稀疏矩阵使用特殊的内存表示方式,可通过格式转换重新排列数据")
15. 获取张量维度 (Size)
- 概念:获取张量的维度信息(形状)
- 应用场景:
- 调试和验证张量维度
- 动态构建神经网络层
- 数据处理前的维度检查
- 动态调整批量大小
3D张量表示: shape=[2, 3, 4]
[[[值, 值, 值, 值],
[值, 值, 值, 值],
[值, 值, 值, 值]],
[[值, 值, 值, 值],
[值, 值, 值, 值],
[值, 值, 值, 值]]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
array = np.random.rand(2, 3, 4)
print("1. NumPy shape:")
print("shape属性:", array.shape)
print("维度数量:", array.ndim)
print("元素总数:", array.size)
print("\n2. TensorFlow shape:")
tf_tensor = tf.constant(array)
print("shape属性:", tf_tensor.shape)
print("维度数量:", tf.rank(tf_tensor))
print("元素总数:", tf.size(tf_tensor))
print("\n3. PyTorch size:")
torch_tensor = torch.tensor(array)
print("size():", torch_tensor.size())
print("shape属性:", torch_tensor.shape)
print("指定维度大小 size(0):", torch_tensor.size(0))
print("维度数量:", torch_tensor.dim())
print("\n4. Pandas shape:")
df = pd.DataFrame(np.random.rand(3, 4))
print("shape属性:", df.shape)
print("行数:", df.shape[0])
print("列数:", df.shape[1])
print("元素总数:", df.size)
print("\n5. SciPy shape:")
sparse_matrix = sp.csr_matrix(np.random.rand(3, 4))
print("shape属性:", sparse_matrix.shape)
print("维度数量:", sparse_matrix.ndim)
print("非零元素数量:", sparse_matrix.nnz)
16. 张量重复 (Repeat)
- 概念:沿指定维度重复张量的元素
- 应用场景:
- 生成批量数据
- 构建注意力掩码
- 扩展特征向量以匹配其他张量
- 图像处理中的上采样
原始矩阵: [[1, 2], repeat(2,1) → [[1, 2, 1, 2],
[3, 4]] [3, 4, 3, 4]]
原始矩阵: [[1, 2], repeat(2,2) → [[1, 2, 1, 2],
[3, 4]] [3, 4, 3, 4],
[1, 2, 1, 2],
[3, 4, 3, 4]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
matrix = np.array([[1, 2], [3, 4]])
print("原始矩阵:")
print(matrix)
print("形状:", matrix.shape)
print("\n1. NumPy tile/repeat:")
np_tiled = np.tile(matrix, (2, 2))
print("np.tile((2, 2))后:")
print(np_tiled)
print("形状:", np_tiled.shape)
np_repeat_0 = np.repeat(matrix, 2, axis=0)
print("\nnp.repeat(2, axis=0)后:")
print(np_repeat_0)
print("形状:", np_repeat_0.shape)
np_repeat_1 = np.repeat(matrix, 2, axis=1)
print("\nnp.repeat(2, axis=1)后:")
print(np_repeat_1)
print("形状:", np_repeat_1.shape)
print("\n2. TensorFlow tile/repeat:")
tf_matrix = tf.constant(matrix)
tf_tiled = tf.tile(tf_matrix, [2, 2])
print("tf.tile([2, 2])后:")
print(tf_tiled.numpy())
print("形状:", tf_tiled.shape)
tf_repeat_0 = tf.repeat(tf_matrix, 2, axis=0)
print("\ntf.repeat(2, axis=0)后:")
print(tf_repeat_0.numpy())
print("形状:", tf_repeat_0.shape)
print("\n3. PyTorch repeat:")
torch_matrix = torch.tensor(matrix)
torch_repeat = torch_matrix.repeat(2, 2)
print("torch.repeat(2, 2)后:")
print(torch_repeat)
print("形状:", torch_repeat.size())
vector = torch.tensor([1, 2, 3])
vector_expanded = vector.unsqueeze(0)
vector_repeated = vector_expanded.repeat(3, 1)
print("\n向量经unsqueeze(0)和repeat(3, 1)后:")
print(vector_repeated)
print("形状:", vector_repeated.size())
print("\n4. Pandas repeat:")
df = pd.DataFrame({'A': [1, 3], 'B': [2, 4]})
df_repeat_index = pd.DataFrame(np.repeat(df.values, 2, axis=0), columns=df.columns)
print("行重复2次:")
print(df_repeat_index)
print("\n5. SciPy 稀疏矩阵没有直接的repeat函数,可转换为密集数组后处理")
17. 上三角矩阵 (Triu)
- 概念:提取或创建上三角矩阵,即只保留主对角线和主对角线以上的元素
- 应用场景:
- 生成注意力掩码(自回归模型、Transformer解码器)
- 矩阵分解
- 线性方程组求解
- 避免重复计算(如距离矩阵)
原始矩阵: triu操作后: triu(k=1)后:
[[1, 2, 3], [[1, 2, 3], [[0, 2, 3],
[4, 5, 6], → [0, 5, 6], → [0, 0, 6],
[7, 8, 9]] [0, 0, 9]] [0, 0, 0]]
import numpy as np
import tensorflow as tf
import torch
import pandas as pd
import scipy.sparse as sp
matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print("原始矩阵:")
print(matrix)
print("\n1. NumPy triu:")
np_triu = np.triu(matrix)
print("np.triu(matrix):")
print(np_triu)
np_triu_k1 = np.triu(matrix, k=1)
print("\nnp.triu(matrix, k=1):")
print(np_triu_k1)
print("\n2. TensorFlow 上三角:")
tf_matrix = tf.constant(matrix)
tf_triu = tf.linalg.band_part(tf_matrix, 0, -1)
print("tf上三角矩阵:")
print(tf_triu.numpy())
tf_triu_k1 = tf.linalg.band_part(tf_matrix, -1, -1) - tf.linalg.band_part(tf_matrix, 0, -1)
print("\ntf主对角线上方:")
print(tf_triu_k1.numpy())
print("\n3. PyTorch triu:")
torch_matrix = torch.tensor(matrix)
torch_triu = torch.triu(torch_matrix)
print("torch.triu():")
print(torch_triu)
torch_triu_k1 = torch.triu(torch_matrix, diagonal=1)
print("\ntorch.triu(diagonal=1):")
print(torch_triu_k1)
print("\n4. Pandas 没有直接的triu函数,可通过NumPy实现:")
df = pd.DataFrame(matrix)
df_triu = pd.DataFrame(np.triu(df.values))
print(df_triu)
print("\n5. SciPy triu:")
sparse_matrix = sp.csr_matrix(matrix)
scipy_triu = sp.triu(sparse_matrix)
print("sp.triu():")
print(scipy_triu.toarray())
scipy_triu_k1 = sp.triu(sparse_matrix, k=1)
print("\nsp.triu(k=1):")
print(scipy_triu_k1.toarray())