基于 Python 的实现:居民用电量数据分析与可视化
本文将介绍如何利用 Python 技术栈(包括 pymysql、pandas、matplotlib 等库)对居民用电量数据进行分析和可视化,以帮助我们更好地理解用电行为模式。
数据准备
在MySQL数据库中创建数据,,数据库表结构如下:
date
:记录日期resident
:居民标识consumption
:当日用电量(千瓦时)
-- 创建表语句..
CREATE TABLE `daily_electricity` (
`id` int NOT NULL AUTO_INCREMENT,
`date` date NOT NULL,
`resident` varchar(50) NOT NULL,
`consumption` int NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `unique_record` (`date`,`resident`)
) ENGINE=InnoDB AUTO_INCREMENT=601 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
-- 插入数据语句..
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (1, '2025-02-27', '居民1', 26);
-- 更多数据记录...
读取数据
通过 pymysql 库连接数据库并获取数据:
import pymysql
def fetch_data_from_db():
"""从数据库中读取用电量数据"""
connection = pymysql.connect(**DB_CONFIG)
data = []
try:
with connection.cursor() as cursor:
cursor.execute("SELECT date, resident, consumption FROM daily_electricity ORDER BY date, resident")
data = cursor.fetchall()
finally:
connection.close()
return data
数据处理
将获取的数据转换为 pandas DataFrame 格式,便于后续分析:
import pandas as pd
def convert_to_dataframe(data):
"""将数据转换为 DataFrame 格式"""
df = pd.DataFrame(data, columns=['date', 'resident', 'consumption'])
df['date'] = pd.to_datetime(df['date'])
return df
数据可视化
1. 折线图:居民用电量趋势
通过折线图可以直观观察每个居民在不同日期的用电量变化趋势:
def plot_trends(df):
plt.figure(figsize=(12, 6))
for resident, group in df.groupby('resident'):
plt.plot(group['date'], group['consumption'], marker='o', linestyle='-', label=resident)
plt.title('居民每日用电量趋势')
plt.xlabel('日期')
plt.ylabel('用电量 (千瓦时)')
plt.legend()
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()
2. 柱状图:最近 7 天平均用电量对比
展示最近一周内各居民的平均用电量对比:
def plot_bar_chart(df):
recent_df = df[df['date'] >= df['date'].max() - pd.Timedelta(days=7)]
avg_consumption = recent_df.groupby('resident')['consumption'].mean().reset_index()
plt.figure(figsize=(10, 5))
plt.bar(avg_consumption['resident'], avg_consumption['consumption'], color='orange')
plt.title('最近 7 天各居民平均用电量对比')
plt.xlabel('居民')
plt.ylabel('平均用电量 (千瓦时)')
plt.xticks(rotation=0)
plt.grid(True, axis='y')
plt.tight_layout()
plt.show()
3. 饼图:总用电量分布
展示各居民在统计周期内的总用电量占比:
def plot_pie_chart(df):
total_consumption = df.groupby('resident')['consumption'].sum()
plt.figure(figsize=(8, 8))
plt.pie(total_consumption, labels=total_consumption.index, autopct='%1.1f%%',
startangle=90, shadow=True, explode=[0.05] * len(total_consumption))
plt.title('各居民总用电量占比')
plt.axis('equal')
plt.tight_layout()
plt.show()
4. 热力图:每周用电量分布
通过热力图观察不同星期几和不同周的用电量分布模式:
def plot_heatmap(df):
df['weekday'] = df['date'].dt.day_name()
df['week_number'] = df['date'].dt.isocalendar().week
weekly_data = df.groupby(['week_number', 'weekday'])['consumption'].mean().unstack()
weekday_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekly_data = weekly_data.reindex(columns=weekday_order)
plt.figure(figsize=(12, 8))
plt.imshow(weekly_data, cmap='YlOrRd')
plt.colorbar(label='平均用电量 (千瓦时)')
plt.title('每周用电量热力图')
plt.xticks(np.arange(len(weekday_order)), weekday_order, rotation=45)
plt.yticks(np.arange(len(weekly_data.index)), weekly_data.index)
plt.xlabel('星期')
plt.ylabel('周数')
for i in range(len(weekly_data.index)):
for j in range(len(weekday_order)):
if not pd.isna(weekly_data.iloc[i, j]):
plt.text(j, i, f"{
weekly_data.iloc[i, j]:.1f}",
ha="center", va="center", color="black" if weekly_data.iloc[i, j] < 30 else "white")
plt.tight_layout()
plt.show()
5. 箱线图:用电量分布情况
展示各居民用电量的分布特征(中位数、四分位数、异常值等):
def plot_boxplot(df):
plt.figure(figsize=(10, 6))
plt.boxplot([df[df['resident'] == resident]['consumption'] for resident in df['resident'].unique()],
labels=df['resident'].unique())
plt.title('各居民用电量分布箱线图')
plt.xlabel('居民')
plt.ylabel('用电量 (千瓦时)')
plt.grid(True, linestyle='--')
plt.tight_layout()
plt.show()
主程序与执行流程
完整的代码执行流程如下:
if __name__ == "__main__":
# 从数据库读取数据
fetched_data = fetch_data_from_db()
df = convert_to_dataframe(fetched_data)
# 绘制各种图表
plot_trends(df)
plot_bar_chart(df)
plot_pie_chart(df)
plot_heatmap(df)
plot_boxplot(df)
通过多种图表形式直观展示用电模式和趋势。这种分析方法有助于:
- 识别用电高峰和低谷时段
- 发现异常用电行为
- 制定个性化的节能策略
- 优化社区能源分配
共享源代码:
1、数据库数据:
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (1, '2025-02-27', '居民1', 26);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (2, '2025-02-27', '居民2', 35);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (3, '2025-02-27', '居民3', 28);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (4, '2025-02-27', '居民4', 33);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (5, '2025-02-27', '居民5', 25);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (6, '2025-02-28', '居民1', 29);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (7, '2025-02-28', '居民2', 35);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (8, '2025-02-28', '居民3', 33);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (9, '2025-02-28', '居民4', 37);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (10, '2025-02-28', '居民5', 32);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (11, '2025-03-01', '居民1', 23);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (12, '2025-03-01', '居民2', 24);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (13, '2025-03-01', '居民3', 17);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (14, '2025-03-01', '居民4', 20);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (15, '2025-03-01', '居民5', 19);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (16, '2025-03-02', '居民1', 37);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (17, '2025-03-02', '居民2', 32);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (18, '2025-03-02', '居民3', 31);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (19, '2025-03-02', '居民4', 40);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (20, '2025-03-02', '居民5', 33);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (21, '2025-03-03', '居民1', 30);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (22, '2025-03-03', '居民2', 31);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (23, '2025-03-03', '居民3', 29);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (24, '2025-03-03', '居民4', 25);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (25, '2025-03-03', '居民5', 21);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (26, '2025-03-04', '居民1', 20);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (27, '2025-03-04', '居民2', 25);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (28, '2025-03-04', '居民3', 18);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (29, '2025-03-04', '居民4', 20);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (30, '2025-03-04', '居民5', 25);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (31, '2025-03-05', '居民1', 17);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (32, '2025-03-05', '居民2', 20);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (33, '2025-03-05', '居民3', 17);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (34, '2025-03-05', '居民4', 20);
INSERT INTO `jm_ysl`.`daily_electricity` (`id`, `date`, `resident`, `consumption`) VALUES (35, '2025-03-05', '居民5', 16);
INSERT INTO `jm_ysl`.`daily_electricity` (