10.36 根据数据文件猜测表结构
C:\Users\lenovo>cvt_file.pl --iformat=csv stockdat.csv > tmp1.txt
C:\Users\lenovo>cvt_date.pl --iformat=us tmp1.txt > tmp2.txt
C:\Users\lenovo>guess_table.pl --labels --table=stocks tmp2.txt > stocks.sql
# Number of lines = 25, columns = 5
C:\Users\lenovo>mysql cookbook < stocks.sql
ERROR 1045 (28000): Access denied for user 'ODBC'@'localhost' (using password: NO)
C:\Users\lenovo>mysql -u cbuser -p cookbook < stocks.sql
Enter password: ******
import pandas as pd
import mysql.connector
from datetime import datetime
# 读取TSV文件
df = pd.read_csv("C:/Users/lenovo/tmp2.txt", sep='\t')
# 数据预处理:确保日期格式正确
df['trade_date'] = pd.to_datetime(df['trade_date'], errors='coerce')
# 数据库配置
config = {
'user': 'cbuser',
'password': 'cbpass',
'host': 'localhost',
'database': 'cookbook',
'charset': 'utf8mb4'
}
# 生成转义关键字的插入语句
insert_query = """
INSERT INTO stocks (commodity, trade_date, shares, price, `change`)
VALUES (%s, %s, %s, %s, %s)
"""
try:
# 连接数据库
with mysql.connector.connect(**config) as cnx:
cursor = cnx.cursor()
# 转换数据并处理类型
data_to_insert = []
for _, row in df.iterrows():
# 处理日期为字符串(YYYY-MM-DD)
trade_date = row['trade_date'].strftime('%Y-%m-%d') if not pd.isna(row['trade_date']) else None
# 确保数值类型正确
values = (
row['commodity'],
trade_date,
int(row['shares']),
float(row['price']),
float(row['change'])
)
data_to_insert.append(values)
# 批量插入
cursor.executemany(insert_query, data_to_insert)
cnx.commit()
print(f"成功导入 {len(data_to_insert)} 行数据")
except mysql.connector.Error as err:
print(f"数据库错误: {err}")
if 'data_to_insert' in locals() and data_to_insert:
print(f"第一行数据: {data_to_insert[0]}")
except Exception as e:
print(f"导入失败: {e}")
if 'df' in locals() and not df.empty:
print(f"数据样例: {df.head(1).to_dict('records')[0]}")
mysql> describe stocks;
+------------+-----------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+------------+-----------------+------+-----+---------+-------+
| commodity | varchar(6) | NO | | NULL | |
| trade_date | date | NO | | NULL | |
| shares | int unsigned | NO | | NULL | |
| price | double unsigned | NO | | NULL | |
| change | double | NO | | NULL | |
+------------+-----------------+------+-----+---------+-------+
5 rows in set (0.00 sec)
mysql> load data local infile 'C:\\Users\\lenovo\\tmp2.txt' into table stocks
-> ignore 1 lines;
ERROR 2068 (HY000): LOAD DATA LOCAL INFILE file request rejected due to restrictions on access.
mysql> load data local infile "C:\\Users\\lenovo\\tmp2.txt" into table stocks ignore 1 lines;
ERROR 2068 (HY000): LOAD DATA LOCAL INFILE file request rejected due to restrictions on access.
mysql> LOAD DATA LOCAL INFILE 'C:/Users/lenovo/tmp2.txt'
-> INTO TABLE stocks
-> IGNORE 1 LINES;
ERROR 2068 (HY000): LOAD DATA LOCAL INFILE file request rejected due to restrictions on access.
方案A:使用绝对路径且符合secure_file_priv限制
sql
-- 查找允许的目录
SHOW VARIABLES LIKE 'secure_file_priv';
-- 将文件移动到该目录后执行(例如:C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/)
LOAD DATA INFILE 'C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/tmp2.txt'
INTO TABLE stocks
IGNORE 1 LINES;
方案B:通过命令行导入
bash
mysqlimport --local --ignore-lines=1 --fields-terminated-by=, --columns='col1,col2,col3' -u username -p db_name C:/Users/lenovo/tmp2.txt
方案C:使用客户端工具
bash
# 使用mysqlsh (MySQL Shell)
mysqlsh -u username -p --sql --import C:/Users/lenovo/tmp2.txt db_name.stocks
mysql> -- 查看表行数
mysql> SELECT COUNT(*) FROM stocks;
+----------+
| COUNT(*) |
+----------+
| 25 |
+----------+
1 row in set (0.02 sec)
mysql>
mysql> -- 查看数据样例
mysql> SELECT * FROM stocks LIMIT 3;
+-----------+------------+---------+-------+--------+
| commodity | trade_date | shares | price | change |
+-----------+------------+---------+-------+--------+
| sugar | 2006-12-14 | 1000000 | 10.5 | -0.125 |
| oil | 2006-12-14 | 96000 | 60.25 | 0.25 |
| wheat | 2006-12-14 | 500000 | 4.75 | 0.1 |
+-----------+------------+---------+-------+--------+
3 rows in set (0.00 sec)
mysql>
mysql> -- 检查数值类型
mysql> SELECT shares, price, `change` FROM stocks LIMIT 1;
+---------+-------+--------+
| shares | price | change |
+---------+-------+--------+
| 1000000 | 10.5 | -0.125 |
+---------+-------+--------+
1 row in set (0.00 sec)
mysql>
mysql> -- 使用保留关键字查询
mysql> SELECT commodity, `change` FROM stocks WHERE `change` > 0;
+-----------+--------+
| commodity | change |
+-----------+--------+
| oil | 0.25 |
| wheat | 0.1 |
| gold | 5.25 |
| sugar | 0.12 |
| wheat | 0.07 |
| gold | 5.25 |
| oil | 1.4 |
| wheat | 0.08 |
| copper | 0.05 |
| sugar | 0.25 |
| gold | 6.75 |
| copper | 0.07 |
| oil | 0.75 |
| wheat | 0.03 |
| gold | 4.75 |
+-----------+--------+
15 rows in set (0.00 sec)
10.37 在mysql和access之间交换数据
将csv 表格导入 Access中
执行步骤
在 Access 中按 Alt+F11 打开 VBA 编辑器,插入新模块并粘贴代码
修改 C:\Users\lenovo\Documents\Population.accdb 为实际路径,确认 strAccessTable 表名
按 Alt+F8 运行宏 ImportAllCSVFiles
Sub ImportAllCSVFiles()
Dim strFolder As String
Dim strFile As String
Dim strAccessTable As String
Dim blnHasHeaders As Boolean
' 设置参数
strFolder = "D:\machine_learning\database\CSV-datasets\" ' 改为你的实际路径(必须以 \ 结尾)
strAccessTable = "C:\Users\lenovo\Documents\Population.accdb" ' Access目标表名
blnHasHeaders = True ' CSV是否有标题行(True表示有,False表示无)
' 遍历文件夹中的CSV文件
strFile = Dir(strFolder & "*.csv")
Do While Len(strFile) > 0
' 使用TransferText方法导入CSV
DoCmd.TransferText _
TransferType:=acImportDelim, _
TableName:=strAccessTable, _
FileName:=strFolder & strFile, _
HasFieldNames:=blnHasHeaders
strFile = Dir()
Loop
MsgBox "所有CSV文件导入完成!", vbInformation
End Sub
#!/usr/bin/perl
use strict;
use warnings;
use File::Find;
use Text::CSV_XS qw(CSV);
# 文件夹路径
my $folder = "D:/machine_learning/database/CSV-datasets";
# 遍历文件夹
find( {
wanted => sub {
return unless -f and /\.csv$/i;
process_csv($folder . '/' . $_);
},
no_chdir => 1
}, $folder );
sub process_csv {
my $csv_file = shift;
my $excel_file = $csv_file;
$excel_file =~ s/\.csv$/.xls/i;
# 读取CSV
my $csv = Text::CSV_XS->new({
sep_char => ',', # 分隔符(可改为\t制表符)
auto_diag => 1,
binary => 1
}) or die "无法创建CSV对象: " . Text::CSV_XS->error_diag();
open(my $fh, '<:encoding(utf8)', $csv_file) or die "无法打开 $csv_file: $!";
# 转换为Excel(需安装 Spreadsheet::WriteExcel 模块)
eval {
use Spreadsheet::WriteExcel;
my $workbook = Spreadsheet::WriteExcel->new($excel_file);
my $worksheet = $workbook->add_worksheet();
my $row = 0;
while (my $row_data = $csv->getline($fh)) {
for my $col (0..$#$row_data) {
$worksheet->write($row, $col, $row_data->[$col]);
}
$row++;
}
$workbook->close();
print "已转换: $csv_file -> $excel_file\n";
};
if ($@) {
print "转换失败 ($csv_file): $@\n";
}
close $fh;
}
Access 导出表格population_5.txt
Sub ExportForMySQL()
Dim tableName As String
Dim exportPath As String
tableName = "C:\Users\lenovo\Documents\Population_accdb5" ' 目标表名
exportPath = "C:\Users\lenovo\population_5.txt" ' 导出路径
' 导出为制表符分隔的文本
DoCmd.TransferText acExportDelim, , tableName, exportPath, True
MsgBox "导出完成!", vbInformation
End Sub
2. 批处理脚本(Windows)population_5.bat
@echo off
chcp 65001 > nul
setlocal enabledelayedexpansion
:: 配置参数
set "MYSQL_USER=cbuser"
set "MYSQL_PASS=cbpass"
set "MYSQL_DB=cookbook"
set "TABLE_NAME=population_5"
:: 使用当前用户目录下的临时目录
set "TEMP_DIR=C:\Users\%USERNAME%\temp"
set "DATA_FILE=C:\Users\lenovo\population_5.txt"
set "SQL_FILE=%TEMP_DIR%\mysql_import_%RANDOM%.sql"
:: 创建临时目录(如果不存在)
if not exist "%TEMP_DIR%" (
mkdir "%TEMP_DIR%"
if errorlevel 1 (
echo ERROR: 无法创建临时目录,请检查权限
goto END
)
)
:: 检查文件是否存在
if not exist "%DATA_FILE%" (
echo Error: 数据文件不存在: %DATA_FILE%
goto END
)
echo ---------------------
echo Processing: population_5.txt
echo ---------------------
:: 1. 创建SQL命令文件
echo 创建SQL命令文件...
(
echo DROP TABLE IF EXISTS %TABLE_NAME%;
echo CREATE TABLE %TABLE_NAME% (
echo country VARCHAR(100) NOT NULL,
echo region VARCHAR(100),
echo score DECIMAL(10,4),
echo upperwhisker DECIMAL(10,4),
echo lowerwhisker DECIMAL(10,4),
echo log_gdp DECIMAL(10,4),
echo social_support DECIMAL(10,4),
echo life_expectancy DECIMAL(10,4),
echo freedom DECIMAL(10,4),
echo generosity DECIMAL(10,4),
echo corruption DECIMAL(10,4),
echo dystopia DECIMAL(10,4)
echo );
echo.
echo LOAD DATA LOCAL INFILE '%DATA_FILE:\=\\%'
echo INTO TABLE %TABLE_NAME%
echo FIELDS TERMINATED BY ','
echo OPTIONALLY ENCLOSED BY '"'
echo LINES TERMINATED BY '\r\n'
echo IGNORE 1 LINES;
) > "%SQL_FILE%"
:: 检查文件是否创建成功
if not exist "%SQL_FILE%" (
echo ERROR: 无法创建SQL文件,请尝试:
echo 1. 右键脚本选择"以管理员身份运行"
echo 2. 暂时禁用防病毒软件
echo 3. 检查磁盘空间
goto END
)
:: 2. 执行SQL导入
echo 正在导入数据到MySQL...
mysql -u %MYSQL_USER% -p%MYSQL_PASS% -D %MYSQL_DB% < "%SQL_FILE%"
if errorlevel 1 (
echo ERROR: MySQL导入失败,请检查:
echo 1. MySQL服务是否运行
echo 2. 用户名/密码是否正确
echo 3. 文件路径权限是否足够
) else (
echo 数据导入成功!
)
:: 3. 清理临时文件
if exist "%SQL_FILE%" (
del "%SQL_FILE%"
)
echo ---------------------
echo 导入过程完成
echo ---------------------
:END
Pause
导入数据
import pandas as pd
import mysql.connector
# 读取CSV文件
df = pd.read_csv("C:/Users/lenovo/population_5.txt")
# 数据库配置
config = {
'user': 'cbuser',
'password': 'cbpass',
'host': 'localhost',
'database': 'cookbook',
'charset': 'utf8mb4'
}
try:
# 连接数据库
cnx = mysql.connector.connect(**config)
cursor = cnx.cursor()
# 准备插入语句
insert_query = """
INSERT INTO population_5 (
country, region, score, upperwhisker, lowerwhisker,
log_gdp, social_support, life_expectancy, freedom,
generosity, corruption, dystopia
) VALUES (
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
)
"""
# 转换数据并处理缺失值
data_to_insert = []
for _, row in df.iterrows():
values = (
row['Country name'],
row['Regional indicator'],
float(row['Ladder score']) if pd.notna(row['Ladder score']) else None,
float(row['upperwhisker']) if pd.notna(row['upperwhisker']) else None,
float(row['lowerwhisker']) if pd.notna(row['lowerwhisker']) else None,
float(row['Log GDP per capita']) if pd.notna(row['Log GDP per capita']) else None,
float(row['Social support']) if pd.notna(row['Social support']) else None,
float(row['Healthy life expectancy']) if pd.notna(row['Healthy life expectancy']) else None,
float(row['Freedom to make life choices']) if pd.notna(row['Freedom to make life choices']) else None,
float(row['Generosity']) if pd.notna(row['Generosity']) else None,
float(row['Perceptions of corruption']) if pd.notna(row['Perceptions of corruption']) else None,
float(row['Dystopia + residual']) if pd.notna(row['Dystopia + residual']) else None
)
data_to_insert.append(values)
# 批量插入
cursor.executemany(insert_query, data_to_insert)
cnx.commit()
# 验证
cursor.execute("SELECT COUNT(*) FROM population_5")
print(f"成功导入 {cursor.fetchone()[0]} 行数据")
except mysql.connector.Error as e:
print(f"数据库错误: {e}")
except Exception as e:
print(f"导入失败: {e}")
finally:
if cnx.is_connected():
cursor.close()
cnx.close()
print("数据库连接已关闭")
mysql> -- 查看表结构
mysql> DESCRIBE population_5;
+-----------------+---------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+-----------------+---------------+------+-----+---------+-------+
| country | varchar(100) | NO | | NULL | |
| region | varchar(100) | YES | | NULL | |
| score | decimal(10,4) | YES | | NULL | |
| upperwhisker | decimal(10,4) | YES | | NULL | |
| lowerwhisker | decimal(10,4) | YES | | NULL | |
| log_gdp | decimal(10,4) | YES | | NULL | |
| social_support | decimal(10,4) | YES | | NULL | |
| life_expectancy | decimal(10,4) | YES | | NULL | |
| freedom | decimal(10,4) | YES | | NULL | |
| generosity | decimal(10,4) | YES | | NULL | |
| corruption | decimal(10,4) | YES | | NULL | |
| dystopia | decimal(10,4) | YES | | NULL | |
+-----------------+---------------+------+-----+---------+-------+
12 rows in set (0.02 sec)
mysql>
mysql> -- 查看数据行数
mysql> SELECT COUNT(*) FROM population_5;
+----------+
| COUNT(*) |
+----------+
| 143 |
+----------+
1 row in set (0.00 sec)
mysql>
mysql> -- 查看前5行数据
mysql> SELECT country, region, score FROM population_5 LIMIT 5;
+---------+------------------------------+--------+
| country | region | score |
+---------+------------------------------+--------+
| Finland | Western Europe | 7.7400 |
| Denmark | Western Europe | 7.5800 |
| Iceland | Western Europe | 7.5200 |
| Sweden | Western Europe | 7.3400 |
| Israel | Middle East and North Africa | 7.3400 |
+---------+------------------------------+--------+
5 rows in set (0.00 sec)
mysql>
mysql> -- 检查缺失值处理(如Bahrain行)
mysql> SELECT country, generosity FROM population_5 WHERE country = 'Bahrain';
+---------+------------+
| country | generosity |
+---------+------------+
| Bahrain | NULL |
+---------+------------+
1 row in set (0.00 sec)
mysql>
mysql> -- 按地区分组统计平均分数
mysql> SELECT region, AVG(score) AS avg_score
-> FROM population_5
-> GROUP BY region
-> ORDER BY avg_score DESC;
+------------------------------------+------------+
| region | avg_score |
+------------------------------------+------------+
| North America and ANZ | 6.92250000 |
| Western Europe | 6.83700000 |
| Central and Eastern Europe | 6.16647059 |
| Latin America and Caribbean | 6.13842105 |
| East Asia | 5.93000000 |
| Southeast Asia | 5.54666667 |
| Commonwealth of Independent States | 5.53400000 |
| Middle East and North Africa | 5.19529412 |
| Sub-Saharan Africa | 4.32571429 |
| South Asia | 3.89000000 |
+------------------------------------+------------+
10 rows in set (0.00 sec)
数据分析建议
1. 区域幸福感对比
sql
-- 按区域分组,计算平均幸福感分数和样本数
SELECT
region,
COUNT(*) AS country_count,
ROUND(AVG(score), 2) AS avg_score,
ROUND(STD(score), 2) AS score_std
FROM population_5
GROUP BY region
ORDER BY avg_score DESC;
2. GDP 与幸福感相关性
sql
-- 计算Log GDP与幸福感的皮尔逊相关系数
SET @sql = CONCAT('
SELECT
ROUND(CORR(`Log GDP per capita`, score), 4) AS gdp_correlation
FROM population_5
WHERE `Log GDP per capita` IS NOT NULL AND score IS NOT NULL
');
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
3. 幸福感排名分析
sql
-- 按分数排名,添加排名列
WITH Ranked AS (
SELECT
country, region, score,
DENSE_RANK() OVER (ORDER BY score DESC) AS rank
FROM population_5
)
SELECT * FROM Ranked WHERE rank <= 10;
四、数据可视化建议
使用 Python 的matplotlib或seaborn绘制:
1. 区域幸福感分布直方图:
python
运行
import seaborn as sns
import matplotlib.pyplot as plt
# 从MySQL读取数据
df = pd.read_sql("SELECT region, score FROM population_5", cnx)
# 绘制直方图
plt.figure(figsize=(12, 6))
sns.histplot(df, x='score', hue='region', multiple='stack', alpha=0.7)
plt.title('Happiness Score Distribution by Region')
plt.xlabel('Happiness Score')
plt.ylabel('Count')
plt.tight_layout()
plt.show()
2. GDP 与幸福感散点图:
python
运行
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Log GDP per capita', y='score', data=df)
plt.title('Relationship Between GDP and Happiness Score')
plt.xlabel('Log GDP per Capita')
plt.ylabel('Happiness Score')
plt.grid(True, alpha=0.3)
plt.show()
import pandas as pd
import mysql.connector
import matplotlib.pyplot as plt
import seaborn as sns
# 数据库配置
config = {
'user': 'cbuser',
'password': 'cbpass',
'host': 'localhost',
'database': 'cookbook',
'charset': 'utf8mb4'
}
# 方案一:在with语句中使用连接(推荐)
with mysql.connector.connect(**config) as cnx:
# 从MySQL读取数据
df = pd.read_sql("SELECT region, score FROM population_5", cnx)
# 绘制直方图
plt.figure(figsize=(12, 6))
sns.histplot(df, x='score', hue='region', multiple='stack', alpha=0.7)
plt.title('Happiness Score Distribution by Region')
plt.xlabel('Happiness Score')
plt.ylabel('Count')
plt.tight_layout()
plt.show()
# 绘制箱线图
plt.figure(figsize=(14, 6))
sns.boxplot(x='region', y='score', data=df)
plt.title('Happiness Score by Region')
plt.xlabel('Region')
plt.ylabel('Happiness Score')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
# 方案二:手动管理连接(需确保不提前关闭)
# cnx = mysql.connector.connect(**config)
# try:
# df = pd.read_sql("SELECT region, score FROM population_5", cnx)
# # 绘图代码...
# finally:
# cnx.close()
10.38 在mysql和microsoft excel之间交换数据
1. 安装完整依赖链
使用 CPAN 依次安装所需的模块:
bash
cpan
install Spreadsheet::ParseExcel::Simple
install Spreadsheet::ParseExcel
install OLE::Storage_Lite
install Compress::Zlib
如果安装过程中提示其他依赖缺失,请按照提示继续安装。
2. 验证模块安装
安装完成后,验证模块是否能被 Perl 正确加载:
bash
perl -MSpreadsheet::ParseExcel::Simple -e "print 'Module installed successfully!\n';"
替代方案
如果不想安装 Perl 模块,可以使用其他工具实现相同功能:
1. Python 脚本
使用 Python 的 openpyxl 库(处理 .xlsx 文件)或 xlrd 库(处理 .xls 文件):
python
运行
import sys
import xlrd
# 检查命令行参数
if len(sys.argv) != 2:
print("用法: python from_excel.py data.xls")
sys.exit(1)
excel_file = sys.argv[1]
# 打开 Excel 文件
workbook = xlrd.open_workbook(excel_file)
sheet = workbook.sheet_by_index(0) # 获取第一个工作表
# 遍历每一行并输出为制表符分隔的文本
for row in range(sheet.nrows):
values = []
for col in range(sheet.ncols):
cell_value = sheet.cell_value(row, col)
# 将数值类型转换为字符串
if isinstance(cell_value, float) and cell_value.is_integer():
values.append(str(int(cell_value)))
else:
values.append(str(cell_value))
print('\t'.join(values))
C:\Users\lenovo>mysql_to_excel.pl --user=cbuser --password=cbpass cookbook profile > profile.xls
C:\Users\lenovo>mysql_to_excel.pl --user=cbuser --password=cbpass cookbook profile > profile.csv
C:\Users\lenovo>from_excel.pl profile.xls > profile.txt
C:\Users\lenovo>to_excel.pl profile.txt > profile02.csv
Ok run!!
保存为 from_excel.py 并运行:
bash
python from_excel.py data.xls > dat
1. 安装完整依赖链
使用 CPAN 依次安装所需的模块:
bash
cpan
install Excel::Writer::XLSX
install Compress::Zlib
install IO::String
如果安装过程中提示其他依赖缺失,请按照提示继续安装。
2. 验证模块安装
安装完成后,验证模块是否能被 Perl 正确加载:
bash
perl -MExcel::Writer::XLSX -e "print 'Module installed successfully!\n';"
替代方案
如果不想安装 Perl 模块,可以使用其他工具实现相同功能:
1. Python 脚本
使用 Python 的 openpyxl 库创建 Excel 文件:
python
运行
import sys
import csv
from openpyxl import Workbook
# 检查命令行参数
if len(sys.argv) != 2:
print("用法: python to_excel.py data.txt")
sys.exit(1)
txt_file = sys.argv[1]
excel_file = txt_file.rsplit('.', 1)[0] + '.xlsx' # 输出为 .xlsx 格式
# 创建工作簿和工作表
wb = Workbook()
ws = wb.active
# 读取文本文件并写入 Excel
with open(txt_file, 'r', encoding='utf-8') as f:
reader = csv.reader(f, delimiter='\t') # 假设文本文件使用制表符分隔
for row in reader:
ws.append(row)
# 保存 Excel 文件
wb.save(excel_file)
print(f"已将 {txt_file} 转换为 {excel_file}")
保存为 to_excel.py 并运行:
bash
python to_excel.py data.txt
1. 安装完整依赖链
使用 CPAN 依次安装所需的模块:
bash
cpan
install Spreadsheet::WriteExcel::FromDB
install Spreadsheet::WriteExcel
install DBI
install DBD::mysql
如果安装过程中提示其他依赖缺失,请按照提示继续安装。
2. 验证模块安装
安装完成后,验证模块是否能被 Perl 正确加载:
bash
perl -MSpreadsheet::WriteExcel::FromDB -e "print 'Module installed successfully!\n';"
替代方案
如果不想安装 Perl 模块,可以使用其他工具实现相同功能:
1. Python 脚本
使用 Python 的 mysql-connector 和 openpyxl 库从 MySQL 导出数据到 Excel:
python
运行
import sys
import mysql.connector
from openpyxl import Workbook
# 检查命令行参数
if len(sys.argv) != 3:
print("用法: python mysql_to_excel.py 数据库名 表名")
sys.exit(1)
db_name = sys.argv[1]
table_name = sys.argv[2]
excel_file = f"{table_name}.xlsx"
# 数据库配置(根据实际情况修改)
config = {
'user': 'your_username',
'password': 'your_password',
'host': 'localhost',
'database': db_name,
'raise_on_warnings': True
}
# 连接数据库
try:
cnx = mysql.connector.connect(**config)
cursor = cnx.cursor()
# 查询表数据
query = f"SELECT * FROM {table_name}"
cursor.execute(query)
# 获取列名
columns = [desc[0] for desc in cursor.description]
# 创建工作簿和工作表
wb = Workbook()
ws = wb.active
# 写入列名
ws.append(columns)
# 写入数据
for row in cursor:
ws.append(row)
# 保存 Excel 文件
wb.save(excel_file)
print(f"已将 {db_name}.{table_name} 导出到 {excel_file}")
except mysql.connector.Error as err:
print(f"数据库错误: {err}")
finally:
if 'cnx' in locals() and cnx.is_connected():
cursor.close()
cnx.close()
保存为 mysql_to_excel.py 并运行:
bash
python mysql_to_excel.py cookbook profile
10.39 将输出结果导出为xml
1. 安装缺失的模块
你可以使用 CPAN(Comprehensive Perl Archive Network)来安装XML::Generator::DBI模块,具体步骤如下:
步骤 1:打开命令提示符,以管理员身份运行以下命令启动 CPAN shell:
bash
cpan
步骤 2:在 CPAN shell 中,输入以下命令安装模块:
bash
install XML::Generator::DBI
步骤 3:安装完成后,输入quit退出 CPAN shell。
2. 验证模块是否安装成功
你可以通过运行以下命令来检查模块是否安装成功:
bash
perl -MXML::Generator::DBI -e "print 'Module installed successfully!\n';"
如果显示Module installed successfully!,说明模块已成功安装;若仍提示错误,则需要重新安装。
3. 检查模块路径
要是安装后仍然找不到模块,可能是模块安装路径没有被添加到 Perl 的搜索路径中。你可以通过以下命令查看 Perl 的搜索路径:
bash
perl -V
检查输出中的@INC部分,确认模块是否安装在这些路径下。
4. 替代方案
如果你不想安装额外的模块,也可以考虑使用其他工具来实现从 MySQL 导出数据到 XML 的功能,例如:
使用 MySQL 命令行:
bash
C:\Users\lenovo>mysql -u cbuser -p -D cookbook -e "SELECT * FROM expt" --xml > expt.xml
Enter password: ******
使用 Python 脚本:
python
运行
import mysql.connector
from xml.etree.ElementTree import Element, tostring
# 数据库连接配置
config = {
'user': 'username',
'password': 'password',
'host': 'localhost',
'database': 'cookbook',
'raise_on_warnings': True
}
# 连接数据库
cnx = mysql.connector.connect(**config)
cursor = cnx.cursor()
# 执行查询
query = "SELECT * FROM expt"
cursor.execute(query)
# 创建 XML 根元素
root = Element('data')
# 添加查询结果到 XML
for row in cursor:
record = Element('record')
for i, col in enumerate(cursor.column_names):
field = Element(col)
field.text = str(row[i]) if row[i] is not None else ''
record.append(field)
root.append(record)
# 写入 XML 文件
with open('expt.xml', 'wb') as f:
f.write(tostring(root, encoding='utf-8', method='xml'))
# 关闭连接
cursor.close()
cnx.close()
将上述代码保存为mysql_to_xml.py,然后运行:
bash
python mysql_to_xml.py
总结
首选方案是安装缺失的 Perl 模块,这样可以让原脚本正常运行。
三、完整依赖链安装
尝试安装整个 XML::Generator 系列:
bash
cpan
install XML::Generator
install XML::Generator::DBI
install XML::Handler::YAWriter
验证模块安装
安装完成后,验证模块是否能被 Perl 正确加载:
bash
perl -MXML::Handler::YAWriter -e "print 'Module installed successfully!\n';"
C:\Users\lenovo>mysql_to_xml.pl --execute="select * from expt" --user=cbuser --password=cbpass cookbook > expt.xml
文件存入 C:\Users\lenovo中
10.40 将xml导入mysql
3. 完整依赖链安装
如果单独安装模块仍有问题,可以尝试安装相关的 XML 处理模块:
bash
cpan
install XML::XPath
install XML::Parser
install XML::SAX
安装完成后,验证模块是否能被 Perl 正确加载:
bash
perl -MXML::XPath -e "print 'Module installed successfully!\n';"
如果不想安装 Perl 模块,可以使用其他工具实现相同功能:
1. Python 脚本
使用 Python 的 xml.etree 和 mysql.connector 解析 XML 并导入 MySQL:
python
运行
import mysql.connector
import xml.etree.ElementTree as ET
# 数据库配置
config = {
'user': 'cbuser',
'password': 'cbpass',
'host': 'localhost',
'database': 'cookbook',
}
# 连接数据库
cnx = mysql.connector.connect(**config)
cursor = cnx.cursor()
# 解析 XML 文件
tree = ET.parse('expt.xml')
root = tree.getroot()
# 清空目标表(可选,根据需要决定是否保留)
# cursor.execute("TRUNCATE TABLE expt")
# 插入数据
for row in root.findall('row'):
# 提取列名和值
columns = []
values = []
for field in row:
columns.append(field.tag)
values.append(field.text or '') # 处理空值
# 构建 SQL 语句
placeholders = ', '.join(['%s'] * len(columns))
sql = f"INSERT INTO expt ({', '.join(columns)}) VALUES ({placeholders})"
# 执行插入
cursor.execute(sql, values)
# 提交事务并关闭连接
cnx.commit()
cursor.close()
cnx.close()
print("数据导入完成!")
保存为 xml_to_mysql.py 并运行:
bash
python xml_to_mysql.py
C:\Users\lenovo>xml_to_mysql.pl --user=cbuser --password=cbpass cookbook expt expt.xml
Number of records: 8