基于python和SQLite的NBA历年MVP变化趋势可视化分析

发布于:2022-12-04 ⋅ 阅读:(189) ⋅ 点赞:(0)

目 录
9 成员感想 14 1
1 项目背景以及意义 1
2 项目创新点 2
3 项目的设计 3
一:数据爬取部分 3

  1. 利用urllib获取网页数据 3
  2. 利用bs4的beautifulSoup进行数据的解析和获取想要的数据 4
  3. 利用re正则表达式结合bs4得出想要的数 4
    二:数据可视化部分 5
    一:数据爬取部分 6
    二:数据可视化部分 6
    9 成员感想 14
    二:数据可视化部分
    把所有数据从数据库中取出来
    利用了matplotlib.pyplot
    matplotlib.pyplot是一个有命令风格的函数集合,它看起来和MATLAB很相似。每一个pyplot函数都使一副图像做出些许改变,例如创建一幅图,在图中创建一个绘图区域,在绘图区域中添加一条线等等。在matplotlib.pyplot中,各种状态通过函数调用保存起来,以便于可以随时跟踪像当前图像和绘图区域这样的东西。绘图函数是直接作用于当前axes(matplotlib中的专有名词,本文转载自http://www.biyezuopin.vip/onews.asp?id=14765图形中组成部分,不是数学中的坐标系。)
    随后输出下面这些图片:
    NBA历年MVP得分变化趋势
    NBA历年MVP得分条形图
    NBA历年MVP助攻变化趋势
    NBA历年MVP助攻条形图
    NBA历年MVP篮板变化趋势
    NBA历年MVP篮板条形图

4 如何实现
一:数据爬取部分
数据的爬取分为如下几个步骤
利用urllib获取网页数据
利用bs4的beautifulSoup进行数据的解析和获取想要的数据
利用re正则表达式结合bs4得出想要的数据
利用sqlite3存储数据

二:数据可视化部分
把所有数据从数据库中取出来
利用了matplotlib.pyplot进行可视化

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据爬取部分\n",
    "## 数据的爬取分为如下几个步骤\n",
    "* 利用urllib获取网页数据\n",
    "* 利用bs4的beautifulSoup进行数据的解析和获取想要的数据\n",
    "* 利用re正则表达式结合bs4得出想要的数据\n",
    "* 利用sqlite3存储数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-# \n",
    "#-------------------------------------------------------------------------------\n",
    "# Name:         NBA\n",
    "# Description:  \n",
    "# Author:       zhouzikang\n",
    "# Date:         2020-01-09\n",
    "#-------------------------------------------------------------------------------\n",
    "import sqlite3 #进行SQLit数据库的操作\n",
    "from bs4 import BeautifulSoup  #网页解析,获取数据\n",
    "import urllib.request, urllib.error #指定URL 获取网页数据\n",
    "import re   #正则表达式-进行文字匹配\n",
    "\n",
    "def main():\n",
    "    baseurl = \"http://www.stat-nba.com/award/item0.html\"\n",
    "    #1.爬取网页,解析数据\n",
    "    datalist = getData(baseurl)\n",
    "    #2.保存数据\n",
    "    dbpath = \"mvp.db\"\n",
    "    saveData2DB(datalist,dbpath)\n",
    "\n",
    "#正则表达式\n",
    "findPlayer = re.compile(r'[\\s\\S]+/player/[\\s\\S]+')\n",
    "\n",
    "findYear = re.compile(r'current season change_color col0 row[\\s\\S]+')\n",
    "findScore = re.compile(r'normal pts change_color col23 row[\\s\\S]+')\n",
    "findAssist = re.compile(r'normal ast change_color col18 row[\\s\\S]+')\n",
    "findRebound = re.compile(r'normal trb change_color col15 row[\\s\\S]+')\n",
    "\n",
    "# 获取路径\n",
    "def askURL(url):\n",
    "    head = {  # 模拟浏览器头部信息,向豆瓣服务器发送消息\n",
    "        \"User-Agent\": \"Mozilla / 5.0(WindowsNT 10.0;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 70.0.3538 .67Safari / 537.36\"\n",
    "    }\n",
    "    # 用户代理表示告诉豆瓣服务器,我们是什么类型的机器\n",
    "    request = urllib.request.Request(url,headers=head)\n",
    "    html = \"\"\n",
    "    try:\n",
    "        response = urllib.request.urlopen(request)\n",
    "        html = response.read().decode(\"utf-8\")\n",
    "        #print(html)\n",
    "    except urllib.error.URLError as e:\n",
    "        if hasattr(e,\"code\"):\n",
    "            print(e.code)\n",
    "        if hasattr(e,\"reason\"):\n",
    "            print(e.reason)\n",
    "    return html\n",
    "\n",
    "\n",
    "#获取数据\n",
    "def getData(baseurl):\n",
    "    datalist = []\n",
    "    players = []\n",
    "    years = []\n",
    "    scores = []\n",
    "    assists = []\n",
    "    rebounds = []\n",
    "    html = askURL(baseurl)\n",
    "    #逐一解析\n",
    "    soup = BeautifulSoup(html, \"html.parser\")\n",
    "\n",
    "    # 爬取MVP姓名\n",
    "    for item in soup.find_all('a',href=findPlayer):\n",
    "        item = str(item)\n",
    "        item = re.sub('</a>',\" \", item)\n",
    "        item = re.sub('<a[\\s\\S]+>', \" \", item)\n",
    "        players.append(item.strip())\n",
    "\n",
    "    # 爬取MVP年份\n",
    "    for item in soup.find_all('td', class_=findYear):\n",
    "        item = str(item)\n",
    "        item = re.sub('</td>', \" \", item)\n",
    "        item = re.sub('<td[\\s\\S]+>', \" \", item)\n",
    "        years.append(item.strip())\n",
    "\n",
    "    # 爬取MVP获得年份的得分\n",
    "    for item in soup.find_all('td', class_=findScore):\n",
    "        item = str(item)\n",
    "        item = re.sub('</td>', \" \", item)\n",
    "        item = re.sub('<td[\\s\\S]+>', \" \", item)\n",
    "        scores.append(item.strip())\n",
    "\n",
    "    # 爬取MVP获得年份的助攻\n",
    "    for item in soup.find_all('td', class_=findAssist):\n",
    "        item = str(item)\n",
    "        item = re.sub('</td>', \" \", item)\n",
    "        item = re.sub('<td[\\s\\S]+>', \" \", item)\n",
    "        assists.append(item.strip())\n",
    "\n",
    "    # 爬取MVP获得年份的篮板\n",
    "    for item in soup.find_all('td', class_=findRebound):\n",
    "        item = str(item)\n",
    "        item = re.sub('</td>', \" \", item)\n",
    "        item = re.sub('<td[\\s\\S]+>', \" \", item)\n",
    "        rebounds.append(item.strip())\n",
    "    #print(datalist)\n",
    "    #把爬取的数据保存到列表中\n",
    "    for i in range(len(players)):\n",
    "        data = []\n",
    "        data.append(years[i])\n",
    "        data.append(players[i])\n",
    "        data.append(scores[i])\n",
    "        data.append(assists[i])\n",
    "        data.append(rebounds[i])\n",
    "        datalist.append(data)\n",
    "    return datalist\n",
    "\n",
    "def saveData2DB(datalist,dbpath):\n",
    "    init_db(dbpath)\n",
    "    conn = sqlite3.connect(dbpath)\n",
    "    cur = conn.cursor()\n",
    "    for data in datalist:\n",
    "        for index in range(len(data)):\n",
    "            if index >=2:\n",
    "                continue\n",
    "            data[index] = '\"' + str(data[index]) + '\"'\n",
    "        sql = '''\n",
    "                insert into nba (\n",
    "                    year,name,score,assist,rebound)\n",
    "                    values(%s)'''%\",\".join(data)\n",
    "\n",
    "        cur.execute(sql)\n",
    "        conn.commit()\n",
    "    cur.close()\n",
    "    conn.close()\n",
    "\n",
    "\n",
    "def init_db(dbpath):\n",
    "    sql = '''\n",
    "        create table nba(\n",
    "            year varchar ,\n",
    "            name varchar ,\n",
    "            score numeric ,\n",
    "            assist numeric ,\n",
    "            rebound numeric \n",
    "        )\n",
    "    '''\n",
    "    conn = sqlite3.connect(dbpath)\n",
    "    cursor = conn.cursor()\n",
    "    cursor.execute(sql)\n",
    "    conn.commit()\n",
    "    conn.close()\n",
    "\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    #调用参数\n",
    "    main()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据可视化部分"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 把所有数据从数据库中取出来"
   ]

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述


网站公告

今日签到

点亮在社区的每一天
去签到