基于Django的电商购物项目通过网络爬虫技术以及ORM框架获取项目商品数据

发布于:2024-05-20 ⋅ 阅读:(131) ⋅ 点赞:(0)

基于Django的电商购物项目通过网络爬虫技术以及ORM框架获取项目商品数据

这里先提供源码 , 具体思路下一期再进行解释



if __name__ == '__main__':
    import os
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'JiXuShopSystem.settings')
    import django
    django.setup()
    from goods.models import *
    import json
    import random
    import requests
    import re
    from pypinyin import pinyin, Style

    class Spider():
        def __init__(self):

            self.headers = {
                "accept": "application/json, text/javascript, */*; q=0.01",
                "accept-language": "zh-CN,zh;q=0.9",
                "authorization": "OAuth api_sign=d463028caef4688f248d1d05cac958d0153afe12",
                "cache-control": "no-cache",
                "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
                "origin": "https://detail.vip.com",
                "pragma": "no-cache",
                "priority": "u=1, i",
                "referer": "https://detail.vip.com/",
                "sec-ch-ua": "\"Chromium\";v=\"124\", \"Google Chrome\";v=\"124\", \"Not-A.Brand\";v=\"99\"",
                "sec-ch-ua-mobile": "?0",
                "sec-ch-ua-platform": "\"Windows\"",
                "sec-fetch-dest": "empty",
                "sec-fetch-mode": "cors",
                "sec-fetch-site": "same-site",
                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
                "x-requested-with": "XMLHttpRequest"
            }
            self.cookies = {
                "vip_cps_cid": "1703946155095_f8aebf721aa4d69f55487762e3ca4c21",
                "PAPVisitorId": "58460c7a22e31f6b4acb2a1ed741f921",
                "vip_new_old_user": "1",
                "mars_cid": "1703946163504_4eebec221de3364e0da3bbe4a2182454",
                "mars_pid": "0",
                "vip_cps_cuid": "CU1715695090775e67fbb574e7ca3b54",
                "vip_city_name": "%E5%B9%BF%E5%B7%9E%E5%B8%82",
                "VipUINFO": "luc%3Aa%7Csuc%3Aa%7Cbct%3Ac_new%7Chct%3Ac_new%7Cbdts%3A0%7Cbcts%3A0%7Ckfts%3A0%7Cc10%3A0%7Crcabt%3A0%7Cp2%3A0%7Cp3%3A1%7Cp4%3A0%7Cp5%3A0%7Cul%3A3105",
                "vip_address": "%257B%2522pname%2522%253A%2522%255Cu5b89%255Cu5fbd%255Cu7701%2522%252C%2522pid%2522%253A%2522103104%2522%252C%2522cname%2522%253A%2522%255Cu5e7f%255Cu5dde%255Cu5e02%2522%252C%2522cid%2522%253A%2522103104105%2522%257D",
                "vip_province": "103104",
                "vip_province_name": "%E5%AE%89%E5%BE%BD%E7%9C%81",
                "vip_city_code": "103104105",
                "vip_wh": "VIP_HZ",
                "vip_ipver": "31",
                "user_class": "a",
                "visit_id": "DBEBA17A04F9893A4CBF22BF2C37ADB4",
                "mars_sid": "f9ea9bcbdad7fcd940397690e20183a7",
                "VIP_QR_FIRST": "1",
                "vip_tracker_source_from": "",
                "vipshop_passport_src": "https%3A%2F%2Fdetail.vip.com%2Fdetail-1711573035-6920320379434192651.html",
                "pg_session_no": "11",
                "VipDFT": "-1",
                "vip_access_times": "%7B%22list%22%3A3%2C%22detail%22%3A0%7D"
            }
            self.url = "https://mapi.vip.com/vips-mobile/rest/shopping/pc/detail/main/v6"



        def brand(self,response):
            # 品牌
            brandStoreInfo = response['data']['brandStoreInfo']['brandStoreName']
            # Logo
            brandStoreLogo = response['data']['brandStoreInfo']['brandStoreLogo']
            # 首字母
            if brandStoreInfo:
                first_char_pinyin_list = pinyin(brandStoreInfo[0], style=Style.FIRST_LETTER, strict=False)
                # 取第一个拼音的第一个字母(转换为大写)
                first_letter =  first_char_pinyin_list[0][0].upper()
            else:
                first_letter = None
            brand_data = {
                'brandStoreInfo':brandStoreInfo,
                'brandStoreLogo':brandStoreLogo,
                'first_letter':first_letter,
            }
            return brand_data



        def spu(self,response):
            # spu
            spu_name = response['data']['base']['title']
            spu_sales = 0
            spu_comments = 0

            images_detls = response['data']['images']['groups'].values()
            detail = ''
            for detil in images_detls:
                # 介绍
                detailImages = detil['detailImages']
                for imageUrls in detailImages[1:]:
                    imageUrl = imageUrls['imageUrl']
                    detail += f'<p><img src="">{imageUrl}</p>'
                break


            url = "https://mapi.vip.com/vips-mobile/rest/shopping/pc/detail/vendorqa/v1"
            params = {
                "callback": "getVendorQaCb",
                "app_name": "shop_pc",
                "app_version": "4.0",
                "warehouse": "VIP_HZ",
                "fdc_area_id": "103104105",
                "client": "pc",
                "mobile_platform": "1",
                "province_id": "103104",
                "api_key": "70f71280d5d547b2a7bb370a529aeea1",
                "user_id": "",
                "mars_cid": "1703946163504_4eebec221de3364e0da3bbe4a2182454",
                "wap_consumer": "a",
                "productId": "6920549355391848472"
            }
            response_pack = requests.get(url, headers=self.headers, cookies=self.cookies, params=params).text.replace('getVendorQaCb(','').replace(')','')
            response_pack_dict = json.loads(response_pack)
            decs_pack = response_pack_dict['data']['list']



            # 包装清单
            decs_data = ''
            for decs in decs_pack:
                decs_data += f"""
                <dl class="q-tit-item">
                        <dt class="qti-title">
                            <i class="qti-question"></i>
                            <p class="qti-txt" style="color: red">{decs['question']}</p>
                        </dt>
                        <dd class="qti-content">
                            <i class="qti-answer"></i>
                            <p class="qti-txt">
                                {decs['answer']}
                            </p>
                        </dd>
                    </dl>
                """


            # 售后服务
            tsaleService_data = ''
            tsaleServiceListitle = response['data']['saleServiceList']
            for tsaleService in tsaleServiceListitle:
                tsaleService_data += f"""
                    <dl class="q-tit-item">
                            <dt class="qti-title">
                                <i class="qti-question"></i>
                                <p class="qti-txt" style="color: red">{tsaleService['name']}</p>
                            </dt>
                            <dd class="qti-content">
                                <i class="qti-answer"></i>
                                <p class="qti-txt">
                                    {tsaleService['value']}
                                </p>
                            </dd>
                        </dl>
                    """

            spu_data = {
                'name' : spu_name,
                'sales' : spu_sales,
                'comments' : spu_comments,
                'desc_detail' : detail,
                'desc_pack' : decs_data,
                'desc_service' : tsaleService_data

            }
            return spu_data


        def sku(self,response):
            # sku

            # 名称
            title = response['data']['base']['title']


            saleProps_list = []
            products = response['data']['products'].values()

            for product in products:
                # 标题
                longTitle = product['merchandiseSn']
                # 名称
                merchandiseSn = product['merchandiseSn']
                # 市场价
                saleMarketPrice = product['priceView']['salePrice']['saleMarketPrice']
                # 售价
                salePrice = product['priceView']['salePrice']['salePrice']
                # 折扣
                cost_price = int(saleMarketPrice) - int(salePrice)
                # 库存
                stock = 1000
                # 销量
                sales = random.randint(1, 800)
                # 评价
                comments = 0
                # 是否上架
                is_launched = True
                # 默认图片
                default_image = product['smallImage'].replace('http://h2.appsimg.com/a.appsimg.com/upload/merchandise/','').replace('.jpg','')
                saleProps_dict = {
                    'name' : merchandiseSn,
                    'price' : salePrice,
                    'caption' : title,
                    'cost_price' : cost_price ,
                    'market_price' : saleMarketPrice,
                    'stock' : stock,
                    'sales' : sales,
                    'comments' : comments,
                    'is_launched':is_launched,
                    'default_image':default_image
                }
                saleProps_list.append(saleProps_dict)

            return saleProps_list



        def sku_image(self,response):
            images_detls = response['data']['images']['groups'].values()
            SKUImage_detail = []
            # 商品详情图
            for detil in images_detls:
                image_lsit = detil['previewImages']
                for image_urls in image_lsit:
                    image_url = image_urls['imageUrl'].replace('http://h2.appsimg.com/a.appsimg.com/upload/merchandise/','').replace('.jpg','')
                    SKUImage_detail.append(image_url)
                break

            return SKUImage_detail


        def spu_pecification(self,response):
            # 规格
            saleProps_list = []
            saleProps = response['data']['saleProps']
            for saleProp in saleProps:
                name = saleProp['name']
                saleProps_list.append(name)
            return saleProps_list


        def spu_optiuons(self,response):
            saleProps = response['data']['saleProps']
            sption_list = []
            for saleProp in saleProps:
                values = saleProp['values']
                for value in values:
                    sub_value = value['name']
                    sption_list.append(sub_value)
            return sption_list



        def main(self):
            for page in range(0, 2400, 120):
                url = "https://mapi.vip.com/vips-mobile/rest/shopping/pc/search/product/rank"
                params = {
                    "callback": "getMerchandiseIds",
                    "app_name": "shop_pc",
                    "app_version": "4.0",
                    "warehouse": "VIP_HZ",
                    "fdc_area_id": "103104105",
                    "client": "pc",
                    "mobile_platform": "1",
                    "province_id": "103104",
                    "api_key": "70f71280d5d547b2a7bb370a529aeea1",
                    "user_id": "",
                    "mars_cid": "1703946163504_4eebec221de3364e0da3bbe4a2182454",
                    "wap_consumer": "a",
                    "standby_id": "nature",
                    "keyword": "手机",
                    "lv3CatIds": "",
                    "lv2CatIds": "",
                    "lv1CatIds": "",
                    "brandStoreSns": "",
                    "props": "",
                    "priceMin": "",
                    "priceMax": "",
                    "vipService": "",
                    "sort": "0",
                    "pageOffset": f"{page}",
                    "channelId": "1",
                    "gPlatform": "PC",
                    "batchSize": "120",
                    "_": "1715836699310"
                }
                response = requests.get(url, headers=self.headers, cookies=self.cookies, params=params).text

                # 获得产品编码
                pid_code = re.findall('{"pid":"(.*?)"}', response)

                for pid in pid_code:
                    self.spider_data(pid)



        def spider_data(self,pid):
            self.data = {
                "app_name": "shop_pc",
                "app_version": "4.0",
                "warehouse": "VIP_HZ",
                "fdc_area_id": "103104105",
                "client": "pc",
                "mobile_platform": "1",
                "province_id": "103104",
                "api_key": "70f71280d5d547b2a7bb370a529aeea1",
                "user_id": "",
                "mars_cid": "1703946163504_4eebec221de3364e0da3bbe4a2182454",
                "wap_consumer": "a",
                "scene": "detail",
                "productId": f"{pid}",
                "opts": "priceView:13;quotaInfo:1;restrictTips:1;panelView:3;foreShowActive:1;invisible:1;floatingView:1;announcement:1;svipView:2;showSingleColor:1;svipPriceMode:1;promotionTips:6;foldTips:3;formula:2;extraDetailImages:1;shortVideo:1;countryFlagStyle:1;saleServiceList:1;storeInfo:2;brandCountry:1;freightTips:3;priceBannerView:1;bannerTagsView:1;buyMoreFormula:1;mergeGiftTips:0;kf:1;priceIcon:1;tuv:3;promotionTags:7;mergeGiftTips:3;topDetailImage:2;deliveryInfo:1;relatedProdSpu:1"
            }
            response = requests.post(self.url, headers=self.headers, cookies=self.cookies, data=self.data).json()

            brand_data = self.brand(response)
            print(brand_data)

            brand_object = Brand.objects.create(
                name=brand_data['brandStoreInfo'],
                logo=brand_data['brandStoreLogo'],
                first_letter=brand_data['first_letter']
            )

            good_category1 = GoodsCategory.objects.get(id=1)
            good_category2 = GoodsCategory.objects.get(id=38)
            good_category3 = GoodsCategory.objects.get(id=115)

            spu_data = self.spu(response)
            print(spu_data)

            spu_object = SPU.objects.create(
                name=spu_data['name'],
                sales=spu_data['sales'],
                comments=spu_data['comments'],
                desc_detail=spu_data['desc_detail'],
                desc_pack=spu_data['desc_pack'],
                desc_service=spu_data['desc_service'],
                brand=brand_object,
                category1=good_category1,
                category2=good_category2,
                category3=good_category3,
            )

            sku_data_list = self.sku(response)
            for sku_data in sku_data_list:
                sku_object = SKU.objects.create(
                    name=sku_data['name'],
                    price=sku_data['price'],
                    caption=sku_data['caption'],
                    cost_price=sku_data['cost_price'],
                    market_price=sku_data['market_price'],
                    stock=sku_data['stock'],
                    sales=sku_data['sales'],
                    comments=sku_data['comments'],
                    is_launched=sku_data['is_launched'],
                    default_image=sku_data['default_image'],
                    category=good_category3,
                    spu=spu_object
                )

                sku_image_list = self.sku_image(response)
                for sku_image_data in sku_image_list:
                    sku_image_object = SKUImage.objects.create(
                        image=sku_image_data,
                        sku= sku_object
                    )

                spu_pecification_list = self.spu_pecification(response)
                for spu_pecification_data in spu_pecification_list:
                    spu_pecification_object = SPUSpecification.objects.create(
                        name=spu_pecification_data,
                        spu=spu_object
                    )

                    spu_optiuons_list = self.spu_optiuons(response)
                    for spu_optiuons in spu_optiuons_list:
                        spu_optiuons_object = SpecificationOption.objects.create(
                            value=spu_optiuons,
                            spec=spu_pecification_object
                        )

                        SKUSpecification_object = SKUSpecification.objects.create(
                            option=spu_optiuons_object,
                            spec=spu_pecification_object,
                            sku=sku_object

                        )








    spider = Spider()
    spider.main()

网站公告

今日签到

点亮在社区的每一天
去签到