python+Pyppeteer+SpringBoot验证码自动识别登录(文末附源码)

发布于:2024-05-03 ⋅ 阅读:(29) ⋅ 点赞:(0)

效果如下:

 实现流程:

一、Pyppeteer打开网址

import asyncio
from pyppeteer import launch
import pdb
import random

# 启动 Pyppeteer
browser = await launch({'headless': False})
page = await browser.newPage()

# 打开登录页面
await page.goto('http://localhost:8080/login.html')

二、调用后台springboot接口,springboot调用验证码ocr识别接口识别,返回识别结果

 核心代码如下:

public static String getImgWord(String body) {
    	// 【1】请求地址 支持http 和 https 及 WEBSOCKET
    	String host = "https://imgurlocr.market.alicloudapi.com";
    	// 【2】后缀
        String path = "/urlimages";
        // 【3】开通服务后 买家中心-查看AppCode,有100次免费
        String appcode = "xxxxx";
        // 【4】请求参数,详见文档描述
        String urlSend = host + path; // 【5】拼接请求链接
        try {
            URL url = new URL(urlSend);
            HttpURLConnection httpURLCon = (HttpURLConnection) url.openConnection();
            httpURLCon.setRequestMethod("POST");
            httpURLCon.setRequestProperty("Authorization", "APPCODE " + appcode);// 格式
            StringBuilder postData = new StringBuilder(body);

            byte[] postDataBytes = postData.toString().getBytes("UTF-8");
            httpURLCon.setDoOutput(true);
            OutputStream out = httpURLCon.getOutputStream();
            out.write(postDataBytes);
            out.close();
            int httpCode = httpURLCon.getResponseCode();
            if (httpCode == 200) {
                String json = read(httpURLCon.getInputStream());
                System.out.println("正常请求计费(其他均不计费)");
                System.out.println("获取返回的json:");
                System.out.print(json);
                return json.substring(json.indexOf("words\":\"")).replace("words\":\"", "").replace("\"}]}", "");
            } else {
                Map<String, List<String>> map = httpURLCon.getHeaderFields();
                String error = map.get("X-Ca-Error-Message").get(0);
                if (httpCode == 400 && error.equals("Invalid AppCode")) {
                    System.out.println("AppCode错误 ");
                } else if (httpCode == 400 && error.equals("Invalid Url")) {
                    System.out.println("请求的 Method、Path 或者环境错误");
                } else if (httpCode == 400 && error.equals("Invalid Param Location")) {
                    System.out.println("参数错误");
                } else if (httpCode == 403 && error.equals("Unauthorized")) {
                    System.out.println("服务未被授权(或URL和Path不正确)");
                } else if (httpCode == 403 && error.equals("Quota Exhausted")) {
                    System.out.println("套餐包次数用完 ");
                } else if (httpCode == 403 && error.equals("Api Market Subscription quota exhausted")) {
                    System.out.println("套餐包次数用完,请续购套餐");
                } else {
                    System.out.println(httpCode);
                    System.out.println("参数名错误 或 其他错误");
                    System.out.println(error);
                }
                return error;
            }

        } catch (MalformedURLException e) {
            System.out.println("URL格式错误");
            return e.getMessage();
        } catch (UnknownHostException e) {
            System.out.println("URL地址错误");
            return e.getMessage();
        } catch (Exception e) {
            // 打开注释查看详细报错异常信息
            // e.printStackTrace();
        	return e.getMessage();
        }
    }

    /*
     * 读取返回结果
     */
    private static String read(InputStream is) throws IOException {
        StringBuffer sb = new StringBuffer();
        BufferedReader br = new BufferedReader(new InputStreamReader(is));
        String line = null;
        while ((line = br.readLine()) != null) {
            line = new String(line.getBytes(), "utf-8");
            sb.append(line);
        }
        br.close();
        return sb.toString();
    }

三、将验证码识别结果自动填充到input组件

# 执行JavaScript函数并传递参数,等待结果
response_text = await page.evaluate(postFunction, data)
print(response_text)

input_verify_code = await page.xpath("//input[@name='verifyCode']")
await input_verify_code[0].type(response_text, {'delay': random.randint(100, 151) - 50})

input_username = await page.xpath("//input[@name='username']")
await input_username[0].type('admin', {'delay': random.randint(100, 151) - 50})

input_password = await page.xpath("//input[@name='password']")
await input_password[0].type('123456', {'delay': random.randint(100, 151) - 50})

四、自动登录

# 自动点击"立即登录"按钮
button = await page.xpath('//button[@type="submit"]')

# 如果找到了button,则执行回车操作
if button:
    await button[0].press('Enter')

 python完整代码:

import asyncio
from pyppeteer import launch
import pdb
import random

async def main():
    # 启动 Pyppeteer
    browser = await launch({'headless': False})
    page = await browser.newPage()

    # 打开登录页面
    await page.goto('http://localhost:8080/login.html')

    # 等待
    await asyncio.sleep(5)

    # 获取验证码图片组件
    img = await page.xpath("//*[@id='vCode']")

    # 获取img的src
    src = await (await img[0].getProperty('src')).jsonValue()

    # 定义ajax post请求函数
    postFunction = """(data) => {
        // 这里使用fetch API发起POST请求
        return fetch('http://localhost:8080/getImgWord', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json',
            },
            body: JSON.stringify(data)
        }).then(response => response.text())
    }"""
 
    # 传递参数给JavaScript函数
    data = {'img': src}
 
    # 执行JavaScript函数并传递参数,等待结果
    response_text = await page.evaluate(postFunction, data)
    print(response_text)
    
    input_verify_code = await page.xpath("//input[@name='verifyCode']")
    await input_verify_code[0].type(response_text, {'delay': random.randint(100, 151) - 50})

    input_username = await page.xpath("//input[@name='username']")
    await input_username[0].type('admin', {'delay': random.randint(100, 151) - 50})

    input_password = await page.xpath("//input[@name='password']")
    await input_password[0].type('123456', {'delay': random.randint(100, 151) - 50})

    # 自动点击"立即登录"按钮
    button = await page.xpath('//button[@type="submit"]')
 
    # 如果找到了button,则执行回车操作
    if button:
        await button[0].press('Enter')

        
    # 关闭浏览器
    # await browser.close()    
    

# 运行爬虫
asyncio.get_event_loop().run_until_complete(main())

完整资源包:

https://download.csdn.net/download/svygh123/89254844