【agent】简历信息提取智能体-EW帮帮网

简历信息提取系统，使用Pydantic模型定义数据结构，并通过大语言模型（如GPT）从非结构化的简历文本中提取结构化信息。以下是详细解析：

核心功能

数据建模：用Pydantic的Resume类严格定义简历字段和校验规则。
格式转换：自动统一日期格式（如1990-05-15 → 05-15-1990）。
大模型交互：通过Prompt工程让AI提取信息并返回标准JSON。

核心代码

from datetime import datetime, date
from typing import List, Optional
from pydantic import BaseModel, Field, field_validator, EmailStr, model_validator

# 定义这个pydantic模型是关键的关键
class Resume(BaseModel):
    name: Optional[str] = Field(None, description="求职者姓名，如果没找到就置为空字符串")
    city: Optional[str] = Field(None, description="求职者居住地，如果没找到就置为空字符串")
    birthday: Optional[str] = Field(None, description="求职者生日，如果没找到就置为空字符串")
    phone: Optional[str] = Field(None, description="求职者手机号，如果没找到就置为空字符串")
    email: Optional[str] = Field(None, description="求职者邮箱，如果没找到就置为空字符串")
    education: Optional[List[str]] = Field(None, description="求职者教育背景")
    experience: Optional[List[str]] = Field(None, description="求职者工作或实习经历，如果没找到就置为空字符串")
    project: Optional[List[str]] = Field(None, description="求职者项目经历，如果没找到就置为空字符串")
    certificates: Optional[List[str]] = Field(None, description="求职者资格证书，如果没找到就置为空字符串")

    @field_validator("birthday", mode="before")
    def validate_and_convert_date(cls, raw_date):
        if raw_date is None:
            return None
        if isinstance(raw_date, str):
            # List of acceptable date formats
            date_formats = ['%d-%m-%Y', '%Y-%m-%d', '%d/%m/%Y', '%m-%d-%Y']
            for fmt in date_formats:
                try:
                    # Attempt to parse the date string with the current format
                    parsed_date = datetime.strptime(raw_date, fmt).date()
                    # Return the date in MM-DD-YYYY format as a string
                    return parsed_date.strftime('%m-%d-%Y')
                except ValueError:
                    continue  # Try the next format
            # If none of the formats match, raise an error
            raise ValueError(
                f"Invalid date format for 'consultation_date'. Expected one of: {', '.join(date_formats)}."
            )
        if isinstance(raw_date, date):
            # Convert date object to MM-DD-YYYY format
            return raw_date.strftime('%m-%d-%Y')

        raise ValueError(
            "Invalid type for 'consultation_date'. Must be a string or a date object."
        )

class ResumeOpenAI:
    def __init__(self):
        self.resume_profile = Resume()
        self.output_schema = self.resume_profile.model_json_schema()
        self.template = """
        You are an expert in analyzing resumes. Use the following JSON schema to extract relevant information:
        ```json
        {output_schema}
        ```json
        Extract the information from the following document and provide a structured JSON response strictly adhering to the schema above. 
        Please remove any ```json ``` characters from the output. Do not make up any information. If a field cannot be extracted, mark it as `n/a`.
        Document:
        ----------------
        {resume_content}
        ----------------
        """

    def create_prompt(self, output_schema, resume_content):
        return self.template.format(
            output_schema=output_schema,
            resume_content=resume_content
        )

    def run(self, resume_content):
        try:
            response = client.chat.completions.create(
                model=chat_model,
                # 不是所有模型都支持response_format，要看一下调用的模型是否支持这个参数
                # 千问、智谱的模型一般支持
                response_format={ "type": "json_object" },
                messages=[
                    {"role": "system", "content": "你是一位专业的简历信息提取专家。"},
                    {"role": "user", "content": self.create_prompt(self.output_schema, resume_content)}
                ],
            )

            result = response.choices[0].message.content
        except Exception as e:
            print(f"Error occurred: {e}")

        return result

resume_openai = ResumeOpenAI()

【agent】简历信息提取智能体

核心功能

核心代码

网站公告

今日签到

热门文章

最新发布