hh-bot/hh_bot/models/vacancy.py

248 lines
7.8 KiB
Python
Raw Normal View History

2025-06-27 09:57:34 +02:00
"""
📋 Модели данных для работы с вакансиями HH.ru
"""
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Any
import re
@dataclass
class Employer:
"""Информация о работодателе"""
id: str
name: str
url: Optional[str] = None
alternate_url: Optional[str] = None
logo_urls: Optional[Dict[str, str]] = None
vacancies_url: Optional[str] = None
trusted: bool = False
@dataclass
class Experience:
"""Информация об опыте работы"""
id: str
name: str
@dataclass
class Snippet:
"""Краткая информация о вакансии"""
requirement: Optional[str] = None
responsibility: Optional[str] = None
@dataclass
class Salary:
"""Информация о зарплате"""
from_value: Optional[int] = None
to_value: Optional[int] = None
currency: str = "RUR"
gross: bool = False
@dataclass
class Vacancy:
"""Модель вакансии HH.ru"""
id: str
name: str
alternate_url: str
employer: Employer
experience: Experience
snippet: Snippet
premium: bool = False
has_test: bool = False
response_letter_required: bool = False
archived: bool = False
apply_alternate_url: Optional[str] = None
ai_match_score: Optional[float] = None
ai_match_reasons: List[str] = field(default_factory=list)
salary: Optional[Salary] = None
@classmethod
def from_api_response(cls, data: Dict[str, Any]) -> "Vacancy":
"""Создание экземпляра из ответа API HH.ru"""
try:
employer_data = data.get("employer", {})
employer = Employer(
id=employer_data.get("id", ""),
name=employer_data.get("name", "Неизвестная компания"),
url=employer_data.get("url"),
alternate_url=employer_data.get("alternate_url"),
logo_urls=employer_data.get("logo_urls"),
vacancies_url=employer_data.get("vacancies_url"),
trusted=employer_data.get("trusted", False),
)
experience_data = data.get("experience", {})
experience = Experience(
id=experience_data.get("id", "noExperience"),
name=experience_data.get("name", "Без опыта"),
)
snippet_data = data.get("snippet", {})
snippet = Snippet(
requirement=snippet_data.get("requirement"),
responsibility=snippet_data.get("responsibility"),
)
salary = None
salary_data = data.get("salary")
if salary_data:
salary = Salary(
from_value=salary_data.get("from"),
to_value=salary_data.get("to"),
currency=salary_data.get("currency", "RUR"),
gross=salary_data.get("gross", False),
)
return cls(
id=data.get("id", ""),
name=data.get("name", "Без названия"),
alternate_url=data.get("alternate_url", ""),
employer=employer,
experience=experience,
snippet=snippet,
premium=data.get("premium", False),
has_test=data.get("has_test", False),
response_letter_required=data.get("response_letter_required", False),
archived=data.get("archived", False),
apply_alternate_url=data.get("apply_alternate_url"),
salary=salary,
)
except Exception as e:
print(f"❌ Ошибка парсинга вакансии: {e}")
return cls(
id=data.get("id", "unknown"),
name=data.get("name", "Ошибка загрузки"),
alternate_url=data.get("alternate_url", ""),
employer=Employer(id="", name="Неизвестно"),
experience=Experience(id="noExperience", name="Без опыта"),
snippet=Snippet(),
)
def has_python(self) -> bool:
"""Проверка упоминания Python в вакансии"""
text_to_check = (
f"{self.name} {self.snippet.requirement or ''} " f"{self.snippet.responsibility or ''}"
)
python_patterns = [
r"\bpython\b",
r"\bпайтон\b",
r"\bджанго\b",
r"\bflask\b",
r"\bfastapi\b",
r"\bpandas\b",
r"\bnumpy\b",
]
for pattern in python_patterns:
if re.search(pattern, text_to_check, re.IGNORECASE):
return True
return False
def is_junior_level(self) -> bool:
"""Проверка на junior уровень"""
junior_keywords = [
"junior",
"джуниор",
"стажер",
"стажёр",
"начинающий",
"intern",
"trainee",
"entry",
"младший",
]
text_to_check = f"{self.name} {self.snippet.requirement or ''}"
for keyword in junior_keywords:
if keyword.lower() in text_to_check.lower():
return True
return False
def get_salary_info(self) -> str:
"""Получение информации о зарплате в читаемом виде"""
if not self.salary:
return "Зарплата не указана"
from_val = self.salary.from_value
to_val = self.salary.to_value
currency = self.salary.currency
gross_suffix = " (до вычета налогов)" if self.salary.gross else " (на руки)"
if from_val and to_val:
return f"{from_val:,} - {to_val:,} {currency}{gross_suffix}"
elif from_val:
return f"от {from_val:,} {currency}{gross_suffix}"
elif to_val:
return f"до {to_val:,} {currency}{gross_suffix}"
else:
return "Зарплата не указана"
def get_full_text(self) -> str:
"""Получение полного текста вакансии для анализа"""
text_parts = [
self.name,
self.employer.name,
self.snippet.requirement or "",
self.snippet.responsibility or "",
self.experience.name,
]
return " ".join(filter(None, text_parts))
@dataclass
class ApplicationResult:
"""Результат подачи заявки на вакансию"""
vacancy_id: str
vacancy_name: str
success: bool
already_applied: bool = False
error_message: Optional[str] = None
timestamp: Optional[str] = None
def __post_init__(self):
"""Устанавливаем timestamp если не указан"""
if self.timestamp is None:
from datetime import datetime
self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@dataclass
class SearchStats:
"""Статистика поиска вакансий"""
total_found: int = 0
pages_processed: int = 0
filtered_count: int = 0
python_vacancies: int = 0
junior_vacancies: int = 0
with_salary: int = 0
without_test: int = 0
def __str__(self) -> str:
return f"""
📊 Статистика поиска:
📋 Всего найдено: {self.total_found}
📄 Страниц обработано: {self.pages_processed}
Прошло фильтрацию: {self.filtered_count}
🐍 Python вакансий: {self.python_vacancies}
👶 Junior уровня: {self.junior_vacancies}
💰 С указанной ЗП: {self.with_salary}
📝 Без тестов: {self.without_test}
"""