2025-01-20 12:04:20 +09:00

117 lines
4.8 KiB
Python

import logging
import re
from datetime import date, datetime, timedelta
from typing import List
from urllib.parse import unquote
from bs4 import SoupStrainer
from epg2xml.providers import EPGProgram, EPGProvider, no_endtime
from epg2xml.utils import ParserBeautifulSoup as BeautifulSoup
log = logging.getLogger(__name__.rsplit(".", maxsplit=1)[-1].upper())
CH_CATE = [
# 0은 전체 채널
{"id": "1", "name": "UHD"},
{"id": "3", "name": "홍보"},
{"id": "4", "name": "지상파"},
{"id": "5", "name": "홈쇼핑"},
{"id": "6", "name": "종합편성"},
{"id": "8", "name": "드라마/버라이어티"},
{"id": "10", "name": "오락/음악"},
{"id": "12", "name": "영화/시리즈"},
{"id": "137", "name": "스포츠"},
{"id": "206", "name": "취미/레저"},
{"id": "317", "name": "애니/유아"},
{"id": "442", "name": "교육"},
{"id": "446", "name": "다큐/교양"},
{"id": "447", "name": "뉴스/경제"},
{"id": "448", "name": "공공/공익/정보"},
{"id": "449", "name": "종교"},
{"id": "491", "name": "오픈채널"},
{"id": "507", "name": "유료"},
{"id": "508", "name": "오디오"},
]
PTN_RATING = re.compile(r"([\d,]+)")
class KT(EPGProvider):
"""EPGProvider for KT
데이터: rawhtml
요청수: #channels * #days
특이사항:
- 가끔 업데이트 지연
- 프로그램 시작 시각만 제공
"""
referer = "https://tv.kt.com/"
title_regex = r"^(?P<title>.*?)\s?([\<\(]?(?P<part>\d+)부[\>\)]?)?$"
def get_svc_channels(self) -> List[dict]:
svc_channels = []
url = "https://tv.kt.com/tv/channel/pChList.asp"
params = {"ch_type": "1", "parent_menu_id": "0"}
for c in CH_CATE:
params.update({"parent_menu_id": c["id"]})
soup = BeautifulSoup(self.request(url, method="POST", data=params))
raw_channels = [unquote(x.find("span", {"class": "ch"}).text.strip()) for x in soup.select("li > a")]
# 몇몇 채널은 (TV로만 제공, 유료채널) 웹에서 막혀있지만 실제로는 데이터가 있을 수 있다.
for x in raw_channels:
svc_channels.append(
{
"Name": " ".join(x.split()[1:]),
"No": str(x.split()[0]),
"ServiceId": x.split()[0],
"Category": c["name"],
}
)
return svc_channels
@no_endtime
def get_programs(self) -> None:
url = "https://tv.kt.com/tv/channel/pSchedule.asp"
params = {
"ch_type": "1", # 1: live 2: skylife 3: uhd live 4: uhd skylife
"view_type": "1", # 1: daily 2: weekly
"service_ch_no": "SVCID",
"seldate": "EPGDATE",
}
for idx, _ch in enumerate(self.req_channels):
log.info("%03d/%03d %s", idx + 1, len(self.req_channels), _ch)
for nd in range(int(self.cfg["FETCH_LIMIT"])):
day = date.today() + timedelta(days=nd)
params.update({"service_ch_no": _ch.svcid, "seldate": day.strftime("%Y%m%d")})
data = self.request(url, method="POST", data=params)
try:
_epgs = self.__epgs_of_day(_ch.id, data, day)
except Exception:
log.exception("프로그램 파싱 중 예외: %s, %s", _ch, day)
else:
_ch.programs.extend(_epgs)
def __epgs_of_day(self, channelid: str, data: str, day: datetime) -> List[EPGProgram]:
_epgs = []
soup = BeautifulSoup(unquote(data), parse_only=SoupStrainer("tbody"))
for row in soup.find_all("tr"):
cell = row.find_all("td")
hour = cell[0].text.strip()
for minute, program, category in zip(*[c.find_all("p") for c in cell[1:]]):
_epg = EPGProgram(channelid)
_epg.stime = datetime.strptime(f"{day} {hour}:{minute.text.strip()}", "%Y-%m-%d %H:%M")
_epg.title = program.text.replace("방송중 ", "").strip()
if m := self.title_regex.match(_epg.title):
_epg.title = m.group("title")
if part_num := m.group("part"):
_epg.part_num = part_num
_epg.title += f" ({_epg.part_num}부)"
_epg.categories = [category.text.strip()]
for image in program.find_all("img", alt=True):
if "시청 가능" not in (alt := image["alt"]):
continue
grade = PTN_RATING.match(alt)
_epg.rating = int(grade.group(1)) if grade else 0
_epgs.append(_epg)
return _epgs