107 lines
4.2 KiB
Python
107 lines
4.2 KiB
Python
import logging
|
|
from datetime import date, datetime, timedelta
|
|
from typing import List
|
|
from xml.sax.saxutils import unescape
|
|
|
|
from epg2xml.providers import EPGProgram, EPGProvider, no_endtime
|
|
from epg2xml.utils import ParserBeautifulSoup as BeautifulSoup
|
|
|
|
log = logging.getLogger(__name__.rsplit(".", maxsplit=1)[-1].upper())
|
|
today = date.today()
|
|
|
|
CH_CATE = [
|
|
{"name": "지상파", "u1": "100"},
|
|
{"name": "종합 편성", "u1": "500"},
|
|
{"name": "케이블", "u1": "200"},
|
|
{"name": "스카이라이프", "u1": "300"},
|
|
{"name": "해외위성", "u1": "9000"},
|
|
{"name": "라디오", "u1": "400"},
|
|
]
|
|
|
|
|
|
class NAVER(EPGProvider):
|
|
"""EPGProvider for NAVER
|
|
|
|
데이터: rawhtml
|
|
요청수: #channels * #days
|
|
특이사항:
|
|
- 프로그램 시작 시각만 제공
|
|
"""
|
|
|
|
referer = "https://m.search.naver.com/search.naver?where=m&query=%ED%8E%B8%EC%84%B1%ED%91%9C"
|
|
|
|
def get_svc_channels(self) -> List[dict]:
|
|
svc_channels = []
|
|
url = "https://m.search.naver.com/p/csearch/content/nqapirender.nhn"
|
|
params = {
|
|
"key": "ScheduleChannelList",
|
|
"where": "nexearch",
|
|
"pkid": "66",
|
|
"u1": "CATEGORY_CODE",
|
|
}
|
|
for c in CH_CATE:
|
|
params.update({"u1": c["u1"]})
|
|
data = self.request(url, params=params)
|
|
if data["statusCode"].lower() != "success":
|
|
log.error("유효한 응답이 아닙니다: %s", data["statusCode"])
|
|
continue
|
|
soup = BeautifulSoup(data["dataHtml"])
|
|
for ch in soup.select('li[class="item"]'):
|
|
try:
|
|
svcid = ch.select("div > div[data-cid]")[0]["data-cid"]
|
|
name = str(ch.select('div[class="channel_name"] > a')[0].text)
|
|
svc_channels.append(
|
|
{
|
|
"Name": name,
|
|
"ServiceId": svcid,
|
|
"Category": c["name"],
|
|
}
|
|
)
|
|
except Exception:
|
|
pass
|
|
return svc_channels
|
|
|
|
@no_endtime
|
|
def get_programs(self) -> None:
|
|
url = "https://m.search.naver.com/p/csearch/content/nqapirender.nhn"
|
|
params = {"key": "SingleChannelDailySchedule", "where": "m", "pkid": "66", "u1": "SVCID", "u2": "EPGDATE"}
|
|
|
|
for idx, _ch in enumerate(self.req_channels):
|
|
log.info("%03d/%03d %s", idx + 1, len(self.req_channels), _ch)
|
|
for nd in range(int(self.cfg["FETCH_LIMIT"])):
|
|
day = today + timedelta(days=nd)
|
|
params.update({"u1": _ch.svcid, "u2": day.strftime("%Y%m%d")})
|
|
data = self.request(url, params=params)
|
|
if data["statusCode"].lower() != "success":
|
|
log.error("유효한 응답이 아닙니다: %s %s", _ch, data["statusCode"])
|
|
continue
|
|
try:
|
|
_epgs = self.__epgs_of_day(_ch.id, data, day)
|
|
except Exception:
|
|
log.exception("프로그램 파싱 중 예외: %s, %s", _ch, day)
|
|
else:
|
|
_ch.programs.extend(_epgs)
|
|
|
|
def __epgs_of_day(self, channelid: str, data: dict, day: datetime) -> List[EPGProgram]:
|
|
_epgs = []
|
|
soup = BeautifulSoup("".join(data["dataHtml"]))
|
|
for row in soup.find_all("li", {"class": "list"}):
|
|
cell = row.find_all("div")
|
|
_epg = EPGProgram(channelid)
|
|
_epg.title = unescape(cell[4].text.strip())
|
|
_epg.stime = datetime.strptime(f"{str(day)} {cell[1].text.strip()}", "%Y-%m-%d %H:%M")
|
|
for span in cell[3].findAll("span"):
|
|
span_txt = span.text.strip()
|
|
if "ico_age" in span["class"]:
|
|
_epg.rating = int(span_txt.rstrip("세"))
|
|
elif "re" in span["class"]:
|
|
_epg.rebroadcast = True
|
|
else:
|
|
_epg.extras = (_epg.extras or []) + [span_txt]
|
|
try:
|
|
_epg.title_sub = cell[5].text.strip()
|
|
except Exception:
|
|
pass
|
|
_epgs.append(_epg)
|
|
return _epgs
|