""" @header({ searchable: 1, filterable: 1, quickSearch: 1, title: '高清电影天堂', author: '完全修复推送版', lang: 'hipy' }) """ import sys import json import re from urllib.parse import quote_plus, unquote sys.path.append('..') from base.spider import Spider class Spider(Spider): def getName(self): return "高清电影天堂" def init(self, extend=""): self.baseUrl = "https://www.gaoqing888.com" self.siteUrl = self.baseUrl def homeContent(self, filter): return { 'class': [ {'type_name': '每日更新', 'type_id': 'home'}, {'type_name': '选电影', 'type_id': 'movie'} ] } def homeVideoContent(self): result = [] try: html = self.fetch(self.baseUrl, headers=self._get_header()).text if not html: return {'list': result} # 从首页提取视频 video_matches = self._parse_video_items(html) for match in video_matches[:15]: # 只取前15个 try: vod_id = match[0].strip() vod_name = match[1].strip() vod_pic = match[2].strip() if len(match) > 2 else "" p_content = match[3] if len(match) > 3 else "" vod_name = self._clean_text(vod_name) # 提取评分 vod_rating = "" if p_content: rating_match = re.search(r']*title="评分">([^<]+)', p_content, re.S) vod_rating = rating_match.group(1).strip() if rating_match else "" # 检查是否可播 is_playable = bool(re.search(r'playable fa fa-play-circle-o', p_content, re.S)) if p_content else False remarks = [] if vod_rating and vod_rating not in ["0", "0.0"]: remarks.append(f"评分:{vod_rating}") if is_playable: remarks.append("可播") result.append({ "vod_id": vod_id, "vod_name": vod_name, "vod_pic": vod_pic, "vod_remarks": " ".join(remarks) if remarks else "" }) except: continue except Exception as e: print(f"homeVideoContent error: {str(e)}") return {'list': result} def categoryContent(self, tid, pg, filter, extend): result = {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 999999} try: # 构建URL if tid == "home": url = f"{self.baseUrl}/?page={pg}" if pg and int(pg) > 1 else self.baseUrl elif tid == "movie": url = f"{self.baseUrl}/movie?page={pg}" if pg and int(pg) > 1 else f"{self.baseUrl}/movie" else: url = f"{self.baseUrl}/{tid}?page={pg}" if pg and int(pg) > 1 else f"{self.baseUrl}/{tid}" html = self.fetch(url, headers=self._get_header()).text if not html: return result # 提取视频列表 video_matches = self._parse_video_items(html) for match in video_matches: try: vod_id = match[0].strip() vod_name = match[1].strip() vod_pic = match[2].strip() if len(match) > 2 else "" p_content = match[3] if len(match) > 3 else "" if not vod_id or not vod_name: continue vod_name = self._clean_text(vod_name) # 提取评分 vod_rating = "" if p_content: rating_match = re.search(r']*title="评分">([^<]+)', p_content, re.S) vod_rating = rating_match.group(1).strip() if rating_match else "" # 检查是否可播 is_playable = bool(re.search(r'playable fa fa-play-circle-o', p_content, re.S)) if p_content else False remarks = [] if vod_rating and vod_rating not in ["0", "0.0"]: remarks.append(f"评分:{vod_rating}") if is_playable: remarks.append("可播") result['list'].append({ "vod_id": vod_id, "vod_name": vod_name, "vod_pic": vod_pic, "vod_remarks": " ".join(remarks) if remarks else "" }) except: continue # 提取总页数 result['pagecount'] = self._get_page_count(html, pg) except Exception as e: print(f"categoryContent error: {str(e)}") return result def detailContent(self, ids): if not ids: return {'list': []} vod_id = str(ids[0]).strip() url = f'{self.baseUrl}/{vod_id}/detail' try: html = self.fetch(url, headers=self._get_header()).text if not html: return {'list': []} # 提取标题 title_match = re.search(r'
(.*?)
', r']*href="[^"]*/(\d+)/detail"[^>]*>.*?]*>(.*?)
', r']*class="[^"]*desc[^"]*"[^>]*>(.*?)
' ] for pattern in desc_patterns: desc_match = re.search(pattern, html, re.S) if desc_match: desc = desc_match.group(1).strip() return self._clean_text(desc) return '' def _extract_video_info(self, html): """提取视频基本信息""" info = {} meta_pattern = r'