# -*- coding: utf-8 -*- # by @Qist import re import sys import json import time from pyquery import PyQuery as pq from base.spider import Spider import requests from bs4 import BeautifulSoup class Spider(Spider): def getName(self): return "闪雷影视" def init(self, extend=""): pass host = 'http://60.6.229.145:88' ip = '60.6.229.145' header = { 'User-Agent': 'Mozilla/5.0 (Linux; Android 10; SM-G960F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Mobile Safari/537.36', } encoding = 'gb2312' class_names = '电视剧&大陆地区&港台地区&日韩地区&欧美地区&其他地区&动作片&喜剧片&恐怖片&科幻片&战争片&动画片&爱情片&综艺片&剧情片&MTV'.split('&') class_urls = '10&20&21&22&23&24&1&2&3&4&5&6&7&8&9&12'.split('&') def homeContent(self, filter): """ 获取首页内容 """ try: result = {'class': [], 'list': []} for name, cid in zip(self.class_names, self.class_urls): result['class'].append({'type_name': name, 'type_id': cid}) # 获取首页推荐影片 url = f"{self.host}/jdl/List.asp?ClassId=10" resp = self.fetch(url, headers=self.header) data = self.getpq(resp.text) videos = [] # 使用更兼容的选择器 - 先选所有的dl,然后过滤有classid=h4的dd的dl all_dls = data('dl') for dl in all_dls.items(): h4_dd = dl('dd[classid="h4"]') if h4_dd.length > 0: title = h4_dd('a').text() pic = dl('dt img').attr('src') if pic and pic.startswith('../'): pic = self.host + '/' + pic.replace('../', '') elif pic and not pic.startswith('http'): pic = self.host + '/' + pic.lstrip('/') href = h4_dd('a').attr('href') if href: # 从href中提取ClassId vid_match = re.search(r'[Cc]lass[Ii][Dd]=(\d+)', href) vid = vid_match.group(1) if vid_match else href videos.append({ 'vod_id': vid, 'vod_name': title, 'vod_pic': pic, 'vod_remarks': '' }) result['list'] = videos[:10] # 只取前10个 return result except Exception as e: print(f"Error in homeContent: {str(e)}") import traceback traceback.print_exc() return {"class": [], "list": []} def categoryContent(self, tid, pg, filter, extend): """ 获取分类内容 """ try: url = f"{self.host}/jdl/List.asp?ClassId={tid}&searchword=&page={pg}" resp = self.fetch(url, headers=self.header) data = self.getpq(resp.text) result = {} videos = [] # 使用相同的选择器逻辑 all_dls = data('dl') for dl in all_dls.items(): h4_dd = dl('dd[classid="h4"]') if h4_dd.length > 0: title = h4_dd('a').text() pic = dl('dt img').attr('src') if pic and pic.startswith('../'): pic = self.host + '/' + pic.replace('../', '') elif pic and not pic.startswith('http'): pic = self.host + '/' + pic.lstrip('/') href = h4_dd('a').attr('href') if href: # 从href中提取ClassId vid_match = re.search(r'[Cc]lass[Ii][Dd]=(\d+)', href) vid = vid_match.group(1) if vid_match else href videos.append({ 'vod_id': vid, 'vod_name': title, 'vod_pic': pic, 'vod_remarks': '' }) result['list'] = videos result['page'] = int(pg) result['pagecount'] = 999 # 假设有很多页 result['limit'] = len(videos) result['total'] = 999999 return result except Exception as e: print(f"Error in categoryContent: {str(e)}") import traceback traceback.print_exc() return {"list": [], "page": 1, "pagecount": 1, "limit": 0, "total": 0} def detailContent(self, ids): """ 获取影片详情 """ try: url = f"{self.host}/jdl/movie.asp?ClassId={ids[0]}" resp = self.fetch(url, headers=self.header) data = self.getpq(resp.text) vod = { 'vod_id': ids[0], 'vod_name': data('li[classid="h4"]').text(), 'vod_pic': '', 'vod_remarks': '', 'vod_year': '', 'vod_area': '', 'vod_director': '', 'vod_actor': '', 'vod_content': '' } # 获取封面 cover_img = data('.intro .img img').attr('src') if cover_img and cover_img.startswith('../'): vod['vod_pic'] = f"{self.host}/{cover_img[3:]}" elif cover_img and not cover_img.startswith('http'): vod['vod_pic'] = f"{self.host}/{cover_img}" else: vod['vod_pic'] = cover_img # 获取演员信息 actor_info = data('li:contains("主  演")').text() if actor_info: vod['vod_actor'] = actor_info.replace('主  演:', '').strip() # 获取内容信息 content_parts = [] selectors = ['li:contains("状  态")', 'li:contains("类  型")', 'li:contains("拍摄地区")', 'li:contains("更新时间")', 'li:contains("单集时长")'] for sel in selectors: part = data(sel).text() if part: content_parts.append(part) vod['vod_content'] = '\n'.join(content_parts) # 获取播放列表 play_urls = [] # 根据j.js中的规则: div.listt a for a in data('div.listt a').items(): title = a.text() href = a.attr('href') if title and href: # 从href构造播放地址 play_link = f"{self.host}{href}" play_urls.append(f"{title}${play_link}") vod['vod_play_from'] = '闪雷影视' vod['vod_play_url'] = '#'.join(play_urls) if play_urls else '无播放源' result = {"list": [vod]} return result except Exception as e: print(f"Error in detailContent: {str(e)}") import traceback traceback.print_exc() return {"list": []} def searchContent(self, key, quick, pg="1"): """ 搜索内容 """ try: url = f"{self.host}/jdl/List.asp?ClassId=30&type=&searchword={key}&page={pg}" resp = self.fetch(url, headers=self.header) data = self.getpq(resp.text) videos = [] # 使用相同的选择器逻辑 all_dls = data('dl') for dl in all_dls.items(): h4_dd = dl('dd[classid="h4"]') if h4_dd.length > 0: title = h4_dd('a').text() pic = dl('dt img').attr('src') if pic and pic.startswith('../'): pic = self.host + '/' + pic.replace('../', '') elif pic and not pic.startswith('http'): pic = self.host + '/' + pic.lstrip('/') href = h4_dd('a').attr('href') if href: # 从href中提取ClassId vid_match = re.search(r'[Cc]lass[Ii][Dd]=(\d+)', href) vid = vid_match.group(1) if vid_match else href videos.append({ 'vod_id': vid, 'vod_name': title, 'vod_pic': pic, 'vod_remarks': '' }) return {'list': videos, 'page': pg} except Exception as e: print(f"Error in searchContent: {str(e)}") import traceback traceback.print_exc() return {'list': [], 'page': pg} def playerContent(self, flag, id, vipFlags): """ 播放内容 """ try: # id 现在是播放页面的完整URL,格式类似: ClassId,xx,xx,movNo if ',' in id: parts = id.split(',') classid = parts[2] movno = parts[3] play_url = f"{self.host}/PlayMov.asp?ClassId={classid}&video=2&exe=0&down=0&movNo={movno}&vgver=undefined&ClientIP={self.ip}" else: play_url = id # 已经是完整的播放页面URL # 获取播放页面内容 resp = self.fetch(play_url, headers=self.header) html = resp.text # 根据j.js中的lazy规则进行解析 # var url = request(html).match(/videoarr\.push\('(.*?)'/)[1] video_match = re.search(r"videoarr\.push\(['\"](.*?)['\"]\)", html) if video_match: video_url = video_match.group(1) # 处理URL,替换域名部分 # url = url.replace(/https?:\/\/(?:[\d.]+|[\w\-]+)(?::\d+)?\//, rule.host + '/') video_url = re.sub(r'https?://(?:[\d.]+|[\w\-]+)(?::\d+)?/', f'{self.host}/', video_url) result = { "parse": 0, "url": video_url, "header": self.header, "playUrl": "" } return result else: print(f"Warning: Could not extract video URL from {play_url}") # 尝试寻找其他可能的视频URL # 匹配 player.open("...") 调用 js_open_matches = re.findall(r'player\.open\s*\(\s*["\']([^"\']+)["\']\s*\)', html) if js_open_matches: video_url = js_open_matches[0] if not video_url.startswith('http'): video_url = self.host + '/' + video_url.lstrip('/') result = { "parse": 0, "url": video_url, "header": self.header, "playUrl": "" } return result # 如果还是找不到,返回错误 return { "parse": 1, "url": play_url } except Exception as e: print(f"Error in playerContent: {str(e)}") import traceback traceback.print_exc() return { "parse": 1, "url": id } def getpq(self, data): try: return pq(data) except Exception as e: print(f"Error parsing data: {str(e)}") return pq(data.encode('utf-8')) def fetch(self, url, headers=None): """ 发送HTTP请求 """ session = requests.Session() if headers: session.headers.update(headers) else: session.headers.update(self.header) # 增加超时时间以适应响应较慢的服务 response = session.get(url, timeout=25) response.encoding = self.encoding return response def post(self, url, headers=None, data=None): """ 发送POST请求 """ session = requests.Session() if headers: session.headers.update(headers) else: session.headers.update(self.header) response = session.post(url, data=data, timeout=25) response.encoding = self.encoding return response