@ -204,35 +204,51 @@ class Spider(Spider):
if ' 导演: ' in info_text :
if ' 导演: ' in info_text :
director_match = re . search ( r ' 导演:([^/]+) ' , info_text )
director_match = re . search ( r ' 导演:([^/]+) ' , info_text )
if director_match : vod [ ' vod_director ' ] = director_match . group ( 1 ) . strip ( )
if director_match : vod [ ' vod_director ' ] = director_match . group ( 1 ) . strip ( )
playFrom = [ ]
playFrom = [ ]
playList = [ ]
playList = [ ]
h3_elements = doc ( ' h3 ' )
play_links = doc ( ' a[href*= " /play/ " ] ' )
# 改进的播放线路提取逻辑
for i in range ( h3_elements . length ) :
vodlist_heads = doc ( ' .stui-vodlist__head ' )
header_elem = h3_elements . eq ( i )
for i in range ( vodlist_heads . length ) :
header_text = header_elem . text ( ) . strip ( )
head = vodlist_heads . eq ( i )
if any ( keyword in header_text for keyword in [ ' 播放 ' , ' 下载 ' , ' BD5 ' , ' UC ' , ' 夸克 ' ] ) :
h3_elem = head . find ( ' h3 ' )
playFrom . append ( header_text )
if h3_elem . length == 0 :
vodItems = [ ]
continue
for j in range ( play_links . length ) :
try :
header_text = h3_elem . text ( ) . strip ( )
link = play_links . eq ( j )
if not any ( keyword in header_text for keyword in [ ' 播放 ' , ' 下载 ' , ' BD5 ' , ' UC ' , ' 夸克 ' ] ) :
href = link . attr ( ' href ' )
continue
name = link . text ( ) . strip ( )
if not href or not name : continue
playFrom . append ( header_text )
tId_match = re . search ( r ' /play/([^.]+) \ .html ' , href )
vodItems = [ ]
if not tId_match : continue
tId = tId_match . group ( 1 )
# 提取当前播放线路下的所有播放链接
if ' BD5 ' in header_text and ' -1- ' in tId : vodItems . append ( name + " $ " + tId )
play_links = head . find ( ' a[href*= " /play/ " ] ' )
elif ' UC ' in header_text and ' -2- ' in tId : vodItems . append ( name + " $ " + tId )
for j in range ( play_links . length ) :
elif ' 夸克 ' in header_text and ' -3- ' in tId : vodItems . append ( name + " $ " + tId )
try :
except : continue
link = play_links . eq ( j )
if vodItems : playList . append ( ' # ' . join ( vodItems ) )
href = link . attr ( ' href ' )
else : playList . append ( " " )
name = link . text ( ) . strip ( )
if not href or not name :
continue
tId_match = re . search ( r ' /play/([^.]+) \ .html ' , href )
if not tId_match :
continue
tId = tId_match . group ( 1 )
vodItems . append ( name + " $ " + tId )
except :
continue
playList . append ( ' # ' . join ( vodItems ) if vodItems else " " )
vod [ ' vod_play_from ' ] = ' $$$ ' . join ( playFrom ) if playFrom else " "
vod [ ' vod_play_from ' ] = ' $$$ ' . join ( playFrom ) if playFrom else " "
vod [ ' vod_play_url ' ] = ' $$$ ' . join ( playList ) if playList else " "
vod [ ' vod_play_url ' ] = ' $$$ ' . join ( playList ) if playList else " "
result = { ' list ' : [ vod ] }
result = { ' list ' : [ vod ] }
return result
return result
def searchContent ( self , key , quick ) :
def searchContent ( self , key , quick ) :
url = ' https://www.libvio.site/index.php/ajax/suggest?mid=1&wd= {0} ' . format ( key )
url = ' https://www.libvio.site/index.php/ajax/suggest?mid=1&wd= {0} ' . format ( key )
rsp = self . _fetch_with_cache ( url , headers = self . header )
rsp = self . _fetch_with_cache ( url , headers = self . header )
@ -383,46 +399,51 @@ class Spider(Spider):
}
}
header = { " Referer " : " https://www.libvio.site " , " User-Agent " : " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 " }
header = { " Referer " : " https://www.libvio.site " , " User-Agent " : " Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 " }
def playerContent ( self , flag , id , vipFlags ) :
def playerContent ( self , flag , id , vipFlags ) :
# 如果已经是push链接, 直接返回
if id . startswith ( ' push:// ' ) :
return { " parse " : 0 , " playUrl " : " " , " url " : id , " header " : " " }
result = { }
result = { }
url = ' https://www.libvio.site/play/ {0} .html ' . format ( id )
url = ' https://www.libvio.site/play/ {0} .html ' . format ( id )
try :
try :
rsp = self . _fetch_with_cache ( url , headers = self . header )
rsp = self . _fetch_with_cache ( url , headers = self . header )
if not rsp :
if not rsp :
return { " parse " : 1 , " playUrl " : " " , " url " : url , " header " : ujson . dumps ( self . header ) }
return { " parse " : 1 , " playUrl " : " " , " url " : url , " header " : ujson . dumps ( self . header ) }
if self . _is_cloud_drive ( id ) : return self . _handle_cloud_drive ( url , rsp , id )
return self . _handle_cloud_drive ( url , rsp , id )
return self . _handle_bd5_player ( url , rsp , id )
except Exception as e :
except Exception as e :
print ( f " Player parse error: { e } " )
print ( f " Player parse error: { e } " )
return { " parse " : 1 , " playUrl " : " " , " url " : url , " header " : ujson . dumps ( self . header ) }
return { " parse " : 1 , " playUrl " : " " , " url " : url , " header " : ujson . dumps ( self . header ) }
def _is_cloud_drive ( self , id ) :
parts = id . split ( ' - ' )
if len ( parts ) > = 2 :
source_type = parts [ 1 ]
return source_type in [ ' 2 ' , ' 3 ' ]
return False
def _handle_cloud_drive ( self , url , rsp , id ) :
def _handle_cloud_drive ( self , url , rsp , id ) :
result = { }
try :
try :
doc = self . _parse_html_fast ( rsp . text )
iframe_src = doc ( ' iframe ' ) . attr ( ' src ' )
if iframe_src :
try :
iframe_content = self . _fetch_with_cache ( iframe_src , headers = self . header )
if not iframe_content : raise Exception ( " Iframe fetch failed " )
iframe_doc = self . _parse_html_fast ( iframe_content . text )
uc_link = iframe_doc ( ' a[href*= " drive.uc.cn " ] ' ) . attr ( ' href ' )
if uc_link : return { " parse " : 0 , " playUrl " : " " , " url " : uc_link , " header " : " " }
quark_link = iframe_doc ( ' a[href*= " pan.quark.cn " ] ' ) . attr ( ' href ' )
if quark_link : return { " parse " : 0 , " playUrl " : " " , " url " : quark_link , " header " : " " }
except Exception as e : print ( f " iframe parse failed: { e } " )
page_text = rsp . text
page_text = rsp . text
# Added \b for word boundaries to avoid partial matches
uc_match = re . search ( r ' https://drive \ .uc \ .cn/s/[^ " \ s]+? \ b ' , page_text )
# 首先尝试从JavaScript变量中提取网盘链接
if uc_match : return { " parse " : 0 , " playUrl " : " " , " url " : uc_match . group ( 0 ) , " header " : " " }
script_pattern = r ' var player_[^=]*= \ s*( { [^}]+}) '
quark_match = re . search ( r ' https://pan \ .quark \ .cn/s/[^ " \ s]+? \ b ' , page_text )
matches = re . findall ( script_pattern , page_text )
if quark_match : return { " parse " : 0 , " playUrl " : " " , " url " : quark_match . group ( 0 ) , " header " : " " }
except Exception as e : print ( f " Cloud drive parse error: { e } " )
for match in matches :
return { " parse " : 1 , " playUrl " : " " , " url " : url , " header " : ujson . dumps ( self . header ) }
try :
player_data = ujson . loads ( match )
from_value = player_data . get ( ' from ' , ' ' )
url_value = player_data . get ( ' url ' , ' ' )
if from_value == ' kuake ' and url_value :
# 夸克网盘
drive_url = url_value . replace ( ' \\ / ' , ' / ' )
return { " parse " : 0 , " playUrl " : " " , " url " : f " push:// { drive_url } " , " header " : " " }
elif from_value == ' uc ' and url_value :
# UC网盘
drive_url = url_value . replace ( ' \\ / ' , ' / ' )
return { " parse " : 0 , " playUrl " : " " , " url " : f " push:// { drive_url } " , " header " : " " }
except :
continue
except Exception as e :
print ( f " Cloud drive parse error: { e } " )
# 如果所有网盘解析都失败, 尝试BD5播放源
return self . _handle_bd5_player ( url , rsp , id )
def _handle_bd5_player ( self , url , rsp , id ) :
def _handle_bd5_player ( self , url , rsp , id ) :
try :
try :
doc = self . _parse_html_fast ( rsp . text )
doc = self . _parse_html_fast ( rsp . text )