http://182.254.8.83/vwecam.gtimg.com/1006_d81d60f3c83844a5ad6a184149d4ccbb.f0.mp4?sha=78A27CF4908AB546C4ED08708B57168D62190991&ptype=http&%3bvkey=F164A3A5C6358B4A5B0EDAEE4A9F2BFD7D12A99F8D0B632A1F50E6DA9634DDCA7E8D97D47F674CD161E1C14190324B27AC0EF147F2AB27FE&%3bsdtfrom=v1000&%3bowner=3154028129&ocid=1291850924&ocid=3775189258
<a class="img-item " data-cmd="qz_popup" href="https://user.qzone.qq.com/3154028129/311/1006_fe0b209c194c44dca48c1a8e5fd4ccbb" data-v_itemid="1006_fe0b209c194c44dca48c1a8e5fd4ccbb" data-v_type="1" data-v_picinfo_url="https://b244.photo.store.qq.com/psb?/V10EoZxv3I7ltU/bDCXj4CDH6Rzc0H8OXVcb7o2v.JLtMXICXNA5VUGwU0!/c/dPQAAAAAAAAA&bo=gAJwAQAAAAARF9M!" data-v_picinfo_width="480" data-v_picinfo_height="276" data-v_vidiourl="" data-v_vidioswfurl="http://vwecam.gtimg.com/1006_fe0b209c194c44dca48c1a8e5fd4ccbb.f0.mp4?ptype=http&vkey=6E57E873130938EDA40637E14C2C82503BC1835CDD1BF14AD7ECB95F648A9BB9E1DBB80525036F753803A2F7DF1F13BFDC54F51BA5242F54&sdtfrom=v1000&owner=3154028129" data-v_h265="" data-v_source_website="" data-v_writefrom="" data-videotype="mood" data-clicklog="video" hotclickpath="" hotdomain="" data-version="3" data-param="3&videosrc=http%3A%2F%2Fvwecam.gtimg.com%2F1006_fe0b209c194c44dca48c1a8e5fd4ccbb.f0.mp4%3Fptype%3Dhttp%26vkey%3D6E57E873130938EDA40637E14C2C82503BC1835CDD1BF14AD7ECB95F648A9BB9E1DBB80525036F753803A2F7DF1F13BFDC54F51BA5242F54%26sdtfrom%3Dv1000%26owner%3D3154028129&type=1&org_vidio_url=" data-src="/qzone/app/mood/richinfo_view.html" data-width="512" data-height="512" data-type="popup" data-title="" data-config="" data-extendinfo1="" data-extendinfo2="" data-extendinfo3="" data-extendinfo4="" data-vfeed-id="vfeed_3154028129_311_61a6febb83fc6a5a825f0700_" data-newplayer="1" data-newplayer-id="vfeed_3154028129_311_61a6febb83fc6a5a825f0700_" style="background-color: rgb(0, 0, 0); 560px; height: 322px;"><div class="video-img j-videofeed-imgctn" style=" 560px; height: 322px;"><img src="http://b244.photo.store.qq.com/psb?/V10EoZxv3I7ltU/bDCXj4CDH6Rzc0H8OXVcb7o2v.JLtMXICXNA5VUGwU0!/c/dPQAAAAAAAAA&bo=gAJwAQAAAAARF9M!" data-oriwidth="640" data-oriheight="368" style=" 560px; height: 322px; margin: 0px;"><i class="ui-icon icon-media-play j-videofeed-icon-play" style="display: none;"></i><span class="video-loading j-videofeed-icon-loading" style="display: none;"><i class="inner"></i></span></div><div style="position: absolute; left: 0px; top: 0px; 560px; height: 322px; overflow: hidden;" class="j-videofeed-flashctn" data-need_hide_when_inited="1" data-vpjs-video-id="1516968272464"><div style=" 100%; height: 100%; overflow: hidden; background-color: rgb(0, 0, 0); position: relative; top: 0px; left: 0px;" id="vpjs-playerContainer-1516968272464" data-vpjs-video-id="1516968272464" class="vpjs-playerContainer"><div id="vpjs-videoContainer-1516968272464" style=" 100%; height: 100%; position: relative; z-index: 0;"> <video id="vpjs-video-1516968272464" webkit-playsinline="true" playsinline="true" x-webkit-airplay="true" muted="muted" autoplay="" preload="auto" src="http://vwecam.gtimg.com/1006_fe0b209c194c44dca48c1a8e5fd4ccbb.f0.mp4?ptype=http&vkey=6E57E873130938EDA40637E14C2C82503BC1835CDD1BF14AD7ECB95F648A9BB9E1DBB80525036F753803A2F7DF1F13BFDC54F51BA5242F54&sdtfrom=v1000&owner=3154028129" style=" 100%; height: auto; margin-top: 0%; position: absolute; left: 0; top: 0"> <source src="http://vwecam.gtimg.com/1006_fe0b209c194c44dca48c1a8e5fd4ccbb.f0.mp4?ptype=http&vkey=6E57E873130938EDA40637E14C2C82503BC1835CDD1BF14AD7ECB95F648A9BB9E1DBB80525036F753803A2F7DF1F13BFDC54F51BA5242F54&sdtfrom=v1000&owner=3154028129"> Your browser does not support the video tag. </video> </div><div id="vpjs-videoPoster-1516968272464" style="display: none;"> <div style="background:url(http://b244.photo.store.qq.com/psb?/V10EoZxv3I7ltU/bDCXj4CDH6Rzc0H8OXVcb7o2v.JLtMXICXNA5VUGwU0!/c/dPQAAAAAAAAA&bo=gAJwAQAAAAARF9M!) no-repeat center; background-size: contain; position: absolute; top: 0; left: 0; bottom: 0; right: 0;"> </div></div><div id="vpjs-videoMask-1516968272464"> <div style="top: 0; left: 0; bottom: 0; position: absolute; right: 0;"></div></div><div id="vpjs-videoControlBar-1516968272464" class="vpjs-videoControlBar vpjs-fade vpjs-fadein" style="position: absolute; left: 0px; right: 0px; top: 0px; bottom: 0px; display: block;"> <div class="vpjs-controls"> <div class="vpjs-controls-bottom"> <!-- 直播进度 S--> <div class="vpjs-progress-pane"> <div class="vpjs-controls-progress"> <div class="vpjs-progress-bar" style=" 540px;"> <i class="vpjs-ui-icon vpjs-progress-icon-pointer"></i> </div> </div> </div> <div class="vpjs-controls-time"> <!-- 播放按钮 --> <i class="vpjs-ui-icon vpjs-icon-play"></i> <span class="vpjs-time"> <b class="j-cur-time-text">00:10</b>/<span class="j-total-time-text">00:10</span> </span> </div> <div class="vpjs-controls-other"> <div class="vpjs-controls-btn vpjs-left"> <i class="vpjs-ui-icon vpjs-icon-fullscreen"></i></div><div class="vpjs-controls-btn vpjs-left"> <i class="vpjs-ui-icon vpjs-icon-silence j-voice-icon"></i> <div class="vpjs-voice-progress" style="visibility: hidden;"> <div class="vpjs-progress-bar j-voice-progress-bar" style="height: 8px;"> <i class="vpjs-ui-icon vpjs-voice-icon-pointer"></i> </div> </div></div></div> </div> </div> <div class="control-tips" style="display: none;">点击查看更多</div></div><div id="vpjs-videoLoading-1516968272464" style="display: none;"> <div style="background: url(//qzonestyle.gtimg.cn/aoi/img/video-loading-bg.png) no-repeat center; background-size: cover; 40px; height: 24px; position: absolute; left: 50%; top: 50%; margin-left: -20px; margin-top: -12px;"> <i style="background: url(//qzonestyle.gtimg.cn/aoi/img/video-loading.gif); 40px; height: 24px;"></i> </div></div><div id="vpjs-videoBigPlayButton-1516968272464" style="display: block;"> <span style="background: url(//qzonestyle.gtimg.cn/qzone/hybrid/common/videoPlayer/img/big-play-button.png) no-repeat center; background-size: cover; position: absolute; top: 50%; left: 50%; 60px; height: 60px; margin-left: -30px; margin-top: -30px; cursor: pointer;"></span></div><div id="vpjs-videoError-1516968272464" style="display: none;"> <div style="position: absolute; left: 0; right: 0; top: 0; bottom: 0; background: url(//qzonestyle.gtimg.cn/aoi/img/live/people-empty.png) no-repeat center; background-size: cover; z-index: 99;"> <p style="position: absolute; left: 50%; top: 50%; text-align: center; 200px; margin-left: -100px; margin-top: -9px;"> <span class="vpjs-error-message" style="vertical-align: middle;">视频播放失败 </span> </p> </div></div></div></div></a>
from selenium import webdriver
import os
import time
import pymysql
from bs4 import BeautifulSoup
h, pt, u, p, db = 'localhost', 3306, 'root', 'root', 'qqzone'
def mysql_fetch(sql):
global h, pt, u, p, db
try:
conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8mb4')
except Exception as e:
print(e)
return ()
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
return cursor.fetchall()
def mysql_write(sql):
global h, pt, u, p, db
try:
conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8mb4')
except Exception as e:
print(e)
return 1
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
return 0
# mp4
driver = webdriver.Firefox()
# 使用get()方法打开待抓取的URL
# coco 706386164 cd 3154028129
target_qq_d = {'c': '704', 'd': '329'}
qq_u, qq_p, target_qq = 'g1om', 'ziy4', target_qq_d['dz']
driver.get('http://user.qzone.qq.com/{}/'.format(target_qq))
time.sleep(5)
# 等待5秒后,判断页面是否需要登录,通过查找页面是否有相应的DIV的id来判断
try:
driver.find_element_by_id('login_div')
a = True
except:
a = False
print(a)
if a == True:
# 如果页面存在登录的DIV,则模拟登录
driver.switch_to.frame('login_frame')
driver.find_element_by_id('switcher_plogin').click()
driver.find_element_by_id('u').clear() # 选择用户名框
driver.find_element_by_id('u').send_keys(qq_u)
driver.find_element_by_id('p').clear()
driver.find_element_by_id('p').send_keys(qq_p)
driver.find_element_by_id('login_button').click()
time.sleep(3)
driver.implicitly_wait(3)
# 判断好友空间是否设置了权限,通过判断是否存在元素ID:QM_OwnerInfo_Icon
# try:
# driver.find_element_by_id('QM_OwnerInfo_Icon')
# b = True
# except:
# b = False
# # 如果有权限能够访问到说说页面,那么定位元素和数据,并解析
b = True
if b == True:
time.sleep(2)
myframe = 'app_canvas_frame'
myframe = 'QM_Feeds_Iframe'
driver.refresh()
time.sleep(3)
try:
for isc in range(30):
# 今日头条 非iframe 无限次
# qq空间说说 iframe 固定20条 2次报错
time.sleep(2)
js = 'window.scrollTo(0,document.body.scrollHeight)'
driver.execute_script(js)
except Exception as e:
print('window.scrollTo-->', e)
driver.switch_to.frame(myframe)
time.sleep(3)
# # 尝试一下获取Cookie,使用get_cookies()
# cookie = driver.get_cookies()
# cookie_dict = []
# for c in cookie:
# ck = "{0}={1};".format(c['name'], c['value'])
# cookie_dict.append(ck)
# i = ''
# for c in cookie_dict:
# i += c
# print('Cookies:', i)
# print("==========完成================")
with open('qqzong.vedio.tmp.0html', 'w', encoding='utf-8') as fw:
# ps
fw.write(driver.page_source)
try:
driver.quit()
except Exception as e:
print(e)
sql_str_l, sql = [], 'INSERT INTO qqzoneshuoshuo (words,imgurls,time_site,time_script) VALUES '
with open('qqzong.vedio.tmp.0html', 'r', encoding='utf-8') as fo:
soup = BeautifulSoup(fo, 'html.parser')
try:
l = soup.find_all('li', class_='f-single f-s-s')
for i in l:
i_txt, f_nick, f_info, info_detail = i.text, i.find('div', class_='f-nick').text, i.find('div',
class_='f-info').text, i.find(
'div', class_='info-detail').text
ii = i.find("a", attrs={"data-v_vidioswfurl": True})
if ii is not None:
media_l = [ii.attrs['data-v_vidioswfurl']]
elif len(i.find_all('img')) > 1:
media_l = [h.attrs['src'] for h in i.find_all('img')[1:]]
else:
media_l = []
# words,imgurls,time_site,time_script
not_support_s = 'Your browser does not support the video tag.'
sql_part = '( "{}","{}","{}","{}" )'.format(
i_txt.replace('{}{}'.format(f_nick, ' '), '').replace(info_detail, '').replace(not_support_s, ''),
','.join(media_l),
info_detail, int(time.time()))
sql_str_l.append(sql_part)
except Exception as e:
print(e)
sql = '{}{}'.format(sql, ','.join(sql_str_l))
print(sql)
try:
mysql_write(sql)
print(sql)
except Exception as e:
print(e)
获取视频地址所在的html中的值
保存原理
url='http://vwecam.gtimg.com/1006_9e83353154174dba9cc28a72b2c3ccbb.f0.mp4?ptype=http&vkey=05BFF9D7555A2A0E224402DCC6946D6DC3AB905326DBCA4D3EEA80C3F1904B9414C8C75586A994D87A0359AD51F1B51ED639C20B1E7AD58B&sdtfrom=v1000&owner=3154028129'
r=requests.get(url)
with open('my.mp4','wb') as fw:
fw.write(r.content)