Python实现网页截图(PyQT5)过程解析


Posted in Python onAugust 12, 2019

方案说明

功能要求:实现网页加载后将页面截取成长图片

涉及模块:PyQT5 PIL

逻辑说明:

1:完成窗口设置,利用PyQT5 QWebEngineView加载网页地址,待网页加载完成后,调用check_pag;

class MainWindow(QMainWindow):
  def __init__(self, parent=None):
    super(MainWindow, self).__init__(parent)
    self.setWindowTitle('易哈佛')
    self.temp_height = 0
    self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False) # 禁用最大化,最小化
    # self.setWindowFlag(Qt.WindowStaysOnTopHint, True) # 窗口顶置
    self.setWindowFlag(Qt.FramelessWindowHint, True) # 窗口无边框
  def urlScreenShot(self, url):
    self.browser = QWebEngineView()
    self.browser.load(QUrl(url))
    geometry = self.chose_screen()
    self.setGeometry(geometry)
    self.browser.loadFinished.connect(self.check_page)
    self.setCentralWidget(self.browser)
  def get_page_size(self):
    size = self.browser.page().contentsSize()
    self.set_height = size.height()
    self.set_width = size.width()
    return size.width(), size.height()
  def chose_screen(self):
    width, height = 750, 1370
    desktop = QApplication.desktop()
    screen_count = desktop.screenCount()
    for i in range(0, screen_count):
      rect = desktop.availableGeometry(i)
      s_width, s_height = rect.width(), rect.height()
      if s_width > width and s_height > height:
        return QRect(rect.left(), rect.top(), width, height)
    return QRect(0, 0, width, height)
if __name__ == '__main__':
  app = QApplication(sys.argv)
  win = MainWindow()
  win.show()
  app.exit(app.exec_())

2:收集页面高度,并计算分次截屏的次数和余量高度;实例化图片合并工具,设置定时器,超时信号发出后,执行exe_command;

def check_page(self):
    p_width, p_height = self.get_page_size()
    self.page, self.over_flow_size = divmod(p_height, self.height())
    if self.page == 0:
      self.page = 1
    self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
    self.timer = QTimer(self)
    self.timer.timeout.connect(self.exe_command)
    self.timer.setInterval(400)
    self.timer.start()

3:exe_command用来控制截图次数,并在每次截图完成后控制网页向下滑屏幕的高度;所有的页面都已截取时,完成图片合并。

def exe_command(self):
    if self.page > 0:
      self.screen_shot()
      self.run_js()
    elif self.page < 0:
      self.timer.stop()
      self.ssm.image_merge()
      self.close()
    elif self.over_flow_size > 0:
      self.screen_shot()
    self.page -= 1    
  def run_js(self):
    script = """
      var scroll = function (dHeight) {
      var t = document.documentElement.scrollTop
      var h = document.documentElement.scrollHeight
      dHeight = dHeight || 0
      var current = t + dHeight
      if (current > h) {
        window.scrollTo(0, document.documentElement.clientHeight)
       } else {
        window.scrollTo(0, current)
       }
      }
    """
    command = script + '\n scroll({})'.format(self.height())
    self.browser.page().runJavaScript(command)

4:screen_shot在每次截图完成后将图片保存,并将图片对象由图片合并根据保存到列表中。

def screen_shot(self):
    screen = QApplication.primaryScreen()
    winid = self.browser.winId()
    pix = screen.grabWindow(int(winid))
    name = '{}/temp.png'.format(self.ssm.root_path)
    pix.save(name)
    self.ssm.add_im(name)

5:截图合并工具,在每次截图完成后将图片对象保存,完成余量截图的重绘和截图的合并。

class ScreenShotMerge():
  def __init__(self, page, over_flow_size):
    self.im_list = []
    self.page = page
    self.over_flow_size = over_flow_size
    self.get_path()

  def get_path(self):
    self.root_path = Path(__file__).parent.joinpath('temp')
    if not self.root_path.exists():
      self.root_path.mkdir(parents=True)
    self.save_path = self.root_path.joinpath('merge.png')

  def add_im(self, path):
    if len(self.im_list) == self.page:
      im = self.reedit_image(path)
    else:
      im = Image.open(path)
    im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
    self.im_list.append(im)

  def get_new_size(self):
    max_width = 0
    total_height = 0
    # 计算合成后图片的宽度(以最宽的为准)和高度
    for img in self.im_list:
      width, height = img.size
      if width > max_width:
        max_width = width
      total_height += height
    return max_width, total_height

  def image_merge(self, ):
    if len(self.im_list) > 1:
      max_width, total_height = self.get_new_size()
      # 产生一张空白图
      new_img = Image.new('RGB', (max_width - 15, total_height), 255)
      x = y = 0
      for img in self.im_list:
        width, height = img.size
        new_img.paste(img, (x, y))
        y += height
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)
    else:
      obj = self.im_list[0]
      width, height = obj.size
      left, top, right, bottom = 0, 0, width, height
      box = (left, top, right, bottom)
      region = obj.crop(box)
      new_img = Image.new('RGB', (width, height), 255)
      new_img.paste(region, box)
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)

  def reedit_image(self, path):
    obj = Image.open(path)
    width, height = obj.size
    left, top, right, bottom = 0, height - self.over_flow_size, width, height
    box = (left, top, right, bottom)
    region = obj.crop(box)
    return region

截图功能完整代码

#!/usr/bin/env python
# -*- coding:UTF-8 -*-
# Author:Leslie-x
import sys
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PIL import Image
from pathlib import Path
class ScreenShotMerge():
  def __init__(self, page, over_flow_size):
    self.im_list = []
    self.page = page
    self.over_flow_size = over_flow_size
    self.get_path()
  def get_path(self):
    self.root_path = Path(__file__).parent.joinpath('temp')
    if not self.root_path.exists():
      self.root_path.mkdir(parents=True)
    self.save_path = self.root_path.joinpath('merge.png')
  def add_im(self, path):
    if len(self.im_list) == self.page:
      im = self.reedit_image(path)
    else:
      im = Image.open(path)
    im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
    self.im_list.append(im)
  def get_new_size(self):
    max_width = 0
    total_height = 0
    # 计算合成后图片的宽度(以最宽的为准)和高度
    for img in self.im_list:
      width, height = img.size
      if width > max_width:
        max_width = width
      total_height += height
    return max_width, total_height
  def image_merge(self, ):
    if len(self.im_list) > 1:
      max_width, total_height = self.get_new_size()
      # 产生一张空白图
      new_img = Image.new('RGB', (max_width - 15, total_height), 255)
      x = y = 0
      for img in self.im_list:
        width, height = img.size
        new_img.paste(img, (x, y))
        y += height
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)
    else:
      obj = self.im_list[0]
      width, height = obj.size
      left, top, right, bottom = 0, 0, width, height
      box = (left, top, right, bottom)
      region = obj.crop(box)
      new_img = Image.new('RGB', (width, height), 255)
      new_img.paste(region, box)
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)
  def reedit_image(self, path):
    obj = Image.open(path)
    width, height = obj.size
    left, top, right, bottom = 0, height - self.over_flow_size, width, height
    box = (left, top, right, bottom)
    region = obj.crop(box)
    return region
class MainWindow(QMainWindow):
  def __init__(self, parent=None):
    super(MainWindow, self).__init__(parent)
    self.setWindowTitle('易哈佛')
    self.temp_height = 0
    self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False) # 禁用最大化,最小化
    # self.setWindowFlag(Qt.WindowStaysOnTopHint, True) # 窗口顶置
    self.setWindowFlag(Qt.FramelessWindowHint, True) # 窗口无边框
  def urlScreenShot(self, url):
    self.browser = QWebEngineView()
    self.browser.load(QUrl(url))
    geometry = self.chose_screen()
    self.setGeometry(geometry)
    self.browser.loadFinished.connect(self.check_page)
    self.setCentralWidget(self.browser)
  def get_page_size(self):
    size = self.browser.page().contentsSize()
    self.set_height = size.height()
    self.set_width = size.width()
    return size.width(), size.height()
  def chose_screen(self):
    width, height = 750, 1370
    desktop = QApplication.desktop()
    screen_count = desktop.screenCount()
    for i in range(0, screen_count):
      rect = desktop.availableGeometry(i)
      s_width, s_height = rect.width(), rect.height()
      if s_width > width and s_height > height:
        return QRect(rect.left(), rect.top(), width, height)
    return QRect(0, 0, width, height)
  def check_page(self):
    p_width, p_height = self.get_page_size()
    self.page, self.over_flow_size = divmod(p_height, self.height())
    if self.page == 0:
      self.page = 1
    self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
    self.timer = QTimer(self)
    self.timer.timeout.connect(self.exe_command)
    self.timer.setInterval(400)
    self.timer.start()
  def exe_command(self):
    if self.page > 0:
      self.screen_shot()
      self.run_js()

    elif self.page < 0:
      self.timer.stop()
      self.ssm.image_merge()
      self.close()

    elif self.over_flow_size > 0:
      self.screen_shot()
    self.page -= 1

  def run_js(self):
    script = """
      var scroll = function (dHeight) {
      var t = document.documentElement.scrollTop
      var h = document.documentElement.scrollHeight
      dHeight = dHeight || 0
      var current = t + dHeight
      if (current > h) {
        window.scrollTo(0, document.documentElement.clientHeight)
       } else {
        window.scrollTo(0, current)
       }
      }
    """
    command = script + '\n scroll({})'.format(self.height())
    self.browser.page().runJavaScript(command)

  def screen_shot(self):
    screen = QApplication.primaryScreen()
    winid = self.browser.winId()
    pix = screen.grabWindow(int(winid))
    name = '{}/temp.png'.format(self.ssm.root_path)
    pix.save(name)
    self.ssm.add_im(name)

if __name__ == '__main__':
  url = 'http://blog.sina.com.cn/lm/rank/focusbang//'
  app = QApplication(sys.argv)
  win = MainWindow()
  win.urlScreenShot(url)
  win.show()
  app.exit(app.exec_())

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
在Python中使用HTML模版的教程
Apr 29 Python
Python 绘图和可视化详细介绍
Feb 11 Python
Python3安装Pymongo详细步骤
May 26 Python
python 拷贝特定后缀名文件,并保留原始目录结构的实例
Apr 27 Python
Python实现的求解最大公约数算法示例
May 03 Python
用Python PIL实现几个简单的图片特效
Jan 18 Python
Django+Xadmin构建项目的方法步骤
Mar 06 Python
11个Python3字典内置方法大全与示例汇总
May 13 Python
Python re正则表达式元字符分组()用法分享
Feb 10 Python
Python print不能立即打印的解决方式
Feb 19 Python
Python 下载Bing壁纸的示例
Sep 29 Python
通用的Django注册功能模块实现方法
Feb 05 Python
python实现知乎高颜值图片爬取
Aug 12 #Python
python3 enum模块的应用实例详解
Aug 12 #Python
Python一键查找iOS项目中未使用的图片、音频、视频资源
Aug 12 #Python
django+echart数据动态显示的例子
Aug 12 #Python
Flask框架学习笔记之使用Flask实现表单开发详解
Aug 12 #Python
Flask框架学习笔记之表单基础介绍与表单提交方式
Aug 12 #Python
python内存管理机制原理详解
Aug 12 #Python
You might like
phpMyAdmin安装并配置允许空密码登录
2015/07/04 PHP
PHP mysql事务问题实例分析
2016/01/18 PHP
PHP共享内存使用与信号控制实例分析
2018/05/09 PHP
PHP写API输出的时用echo的原因详解
2019/04/28 PHP
微信公众号之主动给用户发送消息功能
2019/06/22 PHP
jQuery对指定元素中指定字符串进行替换的方法
2015/03/17 Javascript
基于JS实现的倒计时程序实例
2015/07/24 Javascript
jQuery实现的多滑动门,多选项卡效果代码
2016/03/28 Javascript
第十篇BootStrap轮播插件使用详解
2016/06/21 Javascript
JS中解决谷歌浏览器记住密码输入框颜色改变功能
2017/02/13 Javascript
老生常谈angularjs中的$state.go
2017/04/24 Javascript
js中的闭包实例展示
2018/11/01 Javascript
vue项目刷新当前页面的三种方法
2018/12/04 Javascript
浅谈bootstrap layer.open中end的使用方法
2019/09/12 Javascript
解决Vue中使用keepAlive不缓存问题
2020/08/04 Javascript
python中对list去重的多种方法
2014/09/18 Python
Python中的FTP通信模块ftplib的用法整理
2016/07/08 Python
使用Python进行二进制文件读写的简单方法(推荐)
2016/09/12 Python
pandas将DataFrame的列变成行索引的方法
2018/04/10 Python
Python针对给定列表中元素进行翻转操作的方法分析
2018/04/27 Python
Python3连接SQLServer、Oracle、MySql的方法
2018/06/28 Python
Python简单获取二维数组行列数的方法示例
2018/12/21 Python
mac使用python识别图形验证码功能
2020/01/10 Python
Pyqt5 关于流式布局和滚动条的综合使用示例代码
2020/03/24 Python
python文件排序的方法总结
2020/09/13 Python
Python制作一个仿QQ办公版的图形登录界面
2020/09/22 Python
selenium如何定位span元素的实现
2021/01/13 Python
基于css3 animate制作绚丽的动画效果
2015/11/24 HTML / CSS
蔻驰西班牙官网:COACH西班牙
2019/01/16 全球购物
俄罗斯EPL钻石珠宝店:ЭПЛ
2019/10/22 全球购物
梅西百货官网:Macy’s
2020/08/04 全球购物
关于Java String的一道面试题
2013/09/29 面试题
班级活动策划书
2014/02/06 职场文书
经营目标管理责任书
2014/07/25 职场文书
政法干警核心价值观心得体会
2014/09/11 职场文书
2015年学校团委工作总结
2015/05/26 职场文书