php正则替换处理HTML页面的方法


Posted in PHP onJune 17, 2015

本文实例讲述了php正则替换处理HTML页面的方法。分享给大家供大家参考。具体如下:

<?php
if(!defined('BASEPATH')) exit('No direct script access allowed');
 /**
 * HTML替换处理类,考虑如下几种替换
 * 1. img src : '/<img(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
 * 2. a href : '/<a(.+?)href=([\'\" ])?(.+?)([ >]+?)/i'
 * 3. ifram.src : '/<iframe(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
 * 4. frame src : '/<frame(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'
 * 5. js : '/window.open([( ]+?)([\'" ]+?)(.+?)([ )+?])/i'
 * 6. css : '/background(.+?)url([( ])([\'" ]+?)(.+?)([ )+?])/i'
 */
 class Myreplace {
 private $moudle_array = array('udata','tdata','tresult','dresult');
 private $content;
 private $relative_dirname;
 private $projectid;
 private $moudle;
 function __construct() {
  $this->CI = &get_instance ();
 }
 /**
  * 替换
  * @param string $content HTML内容
  * @param string $relative 相对路径
  * @param int $projectid 项目id
  * @moudle string $moudle 模板标识: udata,tdata,tresult,dresult
  */
 public function my_replace($content,$relative,$projectid,$moudle) {
  $this->content = $content;
  $this->relative_dirname = $relative;
  $this->projectid = $projectid;
  if(in_array(strtolower($moudle),$this->moudle_array))
  $this->moudle = $moudle;
  else exit;
  switch($this->moudle) {
  case 'udata':
   $this->CI->load->model('mupload_data','model');
   break;
  case 'tdata':
   $this->CI->load->model('taskdata','model');
   break;
  case 'tresult':
   $this->CI->load->model('taskresult','model');
   break;
  case 'dresult':
   $this->CI->load->model('dmsresult','model');
   break;
  default:
   break;
  }
  $pattern = '/<img(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
  $content = preg_replace_callback( $pattern, array($this, 'image_replace') , $content );
  $pattern = '/<a(.+?)href=([\'\" ])?(.+?)([ >]+?)/i';
  $content = preg_replace_callback( $pattern, array($this, 'html_replace') , $content );
  $pattern = '/<iframe(.+?)src=([\'\" ])?(.+?)([ >]+?)/i';
  $content = preg_replace_callback( $pattern, array($this, 'iframe_replace') , $content );
  $pattern = '/<frame(.+?)src=([\'\" ])?(.+?)([ >]+?)/i'; 
  $content = preg_replace_callback( $pattern, array($this, 'frame_replace'), $content );
  $pattern = '/window.open([( ]+?)([\'" ]+?)(.+?)([ )]+?)/i';
  $content = preg_replace_callback( $pattern, array($this, 'js_replace'), $content );
  $pattern = '/background(.+?)url([( ])([\'" ]+?)(.+?)([ )+?])/i';
  $content = preg_replace_callback( $pattern, array($this, 'css_replace'), $content);
  return $content;
 }
 private function image_replace($matches) {
  if(count($matches) < 4) return '';
  if( empty($matches[3]) ) return '';
  $matches[3] = rtrim($matches[3],'\'"/');
  //获取图片的id
  $parent_dir_num = substr_count( $matches[3], '../');
  $relative_dirname = $this->relative_dirname;
  for($i=0; $i<$parent_dir_num; $i++) {
  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  }
  $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
  $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  //输出
  if( !empty($image_id) ) {
  if($this->moudle == 'dresult') {
   return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];
  } else {
   return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];
  }
  } else {
  return "<img".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
  }
 }
 private function html_replace( $matches ) {
  if(count($matches) < 4) return '';
  if( empty($matches[3]) ) return '';
  //如果href的链接($matches[3])以http或www或mailto开始,则不进行处理
  //if(preg_match('/^[http|www|mailto](.+?)/i',$matches[3])) 
  // return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[4];
  $matches[3] = rtrim($matches[3],'\'"/');
  //处理锚点
  if(substr_count($matches[3],'#')>0) 
  $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
  //获取html的id
  $parent_dir_num = substr_count( $matches[3], '../');
  $relative_dirname = $this->relative_dirname;
  for($i=0; $i<$parent_dir_num; $i++) {
  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  }
  $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
  $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  //输出
  if( !empty($txtfile_id ) ) {
  if($this->moudle == 'dresult') {
   return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
  } else {
   return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
  }
  } else {
  return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[2].$matches[4];
  }
 }
 private function iframe_replace( $matches ) {
  if(count($matches) < 4) return '';
  if( empty($matches[3]) ) return '';
  $matches[3] = rtrim($matches[3],'\'"/');
  //处理锚点
  if(substr_count($matches[3],'#')>0) 
  $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
  //获取html的id
  $parent_dir_num = substr_count( $matches[3], '../');
  $relative_dirname = $this->relative_dirname;
  for($i=0; $i<$parent_dir_num; $i++) {
  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  }
  $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
  $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  //输出
  if( !empty($txtfile_id ) ) {
  if($this->moudle == 'dresult') { 
   return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
  } else {
   return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];
  }
  } else {
  return "<iframe".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
  }
 }
 private function frame_replace( $matches ) {  
  if(count($matches) < 4) return '';
  if( empty($matches[3]) ) return '';
  $matches[3] = rtrim($matches[3],'\'"/');
  //处理锚点
  if(substr_count($matches[3],'#')>0) 
  $matches[3] = substr($matches[3],0,strrpos($matches[3],'#'));
  //获取html的id
  $parent_dir_num = substr_count( $matches[3], '../');
  $relative_dirname = $this->relative_dirname;
  for($i=0; $i<$parent_dir_num; $i++) {
  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  }
  $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[3],'./');
  $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  //输出
  if( !empty($txtfile_id ) ) {
  if($this->moudle == 'dresult') { 
   return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
  } else {
   return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];
  }
  } else {
  return "<frame".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];
  }
 }
 private function js_replace( $matches ){
  if(count($matches) < 4) return '';
  if( empty($matches[3]) ) return '';
  //处理链接
  $arr_html = split(',',$matches[3]);
  $href = $arr_html[0];
  $other = '';
  for($i=0; $i<count($arr_html); $i++)
  $other = $arr_html[$i].", ";
  $other = rtrim($other,"\, ");
  $href =rtrim($href,'\'\"');
  //处理锚点
  if(substr_count($href,'#')>0) 
  return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];;
  //获取html的id
  $parent_dir_num = substr_count( $href, '../');
  $relative_dirname = $this->relative_dirname;
  for($i=0; $i<$parent_dir_num; $i++) {
  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  }
  $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($href,'./');
  $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  //输出
  if( !empty($txtfile_id ) ) {
  if($this->moudle == 'dresult') { 
   return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4];
  } else {
   return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4];
  }
  } else {
  return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];
  }
 }
 private function css_replace( $matches ) {
  if(count($matches) < 5) return '';
  if( empty($matches[4]) ) return '';
  
  $matches[4] = rtrim($matches[4],'\'"/');
  //获取图片的id
  $parent_dir_num = substr_count( $matches[4], '../');
  $relative_dirname = $this->relative_dirname;
  for($i=0; $i<$parent_dir_num; $i++) {
  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );
  }
  $relativepath = rtrim($relative_dirname,'/') . '/'.ltrim($matches[4],'./');
  $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);
  //输出
  if( !empty($image_id) ) {
  if($this->moudle == 'dresult') {
   return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];
  } else {
   return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];
  }
  } else {
  return "background".$matches[1]."url".$matches[2].$matches[3].$matches[4].$matches[3].$matches[5];
  }
 }
 }
/* End of Myreplace.php */
/* Location: /application/libraries/Myreplace.php */
PHP 相关文章推荐
提高PHP编程效率 引入缓存机制提升性能
Feb 15 PHP
AMFPHP php远程调用(RPC, Remote Procedure Call)工具 快速入门教程
May 10 PHP
php实现的常见排序算法汇总
Sep 08 PHP
自己写的兼容低于PHP 5.5版本的array_column()函数
Oct 24 PHP
php使用fputcsv()函数csv文件读写数据的方法
Jan 06 PHP
php使用socket post数据到其它web服务器的方法
Jun 02 PHP
浅谈PHP中output_buffering
Jul 13 PHP
WordPress中给文章添加自定义字段及后台编辑功能区域
Dec 19 PHP
[原创]php常用字符串输出方法分析(echo,print,printf及sprintf)
Jul 09 PHP
ThinkPHP 模板引擎使用详解
May 07 PHP
PHP利用DWZ.CN服务生成短网址
Aug 11 PHP
php去除deprecated的实例方法
Nov 17 PHP
PHP排序算法类实例
Jun 17 #PHP
php准确获取文件MIME类型的方法
Jun 17 #PHP
php操作MongoDB类实例
Jun 17 #PHP
PHP实现的购物车类实例
Jun 17 #PHP
CodeIgniter实现从网站抓取图片并自动下载到文件夹里的方法
Jun 17 #PHP
PHP基于MySQL数据库实现对象持久层的方法
Jun 17 #PHP
php使用curl打开https网站的方法
Jun 17 #PHP
You might like
一步一步学习PHP(7) php 字符串相关应用
2010/03/05 PHP
php函数的常用方法及注意之处小结
2011/07/10 PHP
浅谈php数组array_change_key_case() 函数和array_chunk()函数
2016/10/22 PHP
php给数组赋值的实例方法
2019/09/26 PHP
用javascript动态调整iframe高度的方法
2007/03/06 Javascript
Javascript 复制数组实现代码
2009/11/26 Javascript
JS代码放在head和body中的区别分析
2011/12/01 Javascript
javascript 随机展示头像实现代码
2011/12/06 Javascript
jquery submit ie6下失效的原因分析及解决方法
2013/11/15 Javascript
JavaScript设计模式学习之“类式继承”
2015/03/12 Javascript
JavaScript使用indexOf获得子字符串在字符串中位置的方法
2015/04/06 Javascript
最简单的JavaScript图片轮播代码(两种方法)
2015/12/18 Javascript
jQuery+ajax实现滚动到页面底部自动加载图文列表效果(类似图片懒加载)
2016/06/07 Javascript
vue的基本用法与常见指令
2017/08/15 Javascript
JavaScript贪吃蛇小组件实例代码
2017/08/20 Javascript
Parcel.js + Vue 2.x 极速零配置打包体验教程
2017/12/24 Javascript
解决vue this.$forceUpdate() 处理页面刷新问题(v-for循环值刷新等)
2018/07/26 Javascript
Node.js JSON模块用法实例分析
2019/01/04 Javascript
微信小程序wx.getUserInfo授权获取用户信息(头像、昵称)的实现
2020/08/19 Javascript
[40:04]Secret vs Infamous 2019国际邀请赛淘汰赛 败者组 BO3 第二场 8.23
2019/09/05 DOTA
使用httplib模块来制作Python下HTTP客户端的方法
2015/06/19 Python
Python实现Mysql数据库连接池实例详解
2017/04/11 Python
图文详解Django使用Pycharm连接MySQL数据库
2019/08/09 Python
Python使用uuid库生成唯一标识ID
2020/02/12 Python
IntelliJ 中配置 Anaconda的过程图解
2020/06/01 Python
详解python使用金山词霸的翻译功能(调试工具断点的使用)
2021/01/07 Python
小学门卫岗位职责
2013/12/17 职场文书
宿舍违规检讨书
2014/01/12 职场文书
选秀节目策划方案
2014/06/06 职场文书
天猫活动策划方案
2014/08/21 职场文书
领导班子作风建设剖析材料
2014/10/11 职场文书
2015年世界无烟日活动总结
2015/02/10 职场文书
公务员个人总结
2015/02/12 职场文书
预备党员转正意见
2015/06/01 职场文书
2016年校园重阳节广播稿
2015/12/18 职场文书
php去除deprecated的实例方法
2021/11/17 PHP