python分析apache访问日志脚本分享


Posted in Python onFebruary 26, 2015
#!/usr/bin/env python
# coding=utf-8
 
#------------------------------------------------------
# Name:     Apache 日志分析脚本
# Purpose:   此脚本只用来分析Apache的访问日志
# Version:   2.0
# Author:    LEO
# Created:   2013-4-26
# Modified:   2013-5-4
# Copyright:  (c) LEO 2013
#------------------------------------------------------
 
import sys
import time
 
#该类是用来打印格式
class displayFormat(object):
 
  def format_size(self,size):
    '''格式化流量单位'''
    KB = 1024     
    MB = 1048576    
    GB = 1073741824  
    TB = 1099511627776
    if size >= TB :
      size = str(size / TB) + 'T'
    elif size < KB :
      size = str(size) + 'B'
    elif size >= GB and size < TB:
      size = str(size / GB) + 'G'
    elif size >= MB and size < GB :
      size = str(size / MB) + 'M'
    else :
      size = str(size / KB) + 'K'
    return size
 
  formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'
 
  def transverse_line(self) :
    '''输出横线'''
    print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10)
 
  def head(self):
    '''输出头部信息'''
    print self.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503')
 
  def error_print(self) :
    '''输出错误信息'''
    print
    print 'Usage : ' + sys.argv[0] + ' ApacheLogFilePath [Number]'
    print
    sys.exit(1)
 
  def execut_time(self):
    '''输出脚本执行的时间'''
    print
    print "Script Execution Time: %.3f second" % time.clock()
    print
 
#该类是用来生成主机信息的字典
class hostInfo(object):
   
  host_info = ['200','404','500','302','304','503','403','times','size']
 
  def __init__(self,host):
    self.host = host = {}.fromkeys(self.host_info,0)
 
  def increment(self,status_times_size,is_size):
    '''该方法是用来给host_info中的各个值加1'''
    if status_times_size == 'times':
      self.host['times'] += 1
    elif is_size:
      self.host['size'] = self.host['size'] + status_times_size
    else:
      self.host[status_times_size] += 1
 
  def get_value(self,value):
    '''该方法是取到各个主机信息中对应的值'''
    return self.host[value]
 
#该类是用来分析文件
class fileAnalysis(object):
  def __init__(self):
    '''初始化一个空字典'''
    self.report_dict = {}
    self.total_request_times,self.total_traffic,self.total_200, 
    self.total_404,self.total_500,self.total_403,self.total_302, 
    self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0
 
  def split_eachline_todict(self,line):
    '''分割文件中的每一行,并返回一个字典'''
    split_line = line.split()
    split_dict = {'remote_host':split_line[0],'status':split_line[-2],'bytes_sent':split_line[-1],}
    return split_dict
 
  def generate_log_report(self,logfile):
    '''读取文件,分析split_eachline_todict方法生成的字典'''
    for line in logfile:
      try:
        line_dict = self.split_eachline_todict(line)
        host = line_dict['remote_host']
        status = line_dict['status']
      except ValueError :
        continue
      except IndexError :
        continue
 
      if host not in self.report_dict :
        host_info_obj = hostInfo(host)
        self.report_dict[host] = host_info_obj
      else :
        host_info_obj = self.report_dict[host]
 
      host_info_obj.increment('times',False)  
      if status in host_info_obj.host_info : 
        host_info_obj.increment(status,False) 
      try:
        bytes_sent = int(line_dict['bytes_sent']) 
      except ValueError:
        bytes_sent = 0
      host_info_obj.increment(bytes_sent,True)
    return self.report_dict
 
  def return_sorted_list(self,true_dict):
    '''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序'''
    for host_key in true_dict :
      host_value = true_dict[host_key]
      times = host_value.get_value('times') 
      self.total_request_times = self.total_request_times + times 
      size = host_value.get_value('size') 
      self.total_traffic = self.total_traffic + size 
 
      o200 = host_value.get_value('200')
      o404 = host_value.get_value('404')
      o500 = host_value.get_value('500')
      o403 = host_value.get_value('403')
      o302 = host_value.get_value('302')
      o304 = host_value.get_value('304')
      o503 = host_value.get_value('503')
 
      true_dict[host_key] = {'200':o200,'404':o404,'500':o500,'403':o403,'302':o302,'304':o304, 
                  '503':o503,'times':times,'size':size}
 
      self.total_200 = self.total_200 + o200
      self.total_404 = self.total_404 + o404
      self.total_500 = self.total_500 + o500
      self.total_302 = self.total_302 + o302
      self.total_304 = self.total_304 + o304
      self.total_503 = self.total_503 + o503
 
    sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],t[1]['size']),reverse=True)
    return sorted_list
 
class Main(object):
  def main(self) :
    '''主调函数'''
    display_format = displayFormat()
    arg_length = len(sys.argv)
    if arg_length == 1 :
      display_format.error_print()
    elif arg_length == 2 or arg_length == 3:
      infile_name = sys.argv[1]
      try :
        infile = open(infile_name,'r')
        if arg_length == 3 :
          lines = int(sys.argv[2])
        else :
          lines = 0
      except IOError,e :
        print
        print e
        display_format.error_print()
      except ValueError :
        print
        print "Please Enter A Volid Number !!"
        display_format.error_print()
    else :
      display_format.error_print()
 
    fileAnalysis_obj = fileAnalysis()
    not_true_dict = fileAnalysis_obj.generate_log_report(infile)
    log_report = fileAnalysis_obj.return_sorted_list(not_true_dict)
    total_ip = len(log_report)
    if lines :
      log_report = log_report[0:lines]
    infile.close()
 
    print
    total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic)
    total_request_times = fileAnalysis_obj.total_request_times
    print 'Total IP: %s  Total Traffic: %s  Total Request Times: %d' 
       % (total_ip,total_traffic,total_request_times)
    print
    display_format.head()
    display_format.transverse_line()
 
    for host in log_report :
      times = host[1]['times']
      times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100
      print display_format.formatstring % (host[0],
                         display_format.format_size(host[1]['size']),
                         times,str(times_percent)[0:5],
                         host[1]['200'],host[1]['404'],
                         host[1]['500'],host[1]['403'],
                         host[1]['302'],host[1]['304'],host[1]['503'])
                         
    if (not lines) or total_ip == lines :
      display_format.transverse_line()
      print display_format.formatstring % (total_ip,total_traffic, 
                         total_request_times,'100%',
                         fileAnalysis_obj.total_200,
                         fileAnalysis_obj.total_404,
                         fileAnalysis_obj.total_500, 
                         fileAnalysis_obj.total_403,
                         fileAnalysis_obj.total_302, 
                         fileAnalysis_obj.total_304,
                         fileAnalysis_obj.total_503)
    display_format.execut_time()
 
if __name__ == '__main__':
  main_obj = Main()
  main_obj.main()
Python 相关文章推荐
python dict remove数组删除(del,pop)
Mar 24 Python
利用Python脚本在Nginx和uwsgi上部署MoinMoin的教程
May 05 Python
python 读文件,然后转化为矩阵的实例
Apr 23 Python
Python从ZabbixAPI获取信息及实现Zabbix-API 监控的方法
Sep 17 Python
Python3.4学习笔记之列表、数组操作示例
Mar 01 Python
python 哈希表实现简单python字典代码实例
Sep 27 Python
python生成特定分布数的实例
Dec 05 Python
python目标检测给图画框,bbox画到图上并保存案例
Mar 10 Python
Python依赖包迁移到断网环境操作
Jul 13 Python
Matlab使用Plot函数实现数据动态显示方法总结
Feb 25 Python
Python 实现Mac 屏幕截图详解
Oct 05 Python
Python3使用Qt5来实现简易的五子棋小游戏
May 02 Python
Python构造函数及解构函数介绍
Feb 26 #Python
python中的__slots__使用示例
Feb 26 #Python
Python map和reduce函数用法示例
Feb 26 #Python
Python中运行并行任务技巧
Feb 26 #Python
Python通过递归遍历出集合中所有元素的方法
Feb 25 #Python
Python THREADING模块中的JOIN()方法深入理解
Feb 18 #Python
python持久性管理pickle模块详细介绍
Feb 18 #Python
You might like
php代码运行时间查看类代码分享
2011/08/06 PHP
phpmyadmin显示utf8_general_ci中文乱码的问题终级篇
2013/04/08 PHP
php实现使用正则将文本中的网址转换成链接标签
2014/12/03 PHP
为你的网站增加亮点的9款jQuery插件推荐
2011/05/03 Javascript
js判断浏览器版本以及浏览器内核的方法
2015/01/20 Javascript
JavaScript替换当前页面的方法
2015/04/03 Javascript
Javascript中的return作用及javascript return关键字用法详解
2015/11/05 Javascript
AngularJs html compiler详解及示例代码
2016/09/01 Javascript
Vue.2.0.5过渡效果使用技巧
2017/03/16 Javascript
深入浅出es6模板字符串
2017/08/26 Javascript
VUE长按事件需求详解
2017/10/18 Javascript
JS实现监控微信小程序的原理
2018/06/15 Javascript
nodejs 生成和导出 word的实例代码
2018/07/31 NodeJs
angularjs中判断ng-repeat是否迭代完的实例
2018/09/12 Javascript
原生JS实现简单的无缝自动轮播效果
2018/09/26 Javascript
微信小程序webview组件交互,内联h5页面并网页实现微信支付实现解析
2019/08/16 Javascript
vue中v-for循环选中点击的元素并对该元素添加样式操作
2020/07/17 Javascript
Python内置数据类型详解
2014/08/18 Python
Python读取和处理文件后缀为.sqlite的数据文件(实例讲解)
2017/06/27 Python
Python cookbook(字符串与文本)在字符串的开头或结尾处进行文本匹配操作
2018/04/20 Python
python实现雪花飘落效果实例讲解
2019/06/18 Python
python机器学习包mlxtend的安装和配置详解
2019/08/21 Python
Python利用matplotlib绘制约数个数统计图示例
2019/11/26 Python
Pandas —— resample()重采样和asfreq()频度转换方式
2020/02/26 Python
TensorFlow的reshape操作 tf.reshape的实现
2020/04/19 Python
python实现mask矩阵示例(根据列表所给元素)
2020/07/30 Python
深入浅析HTML5中的SVG
2015/11/27 HTML / CSS
中西医结合临床医学专业大学生自荐信
2013/09/28 职场文书
关于幼儿的自我评价
2013/12/18 职场文书
弘扬民族精神演讲稿
2014/05/07 职场文书
经营理念口号
2014/06/21 职场文书
人口与计划生育目标管理责任书
2014/07/29 职场文书
2015年中秋节演讲稿
2015/03/20 职场文书
工商行政处罚决定书
2015/06/24 职场文书
文明医院的标语集锦!
2019/07/24 职场文书
《鲁滨逊漂流记》之六读后感(4篇)
2019/09/29 职场文书