python分析nignx访问日志脚本分享


Posted in Python onFebruary 26, 2015
#!/usr/bin/env python 
# coding=utf-8 
 
#------------------------------------------------------ 
# Name:     nginx 日志分析脚本 
# Purpose:   此脚本只用来分析nginx的访问日志 
# Version:   1.0 
# Author:    LEO 
# Created:   2013-05-07 
# Modified:   2013-05-07 
# Copyright:  (c) LEO 2013 
#------------------------------------------------------ 
 
import sys 
import time 
 
#该类是用来打印格式 
class displayFormat(object): 
 
  def format_size(self,size): 
    '''''格式化流量单位''' 
    KB = 1024      #KB -> B B是字节 
    MB = 1048576    #MB -> B 
    GB = 1073741824   #GB -> B 
    TB = 1099511627776 #TB -> B 
    if size >= TB : 
      size = str(size / TB) + 'T' 
    elif size < KB : 
      size = str(size) + 'B' 
    elif size >= GB and size < TB: 
      size = str(size / GB) + 'G' 
    elif size >= MB and size < GB : 
      size = str(size / MB) + 'M' 
    else : 
      size = str(size / KB) + 'K' 
    return size 
 
  #定义字符串格式化 
  formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s' 
 
  def transverse_line(self) : 
    '''''输出横线''' 
    print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10) 
 
  def head(self): 
    '''''输出头部信息''' 
    print self.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503') 
 
  def error_print(self) : 
    '''''输出错误信息''' 
    print 
    print 'Usage : ' + sys.argv[0] + ' NginxLogFilePath [Number]' 
    print 
    sys.exit(1) 
 
  def execut_time(self): 
    '''''输出脚本执行的时间''' 
    print 
    print "Script Execution Time: %.3f second" % time.clock() 
    print 
 
#该类是用来生成主机信息的字典 
class hostInfo(object): 
  host_info = ['200','404','500','302','304','503','403','times','size'] 
 
  def __init__(self,host): 
    self.host = host = {}.fromkeys(self.host_info,0) 
 
  def increment(self,status_times_size,is_size): 
    '''''该方法是用来给host_info中的各个值加1''' 
    if status_times_size == 'times': 
      self.host['times'] += 1 
    elif is_size: 
      self.host['size'] = self.host['size'] + status_times_size 
    else: 
      self.host[status_times_size] += 1 
 
  def get_value(self,value): 
    '''''该方法是取到各个主机信息中对应的值''' 
    return self.host[value] 
 
#该类是用来分析文件 
class fileAnalysis(object): 
  def __init__(self): 
    '''''初始化一个空字典''' 
    self.report_dict = {} 
    self.total_request_times,self.total_traffic,self.total_200, 
    self.total_404,self.total_500,self.total_403,self.total_302, 
    self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0 
 
  def split_eachline_todict(self,line): 
    '''''分割文件中的每一行,并返回一个字典''' 
    split_line = line.split() 
    split_dict = {'remote_host':split_line[0],'status':split_line[8], 
           'bytes_sent':split_line[9],} 
    return split_dict 
 
  def generate_log_report(self,logfile): 
    '''''读取文件,分析split_eachline_todict方法生成的字典''' 
    for line in logfile: 
      try: 
        line_dict = self.split_eachline_todict(line) 
        host = line_dict['remote_host'] 
        status = line_dict['status'] 
      except ValueError : 
        continue 
      except IndexError : 
        continue 
 
      if host not in self.report_dict : 
        host_info_obj = hostInfo(host) 
        self.report_dict[host] = host_info_obj 
      else : 
        host_info_obj = self.report_dict[host] 
 
      host_info_obj.increment('times',False) 
      if status in host_info_obj.host_info : 
        host_info_obj.increment(status,False) 
      try: 
        bytes_sent = int(line_dict['bytes_sent']) 
      except ValueError: 
        bytes_sent = 0 
      host_info_obj.increment(bytes_sent,True) 
    return self.report_dict 
 
  def return_sorted_list(self,true_dict): 
    '''''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序''' 
    for host_key in true_dict : 
      host_value = true_dict[host_key] 
      times = host_value.get_value('times')            
      self.total_request_times = self.total_request_times + times 
      size = host_value.get_value('size')            
      self.total_traffic = self.total_traffic + size  
 
      o200 = host_value.get_value('200') 
      o404 = host_value.get_value('404') 
      o500 = host_value.get_value('500') 
      o403 = host_value.get_value('403') 
      o302 = host_value.get_value('302') 
      o304 = host_value.get_value('304') 
      o503 = host_value.get_value('503') 
 
      true_dict[host_key] = {'200':o200,'404':o404,'500':o500, 
                  '403':o403,'302':o302,'304':o304, 
                  '503':o503,'times':times,'size':size} 
 
      self.total_200 = self.total_200 + o200 
      self.total_404 = self.total_404 + o404 
      self.total_500 = self.total_500 + o500 
      self.total_302 = self.total_302 + o302 
      self.total_304 = self.total_304 + o304 
      self.total_503 = self.total_503 + o503 
 
    sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],
                               t[1]['size']),reverse=True) 
 
    return sorted_list 
 
class Main(object): 
  def main(self) : 
    '''''主调函数''' 
    display_format = displayFormat() 
    arg_length = len(sys.argv) 
    if arg_length == 1 : 
      display_format.error_print() 
    elif arg_length == 2 or arg_length == 3: 
      infile_name = sys.argv[1] 
      try : 
        infile = open(infile_name,'r') 
        if arg_length == 3 : 
          lines = int(sys.argv[2]) 
        else : 
          lines = 0 
      except IOError,e : 
        print 
        print e 
        display_format.error_print() 
      except ValueError : 
        print 
        print "Please Enter A Volid Number !!" 
        display_format.error_print() 
    else : 
      display_format.error_print() 
 
    fileAnalysis_obj = fileAnalysis() 
    not_true_dict = fileAnalysis_obj.generate_log_report(infile) 
    log_report = fileAnalysis_obj.return_sorted_list(not_true_dict) 
    total_ip = len(log_report) 
    if lines : 
      log_report = log_report[0:lines] 
    infile.close() 
 
    print 
    total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic) 
    total_request_times = fileAnalysis_obj.total_request_times 
    print 'Total IP: %s  Total Traffic: %s  Total Request Times: %d' 
       % (total_ip,total_traffic,total_request_times) 
    print 
    display_format.head() 
    display_format.transverse_line() 
 
    for host in log_report : 
      times = host[1]['times'] 
      times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100 
      print display_format.formatstring % (host[0],
                         display_format.format_size(host[1]['size']),
                         times,str(times_percent)[0:5],
                         host[1]['200'],host[1]['404'],
                         host[1]['500'],host[1]['403'],
                         host[1]['302'],host[1]['304'],host[1]['503']) 
                         
    if (not lines) or total_ip == lines : 
      display_format.transverse_line() 
      print display_format.formatstring % (total_ip,total_traffic, 
                         total_request_times,'100%',
                         fileAnalysis_obj.total_200,
                         fileAnalysis_obj.total_404,
                         fileAnalysis_obj.total_500, 
                         fileAnalysis_obj.total_403,
                         fileAnalysis_obj.total_302, 
                         fileAnalysis_obj.total_304,
                         fileAnalysis_obj.total_503) 
 
    display_format.execut_time() 
 
if __name__ == '__main__': 
  main_obj = Main() 
  main_obj.main()
Python 相关文章推荐
python写入xml文件的方法
May 08 Python
Python语言实现百度语音识别API的使用实例
Dec 13 Python
Python基于ThreadingTCPServer创建多线程代理的方法示例
Jan 11 Python
Python实现App自动签到领取积分功能
Sep 29 Python
使用Python批量修改文件名的代码实例
Jan 24 Python
Python pip替换为阿里源的方法步骤
Jul 02 Python
python命令行工具Click快速掌握
Jul 04 Python
python自动化测试无法启动谷歌浏览器问题
Oct 10 Python
用 Python 制作地球仪的方法
Apr 24 Python
浅谈Python 钉钉报警必备知识系统讲解
Aug 17 Python
python 如何区分return和yield
Sep 22 Python
python模拟点击在ios中实现的实例讲解
Nov 26 Python
python分析apache访问日志脚本分享
Feb 26 #Python
Python构造函数及解构函数介绍
Feb 26 #Python
python中的__slots__使用示例
Feb 26 #Python
Python map和reduce函数用法示例
Feb 26 #Python
Python中运行并行任务技巧
Feb 26 #Python
Python通过递归遍历出集合中所有元素的方法
Feb 25 #Python
Python THREADING模块中的JOIN()方法深入理解
Feb 18 #Python
You might like
PHP文件打开、关闭、写入的判断与执行代码
2011/05/24 PHP
ThinkPHP的RBAC(基于角色权限控制)深入解析
2013/06/17 PHP
限制ckeditor上传图片文件大小的方法
2013/11/15 PHP
php二维数组排序方法(array_multisort usort)
2013/12/25 PHP
PHP根据传入参数合并多个JS和CSS文件的简单实现
2014/06/13 PHP
PHP中set error handler函数用法小结
2015/11/11 PHP
php短信接口代码
2016/05/13 PHP
PHP 等比例缩放图片详解及实例代码
2016/09/18 PHP
Laravel中unique和exists验证规则的优化详解
2018/01/28 PHP
php用xpath解析html的代码实例讲解
2019/02/14 PHP
javaScript矢量图表库-gRaphael几行代码实现精美的条形图/饼图/点图/曲线图
2013/01/09 Javascript
JS 按钮点击触发(兼容IE、火狐)
2013/08/07 Javascript
JavaScript严格模式详解
2017/01/16 Javascript
javascript实现数据双向绑定的三种方式小结
2017/03/09 Javascript
Angularjs自定义指令Directive详解
2017/05/27 Javascript
Vue仿支付宝支付功能
2018/05/25 Javascript
vue 实现左右拖拽元素并且不超过他的父元素的宽度
2018/11/30 Javascript
Vue.js中 v-model 指令的修饰符详解
2018/12/03 Javascript
python中wx将图标显示在右下角的脚本代码
2013/03/08 Python
在Python中使用zlib模块进行数据压缩的教程
2015/06/26 Python
Python面向对象程序设计OOP深入分析【构造函数,组合类,工具类等】
2019/01/05 Python
Python实现简单查找最长子串功能示例
2019/02/26 Python
python3实现的zip格式压缩文件夹操作示例
2019/08/17 Python
python kafka 多线程消费者&amp;手动提交实例
2019/12/21 Python
Python基于stuck实现scoket文件传输
2020/04/02 Python
Jupyter Notebook输出矢量图实例
2020/04/14 Python
css3中背景尺寸background-size详解
2014/09/02 HTML / CSS
AHAVA美国官方网站:死海海泥护肤品牌
2016/10/18 全球购物
优秀电子工程系毕业生求职信
2014/05/24 职场文书
大学生违纪检讨书300字
2014/10/25 职场文书
2016教师节问候语
2015/11/10 职场文书
2016教师给学生的毕业寄语
2015/12/04 职场文书
社区宣传标语口号
2015/12/26 职场文书
ORACLE数据库应用开发的三十个注意事项
2021/06/07 Oracle
python编程简单几行代码实现视频转换Gif示例
2021/10/05 Python
Python捕获、播放和保存摄像头视频并提高视频清晰度和对比度
2022/04/14 Python