logstats.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. Print stats for log files in a list of directories
  5. This program takes a list of log file directories and prints various statistics
  6. for the files in the directories
  7. """
  8. __author__ = 'George Jones'
  9. __maintainer__ = 'George Jones'
  10. __email__ = 'gmj@pobox.com'
  11. __version__ = '0.0.1'
  12. import optparse
  13. import os
  14. import fnmatch
  15. import sys
  16. from os import path
  17. import time
  18. import gzip
  19. # Setup
  20. def parse_args(argv):
  21. global optp
  22. usage = """
  23. %prog [--quiet] <dir_name> [<dir_name> ...]"""
  24. # Parse arguments.
  25. optp = optparse.OptionParser(description=__doc__.strip(), version=__version__,
  26. usage=usage)
  27. optp.add_option('-d', '--debug', help="Print debugging output",
  28. action='store_true')
  29. optp.add_option('-q', '--quiet', help="be quiet! (For use with scripts/cron)",
  30. action='store_true')
  31. optp.add_option('-n', '--byname', help="Compare file dates lexically by filename, not mtime (not implemetned)",
  32. action='store_true')
  33. optp.add_option('-f', '--files', type='string', action='append',
  34. metavar='<files',
  35. default="*.log" ,
  36. help="files to match (regexp), example: '2015-10-*.log', default: '*.log'")
  37. (opts, args) = optp.parse_args()
  38. return opts, args
  39. def p_error(msg=None):
  40. optp.print_help()
  41. if msg:
  42. optp.error(msg)
  43. sys.exit(1)
  44. def main():
  45. # Parse arguments
  46. global opts
  47. opts, args = parse_args(sys.argv)
  48. if (len(args) == 0):
  49. p_error("need at least one directory name")
  50. sys.exit(1)
  51. if opts.debug:
  52. print("args", args)
  53. # print header
  54. print("|".join(["","dir","totalFiles","totalSize","totalLines","avgRecord","oldestTime","oldestFile","newestTime","newestFile",""]))
  55. # Enumaerate log files of interest
  56. for dir in args:
  57. # initialize totals for this directory
  58. dirTotalsFiles = 0
  59. dirTotalsSize = 0
  60. dirTotalsLines = 0
  61. dirTotalsOldestTimestamp = 4000000000 # 'Tue Oct 2 03:06:40 2096'.
  62. # If this code is running after that...
  63. # George Jones <gmj@pobox.com> Sat Oct 31 10:21:59 2015
  64. dirTotalsOldestName = ""
  65. dirTotalsNewestTimestamp = 0
  66. dirTotalsNewestName = ""
  67. if opts.debug:
  68. print("dir", dir)
  69. if not os.path.isdir(dir):
  70. sys.stderr.write(dir, ' is not a directory, skipping'+ str(e))
  71. next
  72. files = [dir + "/" + f for f in os.listdir(dir)
  73. if
  74. (path.isfile(dir + "/" + f) and
  75. (fnmatch.fnmatch(f,opts.files) or
  76. fnmatch.fnmatch(f,opts.files + ".*.gz")))]
  77. if opts.debug:
  78. print("files", files)
  79. for file in files:
  80. # handle compressed files if requested
  81. # Get direct info about the file
  82. statinfo = os.stat(file)
  83. if file.endswith(".gz"):
  84. num_lines = sum(1 for line in gzip.open(file,'rb'))
  85. if opts.debug:
  86. sys.stderr.write('Compressed file ' + file + ' has ' + str(num_lines) + ' lines' + "\n")
  87. else:
  88. try:
  89. num_lines = sum(1 for line in open(file))
  90. except Exception as e:
  91. sys.stderr.write('Unable to count lines in ' + file + ' '+ str(e) + "\n")
  92. num_lines = 0
  93. if opts.debug:
  94. print("file:", file, "size", statinfo.st_size, "num_lines", num_lines, "mtime:", time.ctime(statinfo.st_mtime))
  95. # compute derived statistics about the file
  96. if num_lines > 0:
  97. avgLineSize = statinfo.st_size / num_lines
  98. else:
  99. avgLineSize = 0
  100. # Add info to summary list
  101. dirTotalsFiles += 1
  102. dirTotalsSize += statinfo.st_size
  103. dirTotalsLines += num_lines
  104. # keep track of oldest and newest files
  105. if opts.byname:
  106. if dirTotalsOldestName == "" or file < dirTotalsOldestName:
  107. if opts.debug:
  108. print("New Oldest File ", file)
  109. dirTotalsOldestName = file
  110. dirTotalsOldestTimestamp = 0
  111. if dirTotalsNewestName == "" or file > dirTotalsNewestName:
  112. if opts.debug:
  113. print("New Newest File ", file)
  114. dirTotalsNewestName = file
  115. dirTotalsNewestTimestamp = 0
  116. else:
  117. if statinfo.st_mtime < dirTotalsOldestTimestamp:
  118. if opts.debug:
  119. print(statinfo.st_mtime, "<", dirTotalsOldestTimestamp)
  120. print("New Oldest Timestamp: ", dirTotalsOldestTimestamp)
  121. dirTotalsOldestTimestamp = statinfo.st_mtime
  122. dirTotalsOldestName = file
  123. if statinfo.st_mtime > dirTotalsNewestTimestamp:
  124. if opts.debug:
  125. print(statinfo.st_mtime, ">", dirTotalsNewestTimestamp)
  126. print("New Newest Timestamp: ", dirTotalsNewestTimestamp)
  127. dirTotalsNewestTimestamp = statinfo.st_mtime
  128. dirTotalsNewestName = file
  129. # compute derived statistics about the directory
  130. if dirTotalsLines > 0:
  131. dirAvgRecord = dirTotalsSize / dirTotalsLines
  132. else:
  133. dirAvgRecord = 0
  134. # print directory summary
  135. print("|".join(["",dir, str(dirTotalsFiles),str(dirTotalsSize),str(dirTotalsLines),str(dirAvgRecord),time.ctime(dirTotalsOldestTimestamp),dirTotalsOldestName,time.ctime(dirTotalsNewestTimestamp),dirTotalsNewestName,""]))
  136. if __name__ == '__main__':
  137. main()