This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.collection.mutable | |
import _root_.java.text.SimpleDateFormat | |
import _root_.java.util.{TimeZone, Calendar, Date, Locale} | |
import _root_.java.io._ | |
import java.util.regex._ | |
import java.text.DecimalFormat | |
import java.text.NumberFormat | |
import java.lang.StringBuffer | |
import scala.collection.mutable._ | |
import scala.io.Source | |
import org.apache.commons.lang.time.DurationFormatUtils | |
import org.apache.commons.math.stat.Frequency | |
import org.apache.commons.math.stat.descriptive.SynchronizedSummaryStatistics | |
/** | |
* A Scala script for generating summary statistics and frequency distributions from a log file. | |
* | |
* Usage: | |
* | |
* scala -cp .:commons-lang-2.5.jar:commons-math-2.0.jar StatisticsGenerator.scala <log file location> | |
* | |
* Note: | |
* The log file must have entries containing the format: | |
* <any content>context=<some context string> time=<#of milliseconds>ms | |
* Requires Scala 2.8.0.Beta and above | |
* Loads the entire file into memory so you will want to increase the heap for large files e.g. | |
* JAVA_OPTS="-Xmx512m" scala -cp .:commons-lang-2.5.jar:commons-math-2.0.jar StatisticsGenerator.scala ./webtest.log > ./webtest-timings.txt | |
* | |
* Neil Figg | |
*/ | |
// match on the number of args supplied | |
args.length match { | |
case 1 => println("processing log file " + args(0)); main | |
case _ => println("incorrect number of args were supplied."); printUsage | |
} | |
// The workflow | |
def main = { | |
val initialTimings = mapTimings(args(0)) | |
val timings = reduceTimings(initialTimings) | |
val stats = genertateStatistics(timings) | |
stats.foreach(stat => println(stat.getAggregateReport())) | |
} | |
/** | |
* Processes one line of the log file at a time | |
* and emits a list of key-value pairs <context, timing> as a Timing. | |
*/ | |
def mapTimings(filename: String): List[Timing] = { | |
val p = Pattern.compile("context=(.*) time=(\\d+)ms") | |
for { | |
line <- Source.fromPath(filename).getLines().toList | |
val m = p.matcher(line) | |
if m.find | |
val timing = Timing(m.group(1), m.group(2).toInt) | |
} yield (timing) | |
} | |
/** | |
* Reduces the list by categorising on the Timing context | |
*/ | |
def reduceTimings(timings: List[Timing]): Map[String, List[Long]] = { | |
timings.foldLeft(Map[String, List[Long]]()) { | |
(map, timing) => map + (timing.context -> (timing.time :: map.getOrElse(timing.context, Nil))) | |
} | |
} | |
/** | |
* Generate the statistics | |
*/ | |
def genertateStatistics(timings: Map[String, List[Long]]): List[PerformanceStat] = { | |
var stats = new ListBuffer[PerformanceStat]() | |
timings foreach {(kv) => | |
val performanceStat = new PerformanceStat(kv._1, kv._2) | |
stats += performanceStat | |
} | |
stats.toList | |
} | |
def printUsage = { | |
println("Usage:") | |
println("scala -cp .:commons-math-2.0.jar:commons-logging-1.1.1.jar:commons-codec-1.4.jar StatisticsGenerator.scala <log file location>") | |
} | |
/** | |
* Case class to hold key-value pairs <context, timing> | |
*/ | |
case class Timing(var context: String, var time: Long) | |
/** | |
* Generate Summary Statistics and Frequency Distributions | |
*/ | |
class PerformanceStat(var name: String) { | |
private var values = List[Long](); | |
private var newLine: String = System.getProperty("line.separator") | |
def this (name: String, times: List[Long]) { | |
this(name) | |
this.values = times | |
} | |
def getName = name | |
def setName(value:String):Unit = name = value | |
def addResponseTime(responseTime: Long):Unit = { | |
values = responseTime :: values | |
} | |
def generateFrequencyDistribution(): Frequency = { | |
var frequency = new Frequency() | |
values.foreach(frequency.addValue(_)) | |
frequency | |
} | |
def generateSummaryStatistics(): SynchronizedSummaryStatistics = { | |
var summaryStatistics = new SynchronizedSummaryStatistics() | |
values.foreach(summaryStatistics.addValue(_)) | |
summaryStatistics | |
} | |
def getAggregateReport(): String = { | |
def getDateTime(): String = { | |
var date = new Date() | |
var timePattern = "yyyy-MM-dd HH:mm:ss" | |
var formatter = new SimpleDateFormat(timePattern) | |
formatter.format(date) | |
} | |
val recordCounter = generateSummaryStatistics().getN() | |
var sb = new StringBuffer() | |
sb.append(newLine).append(name).append(newLine) | |
sb.append("----------------------------------------------------").append(newLine).append(newLine) | |
sb.append("Date: ").append(getDateTime()).append(newLine) | |
sb.append("# of calls: ").append(recordCounter).append(newLine).append(newLine) | |
sb.append(getSummaryStatisticsReport()).append(newLine) | |
sb.append(getFrequencyDistributionReport()).append(newLine).append(newLine) | |
sb.toString() | |
} | |
def getSummaryStatisticsReport(): String = { | |
var summaryStatistics = generateSummaryStatistics() | |
var decimalFormat = new DecimalFormat("####,###.00") | |
var aMax = decimalFormat.format(summaryStatistics.getMax()) + "ms" | |
var aMin = decimalFormat.format(summaryStatistics.getMin()) + "ms" | |
var aMean = decimalFormat.format(summaryStatistics.getMean()) + "ms" | |
var aStd = decimalFormat.format(summaryStatistics.getStandardDeviation()) + "ms" | |
var sb = new StringBuffer() | |
sb.append("Summary Statistics").append(newLine) | |
sb.append("Max: ").append(aMax).append(newLine) | |
sb.append("Min: ").append(aMin).append(newLine) | |
sb.append("Mean: ").append(aMean).append(newLine) | |
sb.append("Standard Deviation: ").append(aStd).append(newLine) | |
sb.toString() | |
} | |
def getFrequencyDistributionReport(): String = { | |
var frequencyDistribution = generateFrequencyDistribution() | |
val recordCounter = generateSummaryStatistics.getN() | |
val timePeriods = List(50,100,150,200,250,300,400,500,1000,2000,3000,60000) | |
val percentFormatter = NumberFormat.getPercentInstance() | |
var sb = new StringBuffer() | |
sb.append("Frequency Distribution").append(newLine) | |
timePeriods foreach { | |
timePeriod => sb.append("< " + timePeriod + "ms: ").append(frequencyDistribution.getCumFreq(timePeriod)).append("/").append(recordCounter).append(" (").append(percentFormatter.format(frequencyDistribution.getCumPct(timePeriod))).append(")").append(newLine) | |
} | |
sb.toString() | |
} | |
} | |