additional regex format checks added

more cases now processed correctly
This commit is contained in:
Alexander Oleinik 2015-09-13 10:44:00 +00:00
parent 6fcc3b8cf8
commit cc8a29c4b2
1 changed files with 96 additions and 49 deletions

View File

@ -29,6 +29,11 @@ type HttpStats struct {
codes map[int]int codes map[int]int
} }
type CombinedEntry struct {
time time.Time
code int
}
var sampleConfig = ` var sampleConfig = `
# List of virtualhosts for http codes collecting # List of virtualhosts for http codes collecting
# (each section for one virtualhost, none for disable collecting codes) # (each section for one virtualhost, none for disable collecting codes)
@ -115,40 +120,74 @@ func SearchStringInSlice(a string, list []string) bool {
return false return false
} }
func (n Webservercodes) ParseRegex(regex string, f io.ReadSeeker) (*regexp.Regexp, error) { func (n Webservercodes) CombineKeysValues(keys []string, values []string) (*CombinedEntry, error) {
if rx, err := regexp.Compile(regex); err == nil { if len(values) < len(keys) {
return nil, errors.New("Not enough substrings")
keys := rx.SubexpNames(); }
if SearchStringInSlice("time", keys) && SearchStringInSlice("code", keys) {
items := map[string]string{}
reader := reverse.NewScanner(f) for k, v := range keys {
if (reader.Scan()) { items[v] = values[k]
// we will check regexp validity by scan the last log line }
// and parse it, assuming that other lines will match as well
parsedLine := rx.FindStringSubmatch(reader.Text()) combined := CombinedEntry{}
if len(parsedLine) >= 3 { if logDt, ok := items["time"]; ok {
if time, err := time.Parse("02/Jan/2006:15:04:05 -0700", logDt); err == nil {
logDt := parsedLine[1] combined.time = time
if _, err := time.Parse("02/Jan/2006:15:04:05 -0700", logDt); err == nil {
return rx, nil
} else {
return nil, errors.New("Time must be in apache %t format. Example: '02/Jan/2006:15:04:05 -0700'")
}
} else {
return nil, errors.New("Cannot find matches for regex in log line")
}
} else {
// if not Scanned, file is empty, so we don't need to return regex error
return rx, nil
}
} else { } else {
return nil, errors.New("Regexp must define 'time' and 'code' fields") return nil, errors.New("Time must be in apache %t format. Example: '02/Jan/2006:15:04:05 -0700'")
} }
} else { } else {
return nil, err return nil, errors.New("Time is absent in log line")
} }
if _, ok := items["code"]; ok {
code, _ := strconv.Atoi(items["code"])
combined.code = code
} else {
return nil, errors.New("Http code is absent in log line")
}
return &combined, nil
}
func (n Webservercodes) ValidateRegexp(regex string, f io.ReadSeeker) (*regexp.Regexp, []string, error) {
keys := []string{}
var rx *regexp.Regexp
var err error
if rx, err = regexp.Compile(regex); err != nil {
// error in case of malformed regexp
return nil, keys, err
}
keys = rx.SubexpNames();
if !(SearchStringInSlice("time", keys) && SearchStringInSlice("code", keys)) {
// error if fields 'time' or 'code' are defined
return nil, keys, errors.New("Regexp must define 'time' and 'code' fields")
}
// we will check regexp validity by scan the last log line
// and parse it, assuming that other lines will match as well
reader := reverse.NewScanner(f)
if (!reader.Scan()) {
// if not Scanned, file is empty, so we don't need to return regex error
return rx, keys, nil
}
strings := rx.FindStringSubmatch(reader.Text())
if len(strings) == 0 {
// error if regexp mismatch
return nil, keys, errors.New("Log entries are not match regexp")
}
if _, err := n.CombineKeysValues(keys, strings); err != nil {
// error if no values for 'time' or 'code' are found in parsed log line
return nil, keys, err
}
return rx, keys, nil
} }
func (n *Webservercodes) ParseHttpCodes(file string, regex string, duration time.Duration) (*HttpStats, error) { func (n *Webservercodes) ParseHttpCodes(file string, regex string, duration time.Duration) (*HttpStats, error) {
@ -156,36 +195,44 @@ func (n *Webservercodes) ParseHttpCodes(file string, regex string, duration time
stats := HttpStats{codes: make(map[int]int)} stats := HttpStats{codes: make(map[int]int)}
if f, err := os.Open(file); err == nil { if f, err := os.Open(file); err == nil {
defer f.Close() defer f.Close()
if rx, err := n.ParseRegex(regex, f); err == nil { if rx, keys, err := n.ValidateRegexp(regex, f); err == nil {
var text, logDt string
var parsedLine []string
curTime := time.Now() curTime := time.Now()
reader := reverse.NewScanner(f) errorsCounter := 0
errorsMax := 100 // there is something wrong if more than errorsMax parse errors
var vastedLoop bool
var strings []string
reader := reverse.NewScanner(f)
for reader.Scan() { for reader.Scan() {
text = reader.Text() vastedLoop = false
strings = rx.FindStringSubmatch(reader.Text())
parsedLine = rx.FindStringSubmatch(text) if len(strings) > 0 {
if len(parsedLine) > 0 { if parsedLine, err := n.CombineKeysValues(keys, strings); err == nil {
logDt = parsedLine[1] if curTime.Sub(parsedLine.time) > duration {
if time, err := time.Parse("02/Jan/2006:15:04:05 -0700", logDt); err == nil {
if curTime.Sub(time) > duration {
break break
} }
if code, err := strconv.Atoi(parsedLine[2]); err == nil { if _, ok := stats.codes[parsedLine.code]; ok {
if _, ok := stats.codes[code]; ok { stats.codes[parsedLine.code]++
stats.codes[code]++ } else {
} else { stats.codes[parsedLine.code] = 1
stats.codes[code] = 1
}
} }
} else {
vastedLoop = true
} }
} else {
vastedLoop = true
} }
if vastedLoop {
errorsCounter++
}
if errorsCounter >= errorsMax {
break
}
}
if errorsCounter >= errorsMax {
return nil, errors.New("Too many entries with wrong format in log file. Check regex_parsestring")
} }
} else { } else {
return nil, err return nil, err