feat: add better metrics for errors

This commit is contained in:
Stefan Martinov 2020-11-23 11:33:36 +01:00
parent 24c65ba8df
commit ffa04ace36
3 changed files with 47 additions and 14 deletions

View File

@ -1,6 +1,7 @@
package repo package repo
import ( import (
"context"
"errors" "errors"
"fmt" "fmt"
"io" "io"
@ -28,13 +29,14 @@ func (repo *Repo) updateRoutine() {
for { for {
select { select {
case resChan := <-repo.updateInProgressChannel: case resChan := <-repo.updateInProgressChannel:
logger.Log.Info("waiting for update to complete", zap.String("chan", fmt.Sprintf("%p", resChan))) log := logger.Log.With(zap.String("chan", fmt.Sprintf("%p", resChan)))
log.Info("Waiting for update to complete")
start := time.Now() start := time.Now()
repoRuntime, errUpdate := repo.update() repoRuntime, errUpdate := repo.update(context.Background())
if errUpdate != nil { if errUpdate != nil {
logger.Log.Error("Failed to update contentserver", zap.Error(errUpdate)) log.Error("Failed to update content server from routine", zap.Error(errUpdate))
status.M.UpdatesFailedCounter.WithLabelValues().Inc() status.M.UpdatesFailedCounter.WithLabelValues(errUpdate.Error()).Inc()
} else { } else {
status.M.UpdatesCompletedCounter.WithLabelValues().Inc() status.M.UpdatesCompletedCounter.WithLabelValues().Inc()
} }
@ -45,7 +47,7 @@ func (repo *Repo) updateRoutine() {
} }
duration := time.Since(start) duration := time.Since(start)
logger.Log.Info("update completed", zap.Duration("duration", duration), zap.String("chan", fmt.Sprintf("%p", resChan))) log.Info("Update completed", zap.Duration("duration", duration))
status.M.UpdateDuration.WithLabelValues().Observe(duration.Seconds()) status.M.UpdateDuration.WithLabelValues().Observe(duration.Seconds())
} }
} }
@ -163,7 +165,11 @@ func wireAliases(directory map[string]*content.RepoNode) error {
func (repo *Repo) loadNodesFromJSON() (nodes map[string]*content.RepoNode, err error) { func (repo *Repo) loadNodesFromJSON() (nodes map[string]*content.RepoNode, err error) {
nodes = make(map[string]*content.RepoNode) nodes = make(map[string]*content.RepoNode)
err = json.Unmarshal(repo.jsonBuf.Bytes(), &nodes) err = json.Unmarshal(repo.jsonBuf.Bytes(), &nodes)
return nodes, err if err != nil {
logger.Log.Error("Failed to deserialize nodes", zap.Error(err))
return nil, errors.New("failed to deserialize nodes")
}
return nodes, nil
} }
func (repo *Repo) tryToRestoreCurrent() (err error) { func (repo *Repo) tryToRestoreCurrent() (err error) {
@ -174,14 +180,17 @@ func (repo *Repo) tryToRestoreCurrent() (err error) {
return repo.loadJSONBytes() return repo.loadJSONBytes()
} }
func (repo *Repo) get(URL string) (err error) { func (repo *Repo) get(URL string) error {
response, err := http.Get(URL) response, err := repo.httpClient.Get(URL)
if err != nil { if err != nil {
return err logger.Log.Error("Failed to get", zap.Error(err))
return errors.New("failed to get repo")
} }
defer response.Body.Close() defer response.Body.Close()
if response.StatusCode != http.StatusOK { if response.StatusCode != http.StatusOK {
return fmt.Errorf("bad HTTP Response: %q", response.Status) logger.Log.Error(fmt.Sprintf("Bad HTTP Response %q, want %q", response.Status, http.StatusOK))
return errors.New("bad response code")
} }
// Log.Info(ansi.Red + "RESETTING BUFFER" + ansi.Reset) // Log.Info(ansi.Red + "RESETTING BUFFER" + ansi.Reset)
@ -189,10 +198,15 @@ func (repo *Repo) get(URL string) (err error) {
// Log.Info(ansi.Green + "LOADING DATA INTO BUFFER" + ansi.Reset) // Log.Info(ansi.Green + "LOADING DATA INTO BUFFER" + ansi.Reset)
_, err = io.Copy(&repo.jsonBuf, response.Body) _, err = io.Copy(&repo.jsonBuf, response.Body)
return err if err != nil {
logger.Log.Error("Failed to copy IO stream", zap.Error(err))
return errors.New("failed to copy IO stream")
}
return nil
} }
func (repo *Repo) update() (repoRuntime int64, err error) { func (repo *Repo) update(ctx context.Context) (repoRuntime int64, err error) {
startTimeRepo := time.Now().UnixNano() startTimeRepo := time.Now().UnixNano()
err = repo.get(repo.server) err = repo.get(repo.server)
repoRuntime = time.Now().UnixNano() - startTimeRepo repoRuntime = time.Now().UnixNano() - startTimeRepo
@ -263,8 +277,8 @@ func (repo *Repo) loadNodes(newNodes map[string]*content.RepoNode) error {
logger.Log.Debug("loading nodes for dimension", zap.String("dimension", dimension)) logger.Log.Debug("loading nodes for dimension", zap.String("dimension", dimension))
loadErr := repo.updateDimension(dimension, newNode) loadErr := repo.updateDimension(dimension, newNode)
if loadErr != nil { if loadErr != nil {
logger.Log.Debug("failed to load", zap.String("dimension", dimension), zap.Error(loadErr)) logger.Log.Error("Failed to update dimension", zap.String("dimension", dimension), zap.Error(loadErr))
return loadErr return errors.New("failed to update dimension")
} }
} }
dimensionIsValid := func(dimension string) bool { dimensionIsValid := func(dimension string) bool {

View File

@ -2,9 +2,11 @@ package repo
import ( import (
"bytes" "bytes"
"crypto/tls"
"errors" "errors"
"fmt" "fmt"
"io" "io"
"net/http"
"os" "os"
"strings" "strings"
"time" "time"
@ -41,6 +43,8 @@ type Repo struct {
// jsonBytes []byte // jsonBytes []byte
jsonBuf bytes.Buffer jsonBuf bytes.Buffer
httpClient *http.Client
} }
type repoDimension struct { type repoDimension struct {
@ -62,6 +66,7 @@ func NewRepo(server string, varDir string) *Repo {
history: newHistory(varDir), history: newHistory(varDir),
dimensionUpdateChannel: make(chan *repoDimension), dimensionUpdateChannel: make(chan *repoDimension),
dimensionUpdateDoneChannel: make(chan error), dimensionUpdateDoneChannel: make(chan error),
httpClient: getDefaultHTTPClient(2 * time.Minute),
updateInProgressChannel: make(chan chan updateResponse, 0), updateInProgressChannel: make(chan chan updateResponse, 0),
} }
@ -76,9 +81,22 @@ func NewRepo(server string, varDir string) *Repo {
repo.recovered = true repo.recovered = true
logger.Log.Info("restored previous repo content") logger.Log.Info("restored previous repo content")
} }
return repo return repo
} }
func getDefaultHTTPClient(timeout time.Duration) *http.Client {
client := &http.Client{
Transport: &http.Transport{
DisableKeepAlives: true,
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
TLSHandshakeTimeout: 5 * time.Second,
},
Timeout: timeout,
}
return client
}
func (repo *Repo) Recovered() bool { func (repo *Repo) Recovered() bool {
return repo.recovered return repo.recovered
} }

View File

@ -61,6 +61,7 @@ func newMetrics() *Metrics {
UpdatesFailedCounter: newCounterVec( UpdatesFailedCounter: newCounterVec(
"updates_failed_count", "updates_failed_count",
"Number of updates that failed due to an error", "Number of updates that failed due to an error",
metricLabelError,
), ),
UpdateDuration: newSummaryVec( UpdateDuration: newSummaryVec(
"update_duration_seconds", "update_duration_seconds",