feat: add better metrics for errors

This commit is contained in:
Stefan Martinov 2020-11-23 11:33:36 +01:00
parent 24c65ba8df
commit ffa04ace36
3 changed files with 47 additions and 14 deletions

View File

@ -1,6 +1,7 @@
package repo
import (
"context"
"errors"
"fmt"
"io"
@ -28,13 +29,14 @@ func (repo *Repo) updateRoutine() {
for {
select {
case resChan := <-repo.updateInProgressChannel:
logger.Log.Info("waiting for update to complete", zap.String("chan", fmt.Sprintf("%p", resChan)))
log := logger.Log.With(zap.String("chan", fmt.Sprintf("%p", resChan)))
log.Info("Waiting for update to complete")
start := time.Now()
repoRuntime, errUpdate := repo.update()
repoRuntime, errUpdate := repo.update(context.Background())
if errUpdate != nil {
logger.Log.Error("Failed to update contentserver", zap.Error(errUpdate))
status.M.UpdatesFailedCounter.WithLabelValues().Inc()
log.Error("Failed to update content server from routine", zap.Error(errUpdate))
status.M.UpdatesFailedCounter.WithLabelValues(errUpdate.Error()).Inc()
} else {
status.M.UpdatesCompletedCounter.WithLabelValues().Inc()
}
@ -45,7 +47,7 @@ func (repo *Repo) updateRoutine() {
}
duration := time.Since(start)
logger.Log.Info("update completed", zap.Duration("duration", duration), zap.String("chan", fmt.Sprintf("%p", resChan)))
log.Info("Update completed", zap.Duration("duration", duration))
status.M.UpdateDuration.WithLabelValues().Observe(duration.Seconds())
}
}
@ -163,7 +165,11 @@ func wireAliases(directory map[string]*content.RepoNode) error {
func (repo *Repo) loadNodesFromJSON() (nodes map[string]*content.RepoNode, err error) {
nodes = make(map[string]*content.RepoNode)
err = json.Unmarshal(repo.jsonBuf.Bytes(), &nodes)
return nodes, err
if err != nil {
logger.Log.Error("Failed to deserialize nodes", zap.Error(err))
return nil, errors.New("failed to deserialize nodes")
}
return nodes, nil
}
func (repo *Repo) tryToRestoreCurrent() (err error) {
@ -174,14 +180,17 @@ func (repo *Repo) tryToRestoreCurrent() (err error) {
return repo.loadJSONBytes()
}
func (repo *Repo) get(URL string) (err error) {
response, err := http.Get(URL)
func (repo *Repo) get(URL string) error {
response, err := repo.httpClient.Get(URL)
if err != nil {
return err
logger.Log.Error("Failed to get", zap.Error(err))
return errors.New("failed to get repo")
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
return fmt.Errorf("bad HTTP Response: %q", response.Status)
logger.Log.Error(fmt.Sprintf("Bad HTTP Response %q, want %q", response.Status, http.StatusOK))
return errors.New("bad response code")
}
// Log.Info(ansi.Red + "RESETTING BUFFER" + ansi.Reset)
@ -189,10 +198,15 @@ func (repo *Repo) get(URL string) (err error) {
// Log.Info(ansi.Green + "LOADING DATA INTO BUFFER" + ansi.Reset)
_, err = io.Copy(&repo.jsonBuf, response.Body)
return err
if err != nil {
logger.Log.Error("Failed to copy IO stream", zap.Error(err))
return errors.New("failed to copy IO stream")
}
return nil
}
func (repo *Repo) update() (repoRuntime int64, err error) {
func (repo *Repo) update(ctx context.Context) (repoRuntime int64, err error) {
startTimeRepo := time.Now().UnixNano()
err = repo.get(repo.server)
repoRuntime = time.Now().UnixNano() - startTimeRepo
@ -263,8 +277,8 @@ func (repo *Repo) loadNodes(newNodes map[string]*content.RepoNode) error {
logger.Log.Debug("loading nodes for dimension", zap.String("dimension", dimension))
loadErr := repo.updateDimension(dimension, newNode)
if loadErr != nil {
logger.Log.Debug("failed to load", zap.String("dimension", dimension), zap.Error(loadErr))
return loadErr
logger.Log.Error("Failed to update dimension", zap.String("dimension", dimension), zap.Error(loadErr))
return errors.New("failed to update dimension")
}
}
dimensionIsValid := func(dimension string) bool {

View File

@ -2,9 +2,11 @@ package repo
import (
"bytes"
"crypto/tls"
"errors"
"fmt"
"io"
"net/http"
"os"
"strings"
"time"
@ -41,6 +43,8 @@ type Repo struct {
// jsonBytes []byte
jsonBuf bytes.Buffer
httpClient *http.Client
}
type repoDimension struct {
@ -62,6 +66,7 @@ func NewRepo(server string, varDir string) *Repo {
history: newHistory(varDir),
dimensionUpdateChannel: make(chan *repoDimension),
dimensionUpdateDoneChannel: make(chan error),
httpClient: getDefaultHTTPClient(2 * time.Minute),
updateInProgressChannel: make(chan chan updateResponse, 0),
}
@ -76,9 +81,22 @@ func NewRepo(server string, varDir string) *Repo {
repo.recovered = true
logger.Log.Info("restored previous repo content")
}
return repo
}
func getDefaultHTTPClient(timeout time.Duration) *http.Client {
client := &http.Client{
Transport: &http.Transport{
DisableKeepAlives: true,
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
TLSHandshakeTimeout: 5 * time.Second,
},
Timeout: timeout,
}
return client
}
func (repo *Repo) Recovered() bool {
return repo.recovered
}

View File

@ -61,6 +61,7 @@ func newMetrics() *Metrics {
UpdatesFailedCounter: newCounterVec(
"updates_failed_count",
"Number of updates that failed due to an error",
metricLabelError,
),
UpdateDuration: newSummaryVec(
"update_duration_seconds",