diff --git a/tsdb/head.go b/tsdb/head.go index b28f5aca5..f3e0a7f6c 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -590,6 +590,7 @@ func (h *Head) Init(minValidTime int64) error { snapIdx, snapOffset := -1, 0 refSeries := make(map[chunks.HeadSeriesRef]*memSeries) + snapshotLoaded := false if h.opts.EnableMemorySnapshotOnShutdown { level.Info(h.logger).Log("msg", "Chunk snapshot is enabled, replaying from the snapshot") // If there are any WAL files, there should be at least one WAL file with an index that is current or newer @@ -619,6 +620,7 @@ func (h *Head) Init(minValidTime int64) error { var err error snapIdx, snapOffset, refSeries, err = h.loadChunkSnapshot() if err == nil { + snapshotLoaded = true level.Info(h.logger).Log("msg", "Chunk snapshot loading time", "duration", time.Since(start).String()) } if err != nil { @@ -636,26 +638,36 @@ func (h *Head) Init(minValidTime int64) error { } mmapChunkReplayStart := time.Now() - mmappedChunks, oooMmappedChunks, lastMmapRef, err := h.loadMmappedChunks(refSeries) - if err != nil { - // TODO(codesome): clear out all m-map chunks here for refSeries. - level.Error(h.logger).Log("msg", "Loading on-disk chunks failed", "err", err) - if _, ok := errors.Cause(err).(*chunks.CorruptionErr); ok { - h.metrics.mmapChunkCorruptionTotal.Inc() - } - - // Discard snapshot data since we need to replay the WAL for the missed m-map chunks data. - snapIdx, snapOffset = -1, 0 - - // If this fails, data will be recovered from WAL. - // Hence we wont lose any data (given WAL is not corrupt). - mmappedChunks, oooMmappedChunks, lastMmapRef, err = h.removeCorruptedMmappedChunks(err) + var ( + mmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk + oooMmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk + lastMmapRef chunks.ChunkDiskMapperRef + err error + ) + if snapshotLoaded || h.wal != nil { + // If snapshot was not loaded and if there is no WAL, then m-map chunks will be discarded + // anyway. So we only load m-map chunks when it won't be discarded. + mmappedChunks, oooMmappedChunks, lastMmapRef, err = h.loadMmappedChunks(refSeries) if err != nil { - return err + // TODO(codesome): clear out all m-map chunks here for refSeries. + level.Error(h.logger).Log("msg", "Loading on-disk chunks failed", "err", err) + if _, ok := errors.Cause(err).(*chunks.CorruptionErr); ok { + h.metrics.mmapChunkCorruptionTotal.Inc() + } + + // Discard snapshot data since we need to replay the WAL for the missed m-map chunks data. + snapIdx, snapOffset = -1, 0 + + // If this fails, data will be recovered from WAL. + // Hence we wont lose any data (given WAL is not corrupt). + mmappedChunks, oooMmappedChunks, lastMmapRef, err = h.removeCorruptedMmappedChunks(err) + if err != nil { + return err + } } + level.Info(h.logger).Log("msg", "On-disk memory mappable chunks replay completed", "duration", time.Since(mmapChunkReplayStart).String()) } - level.Info(h.logger).Log("msg", "On-disk memory mappable chunks replay completed", "duration", time.Since(mmapChunkReplayStart).String()) if h.wal == nil { level.Info(h.logger).Log("msg", "WAL not found") return nil