diff --git a/perforator/agent/collector/pkg/machine/programstate/lang.go b/perforator/agent/collector/pkg/machine/programstate/lang.go index d7049f398..9a01aff25 100644 --- a/perforator/agent/collector/pkg/machine/programstate/lang.go +++ b/perforator/agent/collector/pkg/machine/programstate/lang.go @@ -41,9 +41,23 @@ func (s *State) DeletePthreadConfig(id unwinder.BinaryId) error { return s.maps.PthreadStorage.Delete(&id) } -// TODO: we can use batch lookups into bpf maps func (s *State) SymbolizeInterpeter(key *unwinder.SymbolKey) (res unwinder.Symbol, exists bool) { err := s.maps.InterpreterSymbols.Lookup(key, &res) exists = (err == nil) return } + +// SymbolizeInterpreterBatch resolves |keys| symbol keys via individual Lookup calls. +// BPF_MAP_TYPE_LRU_HASH does not support BPF_MAP_LOOKUP_BATCH for arbitrary key sets, +// so per-key Lookup is the correct approach here. +// Intended to be invoked for LRU cache misses only (see Symbolizer.SymbolizeBatch), +// so |keys| ≤ n where n is the full stack depth. +// found[i] == false iff keys[i] is absent from the eBPF map. +func (s *State) SymbolizeInterpreterBatch(keys []unwinder.SymbolKey) (symbols []unwinder.Symbol, found []bool) { + symbols = make([]unwinder.Symbol, len(keys)) + found = make([]bool, len(keys)) + for i := range keys { + found[i] = s.maps.InterpreterSymbols.Lookup(&keys[i], &symbols[i]) == nil + } + return +} diff --git a/perforator/agent/collector/pkg/profiler/stack_processor.go b/perforator/agent/collector/pkg/profiler/stack_processor.go index d526b7c4c..d7feaad6f 100644 --- a/perforator/agent/collector/pkg/profiler/stack_processor.go +++ b/perforator/agent/collector/pkg/profiler/stack_processor.go @@ -33,18 +33,25 @@ func newPHPSampleStackProcessor(symbolizer *symbolizer.Symbolizer) *sampleStackP } func (s *sampleStackProcessor) Process(builder *profile.SampleBuilder, stack *unwinder.InterpreterStack) interpreterStackMetrics { - processFrame := s.getFrameProcessor() + n := int(stack.Len) mtr := interpreterStackMetrics{} - for i := 0; i < int(stack.Len); i++ { + // Collect all symbol keys up front; SymbolizeBatch resolves them via + // LRU cache (no syscall) then per-miss eBPF Lookup — before profile construction. + keys := make([]unwinder.SymbolKey, n) + for i := range n { + keys[i] = stack.Frames[i].SymbolKey + } + prefetched := s.interpreterSymbolizer.SymbolizeBatch(keys) + + processFrame := s.getFrameProcessor() + for i := range n { loc := builder.AddInterpreterLocation(&profile.InterpreterLocationKey{ ObjectAddress: stack.Frames[i].SymbolKey.ObjectAddr, Linestart: stack.Frames[i].SymbolKey.Linestart, }) - loc.SetMapping().SetPath(string(s.langMapping)).Finish() - processFrame(s, &mtr, loc, &stack.Frames[i]) - + processFrame(s, &mtr, loc, &stack.Frames[i], prefetched[i]) loc.Finish() mtr.framesCount++ } @@ -52,9 +59,8 @@ func (s *sampleStackProcessor) Process(builder *profile.SampleBuilder, stack *un return mtr } -func processFrameCommon(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame) { - symbol, exists := s.interpreterSymbolizer.Symbolize(&frame.SymbolKey) - if !exists { +func processFrameCommon(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame, sym *symbolizer.Symbol) { + if sym == nil { mtr.unsymbolizedFramesCount++ loc.AddFrame(). SetName(models.UnsymbolizedInterpreterLocation). @@ -64,22 +70,21 @@ func processFrameCommon(s *sampleStackProcessor, mtr *interpreterStackMetrics, l } loc.AddFrame(). - SetName(symbol.Name). - SetFilename(symbol.FileName). + SetName(sym.Name). + SetFilename(sym.FileName). SetStartLine(int64(frame.SymbolKey.Linestart)). Finish() } -func processPythonFrame(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame) { +func processPythonFrame(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame, sym *symbolizer.Symbol) { if frame.SymbolKey.Linestart == -1 { loc.AddFrame().SetName(python_models.PythonTrampolineFrame).Finish() return } - - processFrameCommon(s, mtr, loc, frame) + processFrameCommon(s, mtr, loc, frame, sym) } -func (s *sampleStackProcessor) getFrameProcessor() func(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame) { +func (s *sampleStackProcessor) getFrameProcessor() func(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame, sym *symbolizer.Symbol) { switch s.langMapping { case profile.PythonSpecialMapping: return processPythonFrame diff --git a/perforator/internal/linguist/symbolizer/symbolizer.go b/perforator/internal/linguist/symbolizer/symbolizer.go index 341887adf..f4f99c627 100644 --- a/perforator/internal/linguist/symbolizer/symbolizer.go +++ b/perforator/internal/linguist/symbolizer/symbolizer.go @@ -178,6 +178,27 @@ func extractNameAndFilenameSlices(symbol *unwinder.Symbol) (nameBytes, filenameB return } +// decodeSymbol converts a raw eBPF symbol into a Go Symbol. +// Returns nil for unknown codepoint sizes. +func (s *Symbolizer) decodeSymbol(raw *unwinder.Symbol) *Symbol { + nameBytes, filenameBytes := extractNameAndFilenameSlices(raw) + var name, fileName string + switch raw.CodepointSize { + case 1: + name = copy.ZeroTerminatedString(nameBytes) + fileName = copy.ZeroTerminatedString(filenameBytes) + case 2: + name = s.decodeUTF16(nameBytes) + fileName = s.decodeUTF16(filenameBytes) + case 4: + name = s.decodeUTF32(nameBytes) + fileName = s.decodeUTF32(filenameBytes) + default: + return nil + } + return &Symbol{Name: name, FileName: fileName} +} + func (s *Symbolizer) Symbolize(key *unwinder.SymbolKey) (*Symbol, bool) { if symbol, ok := s.cache.Get(*key); ok { s.metrics.cacheHits.Inc() @@ -186,33 +207,58 @@ func (s *Symbolizer) Symbolize(key *unwinder.SymbolKey) (*Symbol, bool) { s.metrics.cacheMisses.Inc() - symbol, exists := s.state.SymbolizeInterpeter(key) + raw, exists := s.state.SymbolizeInterpeter(key) if !exists { return nil, false } - var name, fileName string - nameBytes, filenameBytes := extractNameAndFilenameSlices(&symbol) - - switch symbol.CodepointSize { - case 1: - name = copy.ZeroTerminatedString(nameBytes) - fileName = copy.ZeroTerminatedString(filenameBytes) - case 2: - name = s.decodeUTF16(nameBytes) - fileName = s.decodeUTF16(filenameBytes) - case 4: - name = s.decodeUTF32(nameBytes) - fileName = s.decodeUTF32(filenameBytes) - default: + sym := s.decodeSymbol(&raw) + if sym == nil { return nil, false } - newSymbol := &Symbol{ - Name: name, - FileName: fileName, + _ = s.cache.Add(*key, sym) + return sym, true +} + +// SymbolizeBatch resolves all keys in two passes: LRU cache hits first, +// then individual eBPF Lookup calls for cache misses only. +// Syscalls: O(n) per-frame → O(|miss|) per sample; at warm-cache rates |miss| ≪ n. +// results[i] == nil iff key[i] is absent from both cache and eBPF map. +func (s *Symbolizer) SymbolizeBatch(keys []unwinder.SymbolKey) []*Symbol { + results := make([]*Symbol, len(keys)) + + // Pass 1: serve hits from LRU cache. + missIdx := make([]int, 0, len(keys)) + for i, key := range keys { + if sym, ok := s.cache.Get(key); ok { + s.metrics.cacheHits.Inc() + results[i] = sym + } else { + s.metrics.cacheMisses.Inc() + missIdx = append(missIdx, i) + } + } + if len(missIdx) == 0 { + return results } - _ = s.cache.Add(*key, newSymbol) - return newSymbol, true + // Pass 2: fetch cache misses from eBPF map (one Lookup per miss). + missKeys := make([]unwinder.SymbolKey, len(missIdx)) + for j, idx := range missIdx { + missKeys[j] = keys[idx] + } + rawSymbols, found := s.state.SymbolizeInterpreterBatch(missKeys) + for j, idx := range missIdx { + if !found[j] { + continue + } + sym := s.decodeSymbol(&rawSymbols[j]) + if sym == nil { + continue + } + _ = s.cache.Add(missKeys[j], sym) + results[idx] = sym + } + return results }