Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion perforator/agent/collector/pkg/machine/programstate/lang.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,23 @@ func (s *State) DeletePthreadConfig(id unwinder.BinaryId) error {
return s.maps.PthreadStorage.Delete(&id)
}

// TODO: we can use batch lookups into bpf maps
func (s *State) SymbolizeInterpeter(key *unwinder.SymbolKey) (res unwinder.Symbol, exists bool) {
err := s.maps.InterpreterSymbols.Lookup(key, &res)
exists = (err == nil)
return
}

// SymbolizeInterpreterBatch resolves |keys| symbol keys via individual Lookup calls.
// BPF_MAP_TYPE_LRU_HASH does not support BPF_MAP_LOOKUP_BATCH for arbitrary key sets,
// so per-key Lookup is the correct approach here.
// Intended to be invoked for LRU cache misses only (see Symbolizer.SymbolizeBatch),
// so |keys| ≤ n where n is the full stack depth.
// found[i] == false iff keys[i] is absent from the eBPF map.
func (s *State) SymbolizeInterpreterBatch(keys []unwinder.SymbolKey) (symbols []unwinder.Symbol, found []bool) {
symbols = make([]unwinder.Symbol, len(keys))
found = make([]bool, len(keys))
for i := range keys {
found[i] = s.maps.InterpreterSymbols.Lookup(&keys[i], &symbols[i]) == nil
}
return
}
33 changes: 19 additions & 14 deletions perforator/agent/collector/pkg/profiler/stack_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,28 +33,34 @@ func newPHPSampleStackProcessor(symbolizer *symbolizer.Symbolizer) *sampleStackP
}

func (s *sampleStackProcessor) Process(builder *profile.SampleBuilder, stack *unwinder.InterpreterStack) interpreterStackMetrics {
processFrame := s.getFrameProcessor()
n := int(stack.Len)
mtr := interpreterStackMetrics{}

for i := 0; i < int(stack.Len); i++ {
// Collect all symbol keys up front; SymbolizeBatch resolves them via
// LRU cache (no syscall) then per-miss eBPF Lookup — before profile construction.
keys := make([]unwinder.SymbolKey, n)
for i := range n {
keys[i] = stack.Frames[i].SymbolKey
}
prefetched := s.interpreterSymbolizer.SymbolizeBatch(keys)

processFrame := s.getFrameProcessor()
for i := range n {
loc := builder.AddInterpreterLocation(&profile.InterpreterLocationKey{
ObjectAddress: stack.Frames[i].SymbolKey.ObjectAddr,
Linestart: stack.Frames[i].SymbolKey.Linestart,
})

loc.SetMapping().SetPath(string(s.langMapping)).Finish()
processFrame(s, &mtr, loc, &stack.Frames[i])

processFrame(s, &mtr, loc, &stack.Frames[i], prefetched[i])
loc.Finish()
mtr.framesCount++
}

return mtr
}

func processFrameCommon(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame) {
symbol, exists := s.interpreterSymbolizer.Symbolize(&frame.SymbolKey)
if !exists {
func processFrameCommon(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame, sym *symbolizer.Symbol) {
if sym == nil {
mtr.unsymbolizedFramesCount++
loc.AddFrame().
SetName(models.UnsymbolizedInterpreterLocation).
Expand All @@ -64,22 +70,21 @@ func processFrameCommon(s *sampleStackProcessor, mtr *interpreterStackMetrics, l
}

loc.AddFrame().
SetName(symbol.Name).
SetFilename(symbol.FileName).
SetName(sym.Name).
SetFilename(sym.FileName).
SetStartLine(int64(frame.SymbolKey.Linestart)).
Finish()
}

func processPythonFrame(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame) {
func processPythonFrame(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame, sym *symbolizer.Symbol) {
if frame.SymbolKey.Linestart == -1 {
loc.AddFrame().SetName(python_models.PythonTrampolineFrame).Finish()
return
}

processFrameCommon(s, mtr, loc, frame)
processFrameCommon(s, mtr, loc, frame, sym)
}

func (s *sampleStackProcessor) getFrameProcessor() func(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame) {
func (s *sampleStackProcessor) getFrameProcessor() func(s *sampleStackProcessor, mtr *interpreterStackMetrics, loc *profile.LocationBuilder, frame *unwinder.InterpreterFrame, sym *symbolizer.Symbol) {
switch s.langMapping {
case profile.PythonSpecialMapping:
return processPythonFrame
Expand Down
86 changes: 66 additions & 20 deletions perforator/internal/linguist/symbolizer/symbolizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,27 @@ func extractNameAndFilenameSlices(symbol *unwinder.Symbol) (nameBytes, filenameB
return
}

// decodeSymbol converts a raw eBPF symbol into a Go Symbol.
// Returns nil for unknown codepoint sizes.
func (s *Symbolizer) decodeSymbol(raw *unwinder.Symbol) *Symbol {
nameBytes, filenameBytes := extractNameAndFilenameSlices(raw)
var name, fileName string
switch raw.CodepointSize {
case 1:
name = copy.ZeroTerminatedString(nameBytes)
fileName = copy.ZeroTerminatedString(filenameBytes)
case 2:
name = s.decodeUTF16(nameBytes)
fileName = s.decodeUTF16(filenameBytes)
case 4:
name = s.decodeUTF32(nameBytes)
fileName = s.decodeUTF32(filenameBytes)
default:
return nil
}
return &Symbol{Name: name, FileName: fileName}
}

func (s *Symbolizer) Symbolize(key *unwinder.SymbolKey) (*Symbol, bool) {
if symbol, ok := s.cache.Get(*key); ok {
s.metrics.cacheHits.Inc()
Expand All @@ -186,33 +207,58 @@ func (s *Symbolizer) Symbolize(key *unwinder.SymbolKey) (*Symbol, bool) {

s.metrics.cacheMisses.Inc()

symbol, exists := s.state.SymbolizeInterpeter(key)
raw, exists := s.state.SymbolizeInterpeter(key)
if !exists {
return nil, false
}

var name, fileName string
nameBytes, filenameBytes := extractNameAndFilenameSlices(&symbol)

switch symbol.CodepointSize {
case 1:
name = copy.ZeroTerminatedString(nameBytes)
fileName = copy.ZeroTerminatedString(filenameBytes)
case 2:
name = s.decodeUTF16(nameBytes)
fileName = s.decodeUTF16(filenameBytes)
case 4:
name = s.decodeUTF32(nameBytes)
fileName = s.decodeUTF32(filenameBytes)
default:
sym := s.decodeSymbol(&raw)
if sym == nil {
return nil, false
}

newSymbol := &Symbol{
Name: name,
FileName: fileName,
_ = s.cache.Add(*key, sym)
return sym, true
}

// SymbolizeBatch resolves all keys in two passes: LRU cache hits first,
// then individual eBPF Lookup calls for cache misses only.
// Syscalls: O(n) per-frame → O(|miss|) per sample; at warm-cache rates |miss| ≪ n.
// results[i] == nil iff key[i] is absent from both cache and eBPF map.
func (s *Symbolizer) SymbolizeBatch(keys []unwinder.SymbolKey) []*Symbol {
results := make([]*Symbol, len(keys))

// Pass 1: serve hits from LRU cache.
missIdx := make([]int, 0, len(keys))
for i, key := range keys {
if sym, ok := s.cache.Get(key); ok {
s.metrics.cacheHits.Inc()
results[i] = sym
} else {
s.metrics.cacheMisses.Inc()
missIdx = append(missIdx, i)
}
}
if len(missIdx) == 0 {
return results
}

_ = s.cache.Add(*key, newSymbol)
return newSymbol, true
// Pass 2: fetch cache misses from eBPF map (one Lookup per miss).
missKeys := make([]unwinder.SymbolKey, len(missIdx))
for j, idx := range missIdx {
missKeys[j] = keys[idx]
}
rawSymbols, found := s.state.SymbolizeInterpreterBatch(missKeys)
for j, idx := range missIdx {
if !found[j] {
continue
}
sym := s.decodeSymbol(&rawSymbols[j])
if sym == nil {
continue
}
_ = s.cache.Add(missKeys[j], sym)
results[idx] = sym
}
return results
}