Source file src/runtime/mprof.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Malloc profiling.
     6  // Patterned after tcmalloc's algorithms; shorter code.
     7  
     8  package runtime
     9  
    10  import (
    11  	"runtime/internal/atomic"
    12  	"unsafe"
    13  )
    14  
    15  // NOTE(rsc): Everything here could use cas if contention became an issue.
    16  var proflock mutex
    17  
    18  // All memory allocations are local and do not escape outside of the profiler.
    19  // The profiler is forbidden from referring to garbage-collected memory.
    20  
    21  const (
    22  	// profile types
    23  	memProfile bucketType = 1 + iota
    24  	blockProfile
    25  	mutexProfile
    26  
    27  	// size of bucket hash table
    28  	buckHashSize = 179999
    29  
    30  	// max depth of stack to record in bucket
    31  	maxStack = 32
    32  )
    33  
    34  type bucketType int
    35  
    36  // A bucket holds per-call-stack profiling information.
    37  // The representation is a bit sleazy, inherited from C.
    38  // This struct defines the bucket header. It is followed in
    39  // memory by the stack words and then the actual record
    40  // data, either a memRecord or a blockRecord.
    41  //
    42  // Per-call-stack profiling information.
    43  // Lookup by hashing call stack into a linked-list hash table.
    44  //
    45  // No heap pointers.
    46  //
    47  //go:notinheap
    48  type bucket struct {
    49  	next    *bucket
    50  	allnext *bucket
    51  	typ     bucketType // memBucket or blockBucket (includes mutexProfile)
    52  	hash    uintptr
    53  	size    uintptr
    54  	nstk    uintptr
    55  }
    56  
    57  // A memRecord is the bucket data for a bucket of type memProfile,
    58  // part of the memory profile.
    59  type memRecord struct {
    60  	// The following complex 3-stage scheme of stats accumulation
    61  	// is required to obtain a consistent picture of mallocs and frees
    62  	// for some point in time.
    63  	// The problem is that mallocs come in real time, while frees
    64  	// come only after a GC during concurrent sweeping. So if we would
    65  	// naively count them, we would get a skew toward mallocs.
    66  	//
    67  	// Hence, we delay information to get consistent snapshots as
    68  	// of mark termination. Allocations count toward the next mark
    69  	// termination's snapshot, while sweep frees count toward the
    70  	// previous mark termination's snapshot:
    71  	//
    72  	//              MT          MT          MT          MT
    73  	//             .·|         .·|         .·|         .·|
    74  	//          .·˙  |      .·˙  |      .·˙  |      .·˙  |
    75  	//       .·˙     |   .·˙     |   .·˙     |   .·˙     |
    76  	//    .·˙        |.·˙        |.·˙        |.·˙        |
    77  	//
    78  	//       alloc → ▲ ← free
    79  	//               ┠┅┅┅┅┅┅┅┅┅┅┅P
    80  	//       C+2     →    C+1    →  C
    81  	//
    82  	//                   alloc → ▲ ← free
    83  	//                           ┠┅┅┅┅┅┅┅┅┅┅┅P
    84  	//                   C+2     →    C+1    →  C
    85  	//
    86  	// Since we can't publish a consistent snapshot until all of
    87  	// the sweep frees are accounted for, we wait until the next
    88  	// mark termination ("MT" above) to publish the previous mark
    89  	// termination's snapshot ("P" above). To do this, allocation
    90  	// and free events are accounted to *future* heap profile
    91  	// cycles ("C+n" above) and we only publish a cycle once all
    92  	// of the events from that cycle must be done. Specifically:
    93  	//
    94  	// Mallocs are accounted to cycle C+2.
    95  	// Explicit frees are accounted to cycle C+2.
    96  	// GC frees (done during sweeping) are accounted to cycle C+1.
    97  	//
    98  	// After mark termination, we increment the global heap
    99  	// profile cycle counter and accumulate the stats from cycle C
   100  	// into the active profile.
   101  
   102  	// active is the currently published profile. A profiling
   103  	// cycle can be accumulated into active once its complete.
   104  	active memRecordCycle
   105  
   106  	// future records the profile events we're counting for cycles
   107  	// that have not yet been published. This is ring buffer
   108  	// indexed by the global heap profile cycle C and stores
   109  	// cycles C, C+1, and C+2. Unlike active, these counts are
   110  	// only for a single cycle; they are not cumulative across
   111  	// cycles.
   112  	//
   113  	// We store cycle C here because there's a window between when
   114  	// C becomes the active cycle and when we've flushed it to
   115  	// active.
   116  	future [3]memRecordCycle
   117  }
   118  
   119  // memRecordCycle
   120  type memRecordCycle struct {
   121  	allocs, frees           uintptr
   122  	alloc_bytes, free_bytes uintptr
   123  }
   124  
   125  // add accumulates b into a. It does not zero b.
   126  func (a *memRecordCycle) add(b *memRecordCycle) {
   127  	a.allocs += b.allocs
   128  	a.frees += b.frees
   129  	a.alloc_bytes += b.alloc_bytes
   130  	a.free_bytes += b.free_bytes
   131  }
   132  
   133  // A blockRecord is the bucket data for a bucket of type blockProfile,
   134  // which is used in blocking and mutex profiles.
   135  type blockRecord struct {
   136  	count  float64
   137  	cycles int64
   138  }
   139  
   140  var (
   141  	mbuckets  *bucket // memory profile buckets
   142  	bbuckets  *bucket // blocking profile buckets
   143  	xbuckets  *bucket // mutex profile buckets
   144  	buckhash  *[179999]*bucket
   145  	bucketmem uintptr
   146  
   147  	mProf struct {
   148  		// All fields in mProf are protected by proflock.
   149  
   150  		// cycle is the global heap profile cycle. This wraps
   151  		// at mProfCycleWrap.
   152  		cycle uint32
   153  		// flushed indicates that future[cycle] in all buckets
   154  		// has been flushed to the active profile.
   155  		flushed bool
   156  	}
   157  )
   158  
   159  const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24)
   160  
   161  // newBucket allocates a bucket with the given type and number of stack entries.
   162  func newBucket(typ bucketType, nstk int) *bucket {
   163  	size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(uintptr(0))
   164  	switch typ {
   165  	default:
   166  		throw("invalid profile bucket type")
   167  	case memProfile:
   168  		size += unsafe.Sizeof(memRecord{})
   169  	case blockProfile, mutexProfile:
   170  		size += unsafe.Sizeof(blockRecord{})
   171  	}
   172  
   173  	b := (*bucket)(persistentalloc(size, 0, &memstats.buckhash_sys))
   174  	bucketmem += size
   175  	b.typ = typ
   176  	b.nstk = uintptr(nstk)
   177  	return b
   178  }
   179  
   180  // stk returns the slice in b holding the stack.
   181  func (b *bucket) stk() []uintptr {
   182  	stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b)))
   183  	return stk[:b.nstk:b.nstk]
   184  }
   185  
   186  // mp returns the memRecord associated with the memProfile bucket b.
   187  func (b *bucket) mp() *memRecord {
   188  	if b.typ != memProfile {
   189  		throw("bad use of bucket.mp")
   190  	}
   191  	data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0)))
   192  	return (*memRecord)(data)
   193  }
   194  
   195  // bp returns the blockRecord associated with the blockProfile bucket b.
   196  func (b *bucket) bp() *blockRecord {
   197  	if b.typ != blockProfile && b.typ != mutexProfile {
   198  		throw("bad use of bucket.bp")
   199  	}
   200  	data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(uintptr(0)))
   201  	return (*blockRecord)(data)
   202  }
   203  
   204  // Return the bucket for stk[0:nstk], allocating new bucket if needed.
   205  func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket {
   206  	if buckhash == nil {
   207  		buckhash = (*[buckHashSize]*bucket)(sysAlloc(unsafe.Sizeof(*buckhash), &memstats.buckhash_sys))
   208  		if buckhash == nil {
   209  			throw("runtime: cannot allocate memory")
   210  		}
   211  	}
   212  
   213  	// Hash stack.
   214  	var h uintptr
   215  	for _, pc := range stk {
   216  		h += pc
   217  		h += h << 10
   218  		h ^= h >> 6
   219  	}
   220  	// hash in size
   221  	h += size
   222  	h += h << 10
   223  	h ^= h >> 6
   224  	// finalize
   225  	h += h << 3
   226  	h ^= h >> 11
   227  
   228  	i := int(h % buckHashSize)
   229  	for b := buckhash[i]; b != nil; b = b.next {
   230  		if b.typ == typ && b.hash == h && b.size == size && eqslice(b.stk(), stk) {
   231  			return b
   232  		}
   233  	}
   234  
   235  	if !alloc {
   236  		return nil
   237  	}
   238  
   239  	// Create new bucket.
   240  	b := newBucket(typ, len(stk))
   241  	copy(b.stk(), stk)
   242  	b.hash = h
   243  	b.size = size
   244  	b.next = buckhash[i]
   245  	buckhash[i] = b
   246  	if typ == memProfile {
   247  		b.allnext = mbuckets
   248  		mbuckets = b
   249  	} else if typ == mutexProfile {
   250  		b.allnext = xbuckets
   251  		xbuckets = b
   252  	} else {
   253  		b.allnext = bbuckets
   254  		bbuckets = b
   255  	}
   256  	return b
   257  }
   258  
   259  func eqslice(x, y []uintptr) bool {
   260  	if len(x) != len(y) {
   261  		return false
   262  	}
   263  	for i, xi := range x {
   264  		if xi != y[i] {
   265  			return false
   266  		}
   267  	}
   268  	return true
   269  }
   270  
   271  // mProf_NextCycle publishes the next heap profile cycle and creates a
   272  // fresh heap profile cycle. This operation is fast and can be done
   273  // during STW. The caller must call mProf_Flush before calling
   274  // mProf_NextCycle again.
   275  //
   276  // This is called by mark termination during STW so allocations and
   277  // frees after the world is started again count towards a new heap
   278  // profiling cycle.
   279  func mProf_NextCycle() {
   280  	lock(&proflock)
   281  	// We explicitly wrap mProf.cycle rather than depending on
   282  	// uint wraparound because the memRecord.future ring does not
   283  	// itself wrap at a power of two.
   284  	mProf.cycle = (mProf.cycle + 1) % mProfCycleWrap
   285  	mProf.flushed = false
   286  	unlock(&proflock)
   287  }
   288  
   289  // mProf_Flush flushes the events from the current heap profiling
   290  // cycle into the active profile. After this it is safe to start a new
   291  // heap profiling cycle with mProf_NextCycle.
   292  //
   293  // This is called by GC after mark termination starts the world. In
   294  // contrast with mProf_NextCycle, this is somewhat expensive, but safe
   295  // to do concurrently.
   296  func mProf_Flush() {
   297  	lock(&proflock)
   298  	if !mProf.flushed {
   299  		mProf_FlushLocked()
   300  		mProf.flushed = true
   301  	}
   302  	unlock(&proflock)
   303  }
   304  
   305  func mProf_FlushLocked() {
   306  	c := mProf.cycle
   307  	for b := mbuckets; b != nil; b = b.allnext {
   308  		mp := b.mp()
   309  
   310  		// Flush cycle C into the published profile and clear
   311  		// it for reuse.
   312  		mpc := &mp.future[c%uint32(len(mp.future))]
   313  		mp.active.add(mpc)
   314  		*mpc = memRecordCycle{}
   315  	}
   316  }
   317  
   318  // mProf_PostSweep records that all sweep frees for this GC cycle have
   319  // completed. This has the effect of publishing the heap profile
   320  // snapshot as of the last mark termination without advancing the heap
   321  // profile cycle.
   322  func mProf_PostSweep() {
   323  	lock(&proflock)
   324  	// Flush cycle C+1 to the active profile so everything as of
   325  	// the last mark termination becomes visible. *Don't* advance
   326  	// the cycle, since we're still accumulating allocs in cycle
   327  	// C+2, which have to become C+1 in the next mark termination
   328  	// and so on.
   329  	c := mProf.cycle
   330  	for b := mbuckets; b != nil; b = b.allnext {
   331  		mp := b.mp()
   332  		mpc := &mp.future[(c+1)%uint32(len(mp.future))]
   333  		mp.active.add(mpc)
   334  		*mpc = memRecordCycle{}
   335  	}
   336  	unlock(&proflock)
   337  }
   338  
   339  // Called by malloc to record a profiled block.
   340  func mProf_Malloc(p unsafe.Pointer, size uintptr) {
   341  	var stk [maxStack]uintptr
   342  	nstk := callers(4, stk[:])
   343  	lock(&proflock)
   344  	b := stkbucket(memProfile, size, stk[:nstk], true)
   345  	c := mProf.cycle
   346  	mp := b.mp()
   347  	mpc := &mp.future[(c+2)%uint32(len(mp.future))]
   348  	mpc.allocs++
   349  	mpc.alloc_bytes += size
   350  	unlock(&proflock)
   351  
   352  	// Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock.
   353  	// This reduces potential contention and chances of deadlocks.
   354  	// Since the object must be alive during call to mProf_Malloc,
   355  	// it's fine to do this non-atomically.
   356  	systemstack(func() {
   357  		setprofilebucket(p, b)
   358  	})
   359  }
   360  
   361  // Called when freeing a profiled block.
   362  func mProf_Free(b *bucket, size uintptr) {
   363  	lock(&proflock)
   364  	c := mProf.cycle
   365  	mp := b.mp()
   366  	mpc := &mp.future[(c+1)%uint32(len(mp.future))]
   367  	mpc.frees++
   368  	mpc.free_bytes += size
   369  	unlock(&proflock)
   370  }
   371  
   372  var blockprofilerate uint64 // in CPU ticks
   373  
   374  // SetBlockProfileRate controls the fraction of goroutine blocking events
   375  // that are reported in the blocking profile. The profiler aims to sample
   376  // an average of one blocking event per rate nanoseconds spent blocked.
   377  //
   378  // To include every blocking event in the profile, pass rate = 1.
   379  // To turn off profiling entirely, pass rate <= 0.
   380  func SetBlockProfileRate(rate int) {
   381  	var r int64
   382  	if rate <= 0 {
   383  		r = 0 // disable profiling
   384  	} else if rate == 1 {
   385  		r = 1 // profile everything
   386  	} else {
   387  		// convert ns to cycles, use float64 to prevent overflow during multiplication
   388  		r = int64(float64(rate) * float64(tickspersecond()) / (1000 * 1000 * 1000))
   389  		if r == 0 {
   390  			r = 1
   391  		}
   392  	}
   393  
   394  	atomic.Store64(&blockprofilerate, uint64(r))
   395  }
   396  
   397  func blockevent(cycles int64, skip int) {
   398  	if cycles <= 0 {
   399  		cycles = 1
   400  	}
   401  
   402  	rate := int64(atomic.Load64(&blockprofilerate))
   403  	if blocksampled(cycles, rate) {
   404  		saveblockevent(cycles, rate, skip+1, blockProfile)
   405  	}
   406  }
   407  
   408  // blocksampled returns true for all events where cycles >= rate. Shorter
   409  // events have a cycles/rate random chance of returning true.
   410  func blocksampled(cycles, rate int64) bool {
   411  	if rate <= 0 || (rate > cycles && int64(fastrand())%rate > cycles) {
   412  		return false
   413  	}
   414  	return true
   415  }
   416  
   417  func saveblockevent(cycles, rate int64, skip int, which bucketType) {
   418  	gp := getg()
   419  	var nstk int
   420  	var stk [maxStack]uintptr
   421  	if gp.m.curg == nil || gp.m.curg == gp {
   422  		nstk = callers(skip, stk[:])
   423  	} else {
   424  		nstk = gcallers(gp.m.curg, skip, stk[:])
   425  	}
   426  	lock(&proflock)
   427  	b := stkbucket(which, 0, stk[:nstk], true)
   428  
   429  	if which == blockProfile && cycles < rate {
   430  		// Remove sampling bias, see discussion on http://golang.org/cl/299991.
   431  		b.bp().count += float64(rate) / float64(cycles)
   432  		b.bp().cycles += rate
   433  	} else {
   434  		b.bp().count++
   435  		b.bp().cycles += cycles
   436  	}
   437  	unlock(&proflock)
   438  }
   439  
   440  var mutexprofilerate uint64 // fraction sampled
   441  
   442  // SetMutexProfileFraction controls the fraction of mutex contention events
   443  // that are reported in the mutex profile. On average 1/rate events are
   444  // reported. The previous rate is returned.
   445  //
   446  // To turn off profiling entirely, pass rate 0.
   447  // To just read the current rate, pass rate < 0.
   448  // (For n>1 the details of sampling may change.)
   449  func SetMutexProfileFraction(rate int) int {
   450  	if rate < 0 {
   451  		return int(mutexprofilerate)
   452  	}
   453  	old := mutexprofilerate
   454  	atomic.Store64(&mutexprofilerate, uint64(rate))
   455  	return int(old)
   456  }
   457  
   458  //go:linkname mutexevent sync.event
   459  func mutexevent(cycles int64, skip int) {
   460  	if cycles < 0 {
   461  		cycles = 0
   462  	}
   463  	rate := int64(atomic.Load64(&mutexprofilerate))
   464  	// TODO(pjw): measure impact of always calling fastrand vs using something
   465  	// like malloc.go:nextSample()
   466  	if rate > 0 && int64(fastrand())%rate == 0 {
   467  		saveblockevent(cycles, rate, skip+1, mutexProfile)
   468  	}
   469  }
   470  
   471  // Go interface to profile data.
   472  
   473  // A StackRecord describes a single execution stack.
   474  type StackRecord struct {
   475  	Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
   476  }
   477  
   478  // Stack returns the stack trace associated with the record,
   479  // a prefix of r.Stack0.
   480  func (r *StackRecord) Stack() []uintptr {
   481  	for i, v := range r.Stack0 {
   482  		if v == 0 {
   483  			return r.Stack0[0:i]
   484  		}
   485  	}
   486  	return r.Stack0[0:]
   487  }
   488  
   489  // MemProfileRate controls the fraction of memory allocations
   490  // that are recorded and reported in the memory profile.
   491  // The profiler aims to sample an average of
   492  // one allocation per MemProfileRate bytes allocated.
   493  //
   494  // To include every allocated block in the profile, set MemProfileRate to 1.
   495  // To turn off profiling entirely, set MemProfileRate to 0.
   496  //
   497  // The tools that process the memory profiles assume that the
   498  // profile rate is constant across the lifetime of the program
   499  // and equal to the current value. Programs that change the
   500  // memory profiling rate should do so just once, as early as
   501  // possible in the execution of the program (for example,
   502  // at the beginning of main).
   503  var MemProfileRate int = defaultMemProfileRate(512 * 1024)
   504  
   505  // defaultMemProfileRate returns 0 if disableMemoryProfiling is set.
   506  // It exists primarily for the godoc rendering of MemProfileRate
   507  // above.
   508  func defaultMemProfileRate(v int) int {
   509  	if disableMemoryProfiling {
   510  		return 0
   511  	}
   512  	return v
   513  }
   514  
   515  // disableMemoryProfiling is set by the linker if runtime.MemProfile
   516  // is not used and the link type guarantees nobody else could use it
   517  // elsewhere.
   518  var disableMemoryProfiling bool
   519  
   520  // A MemProfileRecord describes the live objects allocated
   521  // by a particular call sequence (stack trace).
   522  type MemProfileRecord struct {
   523  	AllocBytes, FreeBytes     int64       // number of bytes allocated, freed
   524  	AllocObjects, FreeObjects int64       // number of objects allocated, freed
   525  	Stack0                    [32]uintptr // stack trace for this record; ends at first 0 entry
   526  }
   527  
   528  // InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).
   529  func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes }
   530  
   531  // InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).
   532  func (r *MemProfileRecord) InUseObjects() int64 {
   533  	return r.AllocObjects - r.FreeObjects
   534  }
   535  
   536  // Stack returns the stack trace associated with the record,
   537  // a prefix of r.Stack0.
   538  func (r *MemProfileRecord) Stack() []uintptr {
   539  	for i, v := range r.Stack0 {
   540  		if v == 0 {
   541  			return r.Stack0[0:i]
   542  		}
   543  	}
   544  	return r.Stack0[0:]
   545  }
   546  
   547  // MemProfile returns a profile of memory allocated and freed per allocation
   548  // site.
   549  //
   550  // MemProfile returns n, the number of records in the current memory profile.
   551  // If len(p) >= n, MemProfile copies the profile into p and returns n, true.
   552  // If len(p) < n, MemProfile does not change p and returns n, false.
   553  //
   554  // If inuseZero is true, the profile includes allocation records
   555  // where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.
   556  // These are sites where memory was allocated, but it has all
   557  // been released back to the runtime.
   558  //
   559  // The returned profile may be up to two garbage collection cycles old.
   560  // This is to avoid skewing the profile toward allocations; because
   561  // allocations happen in real time but frees are delayed until the garbage
   562  // collector performs sweeping, the profile only accounts for allocations
   563  // that have had a chance to be freed by the garbage collector.
   564  //
   565  // Most clients should use the runtime/pprof package or
   566  // the testing package's -test.memprofile flag instead
   567  // of calling MemProfile directly.
   568  func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
   569  	lock(&proflock)
   570  	// If we're between mProf_NextCycle and mProf_Flush, take care
   571  	// of flushing to the active profile so we only have to look
   572  	// at the active profile below.
   573  	mProf_FlushLocked()
   574  	clear := true
   575  	for b := mbuckets; b != nil; b = b.allnext {
   576  		mp := b.mp()
   577  		if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
   578  			n++
   579  		}
   580  		if mp.active.allocs != 0 || mp.active.frees != 0 {
   581  			clear = false
   582  		}
   583  	}
   584  	if clear {
   585  		// Absolutely no data, suggesting that a garbage collection
   586  		// has not yet happened. In order to allow profiling when
   587  		// garbage collection is disabled from the beginning of execution,
   588  		// accumulate all of the cycles, and recount buckets.
   589  		n = 0
   590  		for b := mbuckets; b != nil; b = b.allnext {
   591  			mp := b.mp()
   592  			for c := range mp.future {
   593  				mp.active.add(&mp.future[c])
   594  				mp.future[c] = memRecordCycle{}
   595  			}
   596  			if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
   597  				n++
   598  			}
   599  		}
   600  	}
   601  	if n <= len(p) {
   602  		ok = true
   603  		idx := 0
   604  		for b := mbuckets; b != nil; b = b.allnext {
   605  			mp := b.mp()
   606  			if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
   607  				record(&p[idx], b)
   608  				idx++
   609  			}
   610  		}
   611  	}
   612  	unlock(&proflock)
   613  	return
   614  }
   615  
   616  // Write b's data to r.
   617  func record(r *MemProfileRecord, b *bucket) {
   618  	mp := b.mp()
   619  	r.AllocBytes = int64(mp.active.alloc_bytes)
   620  	r.FreeBytes = int64(mp.active.free_bytes)
   621  	r.AllocObjects = int64(mp.active.allocs)
   622  	r.FreeObjects = int64(mp.active.frees)
   623  	if raceenabled {
   624  		racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), funcPC(MemProfile))
   625  	}
   626  	if msanenabled {
   627  		msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
   628  	}
   629  	copy(r.Stack0[:], b.stk())
   630  	for i := int(b.nstk); i < len(r.Stack0); i++ {
   631  		r.Stack0[i] = 0
   632  	}
   633  }
   634  
   635  func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) {
   636  	lock(&proflock)
   637  	for b := mbuckets; b != nil; b = b.allnext {
   638  		mp := b.mp()
   639  		fn(b, b.nstk, &b.stk()[0], b.size, mp.active.allocs, mp.active.frees)
   640  	}
   641  	unlock(&proflock)
   642  }
   643  
   644  // BlockProfileRecord describes blocking events originated
   645  // at a particular call sequence (stack trace).
   646  type BlockProfileRecord struct {
   647  	Count  int64
   648  	Cycles int64
   649  	StackRecord
   650  }
   651  
   652  // BlockProfile returns n, the number of records in the current blocking profile.
   653  // If len(p) >= n, BlockProfile copies the profile into p and returns n, true.
   654  // If len(p) < n, BlockProfile does not change p and returns n, false.
   655  //
   656  // Most clients should use the runtime/pprof package or
   657  // the testing package's -test.blockprofile flag instead
   658  // of calling BlockProfile directly.
   659  func BlockProfile(p []BlockProfileRecord) (n int, ok bool) {
   660  	lock(&proflock)
   661  	for b := bbuckets; b != nil; b = b.allnext {
   662  		n++
   663  	}
   664  	if n <= len(p) {
   665  		ok = true
   666  		for b := bbuckets; b != nil; b = b.allnext {
   667  			bp := b.bp()
   668  			r := &p[0]
   669  			r.Count = int64(bp.count)
   670  			// Prevent callers from having to worry about division by zero errors.
   671  			// See discussion on http://golang.org/cl/299991.
   672  			if r.Count == 0 {
   673  				r.Count = 1
   674  			}
   675  			r.Cycles = bp.cycles
   676  			if raceenabled {
   677  				racewriterangepc(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0), getcallerpc(), funcPC(BlockProfile))
   678  			}
   679  			if msanenabled {
   680  				msanwrite(unsafe.Pointer(&r.Stack0[0]), unsafe.Sizeof(r.Stack0))
   681  			}
   682  			i := copy(r.Stack0[:], b.stk())
   683  			for ; i < len(r.Stack0); i++ {
   684  				r.Stack0[i] = 0
   685  			}
   686  			p = p[1:]
   687  		}
   688  	}
   689  	unlock(&proflock)
   690  	return
   691  }
   692  
   693  // MutexProfile returns n, the number of records in the current mutex profile.
   694  // If len(p) >= n, MutexProfile copies the profile into p and returns n, true.
   695  // Otherwise, MutexProfile does not change p, and returns n, false.
   696  //
   697  // Most clients should use the runtime/pprof package
   698  // instead of calling MutexProfile directly.
   699  func MutexProfile(p []BlockProfileRecord) (n int, ok bool) {
   700  	lock(&proflock)
   701  	for b := xbuckets; b != nil; b = b.allnext {
   702  		n++
   703  	}
   704  	if n <= len(p) {
   705  		ok = true
   706  		for b := xbuckets; b != nil; b = b.allnext {
   707  			bp := b.bp()
   708  			r := &p[0]
   709  			r.Count = int64(bp.count)
   710  			r.Cycles = bp.cycles
   711  			i := copy(r.Stack0[:], b.stk())
   712  			for ; i < len(r.Stack0); i++ {
   713  				r.Stack0[i] = 0
   714  			}
   715  			p = p[1:]
   716  		}
   717  	}
   718  	unlock(&proflock)
   719  	return
   720  }
   721  
   722  // ThreadCreateProfile returns n, the number of records in the thread creation profile.
   723  // If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
   724  // If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
   725  //
   726  // Most clients should use the runtime/pprof package instead
   727  // of calling ThreadCreateProfile directly.
   728  func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
   729  	first := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
   730  	for mp := first; mp != nil; mp = mp.alllink {
   731  		n++
   732  	}
   733  	if n <= len(p) {
   734  		ok = true
   735  		i := 0
   736  		for mp := first; mp != nil; mp = mp.alllink {
   737  			p[i].Stack0 = mp.createstack
   738  			i++
   739  		}
   740  	}
   741  	return
   742  }
   743  
   744  //go:linkname runtime_goroutineProfileWithLabels runtime/pprof.runtime_goroutineProfileWithLabels
   745  func runtime_goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
   746  	return goroutineProfileWithLabels(p, labels)
   747  }
   748  
   749  // labels may be nil. If labels is non-nil, it must have the same length as p.
   750  func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
   751  	if labels != nil && len(labels) != len(p) {
   752  		labels = nil
   753  	}
   754  	gp := getg()
   755  
   756  	isOK := func(gp1 *g) bool {
   757  		// Checking isSystemGoroutine here makes GoroutineProfile
   758  		// consistent with both NumGoroutine and Stack.
   759  		return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false)
   760  	}
   761  
   762  	stopTheWorld("profile")
   763  
   764  	// World is stopped, no locking required.
   765  	n = 1
   766  	forEachGRace(func(gp1 *g) {
   767  		if isOK(gp1) {
   768  			n++
   769  		}
   770  	})
   771  
   772  	if n <= len(p) {
   773  		ok = true
   774  		r, lbl := p, labels
   775  
   776  		// Save current goroutine.
   777  		sp := getcallersp()
   778  		pc := getcallerpc()
   779  		systemstack(func() {
   780  			saveg(pc, sp, gp, &r[0])
   781  		})
   782  		r = r[1:]
   783  
   784  		// If we have a place to put our goroutine labelmap, insert it there.
   785  		if labels != nil {
   786  			lbl[0] = gp.labels
   787  			lbl = lbl[1:]
   788  		}
   789  
   790  		// Save other goroutines.
   791  		forEachGRace(func(gp1 *g) {
   792  			if !isOK(gp1) {
   793  				return
   794  			}
   795  
   796  			if len(r) == 0 {
   797  				// Should be impossible, but better to return a
   798  				// truncated profile than to crash the entire process.
   799  				return
   800  			}
   801  			saveg(^uintptr(0), ^uintptr(0), gp1, &r[0])
   802  			if labels != nil {
   803  				lbl[0] = gp1.labels
   804  				lbl = lbl[1:]
   805  			}
   806  			r = r[1:]
   807  		})
   808  	}
   809  
   810  	startTheWorld()
   811  	return n, ok
   812  }
   813  
   814  // GoroutineProfile returns n, the number of records in the active goroutine stack profile.
   815  // If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
   816  // If len(p) < n, GoroutineProfile does not change p and returns n, false.
   817  //
   818  // Most clients should use the runtime/pprof package instead
   819  // of calling GoroutineProfile directly.
   820  func GoroutineProfile(p []StackRecord) (n int, ok bool) {
   821  
   822  	return goroutineProfileWithLabels(p, nil)
   823  }
   824  
   825  func saveg(pc, sp uintptr, gp *g, r *StackRecord) {
   826  	n := gentraceback(pc, sp, 0, gp, 0, &r.Stack0[0], len(r.Stack0), nil, nil, 0)
   827  	if n < len(r.Stack0) {
   828  		r.Stack0[n] = 0
   829  	}
   830  }
   831  
   832  // Stack formats a stack trace of the calling goroutine into buf
   833  // and returns the number of bytes written to buf.
   834  // If all is true, Stack formats stack traces of all other goroutines
   835  // into buf after the trace for the current goroutine.
   836  func Stack(buf []byte, all bool) int {
   837  	if all {
   838  		stopTheWorld("stack trace")
   839  	}
   840  
   841  	n := 0
   842  	if len(buf) > 0 {
   843  		gp := getg()
   844  		sp := getcallersp()
   845  		pc := getcallerpc()
   846  		systemstack(func() {
   847  			g0 := getg()
   848  			// Force traceback=1 to override GOTRACEBACK setting,
   849  			// so that Stack's results are consistent.
   850  			// GOTRACEBACK is only about crash dumps.
   851  			g0.m.traceback = 1
   852  			g0.writebuf = buf[0:0:len(buf)]
   853  			goroutineheader(gp)
   854  			traceback(pc, sp, 0, gp)
   855  			if all {
   856  				tracebackothers(gp)
   857  			}
   858  			g0.m.traceback = 0
   859  			n = len(g0.writebuf)
   860  			g0.writebuf = nil
   861  		})
   862  	}
   863  
   864  	if all {
   865  		startTheWorld()
   866  	}
   867  	return n
   868  }
   869  
   870  // Tracing of alloc/free/gc.
   871  
   872  var tracelock mutex
   873  
   874  func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) {
   875  	lock(&tracelock)
   876  	gp := getg()
   877  	gp.m.traceback = 2
   878  	if typ == nil {
   879  		print("tracealloc(", p, ", ", hex(size), ")\n")
   880  	} else {
   881  		print("tracealloc(", p, ", ", hex(size), ", ", typ.string(), ")\n")
   882  	}
   883  	if gp.m.curg == nil || gp == gp.m.curg {
   884  		goroutineheader(gp)
   885  		pc := getcallerpc()
   886  		sp := getcallersp()
   887  		systemstack(func() {
   888  			traceback(pc, sp, 0, gp)
   889  		})
   890  	} else {
   891  		goroutineheader(gp.m.curg)
   892  		traceback(^uintptr(0), ^uintptr(0), 0, gp.m.curg)
   893  	}
   894  	print("\n")
   895  	gp.m.traceback = 0
   896  	unlock(&tracelock)
   897  }
   898  
   899  func tracefree(p unsafe.Pointer, size uintptr) {
   900  	lock(&tracelock)
   901  	gp := getg()
   902  	gp.m.traceback = 2
   903  	print("tracefree(", p, ", ", hex(size), ")\n")
   904  	goroutineheader(gp)
   905  	pc := getcallerpc()
   906  	sp := getcallersp()
   907  	systemstack(func() {
   908  		traceback(pc, sp, 0, gp)
   909  	})
   910  	print("\n")
   911  	gp.m.traceback = 0
   912  	unlock(&tracelock)
   913  }
   914  
   915  func tracegc() {
   916  	lock(&tracelock)
   917  	gp := getg()
   918  	gp.m.traceback = 2
   919  	print("tracegc()\n")
   920  	// running on m->g0 stack; show all non-g0 goroutines
   921  	tracebackothers(gp)
   922  	print("end tracegc\n")
   923  	print("\n")
   924  	gp.m.traceback = 0
   925  	unlock(&tracelock)
   926  }
   927  

View as plain text