Source file src/runtime/netpoll.go

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build aix || darwin || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris || windows
     6  // +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris windows
     7  
     8  package runtime
     9  
    10  import (
    11  	"runtime/internal/atomic"
    12  	"unsafe"
    13  )
    14  
    15  // Integrated network poller (platform-independent part).
    16  // A particular implementation (epoll/kqueue/port/AIX/Windows)
    17  // must define the following functions:
    18  //
    19  // func netpollinit()
    20  //     Initialize the poller. Only called once.
    21  //
    22  // func netpollopen(fd uintptr, pd *pollDesc) int32
    23  //     Arm edge-triggered notifications for fd. The pd argument is to pass
    24  //     back to netpollready when fd is ready. Return an errno value.
    25  //
    26  // func netpollclose(fd uintptr) int32
    27  //     Disable notifications for fd. Return an errno value.
    28  //
    29  // func netpoll(delta int64) gList
    30  //     Poll the network. If delta < 0, block indefinitely. If delta == 0,
    31  //     poll without blocking. If delta > 0, block for up to delta nanoseconds.
    32  //     Return a list of goroutines built by calling netpollready.
    33  //
    34  // func netpollBreak()
    35  //     Wake up the network poller, assumed to be blocked in netpoll.
    36  //
    37  // func netpollIsPollDescriptor(fd uintptr) bool
    38  //     Reports whether fd is a file descriptor used by the poller.
    39  
    40  // Error codes returned by runtime_pollReset and runtime_pollWait.
    41  // These must match the values in internal/poll/fd_poll_runtime.go.
    42  const (
    43  	pollNoError        = 0 // no error
    44  	pollErrClosing     = 1 // descriptor is closed
    45  	pollErrTimeout     = 2 // I/O timeout
    46  	pollErrNotPollable = 3 // general error polling descriptor
    47  )
    48  
    49  // pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
    50  // goroutines respectively. The semaphore can be in the following states:
    51  // pdReady - io readiness notification is pending;
    52  //           a goroutine consumes the notification by changing the state to nil.
    53  // pdWait - a goroutine prepares to park on the semaphore, but not yet parked;
    54  //          the goroutine commits to park by changing the state to G pointer,
    55  //          or, alternatively, concurrent io notification changes the state to pdReady,
    56  //          or, alternatively, concurrent timeout/close changes the state to nil.
    57  // G pointer - the goroutine is blocked on the semaphore;
    58  //             io notification or timeout/close changes the state to pdReady or nil respectively
    59  //             and unparks the goroutine.
    60  // nil - none of the above.
    61  const (
    62  	pdReady uintptr = 1
    63  	pdWait  uintptr = 2
    64  )
    65  
    66  const pollBlockSize = 4 * 1024
    67  
    68  // Network poller descriptor.
    69  //
    70  // No heap pointers.
    71  //
    72  //go:notinheap
    73  type pollDesc struct {
    74  	link *pollDesc // in pollcache, protected by pollcache.lock
    75  
    76  	// The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations.
    77  	// This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime.
    78  	// pollReset, pollWait, pollWaitCanceled and runtime¬∑netpollready (IO readiness notification)
    79  	// proceed w/o taking the lock. So closing, everr, rg, rd, wg and wd are manipulated
    80  	// in a lock-free way by all operations.
    81  	// TODO(golang.org/issue/49008): audit these lock-free fields for continued correctness.
    82  	// NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg),
    83  	// that will blow up when GC starts moving objects.
    84  	lock    mutex // protects the following fields
    85  	fd      uintptr
    86  	closing bool
    87  	everr   bool      // marks event scanning error happened
    88  	user    uint32    // user settable cookie
    89  	rseq    uintptr   // protects from stale read timers
    90  	rg      uintptr   // pdReady, pdWait, G waiting for read or nil. Accessed atomically.
    91  	rt      timer     // read deadline timer (set if rt.f != nil)
    92  	rd      int64     // read deadline
    93  	wseq    uintptr   // protects from stale write timers
    94  	wg      uintptr   // pdReady, pdWait, G waiting for write or nil. Accessed atomically.
    95  	wt      timer     // write deadline timer
    96  	wd      int64     // write deadline
    97  	self    *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
    98  }
    99  
   100  type pollCache struct {
   101  	lock  mutex
   102  	first *pollDesc
   103  	// PollDesc objects must be type-stable,
   104  	// because we can get ready notification from epoll/kqueue
   105  	// after the descriptor is closed/reused.
   106  	// Stale notifications are detected using seq variable,
   107  	// seq is incremented when deadlines are changed or descriptor is reused.
   108  }
   109  
   110  var (
   111  	netpollInitLock mutex
   112  	netpollInited   uint32
   113  
   114  	pollcache      pollCache
   115  	netpollWaiters uint32
   116  )
   117  
   118  //go:linkname poll_runtime_pollServerInit internal/poll.runtime_pollServerInit
   119  func poll_runtime_pollServerInit() {
   120  	netpollGenericInit()
   121  }
   122  
   123  func netpollGenericInit() {
   124  	if atomic.Load(&netpollInited) == 0 {
   125  		lockInit(&netpollInitLock, lockRankNetpollInit)
   126  		lock(&netpollInitLock)
   127  		if netpollInited == 0 {
   128  			netpollinit()
   129  			atomic.Store(&netpollInited, 1)
   130  		}
   131  		unlock(&netpollInitLock)
   132  	}
   133  }
   134  
   135  func netpollinited() bool {
   136  	return atomic.Load(&netpollInited) != 0
   137  }
   138  
   139  //go:linkname poll_runtime_isPollServerDescriptor internal/poll.runtime_isPollServerDescriptor
   140  
   141  // poll_runtime_isPollServerDescriptor reports whether fd is a
   142  // descriptor being used by netpoll.
   143  func poll_runtime_isPollServerDescriptor(fd uintptr) bool {
   144  	return netpollIsPollDescriptor(fd)
   145  }
   146  
   147  //go:linkname poll_runtime_pollOpen internal/poll.runtime_pollOpen
   148  func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
   149  	pd := pollcache.alloc()
   150  	lock(&pd.lock)
   151  	wg := atomic.Loaduintptr(&pd.wg)
   152  	if wg != 0 && wg != pdReady {
   153  		throw("runtime: blocked write on free polldesc")
   154  	}
   155  	rg := atomic.Loaduintptr(&pd.rg)
   156  	if rg != 0 && rg != pdReady {
   157  		throw("runtime: blocked read on free polldesc")
   158  	}
   159  	pd.fd = fd
   160  	pd.closing = false
   161  	pd.everr = false
   162  	pd.rseq++
   163  	atomic.Storeuintptr(&pd.rg, 0)
   164  	pd.rd = 0
   165  	pd.wseq++
   166  	atomic.Storeuintptr(&pd.wg, 0)
   167  	pd.wd = 0
   168  	pd.self = pd
   169  	unlock(&pd.lock)
   170  
   171  	errno := netpollopen(fd, pd)
   172  	if errno != 0 {
   173  		pollcache.free(pd)
   174  		return nil, int(errno)
   175  	}
   176  	return pd, 0
   177  }
   178  
   179  //go:linkname poll_runtime_pollClose internal/poll.runtime_pollClose
   180  func poll_runtime_pollClose(pd *pollDesc) {
   181  	if !pd.closing {
   182  		throw("runtime: close polldesc w/o unblock")
   183  	}
   184  	wg := atomic.Loaduintptr(&pd.wg)
   185  	if wg != 0 && wg != pdReady {
   186  		throw("runtime: blocked write on closing polldesc")
   187  	}
   188  	rg := atomic.Loaduintptr(&pd.rg)
   189  	if rg != 0 && rg != pdReady {
   190  		throw("runtime: blocked read on closing polldesc")
   191  	}
   192  	netpollclose(pd.fd)
   193  	pollcache.free(pd)
   194  }
   195  
   196  func (c *pollCache) free(pd *pollDesc) {
   197  	lock(&c.lock)
   198  	pd.link = c.first
   199  	c.first = pd
   200  	unlock(&c.lock)
   201  }
   202  
   203  // poll_runtime_pollReset, which is internal/poll.runtime_pollReset,
   204  // prepares a descriptor for polling in mode, which is 'r' or 'w'.
   205  // This returns an error code; the codes are defined above.
   206  //go:linkname poll_runtime_pollReset internal/poll.runtime_pollReset
   207  func poll_runtime_pollReset(pd *pollDesc, mode int) int {
   208  	errcode := netpollcheckerr(pd, int32(mode))
   209  	if errcode != pollNoError {
   210  		return errcode
   211  	}
   212  	if mode == 'r' {
   213  		atomic.Storeuintptr(&pd.rg, 0)
   214  	} else if mode == 'w' {
   215  		atomic.Storeuintptr(&pd.wg, 0)
   216  	}
   217  	return pollNoError
   218  }
   219  
   220  // poll_runtime_pollWait, which is internal/poll.runtime_pollWait,
   221  // waits for a descriptor to be ready for reading or writing,
   222  // according to mode, which is 'r' or 'w'.
   223  // This returns an error code; the codes are defined above.
   224  //go:linkname poll_runtime_pollWait internal/poll.runtime_pollWait
   225  func poll_runtime_pollWait(pd *pollDesc, mode int) int {
   226  	errcode := netpollcheckerr(pd, int32(mode))
   227  	if errcode != pollNoError {
   228  		return errcode
   229  	}
   230  	// As for now only Solaris, illumos, and AIX use level-triggered IO.
   231  	if GOOS == "solaris" || GOOS == "illumos" || GOOS == "aix" {
   232  		netpollarm(pd, mode)
   233  	}
   234  	for !netpollblock(pd, int32(mode), false) {
   235  		errcode = netpollcheckerr(pd, int32(mode))
   236  		if errcode != pollNoError {
   237  			return errcode
   238  		}
   239  		// Can happen if timeout has fired and unblocked us,
   240  		// but before we had a chance to run, timeout has been reset.
   241  		// Pretend it has not happened and retry.
   242  	}
   243  	return pollNoError
   244  }
   245  
   246  //go:linkname poll_runtime_pollWaitCanceled internal/poll.runtime_pollWaitCanceled
   247  func poll_runtime_pollWaitCanceled(pd *pollDesc, mode int) {
   248  	// This function is used only on windows after a failed attempt to cancel
   249  	// a pending async IO operation. Wait for ioready, ignore closing or timeouts.
   250  	for !netpollblock(pd, int32(mode), true) {
   251  	}
   252  }
   253  
   254  //go:linkname poll_runtime_pollSetDeadline internal/poll.runtime_pollSetDeadline
   255  func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) {
   256  	lock(&pd.lock)
   257  	if pd.closing {
   258  		unlock(&pd.lock)
   259  		return
   260  	}
   261  	rd0, wd0 := pd.rd, pd.wd
   262  	combo0 := rd0 > 0 && rd0 == wd0
   263  	if d > 0 {
   264  		d += nanotime()
   265  		if d <= 0 {
   266  			// If the user has a deadline in the future, but the delay calculation
   267  			// overflows, then set the deadline to the maximum possible value.
   268  			d = 1<<63 - 1
   269  		}
   270  	}
   271  	if mode == 'r' || mode == 'r'+'w' {
   272  		pd.rd = d
   273  	}
   274  	if mode == 'w' || mode == 'r'+'w' {
   275  		pd.wd = d
   276  	}
   277  	combo := pd.rd > 0 && pd.rd == pd.wd
   278  	rtf := netpollReadDeadline
   279  	if combo {
   280  		rtf = netpollDeadline
   281  	}
   282  	if pd.rt.f == nil {
   283  		if pd.rd > 0 {
   284  			pd.rt.f = rtf
   285  			// Copy current seq into the timer arg.
   286  			// Timer func will check the seq against current descriptor seq,
   287  			// if they differ the descriptor was reused or timers were reset.
   288  			pd.rt.arg = pd.makeArg()
   289  			pd.rt.seq = pd.rseq
   290  			resettimer(&pd.rt, pd.rd)
   291  		}
   292  	} else if pd.rd != rd0 || combo != combo0 {
   293  		pd.rseq++ // invalidate current timers
   294  		if pd.rd > 0 {
   295  			modtimer(&pd.rt, pd.rd, 0, rtf, pd.makeArg(), pd.rseq)
   296  		} else {
   297  			deltimer(&pd.rt)
   298  			pd.rt.f = nil
   299  		}
   300  	}
   301  	if pd.wt.f == nil {
   302  		if pd.wd > 0 && !combo {
   303  			pd.wt.f = netpollWriteDeadline
   304  			pd.wt.arg = pd.makeArg()
   305  			pd.wt.seq = pd.wseq
   306  			resettimer(&pd.wt, pd.wd)
   307  		}
   308  	} else if pd.wd != wd0 || combo != combo0 {
   309  		pd.wseq++ // invalidate current timers
   310  		if pd.wd > 0 && !combo {
   311  			modtimer(&pd.wt, pd.wd, 0, netpollWriteDeadline, pd.makeArg(), pd.wseq)
   312  		} else {
   313  			deltimer(&pd.wt)
   314  			pd.wt.f = nil
   315  		}
   316  	}
   317  	// If we set the new deadline in the past, unblock currently pending IO if any.
   318  	var rg, wg *g
   319  	if pd.rd < 0 || pd.wd < 0 {
   320  		atomic.StorepNoWB(noescape(unsafe.Pointer(&wg)), nil) // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock
   321  		if pd.rd < 0 {
   322  			rg = netpollunblock(pd, 'r', false)
   323  		}
   324  		if pd.wd < 0 {
   325  			wg = netpollunblock(pd, 'w', false)
   326  		}
   327  	}
   328  	unlock(&pd.lock)
   329  	if rg != nil {
   330  		netpollgoready(rg, 3)
   331  	}
   332  	if wg != nil {
   333  		netpollgoready(wg, 3)
   334  	}
   335  }
   336  
   337  //go:linkname poll_runtime_pollUnblock internal/poll.runtime_pollUnblock
   338  func poll_runtime_pollUnblock(pd *pollDesc) {
   339  	lock(&pd.lock)
   340  	if pd.closing {
   341  		throw("runtime: unblock on closing polldesc")
   342  	}
   343  	pd.closing = true
   344  	pd.rseq++
   345  	pd.wseq++
   346  	var rg, wg *g
   347  	atomic.StorepNoWB(noescape(unsafe.Pointer(&rg)), nil) // full memory barrier between store to closing and read of rg/wg in netpollunblock
   348  	rg = netpollunblock(pd, 'r', false)
   349  	wg = netpollunblock(pd, 'w', false)
   350  	if pd.rt.f != nil {
   351  		deltimer(&pd.rt)
   352  		pd.rt.f = nil
   353  	}
   354  	if pd.wt.f != nil {
   355  		deltimer(&pd.wt)
   356  		pd.wt.f = nil
   357  	}
   358  	unlock(&pd.lock)
   359  	if rg != nil {
   360  		netpollgoready(rg, 3)
   361  	}
   362  	if wg != nil {
   363  		netpollgoready(wg, 3)
   364  	}
   365  }
   366  
   367  // netpollready is called by the platform-specific netpoll function.
   368  // It declares that the fd associated with pd is ready for I/O.
   369  // The toRun argument is used to build a list of goroutines to return
   370  // from netpoll. The mode argument is 'r', 'w', or 'r'+'w' to indicate
   371  // whether the fd is ready for reading or writing or both.
   372  //
   373  // This may run while the world is stopped, so write barriers are not allowed.
   374  //go:nowritebarrier
   375  func netpollready(toRun *gList, pd *pollDesc, mode int32) {
   376  	var rg, wg *g
   377  	if mode == 'r' || mode == 'r'+'w' {
   378  		rg = netpollunblock(pd, 'r', true)
   379  	}
   380  	if mode == 'w' || mode == 'r'+'w' {
   381  		wg = netpollunblock(pd, 'w', true)
   382  	}
   383  	if rg != nil {
   384  		toRun.push(rg)
   385  	}
   386  	if wg != nil {
   387  		toRun.push(wg)
   388  	}
   389  }
   390  
   391  func netpollcheckerr(pd *pollDesc, mode int32) int {
   392  	if pd.closing {
   393  		return pollErrClosing
   394  	}
   395  	if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) {
   396  		return pollErrTimeout
   397  	}
   398  	// Report an event scanning error only on a read event.
   399  	// An error on a write event will be captured in a subsequent
   400  	// write call that is able to report a more specific error.
   401  	if mode == 'r' && pd.everr {
   402  		return pollErrNotPollable
   403  	}
   404  	return pollNoError
   405  }
   406  
   407  func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool {
   408  	r := atomic.Casuintptr((*uintptr)(gpp), pdWait, uintptr(unsafe.Pointer(gp)))
   409  	if r {
   410  		// Bump the count of goroutines waiting for the poller.
   411  		// The scheduler uses this to decide whether to block
   412  		// waiting for the poller if there is nothing else to do.
   413  		atomic.Xadd(&netpollWaiters, 1)
   414  	}
   415  	return r
   416  }
   417  
   418  func netpollgoready(gp *g, traceskip int) {
   419  	atomic.Xadd(&netpollWaiters, -1)
   420  	goready(gp, traceskip+1)
   421  }
   422  
   423  // returns true if IO is ready, or false if timedout or closed
   424  // waitio - wait only for completed IO, ignore errors
   425  // Concurrent calls to netpollblock in the same mode are forbidden, as pollDesc
   426  // can hold only a single waiting goroutine for each mode.
   427  func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
   428  	gpp := &pd.rg
   429  	if mode == 'w' {
   430  		gpp = &pd.wg
   431  	}
   432  
   433  	// set the gpp semaphore to pdWait
   434  	for {
   435  		// Consume notification if already ready.
   436  		if atomic.Casuintptr(gpp, pdReady, 0) {
   437  			return true
   438  		}
   439  		if atomic.Casuintptr(gpp, 0, pdWait) {
   440  			break
   441  		}
   442  
   443  		// Double check that this isn't corrupt; otherwise we'd loop
   444  		// forever.
   445  		if v := atomic.Loaduintptr(gpp); v != pdReady && v != 0 {
   446  			throw("runtime: double wait")
   447  		}
   448  	}
   449  
   450  	// need to recheck error states after setting gpp to pdWait
   451  	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
   452  	// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
   453  	if waitio || netpollcheckerr(pd, mode) == 0 {
   454  		gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
   455  	}
   456  	// be careful to not lose concurrent pdReady notification
   457  	old := atomic.Xchguintptr(gpp, 0)
   458  	if old > pdWait {
   459  		throw("runtime: corrupted polldesc")
   460  	}
   461  	return old == pdReady
   462  }
   463  
   464  func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
   465  	gpp := &pd.rg
   466  	if mode == 'w' {
   467  		gpp = &pd.wg
   468  	}
   469  
   470  	for {
   471  		old := atomic.Loaduintptr(gpp)
   472  		if old == pdReady {
   473  			return nil
   474  		}
   475  		if old == 0 && !ioready {
   476  			// Only set pdReady for ioready. runtime_pollWait
   477  			// will check for timeout/cancel before waiting.
   478  			return nil
   479  		}
   480  		var new uintptr
   481  		if ioready {
   482  			new = pdReady
   483  		}
   484  		if atomic.Casuintptr(gpp, old, new) {
   485  			if old == pdWait {
   486  				old = 0
   487  			}
   488  			return (*g)(unsafe.Pointer(old))
   489  		}
   490  	}
   491  }
   492  
   493  func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
   494  	lock(&pd.lock)
   495  	// Seq arg is seq when the timer was set.
   496  	// If it's stale, ignore the timer event.
   497  	currentSeq := pd.rseq
   498  	if !read {
   499  		currentSeq = pd.wseq
   500  	}
   501  	if seq != currentSeq {
   502  		// The descriptor was reused or timers were reset.
   503  		unlock(&pd.lock)
   504  		return
   505  	}
   506  	var rg *g
   507  	if read {
   508  		if pd.rd <= 0 || pd.rt.f == nil {
   509  			throw("runtime: inconsistent read deadline")
   510  		}
   511  		pd.rd = -1
   512  		atomic.StorepNoWB(unsafe.Pointer(&pd.rt.f), nil) // full memory barrier between store to rd and load of rg in netpollunblock
   513  		rg = netpollunblock(pd, 'r', false)
   514  	}
   515  	var wg *g
   516  	if write {
   517  		if pd.wd <= 0 || pd.wt.f == nil && !read {
   518  			throw("runtime: inconsistent write deadline")
   519  		}
   520  		pd.wd = -1
   521  		atomic.StorepNoWB(unsafe.Pointer(&pd.wt.f), nil) // full memory barrier between store to wd and load of wg in netpollunblock
   522  		wg = netpollunblock(pd, 'w', false)
   523  	}
   524  	unlock(&pd.lock)
   525  	if rg != nil {
   526  		netpollgoready(rg, 0)
   527  	}
   528  	if wg != nil {
   529  		netpollgoready(wg, 0)
   530  	}
   531  }
   532  
   533  func netpollDeadline(arg interface{}, seq uintptr) {
   534  	netpolldeadlineimpl(arg.(*pollDesc), seq, true, true)
   535  }
   536  
   537  func netpollReadDeadline(arg interface{}, seq uintptr) {
   538  	netpolldeadlineimpl(arg.(*pollDesc), seq, true, false)
   539  }
   540  
   541  func netpollWriteDeadline(arg interface{}, seq uintptr) {
   542  	netpolldeadlineimpl(arg.(*pollDesc), seq, false, true)
   543  }
   544  
   545  func (c *pollCache) alloc() *pollDesc {
   546  	lock(&c.lock)
   547  	if c.first == nil {
   548  		const pdSize = unsafe.Sizeof(pollDesc{})
   549  		n := pollBlockSize / pdSize
   550  		if n == 0 {
   551  			n = 1
   552  		}
   553  		// Must be in non-GC memory because can be referenced
   554  		// only from epoll/kqueue internals.
   555  		mem := persistentalloc(n*pdSize, 0, &memstats.other_sys)
   556  		for i := uintptr(0); i < n; i++ {
   557  			pd := (*pollDesc)(add(mem, i*pdSize))
   558  			pd.link = c.first
   559  			c.first = pd
   560  		}
   561  	}
   562  	pd := c.first
   563  	c.first = pd.link
   564  	lockInit(&pd.lock, lockRankPollDesc)
   565  	unlock(&c.lock)
   566  	return pd
   567  }
   568  
   569  // makeArg converts pd to an interface{}.
   570  // makeArg does not do any allocation. Normally, such
   571  // a conversion requires an allocation because pointers to
   572  // go:notinheap types (which pollDesc is) must be stored
   573  // in interfaces indirectly. See issue 42076.
   574  func (pd *pollDesc) makeArg() (i interface{}) {
   575  	x := (*eface)(unsafe.Pointer(&i))
   576  	x._type = pdType
   577  	x.data = unsafe.Pointer(&pd.self)
   578  	return
   579  }
   580  
   581  var (
   582  	pdEface interface{} = (*pollDesc)(nil)
   583  	pdType  *_type      = efaceOf(&pdEface)._type
   584  )
   585  

View as plain text