Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "runtime/internal/atomic"
11 "runtime/internal/syscall"
12 "unsafe"
13 )
14
15
16
17
18 const sigPerThreadSyscall = _SIGRTMIN + 1
19
20 type mOS struct {
21
22
23
24
25
26
27
28 profileTimer int32
29 profileTimerValid atomic.Bool
30
31
32
33 needPerThreadSyscall atomic.Uint8
34 }
35
36
37 func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
38
39
40
41
42
43
44
45
46
47
48 const (
49 _FUTEX_PRIVATE_FLAG = 128
50 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
51 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
52 )
53
54
55
56
57
58
59
60
61
62 func futexsleep(addr *uint32, val uint32, ns int64) {
63
64
65
66
67
68 if ns < 0 {
69 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
70 return
71 }
72
73 var ts timespec
74 ts.setNsec(ns)
75 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
76 }
77
78
79
80
81 func futexwakeup(addr *uint32, cnt uint32) {
82 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
83 if ret >= 0 {
84 return
85 }
86
87
88
89
90 systemstack(func() {
91 print("futexwakeup addr=", addr, " returned ", ret, "\n")
92 })
93
94 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
95 }
96
97 func getproccount() int32 {
98
99
100
101
102
103
104
105 const maxCPUs = 64 * 1024
106 var buf [maxCPUs / 8]byte
107 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
108 if r < 0 {
109 return 1
110 }
111 n := int32(0)
112 for _, v := range buf[:r] {
113 for v != 0 {
114 n += int32(v & 1)
115 v >>= 1
116 }
117 }
118 if n == 0 {
119 n = 1
120 }
121 return n
122 }
123
124
125 const (
126 _CLONE_VM = 0x100
127 _CLONE_FS = 0x200
128 _CLONE_FILES = 0x400
129 _CLONE_SIGHAND = 0x800
130 _CLONE_PTRACE = 0x2000
131 _CLONE_VFORK = 0x4000
132 _CLONE_PARENT = 0x8000
133 _CLONE_THREAD = 0x10000
134 _CLONE_NEWNS = 0x20000
135 _CLONE_SYSVSEM = 0x40000
136 _CLONE_SETTLS = 0x80000
137 _CLONE_PARENT_SETTID = 0x100000
138 _CLONE_CHILD_CLEARTID = 0x200000
139 _CLONE_UNTRACED = 0x800000
140 _CLONE_CHILD_SETTID = 0x1000000
141 _CLONE_STOPPED = 0x2000000
142 _CLONE_NEWUTS = 0x4000000
143 _CLONE_NEWIPC = 0x8000000
144
145
146
147
148
149
150
151
152 cloneFlags = _CLONE_VM |
153 _CLONE_FS |
154 _CLONE_FILES |
155 _CLONE_SIGHAND |
156 _CLONE_SYSVSEM |
157 _CLONE_THREAD
158 )
159
160
161 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
162
163
164
165
166 func newosproc(mp *m) {
167 stk := unsafe.Pointer(mp.g0.stack.hi)
168
171 if false {
172 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
173 }
174
175
176
177 var oset sigset
178 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
179 ret := retryOnEAGAIN(func() int32 {
180 r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
181
182
183 if r >= 0 {
184 return 0
185 }
186 return -r
187 })
188 sigprocmask(_SIG_SETMASK, &oset, nil)
189
190 if ret != 0 {
191 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
192 if ret == _EAGAIN {
193 println("runtime: may need to increase max user processes (ulimit -u)")
194 }
195 throw("newosproc")
196 }
197 }
198
199
200
201
202 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
203 stack := sysAlloc(stacksize, &memstats.stacks_sys)
204 if stack == nil {
205 writeErrStr(failallocatestack)
206 exit(1)
207 }
208 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
209 if ret < 0 {
210 writeErrStr(failthreadcreate)
211 exit(1)
212 }
213 }
214
215 const (
216 _AT_NULL = 0
217 _AT_PAGESZ = 6
218 _AT_HWCAP = 16
219 _AT_SECURE = 23
220 _AT_RANDOM = 25
221 _AT_HWCAP2 = 26
222 )
223
224 var procAuxv = []byte("/proc/self/auxv\x00")
225
226 var addrspace_vec [1]byte
227
228 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
229
230 var auxvreadbuf [128]uintptr
231
232 func sysargs(argc int32, argv **byte) {
233 n := argc + 1
234
235
236 for argv_index(argv, n) != nil {
237 n++
238 }
239
240
241 n++
242
243
244 auxvp := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
245
246 if pairs := sysauxv(auxvp[:]); pairs != 0 {
247 auxv = auxvp[: pairs*2 : pairs*2]
248 return
249 }
250
251
252
253 fd := open(&procAuxv[0], 0 , 0)
254 if fd < 0 {
255
256
257
258 const size = 256 << 10
259 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
260 if err != 0 {
261 return
262 }
263 var n uintptr
264 for n = 4 << 10; n < size; n <<= 1 {
265 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
266 if err == 0 {
267 physPageSize = n
268 break
269 }
270 }
271 if physPageSize == 0 {
272 physPageSize = size
273 }
274 munmap(p, size)
275 return
276 }
277
278 n = read(fd, noescape(unsafe.Pointer(&auxvreadbuf[0])), int32(unsafe.Sizeof(auxvreadbuf)))
279 closefd(fd)
280 if n < 0 {
281 return
282 }
283
284
285 auxvreadbuf[len(auxvreadbuf)-2] = _AT_NULL
286 pairs := sysauxv(auxvreadbuf[:])
287 auxv = auxvreadbuf[: pairs*2 : pairs*2]
288 }
289
290
291
292 var startupRandomData []byte
293
294
295 var secureMode bool
296
297 func sysauxv(auxv []uintptr) (pairs int) {
298 var i int
299 for ; auxv[i] != _AT_NULL; i += 2 {
300 tag, val := auxv[i], auxv[i+1]
301 switch tag {
302 case _AT_RANDOM:
303
304
305 startupRandomData = (*[16]byte)(unsafe.Pointer(val))[:]
306
307 case _AT_PAGESZ:
308 physPageSize = val
309
310 case _AT_SECURE:
311 secureMode = val == 1
312 }
313
314 archauxv(tag, val)
315 vdsoauxv(tag, val)
316 }
317 return i / 2
318 }
319
320 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
321
322 func getHugePageSize() uintptr {
323 var numbuf [20]byte
324 fd := open(&sysTHPSizePath[0], 0 , 0)
325 if fd < 0 {
326 return 0
327 }
328 ptr := noescape(unsafe.Pointer(&numbuf[0]))
329 n := read(fd, ptr, int32(len(numbuf)))
330 closefd(fd)
331 if n <= 0 {
332 return 0
333 }
334 n--
335 v, ok := atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
336 if !ok || v < 0 {
337 v = 0
338 }
339 if v&(v-1) != 0 {
340
341 return 0
342 }
343 return uintptr(v)
344 }
345
346 func osinit() {
347 ncpu = getproccount()
348 physHugePageSize = getHugePageSize()
349 if iscgo {
350
351
352
353
354
355
356
357
358
359
360
361
362
363 sigdelset(&sigsetAllExiting, 32)
364 sigdelset(&sigsetAllExiting, 33)
365 sigdelset(&sigsetAllExiting, 34)
366 }
367 osArchInit()
368 }
369
370 var urandom_dev = []byte("/dev/urandom\x00")
371
372 func getRandomData(r []byte) {
373 if startupRandomData != nil {
374 n := copy(r, startupRandomData)
375 extendRandom(r, n)
376 return
377 }
378 fd := open(&urandom_dev[0], 0 , 0)
379 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
380 closefd(fd)
381 extendRandom(r, int(n))
382 }
383
384 func goenvs() {
385 goenvs_unix()
386 }
387
388
389
390
391
392
393
394 func libpreinit() {
395 initsig(true)
396 }
397
398
399
400 func mpreinit(mp *m) {
401 mp.gsignal = malg(32 * 1024)
402 mp.gsignal.m = mp
403 }
404
405 func gettid() uint32
406
407
408
409 func minit() {
410 minitSignals()
411
412
413
414
415 getg().m.procid = uint64(gettid())
416 }
417
418
419
420
421 func unminit() {
422 unminitSignals()
423 }
424
425
426
427 func mdestroy(mp *m) {
428 }
429
430
431
432
433
434 func sigreturn__sigaction()
435 func sigtramp()
436 func cgoSigtramp()
437
438
439 func sigaltstack(new, old *stackt)
440
441
442 func setitimer(mode int32, new, old *itimerval)
443
444
445 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
446
447
448 func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32
449
450
451 func timer_delete(timerid int32) int32
452
453
454 func rtsigprocmask(how int32, new, old *sigset, size int32)
455
456
457
458 func sigprocmask(how int32, new, old *sigset) {
459 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
460 }
461
462 func raise(sig uint32)
463 func raiseproc(sig uint32)
464
465
466 func sched_getaffinity(pid, len uintptr, buf *byte) int32
467 func osyield()
468
469
470 func osyield_no_g() {
471 osyield()
472 }
473
474 func pipe2(flags int32) (r, w int32, errno int32)
475
476
477 func fcntl(fd, cmd, arg int32) (ret int32, errno int32) {
478 r, _, err := syscall.Syscall6(syscall.SYS_FCNTL, uintptr(fd), uintptr(cmd), uintptr(arg), 0, 0, 0)
479 return int32(r), int32(err)
480 }
481
482 const (
483 _si_max_size = 128
484 _sigev_max_size = 64
485 )
486
487
488
489 func setsig(i uint32, fn uintptr) {
490 var sa sigactiont
491 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
492 sigfillset(&sa.sa_mask)
493
494
495
496 if GOARCH == "386" || GOARCH == "amd64" {
497 sa.sa_restorer = abi.FuncPCABI0(sigreturn__sigaction)
498 }
499 if fn == abi.FuncPCABIInternal(sighandler) {
500 if iscgo {
501 fn = abi.FuncPCABI0(cgoSigtramp)
502 } else {
503 fn = abi.FuncPCABI0(sigtramp)
504 }
505 }
506 sa.sa_handler = fn
507 sigaction(i, &sa, nil)
508 }
509
510
511
512 func setsigstack(i uint32) {
513 var sa sigactiont
514 sigaction(i, nil, &sa)
515 if sa.sa_flags&_SA_ONSTACK != 0 {
516 return
517 }
518 sa.sa_flags |= _SA_ONSTACK
519 sigaction(i, &sa, nil)
520 }
521
522
523
524 func getsig(i uint32) uintptr {
525 var sa sigactiont
526 sigaction(i, nil, &sa)
527 return sa.sa_handler
528 }
529
530
531
532
533 func setSignalstackSP(s *stackt, sp uintptr) {
534 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
535 }
536
537
538 func (c *sigctxt) fixsigcode(sig uint32) {
539 }
540
541
542
543
544 func sysSigaction(sig uint32, new, old *sigactiont) {
545 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
546
547
548
549
550
551
552
553
554
555
556
557 if sig != 32 && sig != 33 && sig != 64 {
558
559 systemstack(func() {
560 throw("sigaction failed")
561 })
562 }
563 }
564 }
565
566
567
568
569 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
570
571 func getpid() int
572 func tgkill(tgid, tid, sig int)
573
574
575 func signalM(mp *m, sig int) {
576 tgkill(getpid(), int(mp.procid), sig)
577 }
578
579
580
581
582
583
584
585
586 func validSIGPROF(mp *m, c *sigctxt) bool {
587 code := int32(c.sigcode())
588 setitimer := code == _SI_KERNEL
589 timer_create := code == _SI_TIMER
590
591 if !(setitimer || timer_create) {
592
593
594
595 return true
596 }
597
598 if mp == nil {
599
600
601
602
603
604
605
606
607
608
609
610
611 return setitimer
612 }
613
614
615
616 if mp.profileTimerValid.Load() {
617
618
619
620
621
622 return timer_create
623 }
624
625
626 return setitimer
627 }
628
629 func setProcessCPUProfiler(hz int32) {
630 setProcessCPUProfilerTimer(hz)
631 }
632
633 func setThreadCPUProfiler(hz int32) {
634 mp := getg().m
635 mp.profilehz = hz
636
637
638 if mp.profileTimerValid.Load() {
639 timerid := mp.profileTimer
640 mp.profileTimerValid.Store(false)
641 mp.profileTimer = 0
642
643 ret := timer_delete(timerid)
644 if ret != 0 {
645 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
646 throw("timer_delete")
647 }
648 }
649
650 if hz == 0 {
651
652 return
653 }
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674 spec := new(itimerspec)
675 spec.it_value.setNsec(1 + int64(fastrandn(uint32(1e9/hz))))
676 spec.it_interval.setNsec(1e9 / int64(hz))
677
678 var timerid int32
679 var sevp sigevent
680 sevp.notify = _SIGEV_THREAD_ID
681 sevp.signo = _SIGPROF
682 sevp.sigev_notify_thread_id = int32(mp.procid)
683 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
684 if ret != 0 {
685
686
687 return
688 }
689
690 ret = timer_settime(timerid, 0, spec, nil)
691 if ret != 0 {
692 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
693 ", 0, {interval: {",
694 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
695 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
696 throw("timer_settime")
697 }
698
699 mp.profileTimer = timerid
700 mp.profileTimerValid.Store(true)
701 }
702
703
704
705 type perThreadSyscallArgs struct {
706 trap uintptr
707 a1 uintptr
708 a2 uintptr
709 a3 uintptr
710 a4 uintptr
711 a5 uintptr
712 a6 uintptr
713 r1 uintptr
714 r2 uintptr
715 }
716
717
718
719
720
721
722 var perThreadSyscall perThreadSyscallArgs
723
724
725
726
727
728
729
730
731
732 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
733 if iscgo {
734
735 panic("doAllThreadsSyscall not supported with cgo enabled")
736 }
737
738
739
740
741
742
743
744
745 stopTheWorld(stwAllThreadsSyscall)
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767 allocmLock.lock()
768
769
770
771
772
773
774 acquirem()
775
776
777
778
779
780
781 r1, r2, errno := syscall.Syscall6(trap, a1, a2, a3, a4, a5, a6)
782 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
783
784 r2 = 0
785 }
786 if errno != 0 {
787 releasem(getg().m)
788 allocmLock.unlock()
789 startTheWorld()
790 return r1, r2, errno
791 }
792
793 perThreadSyscall = perThreadSyscallArgs{
794 trap: trap,
795 a1: a1,
796 a2: a2,
797 a3: a3,
798 a4: a4,
799 a5: a5,
800 a6: a6,
801 r1: r1,
802 r2: r2,
803 }
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840 for mp := allm; mp != nil; mp = mp.alllink {
841 for atomic.Load64(&mp.procid) == 0 {
842
843 osyield()
844 }
845 }
846
847
848
849 gp := getg()
850 tid := gp.m.procid
851 for mp := allm; mp != nil; mp = mp.alllink {
852 if atomic.Load64(&mp.procid) == tid {
853
854 continue
855 }
856 mp.needPerThreadSyscall.Store(1)
857 signalM(mp, sigPerThreadSyscall)
858 }
859
860
861 for mp := allm; mp != nil; mp = mp.alllink {
862 if mp.procid == tid {
863 continue
864 }
865 for mp.needPerThreadSyscall.Load() != 0 {
866 osyield()
867 }
868 }
869
870 perThreadSyscall = perThreadSyscallArgs{}
871
872 releasem(getg().m)
873 allocmLock.unlock()
874 startTheWorld()
875
876 return r1, r2, errno
877 }
878
879
880
881
882
883
884
885 func runPerThreadSyscall() {
886 gp := getg()
887 if gp.m.needPerThreadSyscall.Load() == 0 {
888 return
889 }
890
891 args := perThreadSyscall
892 r1, r2, errno := syscall.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
893 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
894
895 r2 = 0
896 }
897 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
898 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
899 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
900 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
901 }
902
903 gp.m.needPerThreadSyscall.Store(0)
904 }
905
906 const (
907 _SI_USER = 0
908 _SI_TKILL = -6
909 )
910
911
912
913
914
915 func (c *sigctxt) sigFromUser() bool {
916 code := int32(c.sigcode())
917 return code == _SI_USER || code == _SI_TKILL
918 }
919
View as plain text