sysmon详解

前言

本系列基于golang 1.20.4版本,本文还未写完,仅参考使用！！！

1.sysmon设计原理

流程如下所示 //todo,具体逻辑后面有时间看

+--------------+     +-------+     +-----------+     +--------+     +-----------+     +---------+     +--------+     +----+
| runtime.main | --> | newm1 | --> | newosproc | --> | sysmon | --> | checkdead | --> | netpoll | --> | retake | --> | gc |
+--------------+     +-------+     +-----------+     +--------+     +-----------+     +---------+     +--------+     +----+

sysmon 检查死锁

//todo,具体逻辑后面有时间看

sysmon 计时器

//todo,具体逻辑后面有时间看

sysmon netpoll

上章schdule的时候讲了,逻辑一样

sysmon 抢占式调度

重点看下

当p处于_Prunning || _Psyscall状态时, 如果上一次调度时间过去了10ms,通过preemptone(pp)抢占p
当p处于_Psyscall时,1 p不为空或者不存在空闲p时或系统调用时间超过10ms,通过runtime.handoffp抢占p

type sysmontick struct {
	//调度次数
    schedtick   uint32
	//上次调度时间
    schedwhen   int64
	//系统调用次数
    syscalltick uint32
	//系统调用时间
    syscallwhen int64
}

func retake(now int64) uint32 {
	n := 0
	lock(&allpLock)
	//遍历所有的p
	for i := 0; i < len(allp); i++ {
		pp := allp[i]
		if pp == nil {
			continue
		}
		pd := &pp.sysmontick
		s := pp.status
		sysretake := false
		//p的状态为_Prunning/_Psyscall,如果上一次调度时间过去了10ms,
		if s == _Prunning || s == _Psyscall {
			// Preempt G if it's running for too long.
			t := int64(pp.schedtick)
			if int64(pd.schedtick) != t {
				pd.schedtick = uint32(t)
				pd.schedwhen = now
			} else if pd.schedwhen+forcePreemptNS <= now {
				//抢占该p
				preemptone(pp)
				sysretake = true
			}
		}
		//p状态为_Psyscall,也就是执行syscall的时间,超过了10ms时
		if s == _Psyscall {
			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
			t := int64(pp.syscalltick)
			if !sysretake && int64(pd.syscalltick) != t {
				pd.syscalltick = uint32(t)
				pd.syscallwhen = now
				continue
			}
			//如果p的运行队列为空,且超过10ms
			if runqempty(pp) && sched.nmspinning.Load()+sched.npidle.Load() > 0 && pd.syscallwhen+10*1000*1000 > now {
				continue
			}
			// Drop allpLock so we can take sched.lock.
			unlock(&allpLock)
			incidlelocked(-1)
			if atomic.Cas(&pp.status, s, _Pidle) {
				if trace.enabled {
					traceGoSysBlock(pp)
					traceProcStop(pp)
				}
				n++
				pp.syscalltick++
				//通过startm将p与新的m绑定
				handoffp(pp)
			}
			incidlelocked(1)
			lock(&allpLock)
		}
	}
	unlock(&allpLock)
	return uint32(n)
}

sysmon 垃圾回收

//todo 后面有空再看

sysmon代码

func sysmon() {
	lock(&sched.lock)
	//不计入死锁的m数量
	sched.nmsys++
	//检测死锁
	checkdead()
	unlock(&sched.lock)

	lasttrace := int64(0)
	//for循环次数
	idle := 0 // how many cycles in succession we had not wokeup somebody
	delay := uint32(0)

	for {
		if idle == 0 { // start with 20us sleep...
			delay = 20
		} else if idle > 50 { // start doubling the sleep after 1ms...
			delay *= 2
		}
		if delay > 10*1000 { // up to 10ms
			delay = 10 * 1000
		}
		//挂起当前线程
		usleep(delay)

		// sysmon should not enter deep sleep if schedtrace is enabled so that
		// it can print that information at the right time.
		//
		// It should also not enter deep sleep if there are any active P's so
		// that it can retake P's from syscalls, preempt long running G's, and
		// poll the network if all P's are busy for long stretches.
		//
		// It should wakeup from deep sleep if any P's become active either due
		// to exiting a syscall or waking up due to a timer expiring so that it
		// can resume performing those duties. If it wakes from a syscall it
		// resets idle and delay as a bet that since it had retaken a P from a
		// syscall before, it may need to do it again shortly after the
		// application starts work again. It does not reset idle when waking
		// from a timer to avoid adding system load to applications that spend
		// most of their time sleeping.
		now := nanotime()
		if debug.schedtrace <= 0 && (sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs) {
			lock(&sched.lock)
			if sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs {
				syscallWake := false
				next := timeSleepUntil()
				if next > now {
					sched.sysmonwait.Store(true)
					unlock(&sched.lock)
					// Make wake-up period small enough
					// for the sampling to be correct.
					sleep := forcegcperiod / 2
					if next-now < sleep {
						sleep = next - now
					}
					shouldRelax := sleep >= osRelaxMinNS
					if shouldRelax {
						osRelax(true)
					}
					syscallWake = notetsleep(&sched.sysmonnote, sleep)
					if shouldRelax {
						osRelax(false)
					}
					lock(&sched.lock)
					sched.sysmonwait.Store(false)
					noteclear(&sched.sysmonnote)
				}
				if syscallWake {
					idle = 0
					delay = 20
				}
			}
			unlock(&sched.lock)
		}

		lock(&sched.sysmonlock)
		// Update now in case we blocked on sysmonnote or spent a long time
		// blocked on schedlock or sysmonlock above.
		now = nanotime()

		// trigger libc interceptors if needed
		if *cgo_yield != nil {
			asmcgocall(*cgo_yield, nil)
		}
		//网络轮询,上章讲schdule的时候讲过,流程类似
		// poll network if not polled for more than 10ms
		lastpoll := sched.lastpoll.Load()
		if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now {
			sched.lastpoll.CompareAndSwap(lastpoll, now)
			list := netpoll(0) // non-blocking - returns list of goroutines
			if !list.empty() {
				// Need to decrement number of idle locked M's
				// (pretending that one more is running) before injectglist.
				// Otherwise it can lead to the following situation:
				// injectglist grabs all P's but before it starts M's to run the P's,
				// another M returns from syscall, finishes running its G,
				// observes that there is no work to do and no other running M's
				// and reports deadlock.
				incidlelocked(-1)
				injectglist(&list)
				incidlelocked(1)
			}
		}
		
		if scavenger.sysmonWake.Load() != 0 {
			// Kick the scavenger awake if someone requested it.
			scavenger.wake()
		}
		// retake P's blocked in syscalls
		// and preempt long running G's
		if retake(now) != 0 {
			idle = 0
		} else {
			idle++
		}
		//垃圾回收
		// check if we need to force a GC
		if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && forcegc.idle.Load() {
			lock(&forcegc.lock)
			forcegc.idle.Store(false)
			var list gList
			list.push(forcegc.g)
			injectglist(&list)
			unlock(&forcegc.lock)
		}
		if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now {
			lasttrace = now
			schedtrace(debug.scheddetail > 0)
		}
		unlock(&sched.sysmonlock)
	}
}