// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build aix || (solaris && !illumos)

// This code implements the filelock API using POSIX 'fcntl' locks, which attach
// to an (inode, process) pair rather than a file descriptor. To avoid unlocking
// files prematurely when the same file is opened through different descriptors,
// we allow only one read-lock at a time.
//
// Most platforms provide some alternative API, such as an 'flock' system call
// or an F_OFD_SETLK command for 'fcntl', that allows for better concurrency and
// does not require per-inode bookkeeping in the application.

package filelock

import (
	"errors"
	"io"
	"io/fs"
	"math/rand"
	"sync"
	"syscall"
	"time"
)

type lockType int16

const (
	readLock  lockType = syscall.F_RDLCK
	writeLock lockType = syscall.F_WRLCK
)

type inode = uint64 // type of syscall.Stat_t.Ino

type inodeLock struct {
	owner File
	queue []<-chan File
}

var (
	mu     sync.Mutex
	inodes = map[File]inode{}
	locks  = map[inode]inodeLock{}
)

func lock(f File, lt lockType) (err error) {
	// POSIX locks apply per inode and process, and the lock for an inode is
	// released when *any* descriptor for that inode is closed. So we need to
	// synchronize access to each inode internally, and must serialize lock and
	// unlock calls that refer to the same inode through different descriptors.
	fi, err := f.Stat()
	if err != nil {
		return err
	}
	ino := fi.Sys().(*syscall.Stat_t).Ino

	mu.Lock()
	if i, dup := inodes[f]; dup && i != ino {
		mu.Unlock()
		return &fs.PathError{
			Op:   lt.String(),
			Path: f.Name(),
			Err:  errors.New("inode for file changed since last Lock or RLock"),
		}
	}
	inodes[f] = ino

	var wait chan File
	l := locks[ino]
	if l.owner == f {
		// This file already owns the lock, but the call may change its lock type.
	} else if l.owner == nil {
		// No owner: it's ours now.
		l.owner = f
	} else {
		// Already owned: add a channel to wait on.
		wait = make(chan File)
		l.queue = append(l.queue, wait)
	}
	locks[ino] = l
	mu.Unlock()

	if wait != nil {
		wait <- f
	}

	// Spurious EDEADLK errors arise on platforms that compute deadlock graphs at
	// the process, rather than thread, level. Consider processes P and Q, with
	// threads P.1, P.2, and Q.3. The following trace is NOT a deadlock, but will be
	// reported as a deadlock on systems that consider only process granularity:
	//
	// 	P.1 locks file A.
	// 	Q.3 locks file B.
	// 	Q.3 blocks on file A.
	// 	P.2 blocks on file B. (This is erroneously reported as a deadlock.)
	// 	P.1 unlocks file A.
	// 	Q.3 unblocks and locks file A.
	// 	Q.3 unlocks files A and B.
	// 	P.2 unblocks and locks file B.
	// 	P.2 unlocks file B.
	//
	// These spurious errors were observed in practice on AIX and Solaris in
	// cmd/go: see https://golang.org/issue/32817.
	//
	// We work around this bug by treating EDEADLK as always spurious. If there
	// really is a lock-ordering bug between the interacting processes, it will
	// become a livelock instead, but that's not appreciably worse than if we had
	// a proper flock implementation (which generally does not even attempt to
	// diagnose deadlocks).
	//
	// In the above example, that changes the trace to:
	//
	// 	P.1 locks file A.
	// 	Q.3 locks file B.
	// 	Q.3 blocks on file A.
	// 	P.2 spuriously fails to lock file B and goes to sleep.
	// 	P.1 unlocks file A.
	// 	Q.3 unblocks and locks file A.
	// 	Q.3 unlocks files A and B.
	// 	P.2 wakes up and locks file B.
	// 	P.2 unlocks file B.
	//
	// We know that the retry loop will not introduce a *spurious* livelock
	// because, according to the POSIX specification, EDEADLK is only to be
	// returned when “the lock is blocked by a lock from another process”.
	// If that process is blocked on some lock that we are holding, then the
	// resulting livelock is due to a real deadlock (and would manifest as such
	// when using, for example, the flock implementation of this package).
	// If the other process is *not* blocked on some other lock that we are
	// holding, then it will eventually release the requested lock.

	nextSleep := 1 * time.Millisecond
	const maxSleep = 500 * time.Millisecond
	for {
		err = setlkw(f.Fd(), lt)
		if err != syscall.EDEADLK {
			break
		}
		time.Sleep(nextSleep)

		nextSleep += nextSleep
		if nextSleep > maxSleep {
			nextSleep = maxSleep
		}
		// Apply 10% jitter to avoid synchronizing collisions when we finally unblock.
		nextSleep += time.Duration((0.1*rand.Float64() - 0.05) * float64(nextSleep))
	}

	if err != nil {
		unlock(f)
		return &fs.PathError{
			Op:   lt.String(),
			Path: f.Name(),
			Err:  err,
		}
	}

	return nil
}

func unlock(f File) error {
	var owner File

	mu.Lock()
	ino, ok := inodes[f]
	if ok {
		owner = locks[ino].owner
	}
	mu.Unlock()

	if owner != f {
		panic("unlock called on a file that is not locked")
	}

	err := setlkw(f.Fd(), syscall.F_UNLCK)

	mu.Lock()
	l := locks[ino]
	if len(l.queue) == 0 {
		// No waiters: remove the map entry.
		delete(locks, ino)
	} else {
		// The first waiter is sending us their file now.
		// Receive it and update the queue.
		l.owner = <-l.queue[0]
		l.queue = l.queue[1:]
		locks[ino] = l
	}
	delete(inodes, f)
	mu.Unlock()

	return err
}

// setlkw calls FcntlFlock with F_SETLKW for the entire file indicated by fd.
func setlkw(fd uintptr, lt lockType) error {
	for {
		err := syscall.FcntlFlock(fd, syscall.F_SETLKW, &syscall.Flock_t{
			Type:   int16(lt),
			Whence: io.SeekStart,
			Start:  0,
			Len:    0, // All bytes.
		})
		if err != syscall.EINTR {
			return err
		}
	}
}