From 1ed9931af152ba0c5fb9d0e1208b0cdd257cfcf1 Mon Sep 17 00:00:00 2001 From: Tonis Tiigi Date: Fri, 20 Oct 2023 14:38:41 -0700 Subject: [PATCH] llbsolver: fix possible deadlock in history listen The events for currently active builds were sent through pubsub channel instead of directly to the current request, like it was done for completed builds for example. This meant that if there are more active builds running than the pubsub channel buffer (32) the sends will block. Because the history API mutex is held in this process it will eventually block the requests for builds that try to update their history records. Signed-off-by: Tonis Tiigi --- solver/llbsolver/history.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/solver/llbsolver/history.go b/solver/llbsolver/history.go index ac0a5dd6524b..d7ca60a08936 100644 --- a/solver/llbsolver/history.go +++ b/solver/llbsolver/history.go @@ -762,6 +762,9 @@ func (h *HistoryQueue) Listen(ctx context.Context, req *controlapi.BuildHistoryR }() } + // make a copy of events for active builds so we don't keep a lock during grpc send + actives := make([]*controlapi.BuildHistoryEvent, 0, len(h.active)) + for _, e := range h.active { if req.Ref != "" && e.Ref != req.Ref { continue @@ -769,7 +772,7 @@ func (h *HistoryQueue) Listen(ctx context.Context, req *controlapi.BuildHistoryR if _, ok := h.deleted[e.Ref]; ok { continue } - sub.send(&controlapi.BuildHistoryEvent{ + actives = append(actives, &controlapi.BuildHistoryEvent{ Type: controlapi.BuildHistoryEventType_STARTED, Record: e, }) @@ -777,6 +780,12 @@ func (h *HistoryQueue) Listen(ctx context.Context, req *controlapi.BuildHistoryR h.mu.Unlock() + for _, e := range actives { + if err := f(e); err != nil { + return err + } + } + if !req.ActiveOnly { events := []*controlapi.BuildHistoryEvent{} if err := h.opt.DB.View(func(tx *bolt.Tx) error {