From 1a04cf359c5b5638b8b8cb5bf151e70abcb97e5a Mon Sep 17 00:00:00 2001 From: Atin Mukherjee Date: Tue, 8 Jan 2019 20:22:30 +0530 Subject: [PATCH 1/4] brick-mux : fix spurious socket connect failure issue during volume stop Change the logic of using the parent socket file to send the detach request by picking it up from /proc//cmdline where pid is the process id of the parent brick. During Multiplex, this would avoid creating a hard link of a socket file. Credits : avishwan@redhat.com for helping with the utility function Fixes: #1468 Signed-off-by: Atin Mukherjee --- glusterd2/brick/glusterfsd.go | 10 +++++----- glusterd2/brickmux/demultiplex.go | 17 +++++++++++++---- glusterd2/brickmux/multiplex.go | 8 -------- glusterd2/brickmux/utils.go | 27 +++++++++++++++++++++++++-- 4 files changed, 43 insertions(+), 19 deletions(-) diff --git a/glusterd2/brick/glusterfsd.go b/glusterd2/brick/glusterfsd.go index 10e48cd5e..30f184517 100644 --- a/glusterd2/brick/glusterfsd.go +++ b/glusterd2/brick/glusterfsd.go @@ -38,7 +38,7 @@ type Glusterfsd struct { // Externally consumable using methods of Glusterfsd interface binarypath string args []string - socketfilepath string + Socketfilepath string pidfilepath string // For internal use @@ -89,8 +89,8 @@ func (b *Glusterfsd) Args() []string { // SocketFile returns path to the brick socket file used for IPC. func (b *Glusterfsd) SocketFile() string { - if b.socketfilepath != "" { - return b.socketfilepath + if b.Socketfilepath != "" { + return b.Socketfilepath } // First we form a key @@ -101,10 +101,10 @@ func (b *Glusterfsd) SocketFile() string { // Example: /var/run/gluster/.socket glusterdSockDir := config.GetString("rundir") hash := strconv.FormatUint(xxhash.Sum64String(key), 16) - b.socketfilepath = path.Join(glusterdSockDir, hash+".socket") + b.Socketfilepath = path.Join(glusterdSockDir, hash+".socket") // FIXME: The brick can no longer clean this up on clean shut down - return b.socketfilepath + return b.Socketfilepath } // PidFile returns path to the pid file of the brick process diff --git a/glusterd2/brickmux/demultiplex.go b/glusterd2/brickmux/demultiplex.go index 17a6fc6e9..373cc9b0d 100644 --- a/glusterd2/brickmux/demultiplex.go +++ b/glusterd2/brickmux/demultiplex.go @@ -3,7 +3,6 @@ package brickmux import ( "fmt" "os" - "time" "github.com/gluster/glusterd2/glusterd2/brick" "github.com/gluster/glusterd2/glusterd2/daemon" @@ -27,16 +26,26 @@ func IsLastBrickInProc(b brick.Brickinfo) bool { // Demultiplex sends a detach request to the brick process which the // specified brick is multiplexed onto. func Demultiplex(b brick.Brickinfo) error { - + var pidOnFile int log.WithField("brick", b.String()).Debug("get brick daemon for demultiplex process") brickDaemon, err := brick.NewGlusterfsd(b) if err != nil { return err } + if pidOnFile, err = daemon.ReadPidFromFile(brickDaemon.PidFile()); err == nil { + log.WithFields(log.Fields{"brick": b.String(), + "pidfile": brickDaemon.PidFile()}).Error("Failed to read the pidfile") + return err + } + brickDaemon.Socketfilepath, err = glusterdGetSockFromBrickPid(pidOnFile) + if err != nil { + log.WithFields(log.Fields{"brick": b.String(), + "pid": pidOnFile}).Error("Failed to get the socket file of the glusterfsd process") + return err + } log.WithFields(log.Fields{"brick": b.String(), "socketFile": brickDaemon.SocketFile()}).Debug("starting demultiplex process") - client, err := daemon.GetRPCClient(brickDaemon) if err != nil { return err @@ -62,7 +71,7 @@ func Demultiplex(b brick.Brickinfo) error { // There might be some changes on glusterfsd side related to socket // files used while brick signout, // make appropriate changes once glusterfsd side is fixed. - time.Sleep(time.Millisecond * 200) + //time.Sleep(time.Millisecond * 200) log.WithField("brick", b.String()).Debug("deleting brick socket and pid file") os.Remove(brickDaemon.PidFile()) diff --git a/glusterd2/brickmux/multiplex.go b/glusterd2/brickmux/multiplex.go index c234c3ffc..c0309888a 100644 --- a/glusterd2/brickmux/multiplex.go +++ b/glusterd2/brickmux/multiplex.go @@ -3,7 +3,6 @@ package brickmux import ( "fmt" "net/rpc" - "os" "github.com/gluster/glusterd2/glusterd2/brick" "github.com/gluster/glusterd2/glusterd2/daemon" @@ -87,13 +86,6 @@ func Multiplex(b brick.Brickinfo, v *volume.Volinfo, volumes []*volume.Volinfo, return err } - // create Unix Domain Socket hardlink - os.Remove(brickProc.SocketFile()) - if err := os.Link(targetBrickProc.SocketFile(), brickProc.SocketFile()); err != nil { - undoMultiplex(client, &b) - return err - } - // create duplicate pidfile for the multiplexed brick ok, pid := daemon.IsRunning(targetBrickProc) if !ok { diff --git a/glusterd2/brickmux/utils.go b/glusterd2/brickmux/utils.go index 48784af05..eafa8d416 100644 --- a/glusterd2/brickmux/utils.go +++ b/glusterd2/brickmux/utils.go @@ -1,10 +1,12 @@ package brickmux import ( + "fmt" + config "github.com/spf13/viper" + "io/ioutil" "os" "path" - - config "github.com/spf13/viper" + "strings" ) // getBrickVolfilePath returns path correspponding to the volfileID. Since, brick volfiles are now stored on @@ -20,3 +22,24 @@ func getBrickVolfilePath(volfileID string) (string, error) { return volfilePath, nil } + +// glusterdGetSockFromBrickPid returns the socket file from the /proc/ +// filesystem for the concerned process running with the same pid +func glusterdGetSockFromBrickPid(pid int) (string, error) { + content, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid)) + if err != nil { + return "", err + } + + parts := strings.Split(string(content), "\x00") + prevPart := "" + socketFile := "" + for _, p := range parts { + if prevPart == "-S" { + socketFile = p + break + } + prevPart = p + } + return socketFile, nil +} From 6b2e2698667879e489a178f9566fcc854a624505 Mon Sep 17 00:00:00 2001 From: Atin Mukherjee Date: Tue, 8 Jan 2019 20:36:46 +0530 Subject: [PATCH 2/4] brick-mux : remove removal of socket file during Demultiplex Signed-off-by: Atin Mukherjee --- glusterd2/brickmux/demultiplex.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/glusterd2/brickmux/demultiplex.go b/glusterd2/brickmux/demultiplex.go index 373cc9b0d..1afba4c24 100644 --- a/glusterd2/brickmux/demultiplex.go +++ b/glusterd2/brickmux/demultiplex.go @@ -75,8 +75,6 @@ func Demultiplex(b brick.Brickinfo) error { log.WithField("brick", b.String()).Debug("deleting brick socket and pid file") os.Remove(brickDaemon.PidFile()) - os.Remove(brickDaemon.SocketFile()) - log.WithField("brick", b.String()).Debug("deleted brick socket and pid file") return nil } From 91e5c8751d4d41480bf5f78d8903b004f374614b Mon Sep 17 00:00:00 2001 From: Atin Mukherjee Date: Tue, 8 Jan 2019 21:20:57 +0530 Subject: [PATCH 3/4] brick-mux : remove sleep from every demultiplexing brick requests Signed-off-by: Atin Mukherjee --- glusterd2/brickmux/demultiplex.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/glusterd2/brickmux/demultiplex.go b/glusterd2/brickmux/demultiplex.go index 1afba4c24..212eb5508 100644 --- a/glusterd2/brickmux/demultiplex.go +++ b/glusterd2/brickmux/demultiplex.go @@ -67,12 +67,6 @@ func Demultiplex(b brick.Brickinfo) error { } log.WithField("brick", b.String()).Debug("detach request succeeded with result") - // TODO: Find an alternative to substitute the sleep. - // There might be some changes on glusterfsd side related to socket - // files used while brick signout, - // make appropriate changes once glusterfsd side is fixed. - //time.Sleep(time.Millisecond * 200) - log.WithField("brick", b.String()).Debug("deleting brick socket and pid file") os.Remove(brickDaemon.PidFile()) From cedf0cb435b5ca53cc8141baed07078894945d2e Mon Sep 17 00:00:00 2001 From: Atin Mukherjee Date: Thu, 10 Jan 2019 14:01:32 +0530 Subject: [PATCH 4/4] brick-mux : fix error handling Fix error handling from ReadFromPidFile call. Also added a check to bypass detach if the brick pid is -1. Signed-off-by: Atin Mukherjee --- glusterd2/brickmux/demultiplex.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/glusterd2/brickmux/demultiplex.go b/glusterd2/brickmux/demultiplex.go index 212eb5508..d7399bfc4 100644 --- a/glusterd2/brickmux/demultiplex.go +++ b/glusterd2/brickmux/demultiplex.go @@ -32,11 +32,17 @@ func Demultiplex(b brick.Brickinfo) error { if err != nil { return err } - if pidOnFile, err = daemon.ReadPidFromFile(brickDaemon.PidFile()); err == nil { + if pidOnFile, err = daemon.ReadPidFromFile(brickDaemon.PidFile()); err != nil { log.WithFields(log.Fields{"brick": b.String(), "pidfile": brickDaemon.PidFile()}).Error("Failed to read the pidfile") return err + } + if pidOnFile == -1 { + log.WithFields(log.Fields{"brick": b.String(), + "pidfile": brickDaemon.PidFile()}).Error("Pid is -1") + return err + } brickDaemon.Socketfilepath, err = glusterdGetSockFromBrickPid(pidOnFile) if err != nil {