@@ -34,6 +34,7 @@ type RepMgr struct {
3434 PrimaryRegion string
3535 Region string
3636 PrivateIP string
37+ MachineID string
3738 DataDir string
3839 DatabaseName string
3940 Credentials admin.Credential
@@ -161,10 +162,12 @@ func (r *RepMgr) setDefaults() error {
161162 return err
162163 }
163164
165+ hostname := r .machineIdToDNS (r .MachineID )
166+
164167 conf := ConfigMap {
165168 "node_id" : nodeID ,
166- "node_name" : fmt .Sprintf ("'%s'" , r . PrivateIP ),
167- "conninfo" : fmt .Sprintf ("'host=%s port=%d user=%s dbname=%s connect_timeout=5'" , r . PrivateIP , r .Port , r .Credentials .Username , r .DatabaseName ),
169+ "node_name" : fmt .Sprintf ("'%s'" , hostname ),
170+ "conninfo" : fmt .Sprintf ("'host=%s port=%d user=%s dbname=%s connect_timeout=5'" , hostname , r .Port , r .Credentials .Username , r .DatabaseName ),
168171 "data_directory" : fmt .Sprintf ("'%s'" , r .DataDir ),
169172 "failover" : "'automatic'" ,
170173 "use_replication_slots" : "yes" ,
@@ -276,7 +279,7 @@ func (*RepMgr) restartDaemon() error {
276279}
277280
278281func (r * RepMgr ) daemonRestartRequired (m * Member ) bool {
279- return m .Hostname != r .PrivateIP
282+ return m .Hostname != r .MachineID
280283}
281284
282285func (r * RepMgr ) unregisterWitness (id int ) error {
@@ -301,14 +304,14 @@ func (r *RepMgr) rejoinCluster(hostname string) error {
301304 return err
302305}
303306
304- func (r * RepMgr ) clonePrimary (ipStr string ) error {
307+ func (r * RepMgr ) clonePrimary (hostname string ) error {
305308 cmdStr := fmt .Sprintf ("mkdir -p %s" , r .DataDir )
306309 if _ , err := utils .RunCommand (cmdStr , "postgres" ); err != nil {
307310 return fmt .Errorf ("failed to create pg directory: %s" , err )
308311 }
309312
310313 cmdStr = fmt .Sprintf ("repmgr -h %s -p %d -d %s -U %s -f %s standby clone -c -F" ,
311- ipStr ,
314+ hostname ,
312315 r .Port ,
313316 r .DatabaseName ,
314317 r .Credentials .Username ,
@@ -322,6 +325,21 @@ func (r *RepMgr) clonePrimary(ipStr string) error {
322325 return nil
323326}
324327
328+ func (r * RepMgr ) regenReplicationConf (ctx context.Context ) error {
329+ // TODO: do we need -c?
330+ if _ , err := utils .RunCmd (ctx , "postgres" ,
331+ "repmgr" , "--replication-conf-only" ,
332+ "-h" , "" ,
333+ "-p" , fmt .Sprint (r .Port ),
334+ "-d" , r .DatabaseName ,
335+ "-U" , r .Credentials .Username ,
336+ "-f" , r .ConfigPath ,
337+ "standby" , "clone" , "-F" ); err != nil {
338+ return fmt .Errorf ("failed to regenerate replication conf: %s" , err )
339+ }
340+ return nil
341+ }
342+
325343type Member struct {
326344 ID int
327345 Hostname string
@@ -431,26 +449,56 @@ func (*RepMgr) MemberByHostname(ctx context.Context, pg *pgx.Conn, hostname stri
431449 return & member , nil
432450}
433451
452+ // MemberBy6PN returns a member by its 6PN address.
453+ func (r * RepMgr ) MemberBy6PN (ctx context.Context , pg * pgx.Conn , ip string ) (* Member , error ) {
454+ members , err := r .Members (ctx , pg )
455+ if err != nil {
456+ return nil , err
457+ }
458+
459+ resolver := privnet .GetResolver ()
460+ var lastErr error
461+ for _ , member := range members {
462+ ips , err := resolver .LookupIPAddr (ctx , member .Hostname )
463+ if err != nil {
464+ lastErr = err
465+ continue
466+ }
467+
468+ for _ , addr := range ips {
469+ if addr .IP .String () == ip {
470+ return & member , nil
471+ }
472+ }
473+ }
474+
475+ if lastErr != nil {
476+ return nil , fmt .Errorf ("no matches found for %s, and error encountered: %s" , ip , lastErr )
477+ }
478+
479+ return nil , nil
480+ }
481+
434482func (r * RepMgr ) ResolveMemberOverDNS (ctx context.Context ) (* Member , error ) {
435- ips , err := r .InRegionPeerIPs (ctx )
483+ machineIds , err := r .InRegionPeerMachines (ctx )
436484 if err != nil {
437485 return nil , err
438486 }
439487
440488 var target * Member
441489
442- for _ , ip := range ips {
443- if ip . String () == r .PrivateIP {
490+ for _ , machineId := range machineIds {
491+ if machineId == r .MachineID {
444492 continue
445493 }
446494
447- conn , err := r .NewRemoteConnection (ctx , ip . String ( ))
495+ conn , err := r .NewRemoteConnection (ctx , r . machineIdToDNS ( machineId ))
448496 if err != nil {
449497 continue
450498 }
451499 defer func () { _ = conn .Close (ctx ) }()
452500
453- member , err := r .MemberByHostname (ctx , conn , ip . String ( ))
501+ member , err := r .MemberByHostname (ctx , conn , r . machineIdToDNS ( machineId ))
454502 if err != nil {
455503 continue
456504 }
@@ -477,6 +525,21 @@ func (r *RepMgr) InRegionPeerIPs(ctx context.Context) ([]net.IPAddr, error) {
477525 return privnet .AllPeers (ctx , targets )
478526}
479527
528+ func (r * RepMgr ) InRegionPeerMachines (ctx context.Context ) ([]string , error ) {
529+ machines , err := privnet .AllMachines (ctx , r .AppName )
530+ if err != nil {
531+ return nil , err
532+ }
533+
534+ var machineIDs []string
535+ for _ , machine := range machines {
536+ if machine .Region == r .PrimaryRegion {
537+ machineIDs = append (machineIDs , machine .Id )
538+ }
539+ }
540+ return machineIDs , nil
541+ }
542+
480543func (r * RepMgr ) HostInRegion (ctx context.Context , hostname string ) (bool , error ) {
481544 ips , err := r .InRegionPeerIPs (ctx )
482545 if err != nil {
@@ -514,3 +577,11 @@ func (r *RepMgr) UnregisterMember(member Member) error {
514577func (r * RepMgr ) eligiblePrimary () bool {
515578 return r .Region == r .PrimaryRegion
516579}
580+
581+ func (r * RepMgr ) machineIdToDNS (nodeName string ) string {
582+ if len (nodeName ) != 14 {
583+ panic ("invalid machine id" )
584+ }
585+
586+ return fmt .Sprintf ("%s.vm.%s.internal" , nodeName , r .AppName )
587+ }
0 commit comments