Skip to content

Commit e5a9a0b

Browse files
authored
Improve LDAP desktop discovery (#30463)
In environments with many desktops where some DNS queries time out, we can run in to situations where the discovery loop doesn't complete before the desktop's 10m TTL expires, causing desktops to disappear and reappear sporadically. - Increase the TTL for LDAP-discovered desktops to 30m. This won't harm UX, since desktops that stop being discovered are already purged prior to their expiration. - Perform both DNS queries (via the default resolver and by hitting the LDAP server directly) in parallel. This reduces the maximum time a single lookup can take from 20s to 5s.
1 parent 2d164f6 commit e5a9a0b

File tree

1 file changed

+54
-13
lines changed

1 file changed

+54
-13
lines changed

lib/srv/desktop/discovery.go

+54-13
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"context"
1919
"fmt"
2020
"net"
21+
"net/netip"
2122
"strings"
2223
"time"
2324

@@ -183,21 +184,58 @@ func (s *WindowsService) applyLabelsFromLDAP(entry *ldap.Entry, labels map[strin
183184

184185
// lookupDesktop does a DNS lookup for the provided hostname.
185186
// It checks using the default system resolver first, and falls
186-
// back to making a DNS query of the configured LDAP server
187-
// if the system resolver fails.
188-
func (s *WindowsService) lookupDesktop(ctx context.Context, hostname string) (addrs []string, err error) {
189-
tctx, cancel := context.WithTimeout(ctx, 10*time.Second)
190-
defer cancel()
187+
// back to the configured LDAP server if the system resolver fails.
188+
func (s *WindowsService) lookupDesktop(ctx context.Context, hostname string) ([]string, error) {
189+
stringAddrs := func(addrs []netip.Addr) []string {
190+
result := make([]string, 0, len(addrs))
191+
for _, addr := range addrs {
192+
result = append(result, addr.String())
193+
}
194+
return result
195+
}
196+
197+
const queryTimeout = 5 * time.Second
191198

192-
addrs, err = net.DefaultResolver.LookupHost(tctx, hostname)
193-
if err == nil && len(addrs) > 0 {
194-
return addrs, nil
199+
queryResolver := func(resolver *net.Resolver, resolverName string) chan []netip.Addr {
200+
ch := make(chan []netip.Addr, 1)
201+
if resolver != nil {
202+
go func() {
203+
tctx, cancel := context.WithTimeout(ctx, queryTimeout)
204+
defer cancel()
205+
206+
addrs, err := resolver.LookupNetIP(tctx, "ip4", hostname)
207+
if err != nil {
208+
s.cfg.Log.Debugf("DNS lookup for %v failed with %s resolver: %v",
209+
hostname, resolverName, err)
210+
}
211+
if len(addrs) > 0 {
212+
ch <- addrs
213+
}
214+
}()
215+
}
216+
return ch
195217
}
196-
if s.dnsResolver == nil {
197-
return nil, trace.NewAggregate(err, trace.Errorf("DNS lookup for %q failed and there's no LDAP server to fallback to", hostname))
218+
219+
// kick off both DNS queries in parallel
220+
defaultResult := queryResolver(net.DefaultResolver, "default")
221+
ldapResult := queryResolver(s.dnsResolver, "LDAP")
222+
223+
// wait 5 seconds for the default resolver to return
224+
select {
225+
case addrs := <-defaultResult:
226+
return stringAddrs(addrs), nil
227+
case <-s.cfg.Clock.After(5 * time.Second):
228+
}
229+
230+
// If we didn't get a result from the default resolver,
231+
// the result from the LDAP resolver is either available
232+
// now or we're done. There's no more waiting.
233+
select {
234+
case addrs := <-ldapResult:
235+
return stringAddrs(addrs), nil
236+
default:
237+
return nil, trace.Errorf("could not resolve %v in time", hostname)
198238
}
199-
s.cfg.Log.WithError(err).Debugf("DNS lookup for %q failed, falling back to LDAP server", hostname)
200-
return s.dnsResolver.LookupHost(ctx, hostname)
201239
}
202240

203241
// ldapEntryToWindowsDesktop generates the Windows Desktop resource
@@ -237,6 +275,9 @@ func (s *WindowsService) ldapEntryToWindowsDesktop(ctx context.Context, entry *l
237275
return nil, trace.Wrap(err)
238276
}
239277

240-
desktop.SetExpiry(s.cfg.Clock.Now().UTC().Add(apidefaults.ServerAnnounceTTL))
278+
// We use a longer TTL for discovered desktops, because the reconciler will manually
279+
// purge them if they stop being detected, and discovery of large Windows fleets can
280+
// take a long time.
281+
desktop.SetExpiry(s.cfg.Clock.Now().UTC().Add(apidefaults.ServerAnnounceTTL * 3))
241282
return desktop, nil
242283
}

0 commit comments

Comments
 (0)