--- a/resolv/nss_dns/dns-host.c +++ a/resolv/nss_dns/dns-host.c @@ -1031,7 +1031,10 @@ gaih_getanswer_slice (const querybuf *answer, int anslen, const char *qname, int h_namelen = 0; if (ancount == 0) - return NSS_STATUS_NOTFOUND; + { + *h_errnop = HOST_NOT_FOUND; + return NSS_STATUS_NOTFOUND; + } while (ancount-- > 0 && cp < end_of_message && had_error == 0) { @@ -1208,7 +1211,14 @@ gaih_getanswer_slice (const querybuf *answer, int anslen, const char *qname, /* Special case here: if the resolver sent a result but it only contains a CNAME while we are looking for a T_A or T_AAAA record, we fail with NOTFOUND instead of TRYAGAIN. */ - return canon == NULL ? NSS_STATUS_TRYAGAIN : NSS_STATUS_NOTFOUND; + if (canon != NULL) + { + *h_errnop = HOST_NOT_FOUND; + return NSS_STATUS_NOTFOUND; + } + + *h_errnop = NETDB_INTERNAL; + return NSS_STATUS_TRYAGAIN; } @@ -1222,11 +1232,101 @@ gaih_getanswer (const querybuf *answer1, int anslen1, const querybuf *answer2, enum nss_status status = NSS_STATUS_NOTFOUND; + /* Combining the NSS status of two distinct queries requires some + compromise and attention to symmetry (A or AAAA queries can be + returned in any order). What follows is a breakdown of how this + code is expected to work and why. We discuss only SUCCESS, + TRYAGAIN, NOTFOUND and UNAVAIL, since they are the only returns + that apply (though RETURN and MERGE exist). We make a distinction + between TRYAGAIN (recoverable) and TRYAGAIN' (not-recoverable). + A recoverable TRYAGAIN is almost always due to buffer size issues + and returns ERANGE in errno and the caller is expected to retry + with a larger buffer. + + Lastly, you may be tempted to make significant changes to the + conditions in this code to bring about symmetry between responses. + Please don't change anything without due consideration for + expected application behaviour. Some of the synthesized responses + aren't very well thought out and sometimes appear to imply that + IPv4 responses are always answer 1, and IPv6 responses are always + answer 2, but that's not true (see the implemetnation of send_dg + and send_vc to see response can arrive in any order, particlarly + for UDP). However, we expect it holds roughly enough of the time + that this code works, but certainly needs to be fixed to make this + a more robust implementation. + + ---------------------------------------------- + | Answer 1 Status / | Synthesized | Reason | + | Answer 2 Status | Status | | + |--------------------------------------------| + | SUCCESS/SUCCESS | SUCCESS | [1] | + | SUCCESS/TRYAGAIN | TRYAGAIN | [5] | + | SUCCESS/TRYAGAIN' | SUCCESS | [1] | + | SUCCESS/NOTFOUND | SUCCESS | [1] | + | SUCCESS/UNAVAIL | SUCCESS | [1] | + | TRYAGAIN/SUCCESS | TRYAGAIN | [2] | + | TRYAGAIN/TRYAGAIN | TRYAGAIN | [2] | + | TRYAGAIN/TRYAGAIN' | TRYAGAIN | [2] | + | TRYAGAIN/NOTFOUND | TRYAGAIN | [2] | + | TRYAGAIN/UNAVAIL | TRYAGAIN | [2] | + | TRYAGAIN'/SUCCESS | SUCCESS | [3] | + | TRYAGAIN'/TRYAGAIN | TRYAGAIN | [3] | + | TRYAGAIN'/TRYAGAIN' | TRYAGAIN' | [3] | + | TRYAGAIN'/NOTFOUND | TRYAGAIN' | [3] | + | TRYAGAIN'/UNAVAIL | UNAVAIL | [3] | + | NOTFOUND/SUCCESS | SUCCESS | [3] | + | NOTFOUND/TRYAGAIN | TRYAGAIN | [3] | + | NOTFOUND/TRYAGAIN' | TRYAGAIN' | [3] | + | NOTFOUND/NOTFOUND | NOTFOUND | [3] | + | NOTFOUND/UNAVAIL | UNAVAIL | [3] | + | UNAVAIL/SUCCESS | UNAVAIL | [4] | + | UNAVAIL/TRYAGAIN | UNAVAIL | [4] | + | UNAVAIL/TRYAGAIN' | UNAVAIL | [4] | + | UNAVAIL/NOTFOUND | UNAVAIL | [4] | + | UNAVAIL/UNAVAIL | UNAVAIL | [4] | + ---------------------------------------------- + + [1] If the first response is a success we return success. + This ignores the state of the second answer and in fact + incorrectly sets errno and h_errno to that of the second + answer. However because the response is a success we ignore + *errnop and *h_errnop (though that means you touched errno on + success). We are being conservative here and returning the + likely IPv4 response in the first answer as a success. + + [2] If the first response is a recoverable TRYAGAIN we return + that instead of looking at the second response. The + expectation here is that we have failed to get an IPv4 response + and should retry both queries. + + [3] If the first response was not a SUCCESS and the second + response is not NOTFOUND (had a SUCCESS, need to TRYAGAIN, + or failed entirely e.g. TRYAGAIN' and UNAVAIL) then use the + result from the second response, otherwise the first responses + status is used. Again we have some odd side-effects when the + second response is NOTFOUND because we overwrite *errnop and + *h_errnop that means that a first answer of NOTFOUND might see + its *errnop and *h_errnop values altered. Whether it matters + in practice that a first response NOTFOUND has the wrong + *errnop and *h_errnop is undecided. + + [4] If the first response is UNAVAIL we return that instead of + looking at the second response. The expectation here is that + it will have failed similarly e.g. configuration failure. + + [5] Testing this code is complicated by the fact that truncated + second response buffers might be returned as SUCCESS if the + first answer is a SUCCESS. To fix this we add symmetry to + TRYAGAIN with the second response. If the second response + is a recoverable error we now return TRYAGIN even if the first + response was SUCCESS. */ + if (anslen1 > 0) status = gaih_getanswer_slice(answer1, anslen1, qname, &pat, &buffer, &buflen, errnop, h_errnop, ttlp, &first); + if ((status == NSS_STATUS_SUCCESS || status == NSS_STATUS_NOTFOUND || (status == NSS_STATUS_TRYAGAIN /* We want to look at the second answer in case of an @@ -1242,8 +1342,15 @@ gaih_getanswer (const querybuf *answer1, int anslen1, const querybuf *answer2, &pat, &buffer, &buflen, errnop, h_errnop, ttlp, &first); + /* Use the second response status in some cases. */ if (status != NSS_STATUS_SUCCESS && status2 != NSS_STATUS_NOTFOUND) status = status2; + /* Do not return a truncated second response (unless it was + unavoidable e.g. unrecoverable TRYAGAIN). */ + if (status == NSS_STATUS_SUCCESS + && (status2 == NSS_STATUS_TRYAGAIN + && *errnop == ERANGE && *h_errnop != NO_RECOVERY)) + status = NSS_STATUS_TRYAGAIN; } return status; --- a/resolv/res_query.c +++ a/resolv/res_query.c @@ -396,6 +396,7 @@ __libc_res_nsearch(res_state statp, { free (*answerp2); *answerp2 = NULL; + *nanswerp2 = 0; *answerp2_malloced = 0; } } @@ -447,6 +448,7 @@ __libc_res_nsearch(res_state statp, { free (*answerp2); *answerp2 = NULL; + *nanswerp2 = 0; *answerp2_malloced = 0; } @@ -521,6 +523,7 @@ __libc_res_nsearch(res_state statp, { free (*answerp2); *answerp2 = NULL; + *nanswerp2 = 0; *answerp2_malloced = 0; } if (saved_herrno != -1) --- a/resolv/res_send.c +++ a/resolv/res_send.c @@ -1,3 +1,20 @@ +/* Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + /* * Copyright (c) 1985, 1989, 1993 * The Regents of the University of California. All rights reserved. @@ -355,6 +372,8 @@ __libc_res_nsend(res_state statp, const u_char *buf, int buflen, #ifdef USE_HOOKS if (__glibc_unlikely (statp->qhook || statp->rhook)) { if (anssiz < MAXPACKET && ansp) { + /* Always allocate MAXPACKET, callers expect + this specific size. */ u_char *buf = malloc (MAXPACKET); if (buf == NULL) return (-1); @@ -630,6 +649,77 @@ get_nsaddr (res_state statp, int n) return (struct sockaddr *) (void *) &statp->nsaddr_list[n]; } +/* The send_vc function is responsible for sending a DNS query over TCP + to the nameserver numbered NS from the res_state STATP i.e. + EXT(statp).nssocks[ns]. The function supports sending both IPv4 and + IPv6 queries at the same serially on the same socket. + + Please note that for TCP there is no way to disable sending both + queries, unlike UDP, which honours RES_SNGLKUP and RES_SNGLKUPREOP + and sends the queries serially and waits for the result after each + sent query. This implemetnation should be corrected to honour these + options. + + Please also note that for TCP we send both queries over the same + socket one after another. This technically violates best practice + since the server is allowed to read the first query, respond, and + then close the socket (to service another client). If the server + does this, then the remaining second query in the socket data buffer + will cause the server to send the client an RST which will arrive + asynchronously and the client's OS will likely tear down the socket + receive buffer resulting in a potentially short read and lost + response data. This will force the client to retry the query again, + and this process may repeat until all servers and connection resets + are exhausted and then the query will fail. It's not known if this + happens with any frequency in real DNS server implementations. This + implementation should be corrected to use two sockets by default for + parallel queries. + + The query stored in BUF of BUFLEN length is sent first followed by + the query stored in BUF2 of BUFLEN2 length. Queries are sent + serially on the same socket. + + Answers to the query are stored firstly in *ANSP up to a max of + *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP + is non-NULL (to indicate that modifying the answer buffer is allowed) + then malloc is used to allocate a new response buffer and ANSCP and + ANSP will both point to the new buffer. If more than *ANSSIZP bytes + are needed but ANSCP is NULL, then as much of the response as + possible is read into the buffer, but the results will be truncated. + When truncation happens because of a small answer buffer the DNS + packets header feild TC will bet set to 1, indicating a truncated + message and the rest of the socket data will be read and discarded. + + Answers to the query are stored secondly in *ANSP2 up to a max of + *ANSSIZP2 bytes, with the actual response length stored in + *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2 + is non-NULL (required for a second query) then malloc is used to + allocate a new response buffer, *ANSSIZP2 is set to the new buffer + size and *ANSP2_MALLOCED is set to 1. + + The ANSP2_MALLOCED argument will eventually be removed as the + change in buffer pointer can be used to detect the buffer has + changed and that the caller should use free on the new buffer. + + Note that the answers may arrive in any order from the server and + therefore the first and second answer buffers may not correspond to + the first and second queries. + + It is not supported to call this function with a non-NULL ANSP2 + but a NULL ANSCP. Put another way, you can call send_vc with a + single unmodifiable buffer or two modifiable buffers, but no other + combination is supported. + + It is the caller's responsibility to free the malloc allocated + buffers by detecting that the pointers have changed from their + original values i.e. *ANSCP or *ANSP2 has changed. + + If errors are encountered then *TERRNO is set to an appropriate + errno value and a zero result is returned for a recoverable error, + and a less-than zero result is returned for a non-recoverable error. + + If no errors are encountered then *TERRNO is left unmodified and + a the length of the first response in bytes is returned. */ static int send_vc(res_state statp, const u_char *buf, int buflen, const u_char *buf2, int buflen2, @@ -639,11 +729,7 @@ send_vc(res_state statp, { const HEADER *hp = (HEADER *) buf; const HEADER *hp2 = (HEADER *) buf2; - u_char *ans = *ansp; - int orig_anssizp = *anssizp; - // XXX REMOVE - // int anssiz = *anssizp; - HEADER *anhp = (HEADER *) ans; + HEADER *anhp = (HEADER *) *ansp; struct sockaddr *nsap = get_nsaddr (statp, ns); int truncating, connreset, n; /* On some architectures compiler might emit a warning indicating @@ -731,6 +817,8 @@ send_vc(res_state statp, * Receive length & response */ int recvresp1 = 0; + /* Skip the second response if there is no second query. + To do that we mark the second response as received. */ int recvresp2 = buf2 == NULL; uint16_t rlen16; read_len: @@ -767,40 +855,14 @@ send_vc(res_state statp, u_char **thisansp; int *thisresplenp; if ((recvresp1 | recvresp2) == 0 || buf2 == NULL) { + /* We have not received any responses + yet or we only have one response to + receive. */ thisanssizp = anssizp; thisansp = anscp ?: ansp; assert (anscp != NULL || ansp2 == NULL); thisresplenp = &resplen; } else { - if (*anssizp != MAXPACKET) { - /* No buffer allocated for the first - reply. We can try to use the rest - of the user-provided buffer. */ -#if __GNUC_PREREQ (4, 7) - DIAG_PUSH_NEEDS_COMMENT; - DIAG_IGNORE_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); -#endif -#if _STRING_ARCH_unaligned - *anssizp2 = orig_anssizp - resplen; - *ansp2 = *ansp + resplen; -#else - int aligned_resplen - = ((resplen + __alignof__ (HEADER) - 1) - & ~(__alignof__ (HEADER) - 1)); - *anssizp2 = orig_anssizp - aligned_resplen; - *ansp2 = *ansp + aligned_resplen; -#endif -#if __GNUC_PREREQ (4, 7) - DIAG_POP_NEEDS_COMMENT; -#endif - } else { - /* The first reply did not fit into the - user-provided buffer. Maybe the second - answer will. */ - *anssizp2 = orig_anssizp; - *ansp2 = *ansp; - } - thisanssizp = anssizp2; thisansp = ansp2; thisresplenp = resplen2; @@ -804,10 +870,14 @@ send_vc(res_state statp, anhp = (HEADER *) *thisansp; *thisresplenp = rlen; - if (rlen > *thisanssizp) { - /* Yes, we test ANSCP here. If we have two buffers - both will be allocatable. */ - if (__glibc_likely (anscp != NULL)) { + /* Is the answer buffer too small? */ + if (*thisanssizp < rlen) { + /* If the current buffer is not the the static + user-supplied buffer then we can reallocate + it. */ + if (thisansp != NULL && thisansp != ansp) { + /* Always allocate MAXPACKET, callers expect + this specific size. */ u_char *newp = malloc (MAXPACKET); if (newp == NULL) { *terrno = ENOMEM; @@ -819,6 +889,9 @@ send_vc(res_state statp, if (thisansp == ansp2) *ansp2_malloced = 1; anhp = (HEADER *) newp; + /* A uint16_t can't be larger than MAXPACKET + thus it's safe to allocate MAXPACKET but + read RLEN bytes instead. */ len = rlen; } else { Dprint(statp->options & RES_DEBUG, @@ -948,6 +1021,66 @@ reopen (res_state statp, int *terrno, int ns) return 1; } +/* The send_dg function is responsible for sending a DNS query over UDP + to the nameserver numbered NS from the res_state STATP i.e. + EXT(statp).nssocks[ns]. The function supports IPv4 and IPv6 queries + along with the ability to send the query in parallel for both stacks + (default) or serially (RES_SINGLKUP). It also supports serial lookup + with a close and reopen of the socket used to talk to the server + (RES_SNGLKUPREOP) to work around broken name servers. + + The query stored in BUF of BUFLEN length is sent first followed by + the query stored in BUF2 of BUFLEN2 length. Queries are sent + in parallel (default) or serially (RES_SINGLKUP or RES_SNGLKUPREOP). + + Answers to the query are stored firstly in *ANSP up to a max of + *ANSSIZP bytes. If more than *ANSSIZP bytes are needed and ANSCP + is non-NULL (to indicate that modifying the answer buffer is allowed) + then malloc is used to allocate a new response buffer and ANSCP and + ANSP will both point to the new buffer. If more than *ANSSIZP bytes + are needed but ANSCP is NULL, then as much of the response as + possible is read into the buffer, but the results will be truncated. + When truncation happens because of a small answer buffer the DNS + packets header feild TC will bet set to 1, indicating a truncated + message, while the rest of the UDP packet is discarded. + + Answers to the query are stored secondly in *ANSP2 up to a max of + *ANSSIZP2 bytes, with the actual response length stored in + *RESPLEN2. If more than *ANSSIZP bytes are needed and ANSP2 + is non-NULL (required for a second query) then malloc is used to + allocate a new response buffer, *ANSSIZP2 is set to the new buffer + size and *ANSP2_MALLOCED is set to 1. + + The ANSP2_MALLOCED argument will eventually be removed as the + change in buffer pointer can be used to detect the buffer has + changed and that the caller should use free on the new buffer. + + Note that the answers may arrive in any order from the server and + therefore the first and second answer buffers may not correspond to + the first and second queries. + + It is not supported to call this function with a non-NULL ANSP2 + but a NULL ANSCP. Put another way, you can call send_vc with a + single unmodifiable buffer or two modifiable buffers, but no other + combination is supported. + + It is the caller's responsibility to free the malloc allocated + buffers by detecting that the pointers have changed from their + original values i.e. *ANSCP or *ANSP2 has changed. + + If an answer is truncated because of UDP datagram DNS limits then + *V_CIRCUIT is set to 1 and the return value non-zero to indicate to + the caller to retry with TCP. The value *GOTSOMEWHERE is set to 1 + if any progress was made reading a response from the nameserver and + is used by the caller to distinguish between ECONNREFUSED and + ETIMEDOUT (the latter if *GOTSOMEWHERE is 1). + + If errors are encountered then *TERRNO is set to an appropriate + errno value and a zero result is returned for a recoverable error, + and a less-than zero result is returned for a non-recoverable error. + + If no errors are encountered then *TERRNO is left unmodified and + a the length of the first response in bytes is returned. */ static int send_dg(res_state statp, const u_char *buf, int buflen, const u_char *buf2, int buflen2, @@ -957,8 +1090,6 @@ send_dg(res_state statp, { const HEADER *hp = (HEADER *) buf; const HEADER *hp2 = (HEADER *) buf2; - u_char *ans = *ansp; - int orig_anssizp = *anssizp; struct timespec now, timeout, finish; struct pollfd pfd[1]; int ptimeout; @@ -991,6 +1122,8 @@ send_dg(res_state statp, int need_recompute = 0; int nwritten = 0; int recvresp1 = 0; + /* Skip the second response if there is no second query. + To do that we mark the second response as received. */ int recvresp2 = buf2 == NULL; pfd[0].fd = EXT(statp).nssocks[ns]; pfd[0].events = POLLOUT; @@ -1154,55 +1287,56 @@ send_dg(res_state statp, int *thisresplenp; if ((recvresp1 | recvresp2) == 0 || buf2 == NULL) { + /* We have not received any responses + yet or we only have one response to + receive. */ thisanssizp = anssizp; thisansp = anscp ?: ansp; assert (anscp != NULL || ansp2 == NULL); thisresplenp = &resplen; } else { - if (*anssizp != MAXPACKET) { - /* No buffer allocated for the first - reply. We can try to use the rest - of the user-provided buffer. */ -#if _STRING_ARCH_unaligned - *anssizp2 = orig_anssizp - resplen; - *ansp2 = *ansp + resplen; -#else - int aligned_resplen - = ((resplen + __alignof__ (HEADER) - 1) - & ~(__alignof__ (HEADER) - 1)); - *anssizp2 = orig_anssizp - aligned_resplen; - *ansp2 = *ansp + aligned_resplen; -#endif - } else { - /* The first reply did not fit into the - user-provided buffer. Maybe the second - answer will. */ - *anssizp2 = orig_anssizp; - *ansp2 = *ansp; - } - thisanssizp = anssizp2; thisansp = ansp2; thisresplenp = resplen2; } if (*thisanssizp < MAXPACKET - /* Yes, we test ANSCP here. If we have two buffers - both will be allocatable. */ - && anscp + /* If the current buffer is not the the static + user-supplied buffer then we can reallocate + it. */ + && (thisansp != NULL && thisansp != ansp) #ifdef FIONREAD + /* Is the size too small? */ && (ioctl (pfd[0].fd, FIONREAD, thisresplenp) < 0 || *thisanssizp < *thisresplenp) #endif ) { + /* Always allocate MAXPACKET, callers expect + this specific size. */ u_char *newp = malloc (MAXPACKET); if (newp != NULL) { - *anssizp = MAXPACKET; - *thisansp = ans = newp; + *thisanssizp = MAXPACKET; + *thisansp = newp; if (thisansp == ansp2) *ansp2_malloced = 1; } } + /* We could end up with truncation if anscp was NULL + (not allowed to change caller's buffer) and the + response buffer size is too small. This isn't a + reliable way to detect truncation because the ioctl + may be an inaccurate report of the UDP message size. + Therefore we use this only to issue debug output. + To do truncation accurately with UDP we need + MSG_TRUNC which is only available on Linux. We + can abstract out the Linux-specific feature in the + future to detect truncation. */ + if (__glibc_unlikely (*thisanssizp < *thisresplenp)) { + Dprint(statp->options & RES_DEBUG, + (stdout, ";; response may be truncated (UDP)\n") + ); + } + HEADER *anhp = (HEADER *) *thisansp; socklen_t fromlen = sizeof(struct sockaddr_in6); assert (sizeof(from) <= fromlen);