/*
 * Copyright (c) 1999-2003 Caucho Technology.  All rights reserved.
 *
 * Caucho Technology permits modification and use of this file in
 * source and binary form ("the Software") subject to the Caucho
 * Developer Source License 1.1 ("the License") which accompanies
 * this file.  The License is also available at
 *   http://www.caucho.com/download/cdsl1-1.xtp
 *
 * In addition to the terms of the License, the following conditions
 * must be met:
 *
 * 1. Each copy or derived work of the Software must preserve the copyright
 *    notice and this notice unmodified.
 *
 * 2. Each copy of the Software in source or binary form must include 
 *    an unmodified copy of the License in a plain ASCII text file named
 *    LICENSE.
 *
 * 3. Caucho reserves all rights to its names, trademarks and logos.
 *    In particular, the names "Resin" and "Caucho" are trademarks of
 *    Caucho and may not be used to endorse products derived from
 *    this software.  "Resin" and "Caucho" may not appear in the names
 *    of products derived from this software.
 *
 * This Software is provided "AS IS," without a warranty of any kind. 
 * ALL EXPRESS OR IMPLIED REPRESENTATIONS AND WARRANTIES, INCLUDING ANY
 * IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
 * OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED.
 *
 * CAUCHO TECHNOLOGY AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES
 * SUFFERED BY LICENSEE OR ANY THIRD PARTY AS A RESULT OF USING OR
 * DISTRIBUTING SOFTWARE. IN NO EVENT WILL CAUCHO OR ITS LICENSORS BE LIABLE
 * FOR ANY LOST REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL,
 * CONSEQUENTIAL, INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND
 * REGARDLESS OF THE THEORY OF LIABILITY, ARISING OUT OF THE USE OF OR
 * INABILITY TO USE SOFTWARE, EVEN IF HE HAS BEEN ADVISED OF THE POSSIBILITY
 * OF SUCH DAMAGES.      
 *
 * @author Scott Ferguson
 */

#include <stdlib.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <fcntl.h>

#ifdef WIN32
#include <winsock2.h>
#else
#include <sys/types.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <unistd.h>
#endif

#ifdef OPENSSL
/* SSLeay stuff */
#include <openssl/rsa.h>       
#include <openssl/crypto.h>
#include <openssl/x509.h>
#include <openssl/pem.h>
#include <openssl/ssl.h>
#include <openssl/err.h>
#endif

#include "cse.h"

#define DEFAULT_PORT 6802
#define DEAD_TIME 120
#define LIVE_TIME 10
#define CONNECT_TIMEOUT 2

static srun_t *g_srun_list[4096];
static int g_srun_count;
static int g_ssl_init;

/**
 * Opening method for non-ssl.
 */
static int
std_open(stream_t *stream)
{
  return stream->socket >= 0;
}

/**
 * Read for non-ssl.
 */
static int
std_read(stream_t *s, char *buf, int length)
{
  return recv(s->socket, buf, length, 0);
}

/**
 * Write for non-ssl.
 */
static int
std_write(stream_t *s, const char *buf, int length)
{
  return send(s->socket, buf, length, 0);
}

/**
 * Close for non-ssl.
 */
static int
std_close(int socket, void *ssl)
{
  return closesocket(socket);
}

#ifdef OPENSSL

/**
 * Opening method for ssl.
 */
static int
ssl_open(stream_t *stream)
{
  SSL_CTX *ctx;
  SSL *ssl;
  int fd;
  SSL_METHOD *meth;

  fd = stream->socket;
  LOG(("Trying ssl %d\n", fd));
  if (fd < 0)
    return 0;

  if (! stream->ssl_ctx) {
    if (! g_ssl_init) {
      OpenSSL_add_ssl_algorithms();
      SSL_load_error_strings();
      g_ssl_init = 1;
    }
    meth = SSLv2_client_method();
    ctx = SSL_CTX_new(meth);

    if (! ctx) {
      stream->socket = -1;
      closesocket(fd);
      LOG(("Failed SSL context\n"));
      return 0;
    }

    stream->ssl_ctx = ctx;
  }

  ctx = stream->ssl_ctx;

  /*
  ctx = stream->srun->srun->ssl;
  */

  if (! ctx) {
    stream->socket = -1;
    closesocket(fd);
    LOG(("Can't allocate ssl context\n"));
    return 0;
  }
  
  ssl = SSL_new(ctx);
  
  if (! ssl) {
    stream->socket = -1;
    closesocket(fd);
    LOG(("Can't allocate ssl\n"));
    return 0;
  }
  
  SSL_set_fd(ssl, fd);
  
  if (SSL_connect(ssl) < 0) {
    LOG(("Can't open SSL connection %p\n", ssl));
    ERR_print_errors_fp(stderr);
    stream->socket = -1;
    closesocket(fd);
    SSL_free(ssl);
    return 0;
  }

  LOG(("Connect with ssl %d\n", fd));
  
  stream->ssl = ssl;
  
  return 1;
}

/**
 * Read for ssl.
 */
static int
ssl_read(stream_t *s, char *buf, int length)
{
  SSL *ssl = s->ssl;

  if (! ssl)
    return -1;

  return SSL_read(ssl, buf, length);
}

/**
 * Write for non-ssl.
 */
static int
ssl_write(stream_t *s, const char *buf, int length)
{
  SSL *ssl = s->ssl;

  if (! ssl)
    return -1;

  return SSL_write(ssl, (char *) buf, length);
}

/**
 * Close for ssl.
 */
static int
ssl_close(int socket, void *ssl)
{
  if (ssl)
    SSL_free(ssl);
  
  return closesocket(socket);
}
#endif

void
cse_close(stream_t *s, char *msg)
{
  int socket = s->socket;
  s->socket = -1;
  

  if (socket >= 0) {
    LOG(("close %d %s\n", socket, msg));
    
    cse_kill_socket_cleanup(socket, s->web_pool);
    s->srun->srun->close(socket, s->ssl);
  }
}

#ifdef WIN32

static int
cse_connect(struct sockaddr_in *sin, srun_t *srun)
{
  unsigned int sock;
  unsigned long is_nonblock;

  sock = socket(AF_INET, SOCK_STREAM, 0);

  if (sock == INVALID_SOCKET) {
    LOG(("mod_caucho can't create socket.\n"));
    return -1; /* bad socket */
  }

  is_nonblock = 1;
  ioctlsocket(sock, FIONBIO, &is_nonblock);
  if (connect(sock, (struct sockaddr *) sin, sizeof(struct sockaddr_in))) {
    WSAEVENT event = WSACreateEvent();
    WSANETWORKEVENTS networkResult;
    int result;

    WSAEventSelect(sock, event, FD_CONNECT);
    result = WSAWaitForMultipleEvents(1, &event, 0,
                                      srun->connect_timeout * 1000, 0);
    WSAEnumNetworkEvents(sock, event, &networkResult);
    WSAEventSelect(sock, 0, 0);
    WSACloseEvent(event);

    if (result != WSA_WAIT_EVENT_0 ||
 	networkResult.iErrorCode[FD_CONNECT_BIT] != NO_ERROR) {
      closesocket(sock);

      return -1;
    }
  }

  is_nonblock = 0;
  ioctlsocket(sock, FIONBIO, &is_nonblock);
  LOG(("connect %d\n", sock));

  return sock;
}

#else

static int
cse_connect(struct sockaddr_in *sin, srun_t *srun)
{
  int sock;
  fd_set write_fds;
  struct timeval timeout;
  int flags;
  int error = 0;
  int len = sizeof(error);

  sock = socket(AF_INET, SOCK_STREAM, 0);

  if (sock < 0) {
    LOG(("mod_caucho can't create socket.\n"));
    return -1; /* bad socket */
  }

  flags = fcntl(sock, F_GETFL);
  fcntl(sock, F_SETFL, O_NONBLOCK|flags);
  FD_ZERO(&write_fds);
  FD_SET(sock, &write_fds);

  timeout.tv_sec = srun->connect_timeout;
  timeout.tv_usec = 0;

  if (! connect(sock, (const struct sockaddr *) sin, sizeof(*sin))) {
    fcntl(sock, F_SETFL, flags);

    return sock;
  }
  else if (errno != EWOULDBLOCK && errno != EINPROGRESS) {
    LOG(("connect quickfailed %x %d %d\n", sin->sin_addr.s_addr,
	 ntohs(sin->sin_port), errno));
    
    close(sock);

    return -1;
  }
  else if (select(sock + 1, 0, &write_fds, 0, &timeout) <= 0) {
    LOG(("timeout %x %d %d\n", sin->sin_addr.s_addr,
	 ntohs(sin->sin_port), errno));

    fcntl(sock, F_SETFL, flags);

    close(sock);
    
    return -1;
  }
  else if (! FD_ISSET(sock, &write_fds) ||
           getsockopt(sock, SOL_SOCKET, SO_ERROR, &error, &len) < 0 ||
           error) {
    LOG(("connect failed %x %d %d\n", sin->sin_addr.s_addr,
	 ntohs(sin->sin_port), errno));
    close(sock);

    return -1;
  }
  else {
    fcntl(sock, F_SETFL, flags);

    LOG(("connect %x:%d -> %d\n",
         sin->sin_addr.s_addr, ntohs(sin->sin_port), sock));
         
    return sock;
  }
}

#endif

static int
cse_connect_wait(struct sockaddr_in *sin)
{
  int sock;

  sock = socket(AF_INET, SOCK_STREAM, 0);

  if (sock < 0) {
    LOG(("mod_caucho can't create socket.\n"));
    return -1; /* bad socket */
  }
  
  if (! connect(sock, (const struct sockaddr *) sin, sizeof(*sin))) {
    return sock;
  }
  
  LOG(("cse_connect_wait can't connect %x %d %d\n", sin->sin_addr.s_addr,
       ntohs(sin->sin_port), errno));

  closesocket(sock);
    
  return -1;
}

int
cse_open(stream_t *s, config_t *config, srun_item_t *srun_item,
         void *web_pool, int wait)
{
  struct sockaddr_in sin;
  srun_t *srun = srun_item->srun;

  if (! srun)
    return 0;
 
  s->config = config;
  s->pool = config->p;
  s->web_pool = web_pool;
  s->update_count = config->update_count;
  s->socket = -1;
  s->ssl = 0;
  s->ssl_ctx = 0;
  s->write_length = 0;
  s->read_length = 0;
  s->read_offset = 0;
  s->srun = srun_item;
  s->sent_data = 0;

  sin.sin_family = AF_INET;
  if (srun->host)
    sin.sin_addr = *srun->host;
  else
    sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);

  if (srun->port <= 0)
    srun->port = DEFAULT_PORT;

  sin.sin_port = htons((short) srun->port);

  if (wait || srun->connect_timeout <= 0)
    s->socket = cse_connect_wait(&sin);
  else
    s->socket = cse_connect(&sin, srun);

  if (s->socket < 0)
    return 0;
  
  LOG(("open new connection %d %x:%d\n", s->socket, *srun->host, srun->port));

  return srun->open(s);
}

/**
 * Flush the results to the stream.
 *
 * @param s the buffered stream for the results.
 */
int
cse_flush(stream_t *s)
{
  char *buf = s->write_buf;
  int length = s->write_length;

  while (length > 0) {
    int len = s->srun->srun->write(s, buf, length);

    if (len <= 0) {
      cse_close(s, "flush");

      return -1;
    }

    length -= len;
    buf += len;
  }
  
  s->sent_data = 1;
  s->write_length = 0;

  return 0;
}

/**
 * Flushes the output buffer and fills the read buffer.  The two buffers
 * are combined so we can try another srun if the request fails.
 */
int
cse_fill_buffer(stream_t *s)
{
  int len = 0;
  
  if (s->socket < 0)
    return -1;

  /* flush the buffer */
  if (s->write_length > 0) {
    LOG(("write %d %d\n", s->socket, s->write_length));
    len = s->srun->srun->write(s, s->write_buf, s->write_length);

    if (len != s->write_length) {
      cse_close(s, "flush");

      return -1;
    }
  }

  s->read_offset = 0;
  s->read_length = s->srun->srun->read(s, s->read_buf, BUF_LENGTH);
  
  if (s->read_length <= 0) {
    cse_close(s, "fill_buffer");
    
    return -1;
  }

  s->sent_data = 1;
  s->write_length = 0;
  
  return s->read_length;
}

int
cse_read_byte(stream_t *s)
{
  if (s->read_offset >= s->read_length) {
    if (cse_fill_buffer(s) < 0)
      return -1;
  }

  return s->read_buf[s->read_offset++];
}

void
cse_write(stream_t *s, const char *buf, int length)
{
  /* XXX: writev??? */

  if (s->write_length + length > BUF_LENGTH) {
    if (s->write_length > 0) {
      if (cse_flush(s) < 0) {
        s->sent_data = 1;
        return;
      }
    }

    if (length >= BUF_LENGTH) {
      int len;

      len = s->srun->srun->write(s, buf, length);
      s->sent_data = 1;
			       
      if (len < 0)
	cse_close(s, "write");
      
      return;
    }
  }

  memcpy(s->write_buf + s->write_length, buf, length);
  s->write_length += length;
}

int
cse_read_all(stream_t *s, char *buf, int len)
{
  while (len > 0) {
    int sublen;

    if (s->read_offset >= s->read_length) {
      if (cse_fill_buffer(s) < 0)
        return -1;
    }

    sublen = s->read_length - s->read_offset;
    if (len < sublen)
      sublen = len;

    memcpy(buf, s->read_buf + s->read_offset, sublen);

    buf += sublen;
    len -= sublen;
    s->read_offset += sublen;
  }

  return 1;
}

int
cse_skip(stream_t *s, int len)
{
  while (len > 0) {
    int sublen;

    if (s->read_offset >= s->read_length) {
      if (cse_fill_buffer(s) < 0)
	return -1;
    }

    sublen = s->read_length - s->read_offset;
    if (len < sublen)
      sublen = len;

    len -= sublen;
    s->read_offset += sublen;
  }

  return 1;
}

int
cse_read_limit(stream_t *s, char *buf, int buflen, int readlen)
{
  int result;
  
  if (buflen >= readlen) {
    result = cse_read_all(s, buf, readlen);
    buf[readlen] = 0;
  }
  else {
    result = cse_read_all(s, buf, buflen);
    buf[buflen - 1] = 0;
    cse_skip(s, readlen - buflen);
  }

  return result > 0 ? readlen : 0;
}

/**
 * write a packet to srun
 *
 * @param s stream to srun
 * @param code packet code
 * @param buf data buffer
 * @param length length of data in buffer
 */
void
cse_write_packet(stream_t *s, char code, const char *buf, int length)
{
  char temp[4];

  temp[0] = code;
  temp[1] = (length >> 16) & 0xff;
  temp[2] = (length >> 8) & 0xff;
  temp[3] = (length) & 0xff;

  cse_write(s, temp, 4);
  if (length >= 0)
    cse_write(s, buf, length);
}

/**
 * writes a string to srun
 */
void
cse_write_string(stream_t *s, char code, const char *buf)
{
  if (buf)
    cse_write_packet(s, code, buf, strlen(buf));
}

int
cse_read_string(stream_t *s, char *buf, int length)
{
  int code;
  int l1, l2, l3;
  int read_length;

  length--;

  code = cse_read_byte(s);
  l1 = cse_read_byte(s) & 0xff;
  l2 = cse_read_byte(s) & 0xff;
  l3 = cse_read_byte(s) & 0xff;
  read_length = (l1 << 16) + (l2 << 8) + (l3);

  if (s->socket < 0) {
    *buf = 0;
    return -1;
  }

  if (length > read_length)
    length = read_length;

  if (cse_read_all(s, buf, length) < 0) {
    *buf = 0;
    return -1;
  }

  buf[length] = 0;

  /* scan extra */
  for (read_length -= length; read_length > 0; read_length--)
    cse_read_byte(s);

  return code;
}

/**
 * Decodes the first 3 characters of the session to see which
 * JVM owns it.
 */
static int
decode(char code)
{
  if (code >= 'a' && code <= 'z')
    return code - 'a';
  else if (code >= 'A' && code <= 'Z')
    return code - 'A' + 26;
  else if (code >= '0' && code <= '9')
    return code - '0' + 52;
  else if (code == '_')
    return 62;
  else if (code == '-')
    return 63;
  else
    return -1;
}

/**
 * Returns the session id from a cookie.
 */
int
cse_session_from_string(char *source, char *cookie, int *backup)
{
  char *match = strstr(source, cookie);
  
  if (match) {
    int len = strlen(cookie);

    *backup = decode(match[len + 1]);
    
    return decode(match[len]);
  }

  return -1;
}

static srun_t *
cse_add_common_host(config_t *config, const char *hostname, int port, int ssl)
{
  int i;
  struct hostent *hostent;
  srun_t *srun;

  LOG(("adding host %s:%d %s\n", hostname, port, ssl ? "ssl" : ""));

  for (i = 0; i < g_srun_count; i++) {
    srun_t *srun = g_srun_list[i];

    if (srun && ! strcmp(srun->hostname, hostname) && srun->port == port) {
      LOG(("old host %d %x\n", i, srun));
      return srun;
    }
  }

  if (g_srun_count >= 4096) {
    ERR(("too many hosts\n"));
    return 0;
  }

  srun = g_srun_list[g_srun_count];
  if (! srun) {
    srun = malloc(sizeof(srun_t));
    memset(srun, 0, sizeof(srun_t));
    g_srun_list[g_srun_count] = srun;
  }

  hostent = gethostbyname(hostname);
  if (hostent && hostent->h_addr) {
    srun->hostname = strdup(hostname);
    srun->host = (struct in_addr *) malloc(sizeof (struct in_addr));
    memcpy(srun->host, hostent->h_addr, sizeof(struct in_addr));
    srun->port = port;
    srun->conn_head = 0;
    srun->conn_tail = 0;
    srun->max_sockets = 32;

    srun->connect_timeout = CONNECT_TIMEOUT;
    srun->live_time = LIVE_TIME;
    srun->dead_time = DEAD_TIME;

    srun->open = std_open;
    srun->read = std_read;
    srun->write = std_write;
    srun->close = std_close;

#ifdef OPENSSL
    if (ssl) {
      SSL_CTX* ctx;
      SSL_METHOD *meth;

      SSLeay_add_ssl_algorithms();
      meth = TLSv1_client_method();
      SSL_load_error_strings();
      ctx = SSL_CTX_new(meth);

      if (ctx) {
        srun->ssl = ctx;
        srun->open = ssl_open;
        srun->read = ssl_read;
        srun->write = ssl_write;
        srun->close = ssl_close;
      }
      else {
        ERR(("can't initialize ssl"));
      }
    }
#endif

    srun->lock = cse_create_lock(config);

    g_srun_count++;
    
    LOG(("new host %d %x%s\n", g_srun_count, srun, ssl ? " ssl" : ""));
    return srun;
  }

  return 0;
}

/**
 * Adds a new host to the configuration
 */
srun_item_t *
cse_add_host_int(config_t *config, const char *hostname,
		 int port, int session, char *id, char *group_id,
                 int is_backup, int is_ssl)
{
  srun_t *srun;
  srun_item_t *srun_item;
  int index;

  index = session >= 0 ? session : config->srun_size;

  /* Resize if too many hosts. */
  while (index >= config->srun_capacity) {
    int capacity = config->srun_capacity;
    srun_item_t *srun_list;

    if (capacity == 0)
      capacity = 16;

    srun_list = (srun_item_t *) cse_alloc(config->p, 2 * capacity * sizeof(srun_item_t));
    memset(srun_list, 0, 2 * capacity * sizeof(srun_item_t));
    if (config->srun_list)
      memcpy(srun_list, config->srun_list, capacity * sizeof(srun_item_t));
    config->srun_capacity = 2 * capacity;
    config->srun_list = srun_list;
  }

  srun = cse_add_common_host(config, hostname, port, is_ssl);

  if (srun) {
    int i, j;
    int group_size = 1;
    srun_item_t **group;
    
    if (! group_id)
      group_id = "";
    
    for (i = 0; i < config->srun_size; i++) {
      if (config->srun_list[i].group_id &&
          ! strcmp(config->srun_list[i].group_id, group_id)) {
        group_id = config->srun_list[i].group_id;
        group_size++;
      }
    }

    srun_item = &config->srun_list[index];
    srun_item->srun = srun;
    srun_item->is_backup = is_backup;
    srun_item->id = id;
    srun_item->group_id = group_id;
    srun_item->session = index;
    srun_item->is_valid = 1;

    if (index >= config->srun_size)
      config->srun_size = index + 1;

    /* Configure all servers in the group. */
    group = (srun_item_t **) cse_alloc(config->p, group_size * sizeof(srun_item_t *));
    j = 0;
    for (i = 0; i < config->srun_size; i++) {
      if (config->srun_list[i].group_id == group_id) {
        config->srun_list[i].group = group;
        config->srun_list[i].group_index = j;
        config->srun_list[i].group_size = group_size;
        group[j++] = &config->srun_list[i];
      }
    }
    
    return srun_item;
  }
  else {
    cse_error(config, "Resin can't find host %s\n", hostname);

    return 0;
  }
}

/**
 * reuse the socket
 */
static void
cse_reuse(stream_t *s, config_t *config, srun_item_t *srun,
          int socket, void *ssl,
	  int request_time, void *web_pool)
{
  s->socket = socket;
  s->ssl = ssl;
                     
  s->pool = config->p;
  s->web_pool = web_pool;
  s->config = config;
  s->update_count = config->update_count;
  s->write_length = 0;
  s->read_length = 0;
  s->read_offset = 0;

  s->srun = srun;
  s->sent_data = 0;
  
  srun->srun->is_dead = 0;
  
  LOG(("reopen %d\n", s->socket));
}

/**
 * Try to recycle the socket so the next request can reuse it.
 */
void
cse_recycle(stream_t *s, unsigned int now)
{
  int socket = s->socket;
  srun_item_t *srun_item = s->srun;
  srun_t *srun = srun_item ? srun_item->srun : 0;

  if (! srun) {
    cse_close(s, "recycle");
    return;
  }
  
  cse_lock(srun->lock);
  
  if (socket >= 0 && s->config->update_count == s->update_count) {
    int head = srun->conn_head;
    int next_head = (head + 1) % CONN_POOL_SIZE;

    /* If there's room in the ring, add it. */
    if (next_head != srun->conn_tail) {
      s->socket = -1;
      cse_kill_socket_cleanup(socket, s->web_pool);
      srun->conn_pool[head].socket = socket;
      srun->conn_pool[head].ssl = s->ssl;
      srun->conn_pool[head].last_time = now;
      srun->conn_head = next_head;
      cse_unlock(srun->lock);
      LOG(("recycle %d\n", socket));
      return;
    }
  }
  
  cse_unlock(srun->lock);
  
  if (socket >= 0) {
    LOG(("close2 %d update1:%d update2:%d max-sock:%d\n",
         socket, s->config->update_count, s->update_count,
         srun ? srun->max_sockets : -1));
    
    cse_close(s, "recycle");
  }
}

void
close_srun(config_t *config, srun_t *srun, unsigned int now)
{
  int tail;

  cse_lock(srun->lock);

  for (tail = srun->conn_tail;
       tail != srun->conn_head;
       tail = (tail + 1) % CONN_POOL_SIZE) {
    struct conn_t *conn = &srun->conn_pool[tail];
    srun->close(conn->socket, conn->ssl);
    LOG(("close timeout %d\n", srun->conn_pool[tail]));;
  }
  srun->conn_head = srun->conn_tail = 0;
  
  cse_unlock(srun->lock);
}

/**
 * Try to reuse a socket
 */
static int
cse_reuse_socket(stream_t *s, config_t *config, srun_item_t *srun_item,
		 unsigned int now, void *web_pool)
{
  int head;
  int next_head;
  srun_t *srun = srun_item->srun;

  LOG(("reuse head:%d tail:%d\n", srun->conn_head, srun->conn_tail));

  if (! srun || srun->conn_head == srun->conn_tail)
    return 0;
  
  cse_lock(srun->lock);
  for (head = srun->conn_head;
       head != srun->conn_tail;
       head = next_head) {
    struct conn_t *conn;
    next_head = (head + CONN_POOL_SIZE - 1) % CONN_POOL_SIZE;

    conn = &srun->conn_pool[next_head];
    
    if (now > conn->last_time + srun->live_time) {
      LOG(("closing idle socket:%d\n", conn->socket));
      srun->close(conn->socket, conn->ssl);
    }
    else {
      int socket;
      void *ssl;

      socket = conn->socket;
      ssl = conn->ssl;
      srun->conn_head = next_head;

      cse_reuse(s, config, srun_item, socket, ssl, now, web_pool);
      cse_unlock(srun->lock);
      
      return 1;
    }
  }

  srun->conn_head = head;
  cse_unlock(srun->lock);

  return 0;
}

void
cse_close_sockets(config_t *config)
{
  int i;
  
  for (i = 0; i < config->srun_size; i++) {
    srun_item_t *srun_item = config->srun_list + i;
    srun_t *srun = srun_item->srun;
    int tail;

    if (! srun)
      continue;

    cse_lock(srun->lock);

    for (tail = srun->conn_tail;
         tail != srun->conn_head;
         tail = (tail + 1) % CONN_POOL_SIZE) {
      struct conn_t *conn = &srun->conn_pool[tail];
      int socket = conn->socket;
      if (socket >= 0)
        srun->close(socket, conn->ssl);
    }

    srun->conn_head = 0;
    srun->conn_tail = 0;
    
    cse_unlock(srun->lock);
  }

  config->srun_size = 0;
}

void
cse_close_all()
{
  int i;
  
  for (i = 0; i < g_srun_count; i++) {
    srun_t *srun = g_srun_list[i];
    int tail;

    if (! srun)
      continue;

    cse_lock(srun->lock);

    for (tail = srun->conn_tail;
         tail != srun->conn_head;
         tail = (tail + 1) % CONN_POOL_SIZE) {
      struct conn_t *conn = &srun->conn_pool[tail];
      int socket = conn->socket;
      if (socket >= 0)
        srun->close(socket, conn->ssl);
    }

    srun->conn_head = 0;
    srun->conn_tail = 0;
    
    cse_unlock(srun->lock);
  }
}

static int
select_host(config_t *config, unsigned int now)
{
  int size;
  int round_robin;
  int i;
  int best_srun;
  int best_cost = 0x7fffffff;
  
  size = config->srun_size;  
  if (size < 1)
    size = 1;

  if (config->round_robin_index < 0) {
    srand(65521 * time(0) + getpid() + (int) config);
    round_robin = rand();
    if (round_robin < 0)
      round_robin = -round_robin;
    
    config->round_robin_index = round_robin % size;
  }

  round_robin = (config->round_robin_index + 1) % size;

  for (i = 0; i < size; i++) {
    srun_item_t *srun_item = &config->srun_list[round_robin];

    if (! srun_item->is_backup)
      break;

    round_robin = (round_robin + 1) % size;
  }
  
  config->round_robin_index = round_robin;
  best_srun = round_robin;

  for (i = 0; i < size; i++) {
    int index = (i + round_robin) % size;
    srun_item_t *srun_item = &config->srun_list[index];
    srun_t *srun = srun_item->srun;
    int tail;
    int cost;

    if (! srun)
      continue;
    
    cost = srun->active_sockets;
    
    if (srun->is_dead && now < srun->fail_time + srun->dead_time)
      continue;
    else if (cost < best_cost) {
      best_srun = index;
      best_cost = cost;
    }

    /* Close idle connections. */
    for (tail = srun->conn_tail;
         tail != srun->conn_head;
         tail = (tail + 1) % CONN_POOL_SIZE) {
      struct conn_t *conn = &srun->conn_pool[tail];
      
      if (now < conn->last_time + srun->live_time)
        break;
      
      srun->close(conn->socket, conn->ssl);
      srun->conn_tail = (tail + 1) % CONN_POOL_SIZE;
    }
  }

  return best_srun;
}

/**
 * Opens any connection within the current group.
 */
static int
open_connection_group(stream_t *s, config_t *config,
                      srun_item_t *owner_item, int offset,
                      unsigned int now, void *web_pool,
                      int ignore_dead)
{
  srun_item_t *srun_item = 0;
  srun_t *srun;

  if (offset < 0)
    srun_item = owner_item;
  else if (owner_item->group_size < 2)
    return 0;
  else {
    int delta = offset % (owner_item->group_size - 1) + 1;
    int index = (owner_item->group_index + delta) % owner_item->group_size;

    srun_item = owner_item->group[index];
  }
  
  srun = srun_item->srun;

  if (! srun)
    return 0;

  if (cse_reuse_socket(s, config, srun_item, now, web_pool)) {
    srun->is_dead = 0;
    return 1;
  }
  else if (ignore_dead &&
           srun->is_dead && now < srun->fail_time + srun->dead_time) {
  }
  else if (cse_open(s, config, srun_item, web_pool, ! ignore_dead)) {
    s->srun = srun_item;
    srun->is_dead = 0;
    return 1;
  }
  else {
    srun->is_dead = 1;
    srun->fail_time = now;
  }

  return 0;
}

static int
open_connection_any_host(stream_t *s, config_t *config, int host,
                         unsigned int now, void *web_pool, int ignore_dead)
{
  int i;

  int size = config->srun_size;
  if (size < 1)
    size = 1;

  /*
   * Okay, the primaries failed.  So try the secondaries.
   */
  for (i = 0; i < size; i++) {
    srun_item_t *srun_item = config->srun_list + (host + i) % size;
    srun_t *srun = srun_item->srun;

    if (! srun) {
    }
    else if (cse_reuse_socket(s, config, srun_item, now, web_pool)) {
      srun->is_dead = 0;
      return 1;
    }
    else if (ignore_dead && srun_item->is_backup) {
    }
    else if (ignore_dead &&
             srun->is_dead && now < srun->fail_time + srun->dead_time) {
    }
    else if (cse_open(s, config, srun_item, web_pool, ! ignore_dead)) {
      s->srun = srun_item;
      srun->is_dead = 0;
      return 1;
    }
    else {
      srun->is_dead = 1;
      srun->fail_time = now;
    }
  }

  return 0;
}

static int
open_session_host(stream_t *s, config_t *config,
                  int session_index, int backup_index,
                  unsigned int now, void *web_pool)
{
  int host;
  int size = config->srun_size;
  if (size < 1)
    size = 1;
  
  for (host = 0; host < size; host++) {
    if (config->srun_list[host].session == session_index) {
      srun_item_t *owner = &config->srun_list[host];

      /* try to open a connection to the session owner */
      if (open_connection_group(s, config, owner, -1,
                                now, web_pool, 1))
        return 1;
      /* or the backup */
      else if (open_connection_group(s, config, owner, backup_index, 
                                     now, web_pool, 1))
        return 1;
      /* try the original, but force a connect */
      else if (open_connection_group(s, config, owner, -1,
                                     now, web_pool, 0))
        return 1;
      /* try the backup, but force a connect */
      else if (open_connection_group(s, config, owner, backup_index,
                                     now, web_pool, 0))
        return 1;

      return 0;
    }
  }

  return 0;
}

static int
open_connection(stream_t *s, config_t *config,
                int session_index, int backup_index,
                unsigned int now, void *web_pool)
{
  int size;
  int host;

  size = config->srun_size;
  if (size < 1)
    size = 1;

  if (session_index < 0)
    host = select_host(config, now);
  else if (open_session_host(s, config,
                             session_index, backup_index,
                             now, web_pool))
    return 1;
  else
    host = select_host(config, now);

  if (host < 0)
    host = -host;

  /* try opening while ignoring dead servers and backups */
  if (open_connection_any_host(s, config, host, now, web_pool, 1))
    return 1;
  /* otherwise try the dead servers and backups too */
  else
    return open_connection_any_host(s, config, host, now, web_pool, 0);
}

int
cse_open_connection(stream_t *s, config_t *config,
                    int session_index, int backup_index,
                    unsigned int now, void *web_pool)
{
  s->config = config;
  s->socket = -1;
  s->update_count = config->update_count;
  s->pool = config->p;
  s->web_pool = web_pool;
  s->write_length = 0;
  s->read_length = 0;
  s->read_offset = 0;
  s->srun = 0;
  s->sent_data = 0;

  if (config->disable_sticky_sessions)
    session_index = -1;
  
  if (open_connection(s, config, session_index, backup_index, now, web_pool)) {
    cse_set_socket_cleanup(s->socket, web_pool);
    return 1;
  }
  else {
    return 0;
  }
}
