/***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Thu Oct 4 21:49:09 CEST 2001: ported to g++ 3.0 * Sun Apr 7 11:59:03 CEST 2002: Handle URLs with missing node * Mon Jul 22 13:53:02 CEST 2002: Made finaly reading from STDIN work. */ /***************************************************************************/ #include #include #include #include #include #include #include #include #include #ifdef SYS_POLL_MISSING /* { */ struct pollfd { int fd; /* file descriptor */ short events; /* requested events */ short revents; /* returned events */ }; extern "C" int poll(struct pollfd *ufds, unsigned int nfds, int timeout); #define POLLIN 0x0001 /* There is data to read */ #define POLLPRI 0x0002 /* There is urgent data to read */ #define POLLOUT 0x0004 /* Writing now will not block */ #define POLLERR 0x0008 /* Error condition */ #define POLLHUP 0x0010 /* Hung up */ #define POLLNVAL 0x0020 /* Invalid request: fd not open */ #else /* } { */ #include #endif /* } */ #include #include #include #include #include "urlistream.h" using std::ios; /* ------------------------------------------------------------------------- */ /* * Compile with * * CC -DTESTING urlistream.C -o urlcat */ /* ------------------------------------------------------------------------- */ void urlistream::open( const char *url, int timeout /* = default_timeout */ // Milliseconds ) { if (is_open()) close(); /* * "http:"
URL? */ if (!memcmp(url, "http:", 5)) { http_open(url + 5, timeout); return; } /* * "file:" URL? */ if (!memcmp(url, "file:", 5)) { file_open(url + 5); return; } /* * is the URL a bare file name? */ if (strchr(url, ':') == NULL) { file_open(url); return; } open_error_ = "Unknown protocol (only \"file:\" and \"http:\" allowed)"; } void urlistream::open( const string &url, int timeout /* = default_timeout */ // Milliseconds ) { open(url.c_str(), timeout); } /* ------------------------------------------------------------------------- */ void urlistream::http_open( const char *address, // The URL portion after "http:" int timeout // Milliseconds ) { /* * Break up the HTTP address: * * "//" [ ":" ] * * A missing node is interpreted as node "/" - Arno */ char host_name[100]; char port_name[100]; char node_name[1000]; { const char *p = address; char *q; if (*p++ != '/' || *p++ != '/') { open_error_ = "HTTP address does not begin with \"//\""; return; } for (q = host_name; *p && *p != ':' && *p != '/'; ++p) { if (q < host_name + sizeof(host_name) - 1) *q++ = *p; } *q = '\0'; if (*p == ':') { ++p; for (q = port_name; *p && *p != '/'; ++p) { if (q < port_name + sizeof(port_name) - 1) *q++ = *p; } *q = '\0'; } else { strcpy(port_name, "80"); } for (q = node_name; *p && *p != '#'; ++p) { if (q < node_name + sizeof(node_name) - 1) *q++ = *p; } *q = '\0'; if (!node_name[0]) { strcpy(node_name, "/"); } } struct sockaddr_in soc_address; soc_address.sin_family = AF_INET; /* * Parse the host name. */ { const char *p; int dot_count = 0; for (p = host_name; *p; ++p) { if (*p == '.') { ++dot_count; } else if (!isdigit(*p)) break; } if (*p == '\0' && dot_count == 3) { soc_address.sin_addr.s_addr = inet_addr(host_name); } else { struct hostent *h = gethostbyname(host_name); if ( h == 0 || h->h_addrtype != AF_INET || h->h_length != sizeof(struct in_addr) ) { open_error_ = "Could not resolve host name"; return; } soc_address.sin_addr = *(struct in_addr *) h->h_addr; } } /* * Parse the port name. */ if (isdigit(port_name[0])) { soc_address.sin_port = htons(atoi(port_name)); } else { struct servent *s = getservbyname(port_name, 0); soc_address.sin_port = htons(s ? s->s_port : 80); } /* * Strip the "#anchor" suffix from the node name. */ { char *p = strchr(node_name, '#'); if (p) *p = '\0'; } /* * On-the-fly definition of "FileHandle" which closes a UNIX file descriptor * on destruction. */ class FileHandle { public: FileHandle() : fd(-1) {} ~FileHandle() { if (fd != -1) ::close(fd); } operator int() { return fd; } int operator=(int x) { return (fd = x); } private: int fd; } fd; fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { open_error_ = strerror(errno); return; } /* * Make the socket non-blocking, so the "connect()" can be canceled. This * means that when we issue the "connect()" we should NOT have to wait for * the accept on the other end. */ if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { open_error_ = strerror(errno); return; } /* * Issue the "connect()". Since the server can't do an instantaneous * "accept()" and we are non-blocking, this will almost certainly return * a negative status. */ if (connect( fd, (struct sockaddr *) &soc_address, sizeof(soc_address) ) == -1) { if (errno != EINPROGRESS && errno != EAGAIN) { open_error_ = strerror(errno); return; } for (;;) { struct pollfd p; int res; p.fd = fd; p.events = POLLOUT; res = poll(&p, 1, timeout); /* * Interrupted "poll()"? */ if (res == -1 && errno == EINTR) continue; /* * Check for errors. */ if (res == -1 && errno != EALREADY) { open_error_ = strerror(errno); return; } /* * Check for timeout. */ if (res == 0) { open_error_ = "\"connect()\" timed out"; return; } /* * Extra check here for connection success, if we try to * connect again, and get EISCONN, it means we have a * successful connection. * * Notice: On SINIX 5.43 B2000, the "poll()" returns "1" when the * timeout occurs (!?). When we call "poll()", we encounter a SIGPIPE * (!?). If we ignore it, "poll()" returns EINVAL (22) (!?). */ void (*sigpipe_handler)(int) = signal(SIGPIPE, SIG_IGN); /* { */ res = connect(fd, (struct sockaddr *) &soc_address, sizeof(soc_address)); (void) signal(SIGPIPE, sigpipe_handler); /* } */ if (res == -1 && errno == EISCONN) break; if (res == -1 && errno != EALREADY) { open_error_ = strerror(errno); return; } } } /* * Make the socket blocking again on good "connect()". */ if (fcntl(fd, F_SETFL, 0) == -1) { open_error_ = strerror(errno); return; } /* * Issue the HTTP request. * * Notice: "GET xyz" means "return the document without a header". */ char command[4 + (sizeof(node_name) - 1) + 4 + 1]; sprintf(command, "GET %s\r\n\r\n", node_name); ssize_t command_length = strlen(command); if (::write(fd, command, command_length) != command_length) { open_error_ = "Error sending HTTP GET request"; return; } /* * Attach the file descriptor to the ifstream. */ fd_ = fd; fd = -1; // ...so that it is not implicitly "::close()"'d. } /* ------------------------------------------------------------------------- */ void urlistream::file_open(const char *file_name) { fd_ = !strcmp(file_name, "-") ? ::dup(0) : ::open(file_name, O_RDONLY); open_error_ = strerror(errno); } /* ------------------------------------------------------------------------- */ const char * urlistream::open_error() const { return open_error_ ? open_error_ : "No error"; } /* ------------------------------------------------------------------------- */ int urlistream::get() { char ch; int ret = ::read(fd_, &ch, 1); return (ret > 0 ? ch : -1); } #ifdef TESTING /* { */ int main(int argc, char **argv) { if (argc < 2) { cerr << "Usage: urlcat [ ... ]" << endl; exit(1); } for (int i = 1; i < argc; ++i) { urlistream uis(argv[i]); if (!uis.is_open()) { cerr << "Opening \"" << argv[i] << "\": " << uis.open_error() << endl; exit(1); } for (;;) { int c = uis.get(); if (c == EOF) break; cout << (char) c; } } return 0; } #endif /* } */ /* ------------------------------------------------------------------------- */