question archive Accordingly, you will use a class called "WebPageGetter" whose purpose is to retrieve and display data obtained from static web pages on the internet! The WebPageGetter class is instantiated by supplying a constant string representing the hostname of the website to visit
Subject:Computer SciencePrice: Bought3
Accordingly, you will use a class called "WebPageGetter" whose purpose is to retrieve and display data obtained from static web pages on the internet! The WebPageGetter class is instantiated by supplying a constant string representing the hostname of the website to visit.
The WebPageGetter class is written in C++ and works in both Windows and Mac OS X/Linux environments.
To use the program on a non-Windows platform, it will be necessary to comment
out line #5 in the web.h file:
#define WINDOWS
For example:
WebPageGetter wpg1("www.bitsandbots.ca");
The WebPageGetter class also makes available the following member
functions:
void setHostName(const char *);
This function changes the hostname to visit.
void setPage(const char *);
This function, sets the page to visit to a specific page on the host.
void startHtml(const char *file, const char *title);
This function accepts 2 constant strings representing the name of a file
and the title of the webpage that is being visited.
The function creates the starting HTML tags needed to make a
html page that can be viewed by a web browser by writing the data to
the file, but MUST be used in connection with its counterpart function:
endHtml(const char *file) in order to make a complete web page.
void endHtml(const char *);
This function accepts a constant string representing the name of a file.
The function creates the ending HTML tags needed to complete the html
page by appending the data whose name is sent to the function
(see example below).
char* getPage( );
This function retrieves the html page from the host and returns
an address to the allocated memory.
For example, the following program::
#include <iostream>
using namespace std;
int main( ) {
char *page;
WebPageGetter wpg1("www.bitsandbots.ca");
wpg1.setPage("danny/messages/message1");
page = wpg1.getPage( );
if(page) {
cout << page << endl;
}
return 0;
}
Would retrive the data at: www.bitsandbots.ca/danny/messages/message1
and display it on standard output.
I have already written the class "WebPageGetter" (see below), so you will only
be creating a new class that is derived from this class.
web.h
web.cpp
(CODES BELOW).
You will make a new class called "WebWriter" that is derived from "WebPageGetter"
and does the following:
1. Stores a char pointer to hold the address of a string in memory
as well as a char array (1024 character limit) that holds the name of a
file.
2. Has a constructor that accpets a hostname and calls the base class constructor
accordingly.
3. Has the following member function:
writePage(const char *pageLink);
This function accepts a web link on the hostname specified when the
object was created and does the following:
a) Extracts and stores the name of the file from the end of the pageLink string
as follows:
Assuming pageLink is: "danny/messages/message1", then the filename
to be stored would: "message1.html" (starts reading from the end).
If, on the other hand pageLink is simply: "message1", the the filename
to be stored would still be: "message1.html".
b) Calls the appropriate function from the base class to retrieve the
web page and stores the address in the class's pointer member.
NOTE: You do not need to consider memory for this class, the WebPageGetter
class takes cares of all dynamic memory allocation and removal for you.
c) Next, this function calls the base class's startHtml( ) function by supplying
it the filename as both parameters.
d) Opens the file for appending (appending ONLY) and writes all of the content
being pointed to by the char pointer to the file (character by character)
and then closes the file.
e) Finally, this function calls the base class's endHtml( ) function by supplying
it the filename.
--- web.h file ----
// web.h
#define URLLIMIT 1024
// comment the line below for usage on NON windows platforms (Mac OS or Linux)
// #define WINDOWS
#ifdef WINDOWS
#include <winsock.h>
#else
#define SOCKET_ERROR -1
#define INVALID_SOCKET -1
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <unistd.h>
// int close(int);
#endif
class WebPageGetter {
private:
#ifdef WINDOWS
WSADATA wsaData; /* The WSADATA structure contains information about
the Windows Sockets implementation. */
#endif
char *pages[URLLIMIT*4];
int sd, fileSize, pagesVisited;
struct sockaddr_in server;
struct hostent *he;
struct in_addr **addr_list;
char ip[17], host[URLLIMIT], page[URLLIMIT];
char buf[2], url[URLLIMIT*4];
int init(const char *);
void initPages( );
int fetchData(int);
public:
WebPageGetter( );
WebPageGetter(const char *);
void setHostName(const char *);
void setPage(const char *);
void startHtml(const char *, const char *);
void endHtml(const char *);
char* getPage( );
~WebPageGetter( );
};
--- .cpp file ---
// web.cpp
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include "web.h"
int WebPageGetter::init(const char *hostName) {
int status = 1;
fileSize = 0;
#ifdef WINDOWS
if(WSAStartup(MAKEWORD(1, 1), &wsaData)) {
fprintf(stderr, "Error initializing Windows sockets...n");
status = 0;
}
/* The WSAStartup function initiates use of
WS2_32.DLL by a process, and returns zero if successful.
*/
#endif
/* using gethostbyname( ) to convert hostname to ip address */
if(NULL == (he = gethostbyname(host))) {
fprintf(stderr, "Error getting hostname: %sn", host);
status = 0;
}
addr_list = (struct in_addr **) he->h_addr_list;
if(NULL != addr_list[0])
strcpy(ip , inet_ntoa(*addr_list[0]) );
// initialize sockaddr_in members
server.sin_family = AF_INET;
server.sin_port = htons(80); /* assuming web traffic on port 80 */
server.sin_addr.s_addr = inet_addr(ip);
memset(&(server.sin_zero), '', 8);
return status;
}
void WebPageGetter::initPages( ) {
pagesVisited = 0;
for(int i=0; i<URLLIMIT*4; i++)
pages[i] = NULL;
}
int WebPageGetter::fetchData(int pass = 0) {
int n, index, okToWrite = 0, fSize;
char buf[2], prev, next, file[21];
/* create socket and connect to server */
sd = socket(AF_INET, SOCK_STREAM, 0);
connect(sd, (struct sockaddr *)&server, sizeof(struct sockaddr_in));
sprintf(url, "GET /%s HTTP/1.0rnHost: %srnrn", page, host);
send(sd, url, strlen(url), 0);
memset(&(buf), '', sizeof(buf)); /* zero out the 'buf' array */
// allocate memory for pointer after the first pass
if(pass) {
pagesVisited++;
pages[pagesVisited - 1] = new char[fileSize + 1];
}
n = recv(sd, buf, 1, 0);
if(n == SOCKET_ERROR) {
fprintf(stderr, "Error: Could not receive data from socket...n");
#ifdef WINDOWS
WSAGetLastError( );
closesocket(sd);
#else
close(sd);
#endif
exit(EXIT_FAILURE);
}
prev = next = '?'; // used to bypass header and response code
index = fSize = 0;
while(n) {
prev = buf[0];
if(next == 'n' && prev == 'r') {
n = recv(sd, buf, 1, 0); // receive data from the server 1 byte at a time
n = recv(sd, buf, 1, 0);
okToWrite = 1;
}
if(okToWrite) {
if(pass)
pages[pagesVisited - 1][index++] = buf[0];
fSize++;
}
next = prev;
n = recv(sd, buf, 1, 0);
}
if(pass)
pages[pagesVisited - 1][index++] = '';
#ifdef WINDOWS
closesocket(sd);
WSACleanup( );
#else
close(sd);
#endif
return fSize;
}
WebPageGetter::WebPageGetter( ) {
strcpy(host, "www.google.com");
strcpy(page, "index.html");
init(host);
initPages( );
}
WebPageGetter::WebPageGetter(const char *hostName) {
strcpy(host, hostName);
init(hostName);
initPages( );
}
WebPageGetter::~WebPageGetter( ) {
for(int i = pagesVisited - 1; i >= 0; i--)
if(pages[i]) {
delete [ ] pages[i];
}
}
void WebPageGetter::setHostName(const char *hostName) {
strcpy(host, hostName);
if(!init(hostName))
fprintf(stderr, "Error initializing socket/network for host: %sn", host);
}
void WebPageGetter::setPage(const char *webPage) {
strcpy(page, webPage);
if(!init(host))
fprintf(stderr, "Error initializing socket/network for host: %sn", host);
}
char* WebPageGetter::getPage( ) {
fileSize = fetchData( ); // retrieve html page length
fetchData(1); // store html text in 'data'
return pages[pagesVisited - 1];
}
void WebPageGetter::startHtml(const char *file, const char *title) {
FILE *fp;
fp = fopen(file, "w");
if(fp == NULL) {
fprintf(stderr, "startHtml( )...nFILE I/O Error... Could not not write to file: '%s'n", file);
exit(EXIT_FAILURE);
}
fprintf(fp, "<?xml version="1.0" encoding="UTF-8"?>n");
fprintf(fp, " <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"n");
fprintf(fp, " "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">n");
fprintf(fp, "<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">n");
fprintf(fp, "<head><title>%s</title></head>n", title);
fprintf(fp, "<body><pre>n");
fclose(fp);
}
void WebPageGetter::endHtml(const char *file) {
FILE *fp;
fp = fopen(file, "a");
if(fp == NULL) {
fprintf(stderr, "endHtml( )...nFILE I/O Error... Could not not append to file: '%s'n", file);
exit(EXIT_FAILURE);
}
fprintf(fp, "</pre><p>n");
fprintf(fp, "<a href="http://validator.w3.org/check?uri=referer">n");
fprintf(fp, "<img src="http://www.w3.org/Icons/valid-xhtml10" ");
fprintf(fp, " border="0" alt="Valid XHTML 1.0!" height="31" width="88" /></a>n");
fprintf(fp, "</p>n");
fprintf(fp, "</body></html>n");
fclose(fp);
}