CS110: Principles of Computer Systems
Winter 2021-2022
Stanford University
Instructors: Nick Troccoli and Jerry Cain
Introduction to Networking
Servers / HTTP
HTTP and APIs
Networking System Calls / Library Functions
assign6: implement an HTTP Proxy that sits between a client device and a web server to monitor, block or modify web traffic.
web-get is a program that, given a URL, downloads a single document (HTML document, image, video, etc.) and saves a copy of it to the current working directory.
static void fetchContent(const string& host, const string& path) {
// Create a connection to the server on the HTTP port
int socketDescriptor = createClientSocket(host, kDefaultHTTPPort);
if (socketDescriptor == kClientSocketError) {
cerr << "Count not connect to host named \"" << host << "\"." << endl;
return;
}
sockbuf socketBuffer(socketDescriptor);
iosockstream socketStream(&socketBuffer);
// Send our request (using HTTP/1.0 for simpler requests)
socketStream << "GET " << path << " HTTP/1.0\r\n";
socketStream << "Host: " << host << "\r\n";
socketStream << "\r\n" << flush;
readResponse(socketStream, getFileName(path));
}
Step 2: Send an HTTP GET request to the server for that resource
Note: It's standard HTTP-protocol practice that each line, including the blank line that marks the end of the request, end in CRLF (short for carriage-return-line-feed), which is '\r' following by '\n'. We must also flush!
Step 3: Read through the server's HTTP response and save its payload data to a file
static void readResponse(iosockstream& socketStream, const string& filename) {
// Skip the status line and headers (we don't need any information from them)
while (true) {
string line;
getline(socketStream, line);
if (line.empty() || line == "\r") break;
}
readAndSavePayload(socketStream, filename);
}
We keep reading lines until we encounter one that is empty or "\r" (getline consumes the \n). That means we have gotten to the payload. We include line.empty() in case the server forgot the "\r".
Let's write a web application for finding valid scrabble words given certain letters.
We are going to build a web application that lets users find valid scrabble words given certain letters.
scrabble-word-finder-server is a server that can respond to requests for an HTML page, and requests for a list of words given specified letters.
Step 1: open a server socket and listen for incoming HTTP requests
int main(int argc, char *argv[]) {
unsigned short port = atoi(argv[1]);
int serverSocket = createServerSocket(port);
cout << "Server listening on port " << port << "." << endl;
ThreadPool pool(kNumThreads);
while (true) {
int clientDescriptor = accept(serverSocket, NULL, NULL);
pool.schedule([clientDescriptor]() {
sockbuf socketBuffer(clientDescriptor); // destructor closes socket
iosockstream socketStream(&socketBuffer);
handleRequest(socketStream);
});
}
return 0;
}
Step 2: when we receive a request, parse it to see whether its path is "/" or "/words?...".
static void handleRequest(iosockstream& socketStream) {
string method; // e.g. GET
string path; // e.g. /letters
string protocol; // e.g. HTTP/1.0
socketStream >> method >> path >> protocol;
// Extract just the query params, e.g. "key=value" in "/url?key=value"
size_t queryParamsStart = path.find("?");
string queryParams = "";
if (queryParamsStart != string::npos) {
queryParams = path.substr(queryParamsStart + 1);
path = path.substr(0, queryParamsStart);
}
...
...
// read in the rest of the lines/headers, though we don't need it for anything
string newline;
getline(socketStream, newline);
while (true) {
string line;
getline(socketStream, line);
if (line.empty() || line == "\r") break;
}
if (path == "/") {
...
} else if (...)
Step 2: when we receive a request, parse it to see whether its path is "/" or "/words?...".
Step 3: If it's for "/", read in the file "scrabble-word-finder.html" and send the HTML back.
// The payload and its format differ depending on what was requested
string payload;
string contentType;
if (path == "/") {
// send back HTML file
ifstream fileStream("scrabble-word-finder.html");
std::stringstream fileStringStream;
fileStringStream << fileStream.rdbuf();
payload = fileStringStream.str();
contentType = "text/html; charset=UTF-8";
} else if (...) {
...
}
sendResponse(socketStream, payload, contentType);
Step 3: If it's for "/", read in the file "scrabble-word-finder.html" and send the HTML back.
static void sendResponse(iosockstream& socketStream, const string& payload, const string& contentType) {
socketStream << "HTTP/1.1 200 OK\r\n";
socketStream << "Content-Type: " << contentType << "\r\n";
socketStream << "Content-Length: " << payload.size() << "\r\n";
socketStream << "\r\n";
socketStream << payload << flush;
}
Step 4: If it's for "/words?letters=XXXX", compute a list of valid words with those letters and send it back in JSON format.
// The payload and its format differ depending on what was requested
string payload;
string contentType;
if (path == "/") {
...
} else if (path == "/words" && queryParams.find("letters=") != string::npos) {
// compute valid words with these letters and send them back as JSON
string letters = queryParams.substr(queryParams.find("letters=") + string("letters=").length());
sort(letters.begin(), letters.end());
vector<string> formableWords;
findFormableWords(letters, formableWords);
payload = constructJSONPayload(formableWords);
contentType = "text/javascript; charset=UTF-8";
} else {
...
}
sendResponse(socketStream, payload, contentType);
{ "possibilities": ["word1", "word2"] }
We need a way to get a list of valid words given a set of characters.
We have implemented a custom function called subprocess:
subprocess_t subprocess(char *argv[], bool supplyChildInput, bool ingestChildOutput);
subprocess spawns a child process to run the specified command, and can optionally set up pipes we can use to write to the child's STDIN and/or read from its STDOUT.
It returns a struct containing:
Step 4: Otherwise, compute a list of valid words with those letters and send it back in JSON format.
static void findFormableWords(const string& letters, vector<string>& formableWords) {
// Make an argument array for the command subprocess should run
const char *command[] = {"./scrabble-word-finder", letters.c_str(), NULL};
subprocess_t sp = subprocess(const_cast<char **>(command), false, true);
// Make a stream around the file descriptor so we can read lines with getline
stdio_filebuf<char> inbuf(sp.ingestfd, ios::in);
istream instream(&inbuf);
while (true) {
// Read the next line and add it to the list of formable words
string word;
getline(instream, word);
if (instream.fail()) break;
formableWords.push_back(word);
}
// Make sure to only return from this function once the process has finished
waitpid(sp.pid, NULL, 0);
}
Step 4: Otherwise, compute a list of valid words with those letters and send it back in JSON format.
// The payload and its format differ depending on what was requested
string payload;
string contentType;
if (path == "/") {
...
} else if (path == "/words" && queryParams.find("letters=") != string::npos) {
// compute valid words with these letters and send them back as JSON
string letters = queryParams.substr(queryParams.find("letters=") + string("letters=").length());
sort(letters.begin(), letters.end());
vector<string> formableWords;
findFormableWords(letters, formableWords);
payload = constructJSONPayload(formableWords);
contentType = "text/javascript; charset=UTF-8";
} else {
...
}
sendResponse(socketStream, payload, contentType);
Step 4: Otherwise, compute a list of valid words with those letters and send it back in JSON format.
static string constructJSONPayload(const vector<string>& possibilities) {
/* An ostringstream is like cout, but it doesn't print to the screen;
* when you're done adding to the stream, you can convert it to a
* string.
*/
ostringstream payload;
payload << "{" << endl;
payload << " \"possibilities\": [";
// Append each word, followed by a comma for all but the last word
for (size_t i = 0 ; i < possibilities.size(); i++) {
payload << "\"" << possibilities[i] << "\"";
if (i < possibilities.size() - 1) payload << ", ";
}
payload << "]" << endl << "}" << endl;
return payload.str();
}
{ "possibilities": ["word1", "word2"] }
static void sendResponse(iosockstream& socketStream, const string& payload, const string& contentType) {
socketStream << "HTTP/1.1 200 OK\r\n";
socketStream << "Content-Type: " << contentType << "\r\n";
socketStream << "Content-Length: " << payload.size() << "\r\n";
socketStream << "\r\n";
socketStream << payload << flush;
}
Step 4: Otherwise, compute a list of valid words with those letters and send it back in JSON format.
<form action="javascript:void(0);">
<label for="letters" class="form-label">Letters:</label>
<input type="text" class="form-control" id="letters" name="letters"><br>
<button type="submit" class="btn btn-primary" onclick="getWords()">Submit</button>
</form>
<br />
<div id="scrabbleWords"></div>
<script>
function getWords() {
let letters = document.getElementById("letters").value;
let result = fetch("/words?letters=" + letters, {method:"GET"}).then(data => {
return data.json()
}).then(res => {
possibilitiesStr = "";
for (var i = 0; i < res.possibilities.length; i++) {
possibilitiesStr += res.possibilities[i]+"<br>";
}
document.getElementById("scrabbleWords").innerHTML = "<p>Scrabble words:</p>" + possibilitiesStr;
}).catch(error =>
console.log(error)
)
}
</script>
Let's see the underlying system calls and library functions needed to implement createClientSocket and createServerSocket!
We have used createClientSocket in client programs so far to connect to servers. It gives us back a descriptor we can use to read/write data.
But how is the createClientSocket helper function actually implemented?
int main(int argc, char *argv[]) {
// Open a connection to the server
int socketDescriptor = createClientSocket("myth64.stanford.edu", 12345);
// Read in the data from the server (sockbuf descructor closes descriptor)
sockbuf socketBuffer(socketDescriptor);
iosockstream socketStream(&socketBuffer);
string timeline;
getline(socketStream, timeline);
// Print the data from the server
cout << timeline << endl;
return 0;
}
int createClientSocket(const string& host, unsigned short port);
int createClientSocket(const string& host, unsigned short port);
int createClientSocket(const string& host, unsigned short port);
sizeof(struct in_addr)
and the third the AF_INET
constant.struct hostent
with host's info (or NULL if error)struct hostent *gethostbyname(const char *name);
struct hostent *gethostbyname2(const char *name, int af);
struct hostent *gethostbyaddr(const char *addr, int len, int type);
Wait a minute - gethostbyname and gethostbyname2 will give back different info (IPv4 vs. IPv6 addresses). How can the return type be the same?
struct hostent *gethostbyname(const char *name);
struct hostent *gethostbyname2(const char *name, int af);
struct hostent *gethostbyaddr(const char *addr, int len, int type);
// represents an IP Address
struct in_addr {
unsigned int s_addr // stored in network byte order (big endian)
};
// represents a host's info
struct hostent {
// official name of host
char *h_name;
// NULL-terminated list of aliases
char **h_aliases;
// host address type (typically AF_INET for IPv4)
int h_addrtype;
// address length (typically 4, or sizeof(struct in_addr) for IPv4)
int h_length;
// NULL-terminated list of IP addresses
// This is really a struct in_addr ** when hostent contains IPv4 addresses
char **h_addr_list;
};
gethostbyname()
Note: h_addr_list
is typed to be a char *
array, but for IPv4 records it's really struct in_addr **
, so we cast it to that in our code.
Why the confusion?
int createClientSocket(const string& host, unsigned short port) {
struct hostent *he = gethostbyname(host.c_str());
if (he == NULL) return -1;
...
int socket(int domain, int type, int protocol);
int createClientSocket(const string& host, unsigned short port) {
...
int s = socket(AF_INET, SOCK_STREAM, 0);
if (s < 0) return -1;
...
The socket function creates a socket endpoint and returns a descriptor.
int connect(int clientfd, const struct sockaddr *addr, socklen_t addrlen);
connect connects the specified socket to the specified address.
int connect(int clientfd, const struct sockaddr *addr, socklen_t addrlen);
There are actually multiple different types of we may want to pass in. sockaddr_in and sockaddr_in6. How can we handle these possibilities? C doesn't support inheritance or templates.
int connect(int clientfd, const struct sockaddr *addr, socklen_t addrlen);
We will make the parameter type a "parent type" called sockaddr, which will have the same memory layout as sockaddr_in and sockaddr_in6. Its structure is a 2 byte type field followed by 14 bytes of something. Both sockaddr_in and sockaddr_in6 will start with that 2 byte type field, and use the remaining 14 bytes for whatever they want.
struct sockaddr { // generic socket
unsigned short sa_family; // protocol family for socket
char sa_data[14];
// address data (and defines full size to be 16 bytes)
};
struct sockaddr_in { // IPv4 socket address record
unsigned short sin_family;
unsigned short sin_port;
struct in_addr sin_addr;
unsigned char sin_zero[8];
};
struct sockaddr_in6 { // IPv6 socket address record
unsigned short sin6_family;
unsigned short sin6_port;
unsigned int sin6_flowinfo;
struct in6_addr sin6_addr;
unsigned int sin6_scope_id;
};
struct sockaddr_in { // IPv4 socket address record
unsigned short sin_family;
unsigned short sin_port;
struct in_addr sin_addr;
unsigned char sin_zero[8];
};
sin_family
field should always be initialized to be AF_INET
for IPv4 to distinguish what struct type it really is.sin_port
field stores a port number in network byte order.
sin_addr
field stores the IPv4 addresssin_zero
field represents the remaining 8 bytes that are unused.sin6_family
field should always be initialized to be AF_INET6
for IPv6 to distinguish what struct type it really is.sin6_port
field stores a port number in network byte order.sin6_addr
field stores the IPv6 addresssin6_flowinfo
and sin6_scope_id
are beyond the scope of what we need, so we'll ignore them.
struct sockaddr_in6 { // IPv6 socket address record
unsigned short sin6_family;
unsigned short sin6_port;
unsigned int sin6_flowinfo;
struct in6_addr sin6_addr;
unsigned int sin6_scope_id;
};
int createClientSocket(const string& host, unsigned short port) {
...
struct sockaddr_in address;
memset(&address, 0, sizeof(address));
address.sin_family = AF_INET;
address.sin_port = htons(port);
// h_addr is #define for h_addr_list[0]
address.sin_addr = *((struct in_addr *)he->h_addr);
if (connect(s, (struct sockaddr *) &address, sizeof(address)) == 0) return s;
...
htons is "host to network short" - it converts to network byte order, which may or may not be the same as the byte order your machine uses.
int createClientSocket(const string& host, unsigned short port) {
struct hostent *he = gethostbyname(host.c_str());
if (he == NULL) return -1;
int s = socket(AF_INET, SOCK_STREAM, 0);
if (s < 0) return -1;
struct sockaddr_in address;
memset(&address, 0, sizeof(address));
address.sin_family = AF_INET;
address.sin_port = htons(port);
// h_addr is #define for h_addr_list[0]
address.sin_addr = *((struct in_addr *)he->h_addr);
if (connect(s, (struct sockaddr *) &address, sizeof(address)) == 0) return s;
close(s);
return -1;
}
Next time: Distributed systems and MapReduce