Network Proxy with O(1) LFU Caching
Moksh Jain, 16IT221
Nishanth Hebbar, 16IT234
Suyash Ghuge, 16IT114
Abhishek Kamal, 16IT202
Code at: github.com/MJ10/DSA-Project
Network Proxy

In a network, a proxy acts as an intermediary for requests from clients requesting resources from some server.
-
Web proxies forward HTTP requests. The request from the client is the same as a regular HTTP request.
class Server:
def __init__(self, config):
"""
Initializes a server object
"""
# dictionary to store all the active connections
self.__clients = {}
self.config = config
# create and setup TCP socket
self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.setup_socket()
def listen(self):
"""
Listens for connections to the server
"""
print('Proxy Server started on {}:{}'.format(self.config['HOST_NAME'],
self.config['BIND_PORT']))
while True:
(client_socket, client_address) = self.server_socket.accept()
d = threading.Thread(name=self._get_client_name(client_address),
target=self.proxy_thread,
args=(client_socket, client_address))
d.setDaemon(True)
d.start()
self.shutdown(0, 0)
def proxy_thread(self, conn, client_addr):
"""
Handles connections from browsers
"""
req = conn.recv(self.config['MAX_REQUEST_LENGTH'])
line1 = req.split(b'\n')[0]
x = line1.split(b' ')
if len(x) > 1:
url = x[1]
else:
return
http_pos = url.find(b'://')
if http_pos == -1:
temp = url
else:
temp = url[(http_pos + 3):]
port_pos = temp.find(b':')
webserver_pos = temp.find(b'/')
if webserver_pos == -1:
webserver_pos = len(temp)
webserver = ""
port = -1
if port_pos == -1 or webserver_pos < port_pos:
port = 80
webserver = temp[:webserver_pos]
else:
port = int((temp[port_pos + 1:])[:webserver_pos - port_pos - 1])
webserver = temp[:port_pos]
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(self.config['CONNECTION_TIMEOUT'])
s.connect((webserver, port))
s.sendall(req)
while True:
data = s.recv(self.config['MAX_REQUEST_LENGTH'])
if len(data)>0:
conn.send(data)
else:
break
s.close()
conn.close()
except socket.error:
if s:
s.close()
if conn:
conn.close()
self.log("WARNING", client_addr, "Peer Reset " + str(line1))
LFU Cache
Least Frequently Used (LFU) is a type of cache algorithm, used to manage memory in a computer.
Operations:
1. Insert (Set)
2. Retrieve (Get)
3. Evict (Remove)
1
2
3
Frequency List

- It is a doubly linked list, where each node represents elements in the cache with a certain frequency and is itself a doubly linked list.
class FrequencyList:
"""
Linked List containing frequency nodes of different frequencies
"""
def __init__(self):
"""
Initializes Frequency List object
"""
self.head = FrequencyNode()
def insert_new(self, obj):
"""
Insert new element into the linked list with it's access frequency = 1
"""
temp = self.head
key_node = ListNode()
key_node.data = obj
obj.parent = key_node
if temp.next is None or temp.next.value is not 1:
node = FrequencyNode()
node.value = 1
node.prev = temp
if temp.next is not None:
node.next = temp.next
temp.next.prev = node
temp.next = node
key_node.parent_node = node
node.children.next = key_node
key_node.prev = node.children
else:
key_node.parent_node = temp.next
self.insert_node(key_node, temp.next)
def insert_node(self, list_node, frequency_node):
"""
Inserts ListNode into the given freqency node in the first position
"""
if frequency_node.children.next is None:
frequency_node.children.next = list_node
list_node.prev = frequency_node.children
else:
list_node.next = frequency_node.children.next
list_node.prev = frequency_node.children
frequency_node.children.next = list_node
if list_node.next:
list_node.next.prev = list_node
def lookup(self, obj):
"""
Frequency of the element searched for is updated and element is inserted accordingly into it's new position
"""
temp = obj.parent
temp1 = temp.parent_node
if temp1.next:
if temp1.value == temp1.next.value - 1:
self.delete_node(temp)
temp.parent_node = temp1.next
self.insert_node(temp, temp1.next)
else:
temp2 = self.new_frequency_node(temp1)
self.delete_node(temp)
temp.parent_node = temp2
self.insert_node(temp, temp2)
else:
temp2 = FrequencyNode()
temp2.prev = temp1
temp1.next = temp2
temp2.value = temp1.value+1
self.delete_node(temp)
temp.parent_node = temp2
self.insert_node(temp, temp2)
if not temp1.children.next:
temp1.prev.next = temp1.next
temp1.next.prev = temp1.prev
def delete_obj(self):
"""
Deletes the first element of the least frequently used node in the frequency List
return : None
"""
if self.head.next:
temp = self.head.next
child = temp.children.next
if child:
child.prev.next = child.next
if child.next:
child.next.prev = child.prev
if not self.head.next.children.next:
self.head.next.next.prev = self.head
self.head.next = self.head.next.next
return child.data
return None
Hash Table
- The Hash Table used in the Data Structure is a standard Hash Table.
- The Hash Table is assumed to be collision free, allowing O(1) runtime operations.
- The Jenkin's Hash function is used to find the hash of a given key.
class HashTable:
"""
A hash table for accessing the cache contents
Assumed 'collision-free'
"""
def __init__(self, size=1009):
"""
Returns a new hash table of size 'size'
"""
self.MAX = size
self.size = 0
self.table = [HashNode() for _ in range(size)]
def insert(self, obj):
"""
Insert object obj to the table
"""
index = self.hash(obj.key)
obj.hash_reference = self.table[index]
self.table[index].entry = obj
self.size += 1
def search(self, key):
"""
Access the object with given key
"""
hash_index = self.hash(key)
if self.table[hash_index].entry:
return self.table[hash_index].entry
return None
def remove(self, obj):
"""
Remove object from hash table
"""
self.size -= 1
obj.hash_reference.entry = None
def hash(self, key):
"""
Calculates index for key using Jenkin's hash function
"""
hash_code = 0
for i in range(len(key)):
hash_code += ord(key[i])
hash_code += (hash_code << 10)
hash_code ^= (hash_code >> 6)
hash_code += (hash_code << 3)
hash_code ^= (hash_code >> 11)
hash_code += (hash_code << 15)
return hash_code % self.MAX
LFU Cache Implementation
class LFUCache:
"""
Implementation of the proposed LFU cache data structure
"""
def __init__(self):
"""
Initializes an object
"""
self.table = HashTable()
self.list = FrequencyList()
def add(self, key, data):
"""
Add the data with key to cache.
"""
obj_data = self.table.search(key)
if obj_data:
obj_data.data += data
else:
cache_object = CacheObject(key, data)
self.list.insert_new(cache_object)
self.table.insert(cache_object)
def evict(self):
"""
Evict least frequently used cache item
"""
cache_obj = self.list.delete_obj()
if cache_obj:
self.table.remove(cache_obj)
def retrieve(self, key):
"""
Retrieve cache object with key 'key'
"""
cache_obj = self.table.search(key)
if cache_obj:
self.list.lookup(cache_obj)
return cache_obj.data
return None
Cache in the Proxy
def listen():
# ...
if self.cache.table.size >= int(self.cache.table.MAX * 0.9):
self.cache.evict()
# ...
def proxy_thread(self, conn, client_addr):
# ...
# Check if the file requested is css/js file
if bool(self.regex.findall(url.decode())):
# send cached version if present
data = self.cache.retrieve(url.decode())
if data:
conn.send(data.data)
conn.close()
print('Retrieving from cache: ' + url.decode())
else:
# ...
while True:
data = s.recv(self.config['MAX_REQUEST_LENGTH'])
if len(data)>0:
self.cache.add(url.decode(), data)
conn.send(data)
# ...
Comparison


Limitations
- One obvious limitation of the proposed data structure is that the Hash Table is assumed to be collision free, which is what allows O(1) retrieval from the Hash Table. But in practice, it is not possible to have a completely collision free Hash Table.
- A major limitation of the proxy server is that it currently supports only HTTP requests, but most of the websites now use HTTPS.
Future Improvements
- Since the proxy currently only works with HTTP websites, support for HTTPS can be added.
- A provision can be added to the proxy to save the current data in the cache to disk when closed, and reload it into memory when the proxy is restarted.
References
http://dhruvbird.com/lfu.pdf
https://docs.python.org/3/library/threading.html
https://docs.python.org/3.6/howto/sockets.html
dsa-project
By Moksh Jain
dsa-project
- 512