Network Proxy with O(1) LFU Caching

Moksh Jain, 16IT221

Nishanth Hebbar, 16IT234

Suyash Ghuge, 16IT114

Abhishek Kamal, 16IT202

Code at: github.com/MJ10/DSA-Project

Network Proxy

In a network, a proxy acts as an intermediary for requests from clients requesting resources from some server. 

 

  • Web proxies forward HTTP requests. The request from the client is the same as a regular HTTP request.

class Server:

    def __init__(self, config):
        """
        Initializes a server object
        """
        # dictionary to store all the active connections
        self.__clients = {}
        self.config = config
        # create and setup TCP socket
        self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.setup_socket()

    def listen(self):
        """
        Listens for connections to the server
        """
        print('Proxy Server started on {}:{}'.format(self.config['HOST_NAME'],
                                                     self.config['BIND_PORT']))
        while True:
            (client_socket, client_address) = self.server_socket.accept()
            d = threading.Thread(name=self._get_client_name(client_address),
                                 target=self.proxy_thread,
                                 args=(client_socket, client_address))
            d.setDaemon(True)
            d.start()
        self.shutdown(0, 0)

    def proxy_thread(self, conn, client_addr):
        """
        Handles connections from browsers
        """
        req = conn.recv(self.config['MAX_REQUEST_LENGTH'])
        line1 = req.split(b'\n')[0]
        x = line1.split(b' ')
        if len(x) > 1:
            url = x[1]
        else:
            return
        
        http_pos = url.find(b'://')
        if http_pos == -1:
            temp = url
        else:
            temp = url[(http_pos + 3):]
        port_pos = temp.find(b':')
        webserver_pos = temp.find(b'/')
        if webserver_pos == -1:
            webserver_pos = len(temp)

        webserver = ""
        port = -1
        if port_pos == -1 or webserver_pos < port_pos:
            port = 80
            webserver = temp[:webserver_pos]
        else:
            port = int((temp[port_pos + 1:])[:webserver_pos - port_pos - 1])
            webserver = temp[:port_pos]

        try:
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.settimeout(self.config['CONNECTION_TIMEOUT'])
            s.connect((webserver, port))
            s.sendall(req)
            while True:
                data = s.recv(self.config['MAX_REQUEST_LENGTH'])
                if len(data)>0:
                    conn.send(data)
                else:
                    break
            s.close()
            conn.close()
        except socket.error:
            if s:
                s.close()
            if conn:
                conn.close()
            self.log("WARNING", client_addr, "Peer Reset " + str(line1))

LFU Cache

Least Frequently Used (LFU) is a type of cache algorithm, used to manage memory in a computer.

Operations:

1. Insert (Set)

2. Retrieve (Get)

3. Evict (Remove)

1

2

3

Frequency List

  • It is a doubly linked list, where each node represents elements in the cache with a certain frequency and is itself a doubly linked list.
class FrequencyList:
    """
    Linked List containing frequency nodes of different frequencies
    """

    def __init__(self):
        """
        Initializes Frequency List object
        """
        self.head = FrequencyNode()

    def insert_new(self, obj):
        """
        Insert new element into the linked list with it's access frequency = 1
        """
        temp = self.head
        key_node = ListNode()
        key_node.data = obj
        obj.parent = key_node
        if temp.next is None or temp.next.value is not 1:
            node = FrequencyNode()
            node.value = 1
            node.prev = temp
            if temp.next is not None:
                node.next = temp.next
                temp.next.prev = node
            temp.next = node
            key_node.parent_node = node
            node.children.next = key_node
            key_node.prev = node.children
            
        else:
            key_node.parent_node = temp.next
            self.insert_node(key_node, temp.next)
  
    def insert_node(self, list_node, frequency_node):
        """
        Inserts ListNode into the given freqency node in the first position
        """
        if frequency_node.children.next is None:
            frequency_node.children.next = list_node
            list_node.prev = frequency_node.children
        else:
            list_node.next = frequency_node.children.next
            list_node.prev = frequency_node.children
            frequency_node.children.next = list_node
            if list_node.next:
                list_node.next.prev = list_node

    def lookup(self, obj):
        """
        Frequency of the element searched for is updated and element is inserted accordingly into it's new position
        """
        temp = obj.parent
        temp1 = temp.parent_node
        if temp1.next:
            if temp1.value == temp1.next.value - 1:
                self.delete_node(temp)
                temp.parent_node = temp1.next
                self.insert_node(temp, temp1.next)
            else:
                temp2 = self.new_frequency_node(temp1)
                self.delete_node(temp)
                temp.parent_node = temp2
                self.insert_node(temp, temp2)
        else:
            temp2 = FrequencyNode()
            temp2.prev = temp1
            temp1.next = temp2
            temp2.value = temp1.value+1
            self.delete_node(temp)
            temp.parent_node = temp2
            self.insert_node(temp, temp2)

        if not temp1.children.next:
            temp1.prev.next = temp1.next
            temp1.next.prev = temp1.prev

    def delete_obj(self):
        """
        Deletes the first element of the least frequently used node in the frequency List
        return : None
        """
        if self.head.next:
            temp = self.head.next
            child = temp.children.next
            if child:
                child.prev.next = child.next
                if child.next:
                    child.next.prev = child.prev
            if not self.head.next.children.next:
                self.head.next.next.prev = self.head
                self.head.next = self.head.next.next
            return child.data
        return None

Hash Table

  • The Hash Table used in the Data Structure is a standard Hash Table.

 

  • The Hash Table is assumed to be collision free, allowing O(1) runtime operations.

 

  • The Jenkin's Hash function is used to find the hash of a given key.
class HashTable:
    """
    A hash table for accessing the cache contents
    Assumed 'collision-free'
    """
    def __init__(self, size=1009):
        """
        Returns a new hash table of size 'size'
        """
        self.MAX = size
        self.size = 0
        self.table = [HashNode() for _ in range(size)]

    def insert(self, obj):
        """
        Insert object obj to the table
        """
        index = self.hash(obj.key)
        obj.hash_reference = self.table[index]
        self.table[index].entry = obj
        self.size += 1

    def search(self, key):
        """
        Access the object with given key
        """
        hash_index = self.hash(key)
        if self.table[hash_index].entry:
            return self.table[hash_index].entry
        return None

    def remove(self, obj):
        """
        Remove object from hash table
        """
        self.size -= 1
        obj.hash_reference.entry = None

    def hash(self, key):
        """
        Calculates index for key using Jenkin's hash function
        """
        hash_code = 0
        for i in range(len(key)):
            hash_code += ord(key[i])
            hash_code += (hash_code << 10)
            hash_code ^= (hash_code >> 6)
        hash_code += (hash_code << 3)
        hash_code ^= (hash_code >> 11)
        hash_code += (hash_code << 15)
        return hash_code % self.MAX

LFU Cache Implementation

class LFUCache:
    """
    Implementation of the proposed LFU cache data structure
    """
    def __init__(self):
        """
        Initializes an object
        """
        self.table = HashTable()
        self.list = FrequencyList()

    def add(self, key, data):
        """
        Add the data with key to cache.
        """
        obj_data = self.table.search(key)
        if obj_data:
            obj_data.data += data
        else:
            cache_object = CacheObject(key, data)
            self.list.insert_new(cache_object)
            self.table.insert(cache_object)

    def evict(self):
        """
        Evict least frequently used cache item
        """
        cache_obj = self.list.delete_obj()
        if cache_obj:
            self.table.remove(cache_obj)
            
    def retrieve(self, key):
        """
        Retrieve cache object with key 'key'
        """
        cache_obj = self.table.search(key)
        if cache_obj:
            self.list.lookup(cache_obj)
            return cache_obj.data
        return None

Cache in the Proxy

def listen():
    # ...
    if self.cache.table.size >= int(self.cache.table.MAX * 0.9):
        self.cache.evict()
    # ...

def proxy_thread(self, conn, client_addr):
        
    # ...
    
    # Check if the file requested is css/js file
    if bool(self.regex.findall(url.decode())):
        # send cached version if present
        data = self.cache.retrieve(url.decode())
        if data:
            conn.send(data.data)
            conn.close()
            print('Retrieving from cache: ' + url.decode())
        else:
            # ...
                while True:
                    data = s.recv(self.config['MAX_REQUEST_LENGTH'])
                    if len(data)>0:
                        self.cache.add(url.decode(), data)
                        conn.send(data)
            # ...

Comparison

Limitations

  • One obvious limitation of the proposed data structure is that the Hash Table is assumed to be collision free, which is what allows O(1) retrieval from the Hash Table. But in practice, it is not possible to have a completely collision free Hash Table.

 

  • A major limitation of the proxy server is that it currently supports only HTTP requests, but most of the websites now use HTTPS.

Future Improvements

  • Since the proxy currently only works with HTTP websites, support for HTTPS can be added.

 

  • A provision can be added to the proxy to save the current data in the cache to disk when closed, and reload it into memory when the proxy is restarted.

References

http://dhruvbird.com/lfu.pdf

 

https://docs.python.org/3/library/threading.html

 

https://docs.python.org/3.6/howto/sockets.html

dsa-project

By Moksh Jain

dsa-project

  • 512