From c8e5d1d73e5b620f27eccc4c686b0bb1e0d0a81c Mon Sep 17 00:00:00 2001
From: Muhammad Jihad Rinaldi <jihadmjr@gmail.com>
Date: Thu, 16 Apr 2020 01:41:57 +0700
Subject: [PATCH] update readme and push code collmd5

---
 CollisionAttackMD5/README.md          |  66 +++++++++
 CollisionAttackMD5/c_demo.c           |  31 +++++
 CollisionAttackMD5/clean.sh           |   5 +
 CollisionAttackMD5/coll.py            | 174 ++++++++++++++++++++++++
 CollisionAttackMD5/gen_coll_c.py      |  63 +++++++++
 CollisionAttackMD5/gen_coll_python.py |  71 ++++++++++
 CollisionAttackMD5/gen_coll_test.py   |  25 ++++
 CollisionAttackMD5/md5.py             | 187 ++++++++++++++++++++++++++
 README.md                             |   5 +-
 9 files changed, 626 insertions(+), 1 deletion(-)
 create mode 100644 CollisionAttackMD5/README.md
 create mode 100644 CollisionAttackMD5/c_demo.c
 create mode 100755 CollisionAttackMD5/clean.sh
 create mode 100644 CollisionAttackMD5/coll.py
 create mode 100755 CollisionAttackMD5/gen_coll_c.py
 create mode 100755 CollisionAttackMD5/gen_coll_python.py
 create mode 100755 CollisionAttackMD5/gen_coll_test.py
 create mode 100755 CollisionAttackMD5/md5.py

diff --git a/CollisionAttackMD5/README.md b/CollisionAttackMD5/README.md
new file mode 100644
index 0000000..50e3ab8
--- /dev/null
+++ b/CollisionAttackMD5/README.md
@@ -0,0 +1,66 @@
+# Python MD5 Collision Library
+for CS 460 SP15
+
+I don't currently have a creative name for this library. I was hoping to use MD5SuperCollider, but that's [taken](https://github.com/culmor30/MD5-SuperCollider).
+
+## Installation
+First, this library currently only runs on Linux because it uses some unix commands (tested with Ubuntu). It may work on Mac, but that is untested. The library is designed for Python 3 and it requires the boost library headers as a dependency. Obviously standard C compilation packages including gcc are also required. One of the demos outputs Python 2 scripts (not feasible to do in v3), so also have Python 2 to make that work.
+
+```sh
+$ sudo apt-get install libboost-all-dev
+$ sudo apt-get install python # python 2
+$ sudo apt-get install python3
+```
+
+## Description
+This library is contained within `coll.py` (which depends on `md5.py`) and can be used to generate a collection of different files that all have the same MD5 hash. To accomplish this task, I used the [chosen prefix attack](https://en.wikipedia.org/wiki/Collision_attack#Chosen-prefix_collision_attack) from [Marc Stevens'](https://marc-stevens.nl/p/hashclash/) hashcoll program. His program allows one to supply the intermediate hash value (IHV) of the MD5 function and get two pairs of 2 blocks (128 bytes each) that are different yet, after the next 2 block iterations the IHVs will be the same.
+
+The effect of this attack is that given some prefix (starting) data, we can come up with a pair of possible suffix data that are different yet result in the same hash overall when concatentated onto the starting data. `MD5(prefix + collision1a) = MD5(prefix + collision1b)`. We can even extend the above by the inherent linear progression property of MD5. We can use the suffix pair above to generate more complex files: `MD5(prefix + collision1a + moreData)  = MD5(prefix + collision1b + moreData)`. 
+
+Or in the general form: `MD5(prefix + collision1{a,b} + moreData1 + collision2{a,b} + moreData2 + ... + collisionN{a,b} + moreDataN)`, where for each `{a,b}` you have a choice of `a` or `b`. That means given some N collisions, 2^N files with the same MD5 can be generated. Or from the other perspective, if you are looking to generate N files, you need log(N) collisions.
+
+Unfortunately, it is very cumbersome to do interesting things directly with fastcoll and the command line. My library provides a Collider class and other utility functions so you can easily write a script to generate files with the same MD5. 
+
+### My work
+- Automatic download and compilation of fastcoll
+- Wrote a Python 3 compatible MD5 library that exposes internals including the current IHV (Python's hashlib does NOT)
+- Utility functions
+    - Pad data to MD5 block size
+    - A filter generator that will not allow collision blocks if they contain certain binary substrings
+    - Direct access to a wrapper around fastcoll that takes in an IHV and outputs a tuple of the 2 collision parts.
+- Collider class
+    - Pass in data chunks to build the file
+    - Call `diverge()` to place a collision after all the currently supplied data
+    - Use `get_collisions()` to get an iterable over each possible colliding total file (as data)
+        - Only one file is generated in memory at a time
+    - Sanity check functions to help adhere to MD5 block boundaries
+- 3 demos, see below
+
+## Demos
+Each demo is a Python 3 script that begins with `gen_`.
+```sh
+$ python3 gen_<scriptname>.py # or
+$ ./gen_<scriptname>.py
+```
+
+All output files of the demos start with `out_`. This is useful to delete them or check that the MD5 sums are equal. clean.sh removes all output files.
+```sh
+$ ./clean.sh
+$ md5sum out_* # check if related files have the same MD5
+$ sha256sum out_* # related files with the same MD5 sum should have DIFFERENT SHA256
+```
+All demos have comments, so you can read more about how they work in the code. 
+
+**Note that each collision needed will take abou 3s-15s to generate! It's not that fast.**
+
+### Large Amount of Collisions Demo
+This demo is in `gen_coll_test.py`. It generates 213 files that have a mix of text and binary data (the binary data is from the collision blocks). Since we need 8 collisions for that, it may take a minute or three to run, but progress is printed. It showcases the simplicity of using the library by completing the task in 25 lines (some blank).
+
+### Python Script Collision Demo
+This demo is in `gen_coll_python.py`. It generates two python files `out_py_good.py` and `out_py_evil.py`. The good one prints `good` to the console, while the evil one prints `evil`. It's not hard to imagine this being used for much more nefarious purposes. Caveat: the behaviour of both good and evil is contained in both files and easily visible upon reading them. This is because the only degree of freedom we get between the two files is choosing the collision blocks.
+
+### Binary Collision Demo
+This demo is in `gen_coll_c.py`. It first compiles a special C program in `c_demo.c`. Then it generates two executables `out_c_good` and `out_c_evil`. The good one prints `good` to the console (via the `good()` function), while the evil one prints `evil` (via the `evil()` function). These programs are of course harder to open up and examine, but still their behaviour can be determined by working through the disassembly. Obsfucation and anti-debugger techniques could provide more help.
+
+## Future Work
+I have taken a look at Marc Stevens' more recent HashClash project, which includes a more sophisticated attack on MD5 that can converge two unrelated pieces of data by adding specially chosen blocks to each. Unfortunately, I was not able to sort out configuration and execution issues with the software. It is much more complex than fastcoll and requires dozens of configuration parameters and the passing through of data between 5 different programs that do steps of the attack in parallel. Still, I am not completely deterred at this point, and will likely make support for it an addition to this project after the semester.
diff --git a/CollisionAttackMD5/c_demo.c b/CollisionAttackMD5/c_demo.c
new file mode 100644
index 0000000..ed22b4c
--- /dev/null
+++ b/CollisionAttackMD5/c_demo.c
@@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <string.h>
+
+#define BLOCK "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" \
+              "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" \
+              "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" \
+
+void good();
+void evil();
+              
+int main(char** argv, int argc) {
+    char *first = BLOCK "+";
+    char *second = BLOCK "-";
+    
+    if (memcmp(first, second, 192) == 0) {
+        good();
+    }
+    else {
+        evil();
+    }
+    
+    return 0;
+}
+
+void good() {
+   puts("good"); 
+}
+
+void evil() {
+   puts("evil"); 
+}
diff --git a/CollisionAttackMD5/clean.sh b/CollisionAttackMD5/clean.sh
new file mode 100755
index 0000000..076ead2
--- /dev/null
+++ b/CollisionAttackMD5/clean.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+# clean up all generated files
+
+rm out_*
\ No newline at end of file
diff --git a/CollisionAttackMD5/coll.py b/CollisionAttackMD5/coll.py
new file mode 100644
index 0000000..18179c2
--- /dev/null
+++ b/CollisionAttackMD5/coll.py
@@ -0,0 +1,174 @@
+import os
+import zipfile
+import urllib.request
+import io
+import sys
+from md5 import MD5
+import itertools
+import binascii
+
+FASTCOLL_LOC = 'https://www.win.tue.nl/hashclash/fastcoll_v1.0.0.5-1_source.zip'
+FASTCOLL_PLACE = 'fastcoll'
+
+# grab fastcoll, we need boost library dev (header file) packages installed to compile it
+# sudo apt-get install libboost-all-dev
+if not os.path.exists(FASTCOLL_PLACE):
+    print('Grabbing fastcoll')
+    resp = urllib.request.urlopen(FASTCOLL_LOC)
+    data = resp.read()
+    mf = io.BytesIO(data)
+    with zipfile.ZipFile(mf) as zf:
+        os.mkdir(FASTCOLL_PLACE)
+        zf.extractall(FASTCOLL_PLACE)
+    
+    print('Compiling fastcoll')
+    back = os.getcwd()
+    os.chdir(FASTCOLL_PLACE)
+    with open('Makefile', 'w') as mkf:
+        mkf.write("""fastcoll:\n\tg++ -O3 *.cpp -lboost_filesystem -lboost_program_options -lboost_system -o fastcoll\n""")
+    r = os.system('make')
+    os.chdir(back)
+    if r == 0:
+        print('done preparing fastcoll')
+    else:
+        raise Exception('could not compile fastcoll')
+
+### utilities for payload construction
+    
+def md5pad(b, ch=b'\0'):
+    return md5lpad(len(b), ch)
+
+def md5lpad(l, ch=b'\0'):
+    c = l % 64
+    if c == 0:
+        c = 64
+    padl = 64 - c
+    return ch*padl
+
+### block filter utilities
+
+def filter_disallow_binstrings(strs):
+    def out_filter(b):
+        badsubst = strs
+        return all((e not in b) for e in badsubst)
+    return out_filter
+    
+### generating a collision
+    
+def collide(ihv):
+    '''Returns a tuple pair of binary block alternatives that still result in same MD5 from the IHV.'''
+    # a.k.a it generates a chosen prefix collision
+    # This is very hackish, but what else can be done when the fastcoll license is so restrictive?
+    back = os.getcwd()
+    os.chdir(FASTCOLL_PLACE)
+    
+    ivhex = binascii.hexlify(ihv).decode()
+    
+    f0, f1 = 'out-{}-0'.format(ivhex), 'out-{}-1'.format(ivhex)
+    
+    # developer toggle of whether to display output
+    plus = '  > /dev/null 2>&1'
+    os.system('./fastcoll --ihv {} -o {} {}{}'.format(ivhex, f0, f1, plus))
+    
+    with open(f0, 'rb') as f0d:
+        b0 = f0d.read()
+    
+    with open(f1, 'rb') as f1d:
+        b1 = f1d.read()
+    
+    try:
+        os.remove(f0)
+        os.remove(f1)
+    except:
+        pass
+    
+    os.chdir(back)
+    return b0, b1
+
+### very helpful stateful Collider class
+
+class Collider:
+    '''Helper class to generate files with multiple chosen prefix collisions efficiently'''
+    
+    def __init__(self, data=b'', pad=b'\0', blockfilter=lambda x: True):
+        '''Generate a new collider with starting data, default padding, and default collison block filters'''
+        self.alldata = [b'']
+        self.div = []
+        self.dlen = 0
+        
+        self.pad = pad
+        self.blockfilter = blockfilter
+        self.digester = MD5()
+        
+        if type(data)==str:
+            self.strcat(data)
+        else:
+            self.bincat(data)
+    
+    def bincat(self, data):
+        '''Add binary data to the working binary state'''
+        self.dlen += len(data)
+        self.digester.update(data)
+        self.alldata[-1] += data
+        
+    def strcat(self, s):
+        '''Add string data to the working binary state'''
+        self.bincat(s.encode())
+    
+    def padnow(self, pad=None):
+        '''Pad the current working data on the end to a multiple of md5 block size (64 bytes)'''
+        if not pad:
+            pad = self.pad
+        ndata = md5lpad(self.dlen, pad)
+        self.bincat(ndata)
+        
+    def diverge(self, pad=None, blockfilter=None):
+        '''Place a choice of 2 different sets of 128 bytes that still keep the running md5
+        hash (via chosen prefix). Beforehand, the current data will be padded if necessary'''
+        if not pad:
+            pad = self.pad
+        if not blockfilter:
+            blockfilter = self.blockfilter
+        
+        self.padnow(pad)
+        
+        self.alldata.append(b'')
+        
+        # run until the blockfilter passes
+        while True:
+            b0, b1 = collide(self.digester.ihv())
+            if blockfilter(b0) and blockfilter(b1):
+                break
+        
+        self.dlen += len(b0)
+        self.digester.update(b0)
+        self.div.append((b0, b1))
+        
+    def assert_aligned(self):
+        '''Perform an assertion that the total consumed data is aligned to the md5 block size (64 bytes)'''
+        assert(self.dlen % 64 == 0)
+        
+    def safe_diverge(self, pad=None, blockfilter=None):
+        '''Only diverge if we don't need to pad. Otherwise we fair with an assertion error.'''
+        self.assert_aligned()
+        self.diverge(pad, blockfilter)
+        
+    def get_collisions(self, count=None, lsb_last=True):
+        '''Generator that returns colliding data in succession'''
+        if not count:
+            count = 2 ** len(self.div)
+        
+        for i,bincode in enumerate(itertools.product(range(2), repeat=len(self.div))):
+            if i >= count:
+                break
+            if not lsb_last:
+                bincode = tuple(reversed(bincode))
+            
+            blocks = map(lambda i: self.div[i][bincode[i]], range(len(bincode)))
+            zip_data = itertools.zip_longest(self.alldata, blocks, fillvalue=b'')
+            out = b''.join(b''.join(e) for e in zip_data)
+            yield out
+            
+    def get_last_coll(self):
+        '''Get both blocks from the last found collision'''
+        return self.div[-1]
\ No newline at end of file
diff --git a/CollisionAttackMD5/gen_coll_c.py b/CollisionAttackMD5/gen_coll_c.py
new file mode 100755
index 0000000..7e5ec81
--- /dev/null
+++ b/CollisionAttackMD5/gen_coll_c.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+
+from coll import Collider, md5pad, filter_disallow_binstrings
+import os, sys
+
+# First compile the C code into a binary
+temp = 'out_c_demo_temp'
+
+os.system('gcc c_demo.c -o {}'.format(temp))
+
+with open(temp, 'rb') as tempfile:
+    compdata = bytearray(tempfile.read())
+    
+    
+first = None
+second = None
+
+# Find strings in binary:
+# We find where in the first one we can put a collision pair (aligned to 64 bytes).
+# The second string gets a copy of the first from the pair,
+# and it is put at the same offset into the first string.
+for i in range(0, len(compdata), 64):
+    s = compdata[i:i+128]
+    if s != b'%' * 128:
+        continue
+        
+    for q in range(i,i+(64*3+2)):
+        if compdata[q] == ord('+') or compdata[q] == ord('-'):
+            startchars = q-(64*3)
+            if not first:
+                first = i
+                offset = i - startchars
+            else:
+                second = startchars + offset
+                
+            compdata[q] = 0
+            break
+        
+
+if not (first and second):
+    raise Exception('error: did not find marker strings')
+
+# Splice in the collision blocks according to the obtained offsets
+collider = Collider(blockfilter=filter_disallow_binstrings([b'\0']))
+collider.bincat(compdata[:first])
+collider.safe_diverge()
+c1, c2 = collider.get_last_coll()
+collider.bincat(compdata[first+128:second] + c1 + compdata[second+128:])
+
+# Write out good and evil binaries
+cols = collider.get_collisions()
+
+GOOD = 'out_c_good'
+EVIL = 'out_c_evil'
+
+with open(GOOD,  'wb') as good:
+    good.write(next(cols))
+    
+with open(EVIL, 'wb') as evil:
+    evil.write(next(cols))
+
+os.system('chmod +x {} {}'.format(GOOD, EVIL))
+os.remove(temp)
diff --git a/CollisionAttackMD5/gen_coll_python.py b/CollisionAttackMD5/gen_coll_python.py
new file mode 100755
index 0000000..1e17ea9
--- /dev/null
+++ b/CollisionAttackMD5/gen_coll_python.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+
+from coll import Collider, md5pad, filter_disallow_binstrings
+import os
+
+# We generate a file of the form:
+"""
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+#
+
+diff = '''<one of 2 collision blocks>'''
+same = '''<first of the 2 collision blocks>'''
+
+if (same == diff):
+    print "good"
+
+else:
+    print "evil"
+"""
+
+
+collider = Collider(blockfilter=filter_disallow_binstrings([b'\0', b"'''"]))
+
+prefix1 = b"""#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+#"""
+prefix2 = b"""\ndiff = '''"""
+
+# Ensure prefix is a multiple of 64 bytes
+prefix = prefix1 + md5pad(prefix1 + prefix2, b' ') + prefix2
+
+# Load the first half of the collision files that opens the 'diff' variable delcaration
+collider.bincat(prefix)
+# Fill in the 'diff' variable with 2 different blocks that may be chosen
+collider.safe_diverge()
+
+postfix = b"""'''
+same = '''"""
+
+c1, c2 = collider.get_last_coll()
+
+postfix += c1
+
+postfix += b"""'''
+
+if (same == diff):
+    print "good"
+
+else:
+    print "evil"
+
+"""
+
+# Close the 'diff' variable string and declare the 'same' variable to always have the 1st collision block
+# Thus for one file: same == diff, but for the other: same != diff
+collider.bincat(postfix)
+
+# Write out the good and evil scripts
+cols = collider.get_collisions()
+
+GOOD = 'out_py_good.py'
+EVIL = 'out_py_evil.py'
+
+with open(GOOD,  'wb') as good:
+    good.write(next(cols))
+    
+with open(EVIL, 'wb') as evil:
+    evil.write(next(cols))
+
+os.system('chmod +x {} {}'.format(GOOD, EVIL))
\ No newline at end of file
diff --git a/CollisionAttackMD5/gen_coll_test.py b/CollisionAttackMD5/gen_coll_test.py
new file mode 100755
index 0000000..9a5f9e9
--- /dev/null
+++ b/CollisionAttackMD5/gen_coll_test.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+
+from coll import Collider, md5pad, filter_disallow_binstrings
+
+# Generate a 213-way collision as a test
+c = Collider(pad=b' ', blockfilter=filter_disallow_binstrings([b'\0']))
+# begin the output files with hello world text
+c.strcat('Hello world.')
+
+# Diverge 8 times. That means 2^8 possibilities
+for i in range(8):
+    print('Stage {} of 8'.format(i+1))
+    # we fork into 2 different possibilities of collision blocks (128 byte garbage each) here
+    c.diverge()
+    # place some text in the middle of each divergence
+    c.strcat('More text: {}\n'.format(i))
+
+c.strcat('\nFinal.')
+
+# Select the first 213 collisions to output to file
+for i,data in enumerate(c.get_collisions(count=213)):
+    with open('out_test_%03d.txt' % i, 'wb') as f:
+        f.write(data)
+        
+print('Done')
\ No newline at end of file
diff --git a/CollisionAttackMD5/md5.py b/CollisionAttackMD5/md5.py
new file mode 100755
index 0000000..7b67fb2
--- /dev/null
+++ b/CollisionAttackMD5/md5.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+
+"""An implementation of MD5 that exposes internals and is directly built up
+from mathematical primitives from the MD5 specification.
+
+It achieves about 500KB/s, or 1/1000x of GNU md5sum.
+Thus, this is not an implementation great for larges amounts of hashing.
+Instead, the point is access to internals."""
+
+__date__ = '2015-07-02'
+__version__ = 0.8
+
+import math
+import binascii
+
+# util
+bin_to_words = lambda x: [x[4*i:4*(i+1)] for i in range(len(x)//4)]
+words_to_bin = lambda x: b''.join(x)
+word_to_int = lambda x: int.from_bytes(x, 'little')
+int_to_word = lambda x: x.to_bytes(4, 'little')
+bin_to_int = lambda x: list(map(word_to_int, bin_to_words(x)))
+int_to_bin = lambda x: words_to_bin(map(int_to_word, x))
+mod32bit = lambda x: x % 2**32
+rotleft = lambda x,n: (x << n) | (x >> (32-n))
+
+# initial state
+IHV0_HEX = '0123456789abcdeffedcba9876543210'
+IHV0 = bin_to_int(binascii.unhexlify(IHV0_HEX.encode()))
+
+# parameters
+BLOCK_SIZE = 64 # 512 bits (64 bytes)
+ROUNDS = BLOCK_SIZE
+
+# addition constants
+AC = [int(2**32 * abs(math.sin(t+1))) for t in range(ROUNDS)]
+
+# rotation constants
+RC = [7,12,17,22] * 4 + [5,9,14,20] * 4 + [4,11,16,23] * 4 + [6,10,15,21] * 4
+
+# non-linear functions
+F = lambda x,y,z: (x & y) ^ (~x & z)
+G = lambda x,y,z: (z & x) ^ (~z & y)
+H = lambda x,y,z: x ^ y ^ z
+I = lambda x,y,z: y ^ (x | ~z)
+Fx = [F] * 16 + [G] * 16 + [H] * 16 + [I] * 16
+
+# data selection
+M1 = lambda t: t
+M2 = lambda t: (1 + 5*t) % 16
+M3 = lambda t: (5 + 3*t) % 16
+M4 = lambda t: (7*t) % 16
+Mx = [M1] * 16 + [M2] * 16 + [M3] * 16 + [M4] * 16
+Wx = [mxi(i) for i,mxi in enumerate(Mx)]
+
+# iterations and function composition
+RoundQNext = lambda w,q,i: mod32bit(q[0] + rotleft(mod32bit(Fx[i](q[0],q[1],q[2]) + q[3] + AC[i] + w[Wx[i]]), RC[i]))
+DoRounds = lambda w,q,i: DoRounds(w, [RoundQNext(w,q,i)] + q[:3], i+1) if (i < ROUNDS) else q
+MD5CompressionInt = lambda ihvs, b: [mod32bit(ihvsi + qi) for ihvsi,qi in zip(ihvs, DoRounds(bin_to_int(b),ihvs,0))]
+arrSh = lambda x: [x[1],x[2],x[3],x[0]]
+arrUs = lambda x: [x[3],x[0],x[1],x[2]]
+MD5Compression = lambda ihv, b: arrUs(MD5CompressionInt(arrSh(ihv),b))
+
+
+class MD5:
+    """Implementation of MD5
+    
+    Expected outputs:
+    >>> MD5(b'').hexdigest()
+    'd41d8cd98f00b204e9800998ecf8427e'
+    >>> MD5(b'a').hexdigest()
+    '0cc175b9c0f1b6a831c399e269772661'
+    >>> MD5(b'abc').hexdigest()
+    '900150983cd24fb0d6963f7d28e17f72'
+    >>> MD5(b'message digest').hexdigest()
+    'f96b697d7cb7938d525a2f31aaf161d0'
+    >>> MD5(b'abcdefghijklmnopqrstuvwxyz').hexdigest()
+    'c3fcd3d76192e4007dfb496cca67e13b'
+    >>> MD5(b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789').hexdigest()
+    'd174ab98d277d9f5a5611c2c9f419d9f'
+    >>> MD5(b'12345678901234567890123456789012345678901234567890123456789012345678901234567890').hexdigest()
+    '57edf4a22be3c955ac49da2e2107b67a'
+    """
+    
+    def __init__(self, data=None):
+        self._ihv = IHV0
+        self.bits = 0
+        self.buf = b''
+        if data:
+            self.update(data)
+    
+    def update(self, data):
+        self.bits += len(data) * 8
+        self.buf += data
+        while len(self.buf) >= BLOCK_SIZE:
+           to_compress, self.buf = self.buf[:BLOCK_SIZE], self.buf[BLOCK_SIZE:]
+           self._ihv = MD5Compression(self._ihv, to_compress)
+    
+    def digest(self):
+        # total reseved bytes
+        total_bytes = (self.bits // 8)
+        
+        # we deduct 1 extra byte for the 1 bit from the zero pading length
+        zerolen = (56 - (total_bytes + 1)) % 64
+        
+        pad = bytes([0x80] + [0] * zerolen) + (total_bytes * 8).to_bytes(8, 'little')
+
+        temp = MD5()
+        temp._ihv = self._ihv 
+        temp.update(self.buf + pad)
+        digest_value = temp._ihv
+        
+        return int_to_bin(digest_value)
+        
+        
+    def hexdigest(self):
+        return binascii.hexlify(self.digest()).decode()
+    
+    def ihv(self):
+        return int_to_bin(self._ihv)
+    
+    def hexihv(self):
+        """Get the current IHV in hex
+        
+        >>> MD5().hexihv() == IHV0_HEX
+        True
+        >>> MD5(b'test').hexihv() == IHV0_HEX
+        True
+        >>> MD5(b'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!?').hexihv()
+        '9d39fa2529070110ab7f132e7a9cacf3'
+        """
+        return binascii.hexlify(self.ihv()).decode()
+
+def md5(data=None):
+    return MD5(data)
+
+
+if __name__ == '__main__':
+    # Testing
+    
+    # check the the standard MD5 suite expected outputs in the class docstring
+    print('Doctests')
+    import doctest
+    doctest.testmod(verbose=True)
+    print()
+    
+    print('Unittests')
+    import unittest
+    import hashlib
+    
+    class _TestMD5(unittest.TestCase):
+        
+        def test_against_reference_implementation(self):
+            AMOUNT = 1024
+            import random, string
+            rand = random.Random()
+            rand.seed(4)
+            
+            randstring = lambda n: ''.join(rand.choice(string.ascii_uppercase + string.digits) for _ in range(n)).encode()
+            randbin = lambda n: bytes((random.getrandbits(8) for i in range(n)))
+            
+            for i in range(AMOUNT):
+                rlen = rand.randrange(4, 15)
+                randtype = rand.choice([randstring, randbin])
+                to_hash = randtype(rlen)
+                expected = hashlib.md5(to_hash).hexdigest()
+                got = md5(to_hash).hexdigest()
+                self.assertEqual(expected, got, 'hashes for {} do not match'.format(to_hash))
+        
+        def test_boundary_padding(self):
+            for i in range(196):
+                to_hash = b'a' * i
+                expected = hashlib.md5(to_hash).hexdigest()
+                got = md5(to_hash).hexdigest()
+                self.assertEqual(expected, got, 'hashes for {} do not match'.format(to_hash.decode()))
+                
+        def test_hashing_resume(self):
+            basestr = b'asdfjkl;' * 32
+            expected = md5(basestr).hexdigest()
+            for i in range(len(basestr)):
+                a,b = basestr[:i], basestr[i:]
+                hashobj = md5(a)
+                discard = hashobj.digest()
+                hashobj.update(b)
+                got = hashobj.hexdigest()
+                self.assertEqual(expected, got, 'hashes split on index {} do not match'.format(basestr, i))
+        
+    unittest.main(verbosity=2)
diff --git a/README.md b/README.md
index fe1694a..e3c2186 100644
--- a/README.md
+++ b/README.md
@@ -11,4 +11,7 @@ Anggota
 2. Ilham Rizky Akbar
 3. Muhammad Fakhruddin Hafizh
 4. Muhammad Jihad Rinaldi
-5. Muhammad Sulthan Rafi Shaquille
\ No newline at end of file
+5. Muhammad Sulthan Rafi Shaquille
+
+Referensi Code
+https://github.com/thereal1024/python-md5-collision
\ No newline at end of file
-- 
GitLab