From b59d48c390543e5d98979c5374552fdddb06b98f Mon Sep 17 00:00:00 2001
From: mda <mika.dabelza@ui.ac.id>
Date: Sun, 28 Mar 2021 09:20:40 +0700
Subject: [PATCH] check keys and words in kbbi

---
 vigenere_copy.py | 242 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 157 insertions(+), 85 deletions(-)

diff --git a/vigenere_copy.py b/vigenere_copy.py
index 07cf4d0..200faab 100644
--- a/vigenere_copy.py
+++ b/vigenere_copy.py
@@ -19,6 +19,7 @@ https://inventwithpython.com/vigenereHacker.py
 
 import string
 import json
+import random
 
 
 MAX_KEY_LENGTH = 50
@@ -30,13 +31,13 @@ vals = {'A': 0, 'B': 1, 'C': 2, 'D': 3,
         'Q': 16, 'R': 17, 'S': 18, 'T': 19,
         'U': 20, 'V': 21, 'W': 22, 'X': 23,
         'Y': 24, 'Z': 25}
-nums = { 0: 'A', 1: 'B', 2: 'C', 3: 'D',
-         4: 'E', 5: 'F', 6: 'G', 7: 'H',
-         8: 'I', 9: 'J', 10: 'K', 11: 'L',
-         12: 'M', 13: 'N', 14: 'O', 15: 'P',
-         16: 'Q', 17: 'R', 18: 'S', 19: 'T',
-         20: 'U', 21: 'V', 22: 'W', 23: 'X',
-         24: 'Y', 25: 'Z'}
+nums = {0: 'A', 1: 'B', 2: 'C', 3: 'D',
+        4: 'E', 5: 'F', 6: 'G', 7: 'H',
+        8: 'I', 9: 'J', 10: 'K', 11: 'L',
+        12: 'M', 13: 'N', 14: 'O', 15: 'P',
+        16: 'Q', 17: 'R', 18: 'S', 19: 'T',
+        20: 'U', 21: 'V', 22: 'W', 23: 'X',
+        24: 'Y', 25: 'Z'}
 
 # freq = {'A': 0.2039, 'B': 0.0264, 'C': 0.0076, 'D': 0.05,
 #         'E': 0.0828, 'F': 0.0021, 'G': 0.0366, 'H': 0.0274,
@@ -46,32 +47,39 @@ nums = { 0: 'A', 1: 'B', 2: 'C', 3: 'D',
 #         'U': 0.0462, 'V': 0.0018, 'W': 0.0048, 'X': 0.0003,
 #         'Y': 0.0188, 'Z': 0.0004}
 
+
 def letterToVal(letter):
     letter = letter.upper()
     return vals[letter]
 
+
 def valToLetter(val):
     return nums[val]
 
+
 def getSq(s):
-    expCount=dict.fromkeys(range(26), 0)
-    chiSq=0
+    expCount = dict.fromkeys(range(26), 0)
+    chiSq = 0
     for y in range(26):
-        expCount[y]=freq[valToLetter(y)]*len(s)
+        expCount[y] = freq[valToLetter(y)]*len(s)
 
     for y in range(26):
-            chiSq= chiSq+(((s.count(valToLetter(y))-expCount[y])**2)/expCount[y])
+        chiSq = chiSq + \
+            (((s.count(valToLetter(y))-expCount[y])**2)/expCount[y])
 
     return chiSq
 
+
 def getKeyLetter(c):
     return valToLetter((min(c, key=lambda k: c[k])))
 
+
 def encrypt(p, k):
     output = ''
     keyIndex = 0
     for x in p:
-        output = output + valToLetter((letterToVal(x)+letterToVal(k[keyIndex]))%26)
+        output = output + \
+            valToLetter((letterToVal(x)+letterToVal(k[keyIndex])) % 26)
         if keyIndex == len(k)-1:
             keyIndex = 0
         else:
@@ -79,11 +87,13 @@ def encrypt(p, k):
 
     return output
 
+
 def decrypt(c, k):
     output = ''
     keyIndex = 0
     for x in c:
-        output = output + valToLetter((letterToVal(x)-letterToVal(k[keyIndex]))%26)
+        output = output + \
+            valToLetter((letterToVal(x)-letterToVal(k[keyIndex])) % 26)
         if keyIndex == len(k)-1:
             keyIndex = 0
         else:
@@ -91,15 +101,16 @@ def decrypt(c, k):
 
     return output
 
+
 def findKey(keyLength, stripcipher):
-    #Store nth letter strings in dictionary for frequency analysis
-    keys = dict.fromkeys(range(keyLength),'')
+    # Store nth letter strings in dictionary for frequency analysis
+    keys = dict.fromkeys(range(keyLength), '')
     for j in range(keyLength):
         for i in range(j, len(stripcipher), keyLength):
-            keys[j]=keys[j]+stripcipher[i]
+            keys[j] = keys[j]+stripcipher[i]
 
-    #Deciphers each letter of strings of every nth letter using each letter in the
-    #alphabet. Stores output in dictionaries within masterDictionary.
+    # Deciphers each letter of strings of every nth letter using each letter in the
+    # alphabet. Stores output in dictionaries within masterDictionary.
     masterDictionary = {}
     for i in range(keyLength):
         masterDictionary['d'+str(i)] = dict.fromkeys(range(26), '')
@@ -107,16 +118,18 @@ def findKey(keyLength, stripcipher):
     for x in range(keyLength):
         for j in range(26):
             for i in keys[x]:
-                masterDictionary['d'+str(x)][j] = masterDictionary['d'+str(x)][j]+valToLetter(((letterToVal(i))-j)%26)
+                masterDictionary['d'+str(x)][j] = masterDictionary['d' +
+                                                                   str(x)][j]+valToLetter(((letterToVal(i))-j) % 26)
 
-    #Perform Chi Square analysis on the deciphered strings and store in masterCipher
+    # Perform Chi Square analysis on the deciphered strings and store in masterCipher
     masterCipher = {}
     for i in range(keyLength):
         masterCipher['c'+str(i)] = dict.fromkeys(range(26), '')
 
     for i in range(keyLength):
         for j in range(26):
-            masterCipher['c'+str(i)][j] = getSq(masterDictionary['d'+str(i)][j])
+            masterCipher['c' +
+                         str(i)][j] = getSq(masterDictionary['d'+str(i)][j])
 
     '''
     Guess secret key based on Chi Sq. Analysis and return results
@@ -126,18 +139,20 @@ def findKey(keyLength, stripcipher):
         key = key + getKeyLetter(masterCipher['c'+str(i)])
     return key, masterCipher
 
+
 '''
 The following Kasiski Analysis code is modified from Invent With Python:
 https://inventwithpython.com/vigenereHacker.py
 '''
+
+
 def findRepeatSequencesSpacings(message):
     # Goes through the message and finds any 3 to 5 letter sequences
     # that are repeated. Returns a dict with the keys of the sequence and
     # values of a list of spacings (num of letters between the repeats).
 
-
     # Compile a list of seqLen-letter sequences found in the message.
-    seqSpacings = {} # keys are sequences, values are list of int spacings
+    seqSpacings = {}  # keys are sequences, values are list of int spacings
     for seqLen in range(3, 6):
         for seqStart in range(len(message) - seqLen):
             # Determine what the sequence is, and store it in seq
@@ -148,7 +163,7 @@ def findRepeatSequencesSpacings(message):
                 if message[i:i + seqLen] == seq:
                     # Found a repeated sequence.
                     if seq not in seqSpacings:
-                        seqSpacings[seq] = [] # initialize blank list
+                        seqSpacings[seq] = []  # initialize blank list
 
                     # Append the spacing distance between the repeated
                     # sequence and the original sequence.
@@ -162,13 +177,13 @@ def getUsefulFactors(num):
     # returns [2, 72, 3, 48, 4, 36, 6, 24, 8, 18, 9, 16, 12]
 
     if num < 2:
-        return [] # numbers less than 2 have no useful factors
+        return []  # numbers less than 2 have no useful factors
 
-    factors = [] # the list of factors found
+    factors = []  # the list of factors found
 
     # When finding factors, you only need to check the integers up to
     # MAX_KEY_LENGTH.
-    for i in range(2, MAX_KEY_LENGTH + 1): # don't test 1
+    for i in range(2, MAX_KEY_LENGTH + 1):  # don't test 1
         if num % i == 0:
             factors.append(i)
             factors.append(int(num / i))
@@ -183,7 +198,7 @@ def getItemAtIndexOne(x):
 
 def getMostCommonFactors(seqFactors):
     # First, get a count of how many times a factor occurs in seqFactors.
-    factorCounts = {} # key is a factor, value is how often if occurs
+    factorCounts = {}  # key is a factor, value is how often if occurs
 
     # seqFactors keys are sequences, values are lists of factors of the
     # spacings. seqFactors has a value like: {'GFD': [2, 3, 4, 6, 9, 12,
@@ -203,7 +218,7 @@ def getMostCommonFactors(seqFactors):
         if factor <= MAX_KEY_LENGTH:
             # factorsByCount is a list of tuples: (factor, factorCount)
             # factorsByCount has a value like: [(3, 497), (2, 487), ...]
-            factorsByCount.append( (factor, factorCounts[factor]) )
+            factorsByCount.append((factor, factorCounts[factor]))
 
     # Sort the list by the factor count.
     factorsByCount.sort(key=getItemAtIndexOne, reverse=True)
@@ -236,11 +251,13 @@ def kasiskiExamination(ciphertext):
 
     return allLikelyKeyLengths, factorsByCount
 
+
 def read_json(filename):
-    with open(filename) as json_file: 
-        freq = json.load(json_file) 
+    with open(filename) as json_file:
+        freq = json.load(json_file)
     return freq
 
+
 def text_cleaner(cipherText):
     spacePunctuationList = {}
     uppercaseList = []
@@ -261,7 +278,7 @@ def restore_text(spacePunctuationList, uppercaseList, text):
     text = text.lower()
     for i in spacePunctuationList.keys():
         text = text[:i] + spacePunctuationList[i] + text[i:]
-    
+
     for i in uppercaseList:
         temp_text = text[:i] + text[i].upper()
         if i != len(text)-1:
@@ -271,94 +288,149 @@ def restore_text(spacePunctuationList, uppercaseList, text):
     return text
 
 
+def binary_search(arr, x):
+    low = 0
+    high = len(arr) - 1
+    mid = 0
+
+    while low <= high:
+        mid = (high + low) // 2
+        if arr[mid] < x:
+            low = mid + 1
+        elif arr[mid] > x:
+            high = mid - 1
+        else:
+            return mid
+    return -1
+
+
+def offer_answer(key, masterCipher, decrypted):
+    print("\nFound Decryption Key: " + key)
+    print("Decrypted Message: " + decrypted)
+    display = input(
+        "Display Kasiski analysis and frequency analysis? y/n: ").lower()
+    if display == "y":
+        print("Kasiski Analysis Table: ")
+        print("Key Length: \t Score:")
+        for x in kasiskiMaster:
+            print(str(x[0]) + "\t \t" + str(x[1]))
+        print("Chi Square Analysis Table given Key Length " + str(x))
+        for x in masterCipher:
+            print("Key Letter: " + str(x))
+            print("Letter: \t Chi Square:")
+            for y in masterCipher[x]:
+                print(valToLetter(y) + "\t  \t" + str(masterCipher[x][y]))
+
+
 '''
 Program Start
 '''
-#set up frequency table
+# set up frequency table
 freq = read_json("letter_percentage_web.json")
+fl = open('kumpulan_kata.txt', 'r')
+words = fl.read().splitlines()
+fl.close()
 
-ans=True
+ans = True
 while ans:
     print("Vignere Cipher")
-    print ("""
+    print("""
     1. Encrypt
     2. Decrypt
     3. Crack Ciphertext
     4. Exit/Quit
     """)
 
-    ans=input("What would you like to do? ") 
-    
-    if ans=="1": 
+    ans = input("What would you like to do? ")
+
+    if ans == "1":
         print("\nEncrypt")
         plaintext = input("Enter complete plaintext: ")
         key = input("Enter key: ").upper()
-        cleaned_plaintext, spacePunctuationList, uppercaseList = text_cleaner(plaintext)
-        ciphertext = encrypt(cleaned_plaintext,key)
-        ciphertext = restore_text(spacePunctuationList, uppercaseList, ciphertext)
+        cleaned_plaintext, spacePunctuationList, uppercaseList = text_cleaner(
+            plaintext)
+        ciphertext = encrypt(cleaned_plaintext, key)
+        ciphertext = restore_text(
+            spacePunctuationList, uppercaseList, ciphertext)
         print('ciphertext: ', ciphertext)
-    
-    elif ans=="2":
+
+    elif ans == "2":
         print("\nDecrypt")
         ciphertext = input("Enter complete ciphertext: ")
         key = input("Enter key: ").upper()
-        cleaned_ciphertext, spacePunctuationList, uppercaseList = text_cleaner(ciphertext)
-        plaintext = decrypt(cleaned_ciphertext,key)
-        plaintext = restore_text(spacePunctuationList, uppercaseList, plaintext)
+        cleaned_ciphertext, spacePunctuationList, uppercaseList = text_cleaner(
+            ciphertext)
+        plaintext = decrypt(cleaned_ciphertext, key)
+        plaintext = restore_text(spacePunctuationList,
+                                 uppercaseList, plaintext)
         print('plain text: ', plaintext)
-    
-    elif ans=="3":
+
+    elif ans == "3":
 
         print("\nCrack")
         ciphertext = input("Enter complete ciphertext: ")
-        stripcipher, spacePunctuationList, uppercaseList = text_cleaner(ciphertext)
-        print(stripcipher)
+        stripcipher, spacePunctuationList, uppercaseList = text_cleaner(
+            ciphertext)
         allLikelyKeyLengths, kasiskiMaster = kasiskiExamination(stripcipher)
         print("Attempting decryption process using likely key lengths...")
-        keys = [None]*len(allLikelyKeyLengths)
-        for x,idx in allLikelyKeyLengths:
-            (key, masterCipher) = findKey(x, stripcipher)            
-            print("Found Decryption Key: " + key)
+        keys = []
+        found = False
+        for x in range(len(allLikelyKeyLengths)):
+            (key, masterCipher) = findKey(allLikelyKeyLengths[x], stripcipher)
             decrypted = decrypt(stripcipher, key)
-            decrypted = restore_text(spacePunctuationList, uppercaseList, decrypted)
-            print("Decrypted Message: " + decrypted)
-            display = input("Display Kasiski analysis and frequency analysis? y/n: ").lower()
-            if display == "y":
-                print("Kasiski Analysis Table: ")
-                print("Key Length: \t Score:")
-                for x in kasiskiMaster:
-                    print(str(x[0]) + "\t \t" + str(x[1]))
-                print("Chi Square Analysis Table given Key Length " + str(x))
-                for x in masterCipher:
-                    print("Key Letter: "+ str(x))
-                    print("Letter: \t Chi Square:")
-                    for y in masterCipher[x]:
-                        print(valToLetter(y) + "\t  \t" + str(masterCipher[x][y]))
-
-            #Check decryption, continue if incorrect
-            opt = input("Is this decryption correct? y/n: ").lower()
-            if opt == "y":
-                break
+            decrypted = restore_text(
+                spacePunctuationList, uppercaseList, decrypted)
+            if binary_search(words, key.lower()) != -1:
+                offer_answer(key, masterCipher, decrypted)
+                opt = input("Is this decryption correct? y/n: ").lower()
+                if opt == "y":
+                    found = True
+                    break
+                else:
+                    print("Trying next key length...")
             else:
-                print("Trying next key length...")
-
-    
-    elif ans=="4":
-      print("\nGoodbye")
-      ans = False 
-    elif ans !="":
-      print("\nNot Valid Choice Try again") 
-    
-    print("\n")
+                offered = False
+                for i in range(1 if len(decrypted.split()) <= 3 else 3):
+                    if(binary_search(words, decrypted.split()[random.randint(0, len(decrypted.split())-1)].translate(str.maketrans('', '', string.punctuation))) != -1):
+                        offer_answer(key, masterCipher, decrypted)
+                        offered = True
+                        opt = input(
+                            "Is this decryption correct? y/n: ").lower()
+                        if opt == "y":
+                            found = True
+                            break
+                        else:
+                            print("Trying next key length...")
+                            break
+                    if(found):
+                        break
+                if(not offered):
+                    keys.append((key, masterCipher, decrypted))
+
+        if(not found):
+            for x in keys:
+                # Check decryption, continue if incorrect
+                offer_answer(x[0], x[1], x[2])
+                opt = input("Is this decryption correct? y/n: ").lower()
+                if opt == "y":
+                    break
+                else:
+                    print("Trying next key length...")
+
+    elif ans == "4":
+        print("\nGoodbye")
+        ans = False
+    elif ans != "":
+        print("\nNot Valid Choice Try again")
 
+    print("\n")
 
 
-#collect ciphertext from user and remove spaces and punctuation
+# collect ciphertext from user and remove spaces and punctuation
 # ciphertext = input("Enter complete ciphertext: ")
 # stripcipher,spaceList = text_cleaner(ciphertext)
 
 
-
 # #find possible key lengths
 # allLikelyKeyLengths, kasiskiMaster = kasiskiExamination(stripcipher)
 
-- 
GitLab