MD5详解
MD5(Message Digest Algorithm 5)是逆向中常见的一种签名生成算法,主要特点如下:
- 输入任意长度,输出 128 bit 固定长度的信息摘要。
- 运算速度快,MD5 中的运算以位运算(异或,与,或,非等)。
- 不可逆但已经发现碰撞。
算法原理
填充数据
首先需要对输入的数据进行填充,填充的第一位为 1,其余位为 0,使其最终长度模 512 的结果为 448(填充必须进行,哪怕长度正好满足要求,也会再填充 512 bit 的数据)。剩余 64 bit 填入原始长度。这时数据长度可以整除 512,将每 512 bit 数据作为一个分组。
分组数据运算
首先赋值四个初始链接变量,作为后续操作的基础:
A: 0x67452301 B: 0xefcdab89 C: 0x98badcfe D: 0x10325476
随后要提到四个非线性函数:
F(X,Y,Z)=(X&Y)|(~X&Z)
G(X,Y,Z)=(X&Z)|(Y&~Z)
H(X,Y,Z)=X^Y^Z
I(X,Y,Z)=Y^(X|~Z)
基于这四种非线性函数,MD5 会对每个分组进行 4 轮总共 64 次操作,以下操作的 a,b,c,d
初值等于上面的初始链接变量:
// 第一轮 FF(a,b,c,d,Mi,s,tj) 表示a=b+((a+F(b,c,d)+Mi+tj)<<<s),其中<<<表示循环左移
// Mi表示消息的子分组,因为每个分组为512bit,可以分为16个32bit的子分组
// tj表示常数数组的特定元素,由4294967296*abs(sin(j))的整数部分得到
FF(a,b,c,d,M0,7,0xd76aa478)
FF(d,a,b,c,M1,12,0xe8c7b756)
FF(c,d,a,b,M2,17,0×242070db)
FF(b,c,d,a,M3,22,0xc1bdceee)
FF(a,b,c,d,M4,7,0xf57c0faf)
FF(d,a,b,c,M5,12,0×4787c62a)
FF(c,d,a,b,M6,17,0xa8304613)
FF(b,c,d,a,M7,22,0xfd469501)
FF(a,b,c,d,M8,7,0×698098d8)
FF(d,a,b,c,M9,12,0×8b44f7af)
FF(c,d,a,b,M10,17,0xffff5bb1)
FF(b,c,d,a,M11,22,0×895cd7be)
FF(a,b,c,d,M12,7,0×6b901122)
FF(d,a,b,c,M13,12,0xfd987193)
FF(c,d,a,b,M14,17,0xa679438e)
FF(b,c,d,a,M15,22,0×49b40821)
// 第二轮 GG(a,b,c,d,Mi,s,tj) 表示 a=b+((a+G(b,c,d)+Mi+tj)<<<s)
GG(a,b,c,d,M1,5,0xf61e2562)
GG(d,a,b,c,M6,9,0xc040b340)
GG(c,d,a,b,M11,14,0×265e5a51)
GG(b,c,d,a,M0,20,0xe9b6c7aa)
GG(a,b,c,d,M5,5,0xd62f105d)
GG(d,a,b,c,M10,9,0×02441453)
GG(c,d,a,b,M15,14,0xd8a1e681)
GG(b,c,d,a,M4,20,0xe7d3fbc8)
GG(a,b,c,d,M9,5,0×21e1cde6)
GG(d,a,b,c,M14,9,0xc33707d6)
GG(c,d,a,b,M3,14,0xf4d50d87)
GG(b,c,d,a,M8,20,0×455a14ed)
GG(a,b,c,d,M13,5,0xa9e3e905)
GG(d,a,b,c,M2,9,0xfcefa3f8)
GG(c,d,a,b,M7,14,0×676f02d9)
GG(b,c,d,a,M12,20,0×8d2a4c8a)
// 第三轮 HH(a,b,c,d,Mi,s,tj) 表示 a=b+((a+H(b,c,d)+Mi+tj)<<<s)
HH(a,b,c,d,M5,4,0xfffa3942)
HH(d,a,b,c,M8,11,0×8771f681)
HH(c,d,a,b,M11,16,0×6d9d6122)
HH(b,c,d,a,M14,23,0xfde5380c)
HH(a,b,c,d,M1,4,0xa4beea44)
HH(d,a,b,c,M4,11,0×4bdecfa9)
HH(c,d,a,b,M7,16,0xf6bb4b60)
HH(b,c,d,a,M10,23,0xbebfbc70)
HH(a,b,c,d,M13,4,0×289b7ec6)
HH(d,a,b,c,M0,11,0xeaa127fa)
HH(c,d,a,b,M3,16,0xd4ef3085)
HH(b,c,d,a,M6,23,0×04881d05)
HH(a,b,c,d,M9,4,0xd9d4d039)
HH(d,a,b,c,M12,11,0xe6db99e5)
HH(c,d,a,b,M15,16,0×1fa27cf8)
HH(b,c,d,a,M2,23,0xc4ac5665)
// 第四轮 II(a,b,c,d,Mi,s,tj) 表示a=b+((a+I(b,c,d)+Mi+tj)<<<s)
II(a,b,c,d,M0,6,0xf4292244)
II(d,a,b,c,M7,10,0×432aff97)
II(c,d,a,b,M14,15,0xab9423a7)
II(b,c,d,a,M5,21,0xfc93a039)
II(a,b,c,d,M12,6,0×655b59c3)
II(d,a,b,c,M3,10,0×8f0ccc92)
II(c,d,a,b,M10,15,0xffeff47d)
II(b,c,d,a,M1,21,0×85845dd1)
II(a,b,c,d,M8,6,0×6fa87e4f)
II(d,a,b,c,M15,10,0xfe2ce6e0)
II(c,d,a,b,M6,15,0xa3014314)
II(b,c,d,a,M13,21,0×4e0811a1)
II(a,b,c,d,M4,6,0xf7537e82)
II(d,a,b,c,M11,10,0xbd3af235)
II(c,d,a,b,M2,15,0×2ad7d2bb)
II(b,c,d,a,M9,21,0xeb86d391)
对一个分组执行 64 次操作后,更新 A,B,C,D
与 a,b,c,d
值:
A=A+a
B=B+b
C=C+c
D=D+d
a=A
b=B
c=C
d=D
将新的 a,b,c,d
参与到下一个分组的运算中,并不断更新 A,B,C,D
的值,当所有的分组运算完毕后,将 A,B,C,D
拼接起来并按照小端序展示即可。
MD5 Python 实现
import binascii
import sys
import os.path
SV = [0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf,
0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af,
0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e,
0x49b40821, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6,
0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8,
0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122,
0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, 0xd9d4d039,
0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 0xf4292244, 0x432aff97,
0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d,
0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391]
# 根据ascil编码把字符转成对应的二进制
def binvalue(val, bitsize):
binval = bin(val)[2:] if isinstance(val, int) else bin(ord(val))[2:]
if len(binval) > bitsize:
raise ("binary value larger than the expected size")
while len(binval) < bitsize:
binval = "0" + binval
return binval
def string_to_bit_array(text):
array = list()
for char in text:
binval = binvalue(char, 8)
array.extend([int(x) for x in list(binval)])
return array
# 循环左移
def leftCircularShift(k, bits):
bits = bits % 32
k = k % (2 ** 32)
upper = (k << bits) % (2 ** 32)
result = upper | (k >> (32 - (bits)))
return (result)
# 分块
def blockDivide(block, chunks):
result = []
size = len(block) // chunks
for i in range(0, chunks):
result.append(int.from_bytes(block[i * size:(i + 1) * size], byteorder="little"))
return result
# F函数作用于“比特位”上
# if x then y else z
def F(X, Y, Z):
compute = ((X & Y) | ((~X) & Z))
return compute
# if z then x else y
def G(X, Y, Z):
return ((X & Z) | (Y & (~Z)))
# if X = Y then Z else ~Z
def H(X, Y, Z):
return (X ^ Y ^ Z)
def I(X, Y, Z):
return (Y ^ (X | (~Z)))
# 四个F函数
def FF(a, b, c, d, M, s, t):
result = b + leftCircularShift((a + F(b, c, d) + M + t), s)
return (result)
def GG(a, b, c, d, M, s, t):
result = b + leftCircularShift((a + G(b, c, d) + M + t), s)
return (result)
def HH(a, b, c, d, M, s, t):
result = b + leftCircularShift((a + H(b, c, d) + M + t), s)
return (result)
def II(a, b, c, d, M, s, t):
result = b + leftCircularShift((a + I(b, c, d) + M + t), s)
return (result)
# 数据转换
def fmt8(num):
bighex = "{0:08x}".format(num)
binver = binascii.unhexlify(bighex)
result = "{0:08x}".format(int.from_bytes(binver, byteorder='little'))
return (result)
# 计算比特长度
def bitlen(bitstring):
return len(bitstring) * 8
def md5sum(msg):
# 计算比特长度,如果内容过长,64个比特放不下。就取低64bit。
msgLen = bitlen(msg) % (2 ** 64)
# 先填充一个0x80,其实是先填充一个1,后面跟对应个数的0,因为一个明文的编码至少需要8比特,所以直接填充 0b10000000即0x80
msg = msg + b'\x80' # 0x80 = 1000 0000
# 似乎各种编码,即使是一个字母,都至少得1个字节,即8bit才能表示,所以不会出现原文55bit,pad1就满足的情况?可是不对呀,要是二进制文件呢?
# 填充0到满足要求为止。
zeroPad = (448 - (msgLen + 8) % 512) % 512
zeroPad //= 8
msg = msg + b'\x00' * zeroPad + msgLen.to_bytes(8, byteorder='little')
# 计算循环轮数,512个为一轮
msgLen = bitlen(msg)
iterations = msgLen // 512
# 初始化变量
# 算法魔改的第一个点,也是最明显的点
# A = 0x67452301
# B = 0xefcdab89
# C = 0x98badcfe
# D = 0x10325476
# MD5的主体就是对abcd进行n次的迭代,所以得有个初始值,可以随便选,也可以用默认的魔数,这个改起来毫无风险,所以大家爱魔改它,甚至改这个都不算魔改。
# main loop
for i in range(0, iterations):
a = A
b = B
c = C
d = D
block = msg[i * 64:(i + 1) * 64]
# 明文的处理,顺便调整了一下端序
M = blockDivide(block, 16)
# Rounds
a = FF(a, b, c, d, M[0], 7, SV[0])
d = FF(d, a, b, c, M[1], 12, SV[1])
c = FF(c, d, a, b, M[2], 17, SV[2])
b = FF(b, c, d, a, M[3], 22, SV[3])
a = FF(a, b, c, d, M[4], 7, SV[4])
d = FF(d, a, b, c, M[5], 12, SV[5])
c = FF(c, d, a, b, M[6], 17, SV[6])
b = FF(b, c, d, a, M[7], 22, SV[7])
a = FF(a, b, c, d, M[8], 7, SV[8])
d = FF(d, a, b, c, M[9], 12, SV[9])
c = FF(c, d, a, b, M[10], 17, SV[10])
b = FF(b, c, d, a, M[11], 22, SV[11])
a = FF(a, b, c, d, M[12], 7, SV[12])
d = FF(d, a, b, c, M[13], 12, SV[13])
c = FF(c, d, a, b, M[14], 17, SV[14])
b = FF(b, c, d, a, M[15], 22, SV[15])
a = GG(a, b, c, d, M[1], 5, SV[16])
d = GG(d, a, b, c, M[6], 9, SV[17])
c = GG(c, d, a, b, M[11], 14, SV[18])
b = GG(b, c, d, a, M[0], 20, SV[19])
a = GG(a, b, c, d, M[5], 5, SV[20])
d = GG(d, a, b, c, M[10], 9, SV[21])
c = GG(c, d, a, b, M[15], 14, SV[22])
b = GG(b, c, d, a, M[4], 20, SV[23])
a = GG(a, b, c, d, M[9], 5, SV[24])
d = GG(d, a, b, c, M[14], 9, SV[25])
c = GG(c, d, a, b, M[3], 14, SV[26])
b = GG(b, c, d, a, M[8], 20, SV[27])
a = GG(a, b, c, d, M[13], 5, SV[28])
d = GG(d, a, b, c, M[2], 9, SV[29])
c = GG(c, d, a, b, M[7], 14, SV[30])
b = GG(b, c, d, a, M[12], 20, SV[31])
a = HH(a, b, c, d, M[5], 4, SV[32])
d = HH(d, a, b, c, M[8], 11, SV[33])
c = HH(c, d, a, b, M[11], 16, SV[34])
b = HH(b, c, d, a, M[14], 23, SV[35])
a = HH(a, b, c, d, M[1], 4, SV[36])
d = HH(d, a, b, c, M[4], 11, SV[37])
c = HH(c, d, a, b, M[7], 16, SV[38])
b = HH(b, c, d, a, M[10], 23, SV[39])
a = HH(a, b, c, d, M[13], 4, SV[40])
d = HH(d, a, b, c, M[0], 11, SV[41])
c = HH(c, d, a, b, M[3], 16, SV[42])
b = HH(b, c, d, a, M[6], 23, SV[43])
a = HH(a, b, c, d, M[9], 4, SV[44])
d = HH(d, a, b, c, M[12], 11, SV[45])
c = HH(c, d, a, b, M[15], 16, SV[46])
b = HH(b, c, d, a, M[2], 23, SV[47])
a = II(a, b, c, d, M[0], 6, SV[48])
d = II(d, a, b, c, M[7], 10, SV[49])
c = II(c, d, a, b, M[14], 15, SV[50])
b = II(b, c, d, a, M[5], 21, SV[51])
a = II(a, b, c, d, M[12], 6, SV[52])
d = II(d, a, b, c, M[3], 10, SV[53])
c = II(c, d, a, b, M[10], 15, SV[54])
b = II(b, c, d, a, M[1], 21, SV[55])
a = II(a, b, c, d, M[8], 6, SV[56])
d = II(d, a, b, c, M[15], 10, SV[57])
c = II(c, d, a, b, M[6], 15, SV[58])
b = II(b, c, d, a, M[13], 21, SV[59])
a = II(a, b, c, d, M[4], 6, SV[60])
d = II(d, a, b, c, M[11], 10, SV[61])
c = II(c, d, a, b, M[2], 15, SV[62])
b = II(b, c, d, a, M[9], 21, SV[63])
A = (A + a) % (2 ** 32)
B = (B + b) % (2 ** 32)
C = (C + c) % (2 ** 32)
D = (D + d) % (2 ** 32)
result = fmt8(A) + fmt8(B) + fmt8(C) + fmt8(D)
return result
if __name__ == "__main__":
data = str("123456").encode("UTF-8")
print("plainText: ", data)
print("result: ", md5sum(data))