一次pyc文件恢复
Python Pyc的文件格式
[compile.h]
/* Bytecode object */
typedef struct {
PyObject_HEAD
int co_argcount; /* #arguments, except *args */
int co_nlocals; /* #local variables */
int co_stacksize; /* #entries needed for evaluation stack */
int co_flags; /* CO_..., see below */
PyObject *co_code; /* instruction opcodes */
PyObject *co_consts; /* list (constants used) */
PyObject *co_names; /* list of strings (names used) */
PyObject *co_varnames; /* tuple of strings (local variable names) */
PyObject *co_freevars; /* tuple of strings (free variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
/* The rest doesn't count for hash/cmp */
PyObject *co_filename; /* string (where it was loaded from) */
PyObject *co_name; /* string (name, for reference) */
int co_firstlineno; /* first source line number */
PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) */
} PyCodeObject;
加载pyc co_code
In [1]: import dis,marshal
In [2]: f=open('third.pyc')
In [3]: f.read(4)
Out[3]: '\x03\xf3\r\n'
In [4]: f.read(4)
Out[4]: '\xf1\xe1S\\'
In [5]: code = marshal.load(f)
In [6]: code.co_consts
Out[6]:
(-1,
None,
'+',
'/',
'FcjTCgD1EffEm2rPC3bTyL5Wu2bKBI9KAZrwFgrUygHN',
<code object encode at 0x7f0420ee7f30, file "third.py", line 7>,
"Welcome to Processor's Python Classroom Part 3&4!\n",
'qi shi wo jiu shi lan cai ba liang dao ti fang zai yi qi.',
"Now let's start the origin of Python!\n",
'Plz Input Your Flag:\n',
2,
0,
1,
'',
"You're right! ",
"You're Wrong! ")
In [7]: code.co_varnames
Out[7]: ()
In [8]: code.co_names
Out[8]:
('string',
'list',
'letters',
'digits',
'dec',
'encode',
'raw_input',
'enc',
'lst',
'reverse',
'len',
'llen',
'range',
'i',
'chr',
'ord',
'enc2',
'join',
'enc3')
In [9]: code.co_code
Out[9]: 'q\x03\x00q\t\x00d\x0f\x00q\x0e\x00Gdd\x00\x00d\x01\x00l\x00\x00Z\x00\x00e\x01\x00e\x00\x00j\x02\x00\x83\x01\x00e\x01\x00e\x00\x00j\x03\x00\x83\x01\x00\x17d\x02\x00d\x03\x00g\x02\x00\x17Z\x02\x00d\x04\x00Z\x04\x00d\x05\x00\x84\x00\x00Z\x05\x00d\x06\x00GHd\x07\x00GHd\x08\x00GHd\t\x00GHe\x06\x00\x83\x00\x00Z\x07\x00e\x01\x00e\x07\x00\x83\x01\x00Z\x08\x00e\x08\x00j\t\x00\x83\x00\x00\x01e\n\x00e\x08\x00\x83\x01\x00Z\x0b\x00xc\x00e\x0c\x00e\x0b\x00\x83\x01\x00D]U\x00Z\r\x00e\r\x00d\n\x00\x16d\x0b\x00k\x02\x00r\xc4\x00e\x0e\x00e\x0f\x00e\x08\x00e\r\x00\x19\x83\x01\x00d\n\x00\x18\x83\x01\x00e\x08\x00e\r\x00<n\x00\x00e\x0e\x00e\x0f\x00e\x08\x00e\r\x00\x19\x83\x01\x00d\x0c\x00\x17\x83\x01\x00e\x08\x00e\r\x00<q\x8d\x00Wd\r\x00Z\x10\x00e\x10\x00j\x11\x00e\x08\x00\x83\x01\x00Z\x10\x00e\x05\x00e\x10\x00\x83\x01\x00Z\x12\x00e\x12\x00e\x04\x00k\x02\x00r\x1b\x01d\x0e\x00GHn\x05\x00d\x0f\x00GHd\x01\x00S'
使用dis
库对co_code
进行解释
In [10]: dis.dis(code.co_code)
0 JUMP_ABSOLUTE 3
>> 3 JUMP_ABSOLUTE 9
6 LOAD_CONST 15 (15)
>> 9 JUMP_ABSOLUTE 14
12 PRINT_ITEM
13 LOAD_CONST 100 (100)
16 STOP_CODE
17 LOAD_CONST 1 (1)
20 IMPORT_NAME 0 (0)
23 STORE_NAME 0 (0)
26 LOAD_NAME 1 (1)
29 LOAD_NAME 0 (0)
32 LOAD_ATTR 2 (2)
35 CALL_FUNCTION 1
38 LOAD_NAME 1 (1)
41 LOAD_NAME 0 (0)
44 LOAD_ATTR 3 (3)
47 CALL_FUNCTION 1
50 BINARY_ADD
51 LOAD_CONST 2 (2)
54 LOAD_CONST 3 (3)
57 BUILD_LIST 2
60 BINARY_ADD
61 STORE_NAME 2 (2)
64 LOAD_CONST 4 (4)
67 STORE_NAME 4 (4)
70 LOAD_CONST 5 (5)
73 MAKE_FUNCTION 0
76 STORE_NAME 5 (5)
79 LOAD_CONST 6 (6)
82 PRINT_ITEM
83 PRINT_NEWLINE
84 LOAD_CONST 7 (7)
87 PRINT_ITEM
88 PRINT_NEWLINE
89 LOAD_CONST 8 (8)
92 PRINT_ITEM
93 PRINT_NEWLINE
94 LOAD_CONST 9 (9)
97 PRINT_ITEM
98 PRINT_NEWLINE
99 LOAD_NAME 6 (6)
102 CALL_FUNCTION 0
105 STORE_NAME 7 (7)
108 LOAD_NAME 1 (1)
111 LOAD_NAME 7 (7)
114 CALL_FUNCTION 1
117 STORE_NAME 8 (8)
120 LOAD_NAME 8 (8)
123 LOAD_ATTR 9 (9)
126 CALL_FUNCTION 0
129 POP_TOP
130 LOAD_NAME 10 (10)
133 LOAD_NAME 8 (8)
136 CALL_FUNCTION 1
139 STORE_NAME 11 (11)
142 SETUP_LOOP 99 (to 244)
145 LOAD_NAME 12 (12)
148 LOAD_NAME 11 (11)
151 CALL_FUNCTION 1
154 GET_ITER
155 FOR_ITER 85 (to 243)
158 STORE_NAME 13 (13)
161 LOAD_NAME 13 (13)
164 LOAD_CONST 10 (10)
167 BINARY_MODULO
168 LOAD_CONST 11 (11)
171 COMPARE_OP 2 (==)
174 POP_JUMP_IF_FALSE 196
177 LOAD_NAME 14 (14)
180 LOAD_NAME 15 (15)
183 LOAD_NAME 8 (8)
186 LOAD_NAME 13 (13)
189 BINARY_SUBSCR
190 CALL_FUNCTION 1
193 LOAD_CONST 10 (10)
>> 196 BINARY_SUBTRACT
197 CALL_FUNCTION 1
200 LOAD_NAME 8 (8)
203 LOAD_NAME 13 (13)
206 STORE_SUBSCR
207 JUMP_FORWARD 0 (to 210)
>> 210 LOAD_NAME 14 (14)
213 LOAD_NAME 15 (15)
216 LOAD_NAME 8 (8)
219 LOAD_NAME 13 (13)
222 BINARY_SUBSCR
223 CALL_FUNCTION 1
226 LOAD_CONST 12 (12)
229 BINARY_ADD
230 CALL_FUNCTION 1
233 LOAD_NAME 8 (8)
236 LOAD_NAME 13 (13)
239 STORE_SUBSCR
240 JUMP_ABSOLUTE 141
>> 243 POP_BLOCK
>> 244 LOAD_CONST 13 (13)
247 STORE_NAME 16 (16)
250 LOAD_NAME 16 (16)
253 LOAD_ATTR 17 (17)
256 LOAD_NAME 8 (8)
259 CALL_FUNCTION 1
262 STORE_NAME 16 (16)
265 LOAD_NAME 5 (5)
268 LOAD_NAME 16 (16)
271 CALL_FUNCTION 1
274 STORE_NAME 18 (18)
277 LOAD_NAME 18 (18)
280 LOAD_NAME 4 (4)
>> 283 COMPARE_OP 2 (==)
286 POP_JUMP_IF_FALSE 283
289 LOAD_CONST 14 (14)
292 PRINT_ITEM
293 PRINT_NEWLINE
294 JUMP_FORWARD 5 (to 302)
297 LOAD_CONST 15 (15)
300 PRINT_ITEM
301 PRINT_NEWLINE
>> 302 LOAD_CONST 1 (1)
305 RETURN_VALUE
刚开始走了一些弯路,通读了opcode
,结果发现并不需要,只要将反uncompyle2
的部分去掉,修改co_code
长度即可正常反编译,期望修改后的opcode
首行为
0 LOAD_CONST 0(0)
1 LOAD_CONST 1(1)
...
使用hexdump
查看文件
0x64 操作为LOAD_CONST,用法举例:LOAD_CONST 1 HEX: 640100
0x71 操作为JUMP_ABSOLUTE,用法举例:JUMP_ABSOLUTE 14 HEX: 710e00
0x65 操作为LOAD_NAME,用法举例:LOAD_NAME 1 HEX: 650100
...
修改原始pyc
通过opcode
及hexdump
可以确定,当前co_code
长度为0x132
(此处为小端显示,0x1a1b
位置),0x1e
到0x2c
(左闭右开)这部分为混淆代码,直接从16进制数据中删除,然后修改co_code
长度为0x132-(0x2c-0x1e)
,即改为24 01
,保存代码
In [1]: with open('third.pyc','r') as f:
...: dt = f.read()
...:
In [2]: dt = dt[:0x1a]+'\x24'+dt[0x1b:0x1e]+dt[0x2c:]
In [3]: with open('third_test2.pyc', 'w') as f:
...: f.write(dt)
...:
然后使用uncompyle2 third_test2.pyc > third_source.py
进行反编译
源码如下:
# 2019.02.16 14:17:20 CST
#Embedded file name: third.py
import string
letters = list(string.letters) + list(string.digits) + ['+', '/']
dec = 'FcjTCgD1EffEm2rPC3bTyL5Wu2bKBI9KAZrwFgrUygHN'
def encode(input_str):
str_ascii_list = [ '{:0>8}'.format(str(bin(ord(i))).replace('0b', '')) for i in input_str ]
output_str = ''
equal_num = 0
while str_ascii_list:
temp_list = str_ascii_list[:3]
if len(temp_list) != 3:
while len(temp_list) < 3:
equal_num += 1
temp_list += ['00000000']
temp_str = ''.join(temp_list)
temp_str_list = [ temp_str[x:x + 6] for x in [0,
6,
12,
18] ]
temp_str_list = [ int(x, 2) for x in temp_str_list ]
if equal_num:
temp_str_list = temp_str_list[0:4 - equal_num]
output_str += ''.join([ letters[x] for x in temp_str_list ])
str_ascii_list = str_ascii_list[3:]
output_str = output_str + '=' * equal_num
return output_str
print "Welcome to Processor's Python Classroom Part 3&4!\n"
print 'qi shi wo jiu shi lan cai ba liang dao ti fang zai yi qi.'
print "Now let's start the origin of Python!\n"
print 'Plz Input Your Flag:\n'
enc = raw_input()
lst = list(enc)
lst.reverse()
llen = len(lst)
for i in range(llen):
if i % 2 == 0:
lst[i] = chr(ord(lst[i]) - 2)
lst[i] = chr(ord(lst[i]) + 1)
enc2 = ''
enc2 = enc2.join(lst)
enc3 = encode(enc2)
if enc3 == dec:
print "You're right! "
else:
print "You're Wrong! "
# +++ okay decompyling third_test2.pyc
# decompiled 1 files: 1 okay, 0 failed, 0 verify failed
# 2019.02.16 14:17:21 CST
至此,代码已经还原,剩下的题目就很简单了。
解读代码
encode
函数实现了一个base64
,这里有一点点坑,这里的base64
编码范围为abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/
,并非原生的ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/
,直接转换一下就好
solve.py↓
#!/usr/bin/python
# -*- coding: utf-8 -*-
def decode(input_str):
output_str = ''
for i in input_str:
if ord(i)>57 and ord(i)<91:
output_str += i.lower()
elif ord(i)>91:
output_str += i.upper()
else:
output_str += i
lst = list(output_str.decode('base64'))
llen = len(lst)
for i in range(llen):
lst[i] = chr(ord(lst[i]) - 1)
if i % 2 == 0:
lst[i] = chr(ord(lst[i]) + 2)
lst.reverse()
return ''.join(lst)
if __name__ == '__main__':
dec = 'FcjTCgD1EffEm2rPC3bTyL5Wu2bKBI9KAZrwFgrUygHN'
print decode(dec)