MoR03r MoR03r's Blog
一次pyc文件恢复
发表于 2019-2-16 | CTF

一次pyc文件恢复

Python Pyc的文件格式

 [compile.h]
    /* Bytecode object */
    typedef struct {
            PyObject_HEAD
            int co_argcount;        /* #arguments, except *args */
            int co_nlocals;     /* #local variables */
            int co_stacksize;       /* #entries needed for evaluation stack */
            int co_flags;       /* CO_..., see below */
            PyObject *co_code;      /* instruction opcodes */
            PyObject *co_consts;    /* list (constants used) */
            PyObject *co_names;     /* list of strings (names used) */
            PyObject *co_varnames;  /* tuple of strings (local variable names) */
            PyObject *co_freevars;  /* tuple of strings (free variable names) */
            PyObject *co_cellvars;      /* tuple of strings (cell variable names) */
            /* The rest doesn't count for hash/cmp */
            PyObject *co_filename;  /* string (where it was loaded from) */
            PyObject *co_name;      /* string (name, for reference) */
            int co_firstlineno;     /* first source line number */
            PyObject *co_lnotab;    /* string (encoding addr<->lineno mapping) */
    } PyCodeObject;

加载pyc co_code

 In [1]: import dis,marshal

    In [2]: f=open('third.pyc')

    In [3]: f.read(4)
    Out[3]: '\x03\xf3\r\n'

    In [4]: f.read(4)
    Out[4]: '\xf1\xe1S\\'

    In [5]: code = marshal.load(f)

    In [6]: code.co_consts
    Out[6]: 
    (-1,
     None,
     '+',
     '/',
     'FcjTCgD1EffEm2rPC3bTyL5Wu2bKBI9KAZrwFgrUygHN',
     <code object encode at 0x7f0420ee7f30, file "third.py", line 7>,
     "Welcome to Processor's Python Classroom Part 3&4!\n",
     'qi shi wo jiu shi lan cai ba liang dao ti fang zai yi qi.',
     "Now let's start the origin of Python!\n",
     'Plz Input Your Flag:\n',
     2,
     0,
     1,
     '',
     "You're right! ",
     "You're Wrong! ")

    In [7]: code.co_varnames
    Out[7]: ()

    In [8]: code.co_names
    Out[8]: 
    ('string',
     'list',
     'letters',
     'digits',
     'dec',
     'encode',
     'raw_input',
     'enc',
     'lst',
     'reverse',
     'len',
     'llen',
     'range',
     'i',
     'chr',
     'ord',
     'enc2',
     'join',
     'enc3')

    In [9]: code.co_code
    Out[9]: 'q\x03\x00q\t\x00d\x0f\x00q\x0e\x00Gdd\x00\x00d\x01\x00l\x00\x00Z\x00\x00e\x01\x00e\x00\x00j\x02\x00\x83\x01\x00e\x01\x00e\x00\x00j\x03\x00\x83\x01\x00\x17d\x02\x00d\x03\x00g\x02\x00\x17Z\x02\x00d\x04\x00Z\x04\x00d\x05\x00\x84\x00\x00Z\x05\x00d\x06\x00GHd\x07\x00GHd\x08\x00GHd\t\x00GHe\x06\x00\x83\x00\x00Z\x07\x00e\x01\x00e\x07\x00\x83\x01\x00Z\x08\x00e\x08\x00j\t\x00\x83\x00\x00\x01e\n\x00e\x08\x00\x83\x01\x00Z\x0b\x00xc\x00e\x0c\x00e\x0b\x00\x83\x01\x00D]U\x00Z\r\x00e\r\x00d\n\x00\x16d\x0b\x00k\x02\x00r\xc4\x00e\x0e\x00e\x0f\x00e\x08\x00e\r\x00\x19\x83\x01\x00d\n\x00\x18\x83\x01\x00e\x08\x00e\r\x00<n\x00\x00e\x0e\x00e\x0f\x00e\x08\x00e\r\x00\x19\x83\x01\x00d\x0c\x00\x17\x83\x01\x00e\x08\x00e\r\x00<q\x8d\x00Wd\r\x00Z\x10\x00e\x10\x00j\x11\x00e\x08\x00\x83\x01\x00Z\x10\x00e\x05\x00e\x10\x00\x83\x01\x00Z\x12\x00e\x12\x00e\x04\x00k\x02\x00r\x1b\x01d\x0e\x00GHn\x05\x00d\x0f\x00GHd\x01\x00S'

使用dis库对co_code进行解释

 In [10]: dis.dis(code.co_code)
                        0 JUMP_ABSOLUTE       3
            >>    3 JUMP_ABSOLUTE       9
                        6 LOAD_CONST         15 (15)
            >>    9 JUMP_ABSOLUTE      14
                    12 PRINT_ITEM     
                    13 LOAD_CONST        100 (100)
                    16 STOP_CODE      
                    17 LOAD_CONST          1 (1)
                    20 IMPORT_NAME         0 (0)
                    23 STORE_NAME          0 (0)
                    26 LOAD_NAME           1 (1)
                    29 LOAD_NAME           0 (0)
                    32 LOAD_ATTR           2 (2)
                    35 CALL_FUNCTION       1
                    38 LOAD_NAME           1 (1)
                    41 LOAD_NAME           0 (0)
                    44 LOAD_ATTR           3 (3)
                    47 CALL_FUNCTION       1
                    50 BINARY_ADD     
                    51 LOAD_CONST          2 (2)
                    54 LOAD_CONST          3 (3)
                    57 BUILD_LIST          2
                    60 BINARY_ADD     
                    61 STORE_NAME          2 (2)
                    64 LOAD_CONST          4 (4)
                    67 STORE_NAME          4 (4)
                    70 LOAD_CONST          5 (5)
                    73 MAKE_FUNCTION       0
                    76 STORE_NAME          5 (5)
                    79 LOAD_CONST          6 (6)
                    82 PRINT_ITEM     
                    83 PRINT_NEWLINE  
                    84 LOAD_CONST          7 (7)
                    87 PRINT_ITEM     
                    88 PRINT_NEWLINE  
                    89 LOAD_CONST          8 (8)
                    92 PRINT_ITEM     
                    93 PRINT_NEWLINE  
                    94 LOAD_CONST          9 (9)
                    97 PRINT_ITEM     
                    98 PRINT_NEWLINE  
                    99 LOAD_NAME           6 (6)
                    102 CALL_FUNCTION       0
                    105 STORE_NAME          7 (7)
                    108 LOAD_NAME           1 (1)
                    111 LOAD_NAME           7 (7)
                    114 CALL_FUNCTION       1
                    117 STORE_NAME          8 (8)
                    120 LOAD_NAME           8 (8)
                    123 LOAD_ATTR           9 (9)
                    126 CALL_FUNCTION       0
                    129 POP_TOP        
                    130 LOAD_NAME          10 (10)
                    133 LOAD_NAME           8 (8)
                    136 CALL_FUNCTION       1
                    139 STORE_NAME         11 (11)
                    142 SETUP_LOOP         99 (to 244)
                    145 LOAD_NAME          12 (12)
                    148 LOAD_NAME          11 (11)
                    151 CALL_FUNCTION       1
                    154 GET_ITER       
                    155 FOR_ITER           85 (to 243)
                    158 STORE_NAME         13 (13)
                    161 LOAD_NAME          13 (13)
                    164 LOAD_CONST         10 (10)
                    167 BINARY_MODULO  
                    168 LOAD_CONST         11 (11)
                    171 COMPARE_OP          2 (==)
                    174 POP_JUMP_IF_FALSE   196
                    177 LOAD_NAME          14 (14)
                    180 LOAD_NAME          15 (15)
                    183 LOAD_NAME           8 (8)
                    186 LOAD_NAME          13 (13)
                    189 BINARY_SUBSCR  
                    190 CALL_FUNCTION       1
                    193 LOAD_CONST         10 (10)
            >>  196 BINARY_SUBTRACT
                    197 CALL_FUNCTION       1
                    200 LOAD_NAME           8 (8)
                    203 LOAD_NAME          13 (13)
                    206 STORE_SUBSCR   
                    207 JUMP_FORWARD        0 (to 210)
            >>  210 LOAD_NAME          14 (14)
                    213 LOAD_NAME          15 (15)
                    216 LOAD_NAME           8 (8)
                    219 LOAD_NAME          13 (13)
                    222 BINARY_SUBSCR  
                    223 CALL_FUNCTION       1
                    226 LOAD_CONST         12 (12)
                    229 BINARY_ADD     
                    230 CALL_FUNCTION       1
                    233 LOAD_NAME           8 (8)
                    236 LOAD_NAME          13 (13)
                    239 STORE_SUBSCR   
                    240 JUMP_ABSOLUTE     141
            >>  243 POP_BLOCK      
            >>  244 LOAD_CONST         13 (13)
                    247 STORE_NAME         16 (16)
                    250 LOAD_NAME          16 (16)
                    253 LOAD_ATTR          17 (17)
                    256 LOAD_NAME           8 (8)
                    259 CALL_FUNCTION       1
                    262 STORE_NAME         16 (16)
                    265 LOAD_NAME           5 (5)
                    268 LOAD_NAME          16 (16)
                    271 CALL_FUNCTION       1
                    274 STORE_NAME         18 (18)
                    277 LOAD_NAME          18 (18)
                    280 LOAD_NAME           4 (4)
            >>  283 COMPARE_OP          2 (==)
                    286 POP_JUMP_IF_FALSE   283
                    289 LOAD_CONST         14 (14)
                    292 PRINT_ITEM     
                    293 PRINT_NEWLINE  
                    294 JUMP_FORWARD        5 (to 302)
                    297 LOAD_CONST         15 (15)
                    300 PRINT_ITEM     
                    301 PRINT_NEWLINE  
            >>  302 LOAD_CONST          1 (1)
                    305 RETURN_VALUE

刚开始走了一些弯路,通读了opcode,结果发现并不需要,只要将反uncompyle2的部分去掉,修改co_code长度即可正常反编译,期望修改后的opcode首行为

 0 LOAD_CONST 0(0)
        1 LOAD_CONST 1(1)
        ...

使用hexdump查看文件

https://www.kkzevip.com/content/uploadfile/201902/b0151550298542.png

0x64 操作为LOAD_CONST,用法举例:LOAD_CONST 1 HEX: 640100

0x71 操作为JUMP_ABSOLUTE,用法举例:JUMP_ABSOLUTE 14 HEX: 710e00

0x65 操作为LOAD_NAME,用法举例:LOAD_NAME 1 HEX: 650100

...

修改原始pyc

通过opcodehexdump可以确定,当前co_code长度为0x132(此处为小端显示,0x1a1b位置),0x1e0x2c(左闭右开)这部分为混淆代码,直接从16进制数据中删除,然后修改co_code长度为0x132-(0x2c-0x1e),即改为24 01,保存代码

 In [1]: with open('third.pyc','r') as f:
        ...:     dt = f.read()
        ...:     

    In [2]: dt = dt[:0x1a]+'\x24'+dt[0x1b:0x1e]+dt[0x2c:]

    In [3]: with open('third_test2.pyc', 'w') as f:
        ...:     f.write(dt)
        ...:

然后使用uncompyle2 third_test2.pyc > third_source.py进行反编译

源码如下:

 # 2019.02.16 14:17:20 CST
    #Embedded file name: third.py
    import string
    letters = list(string.letters) + list(string.digits) + ['+', '/']
    dec = 'FcjTCgD1EffEm2rPC3bTyL5Wu2bKBI9KAZrwFgrUygHN'

    def encode(input_str):
            str_ascii_list = [ '{:0>8}'.format(str(bin(ord(i))).replace('0b', '')) for i in input_str ]
            output_str = ''
            equal_num = 0
            while str_ascii_list:
                    temp_list = str_ascii_list[:3]
                    if len(temp_list) != 3:
                            while len(temp_list) < 3:
                                    equal_num += 1
                                    temp_list += ['00000000']

                    temp_str = ''.join(temp_list)
                    temp_str_list = [ temp_str[x:x + 6] for x in [0,
                    6,
                    12,
                    18] ]
                    temp_str_list = [ int(x, 2) for x in temp_str_list ]
                    if equal_num:
                            temp_str_list = temp_str_list[0:4 - equal_num]
                    output_str += ''.join([ letters[x] for x in temp_str_list ])
                    str_ascii_list = str_ascii_list[3:]

            output_str = output_str + '=' * equal_num
            return output_str


    print "Welcome to Processor's Python Classroom Part 3&4!\n"
    print 'qi shi wo jiu shi lan cai ba liang dao ti fang zai yi qi.'
    print "Now let's start the origin of Python!\n"
    print 'Plz Input Your Flag:\n'
    enc = raw_input()
    lst = list(enc)
    lst.reverse()
    llen = len(lst)
    for i in range(llen):
            if i % 2 == 0:
                    lst[i] = chr(ord(lst[i]) - 2)
            lst[i] = chr(ord(lst[i]) + 1)

    enc2 = ''
    enc2 = enc2.join(lst)
    enc3 = encode(enc2)
    if enc3 == dec:
            print "You're right! "
    else:
            print "You're Wrong! "
    # +++ okay decompyling third_test2.pyc 
    # decompiled 1 files: 1 okay, 0 failed, 0 verify failed
    # 2019.02.16 14:17:21 CST

至此,代码已经还原,剩下的题目就很简单了。

解读代码

encode函数实现了一个base64,这里有一点点坑,这里的base64编码范围为abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/,并非原生的ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/,直接转换一下就好

solve.py↓

 #!/usr/bin/python
    # -*- coding: utf-8 -*-
    def decode(input_str):
            output_str = ''
            for i in input_str:
                    if ord(i)>57 and ord(i)<91:
                            output_str += i.lower()
                    elif ord(i)>91:
                            output_str += i.upper()
                    else:
                            output_str += i
            lst = list(output_str.decode('base64'))
            llen = len(lst)
            for i in range(llen):
                    lst[i] = chr(ord(lst[i]) - 1)
                    if i % 2 == 0:
                            lst[i] = chr(ord(lst[i]) + 2)
            lst.reverse()
            return ''.join(lst)
    if __name__ == '__main__':
        dec = 'FcjTCgD1EffEm2rPC3bTyL5Wu2bKBI9KAZrwFgrUygHN'
        print decode(dec)

参考文章

http://butian.360.cn/School/content?id=429

https://0x48.pw/2017/03/20/0x2f

TOP