格式化Hive语法树(python)

为了容易看一点,把用explain得到的语法树加上一些缩进. 该代码只是简单的加上缩进.

效果

这是查询explain select key from kv mykv join test mytest on (mykv.key == mytest.id);语句获取的语法树

(TOK_QUERY 
    (TOK_FROM 
        (TOK_JOIN 
            (TOK_TABREF 
                (TOK_TABNAME kv)
             mykv)
         
            (TOK_TABREF 
                (TOK_TABNAME test)
             mytest)
         
            (== 
                (. 
                    (TOK_TABLE_OR_COL mykv)
                 key)
             
                (. 
                    (TOK_TABLE_OR_COL mytest)
                 id)
            )
        )
    )
 
    (TOK_INSERT 
        (TOK_DESTINATION 
            (TOK_DIR TOK_TMP_FILE)
        )
     
        (TOK_SELECT 
            (TOK_SELEXPR 
                (TOK_TABLE_OR_COL key)
            )
        )
    )
)

代码如下

#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Created on 2012-5-20

@author: fatkun
'''
import sys

# explain select key from kv mykv join test mytest on (mykv.key == mytest.id);
original_str = """(TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF 
(TOK_TABNAME kv) mykv) (TOK_TABREF (TOK_TABNAME test) mytest) 
(== (. (TOK_TABLE_OR_COL mykv) key) (. (TOK_TABLE_OR_COL mytest) id)))) 
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)))))"""

tmp_str = original_str.strip().replace('\n', '')

def my_print(mystr):
    sys.stdout.write(mystr)

def print_indent(indent_level):
    for i in range(indent_level):
        my_print(' ' * 4)


indent_level = 0
for char in tmp_str:
    if char == '(':
        # 如果是左括号,先换行,然后打印缩进+(
        my_print('\n')
        print_indent(indent_level)
        my_print(char)
        indent_level += 1
    elif char == ')':
        # 如果是右括号,先打印),再换行,打印下一级别的缩进
        indent_level -= 1
        my_print(char)
        my_print('\n')
        print_indent(indent_level - 1)
    else:
        # 其他的直接打印出来
        my_print(char)
        
updatedupdated2024-11-302024-11-30