python 操作 hive
安装依赖库
pip install thrift pure-sasl thrift_sasl future pyhive
导包
from pyhive import hive
from TCLIService.ttypes import TOperationState
连接Hive服务器
def get_hive_connection(host='localhost', port=10000, username='your_username', database='default'):
try:
conn = hive.Connection(
host=host,
port=port,
username=username,
database=database
)
print(f"成功连接到Hive服务器: {host}:{port}")
return conn
except Exception as e:
print(f"连接失败: {e}")
return None
conn = get_hive_connection(
host='36.41.67.11',
port=10000,
username='root',
database='test'
)
print(conn)
成功连接到Hive服务器: 36.41.67.11:10000
<pyhive.hive.Connection object at 0x0000023723785C40>
执行查询并返回结果
def execute_query(connection, query):
if not connection:
print("没有可用的连接")
return []
try:
cursor = connection.cursor()
cursor.execute(query)
status = cursor.poll().operationState
if status == TOperationState.FINISHED_STATE:
print("查询成功执行")
else:
print(f"查询状态: {status}")
results = cursor.fetchall()
return results
except Exception as e:
print(f"查询执行失败: {e}")
return []
finally:
if cursor:
cursor.close()
sql = 'select gender, count(1) num from student group by gender'
result = execute_query(conn, sql)
print("\n查询结果:")
for row in result:
print(row)
查询成功执行
查询结果:
('女', 9)
('男', 11)