python 操作 hive

发布于:2025-07-03 ⋅ 阅读:(21) ⋅ 点赞:(0)

python 操作 hive

安装依赖库

pip install thrift pure-sasl thrift_sasl future pyhive

导包

from pyhive import hive
from TCLIService.ttypes import TOperationState

连接Hive服务器

# 连接Hive服务器
def get_hive_connection(host='localhost', port=10000, username='your_username', database='default'):
  try:
    conn = hive.Connection(
      host=host,
      port=port,
      username=username,
      database=database
    )
    print(f"成功连接到Hive服务器: {host}:{port}")
    return conn
  except Exception as e:
    print(f"连接失败: {e}")
    return None
conn = get_hive_connection(
  host='36.41.67.11',
  port=10000,
  username='root',
  database='test'
)
print(conn)
成功连接到Hive服务器: 36.41.67.11:10000
<pyhive.hive.Connection object at 0x0000023723785C40>

执行查询并返回结果

# 执行查询并返回结果
def execute_query(connection, query):
  if not connection:
    print("没有可用的连接")
    return []

  try:
    cursor = connection.cursor()
    cursor.execute(query)

    # 获取查询状态
    status = cursor.poll().operationState
    if status == TOperationState.FINISHED_STATE:
      print("查询成功执行")
    else:
      print(f"查询状态: {status}")

    results = cursor.fetchall()
    return results
  except Exception as e:
    print(f"查询执行失败: {e}")
    return []
  finally:
    if cursor:
      cursor.close()
sql = 'select gender, count(1) num from student group by gender'
result = execute_query(conn, sql)
print("\n查询结果:")
for row in result:
  print(row)
查询成功执行

查询结果:
('女', 9)
('男', 11)