但行好事
莫论前程❤

hive常用命令

创建新表

hive> CREATE TABLE t_hive (a int, b int, c int) 
      ROW FORMAT DELIMITED
      FIELDS TERMINATED BY '\$';

创建表的时候同时赋值

hive> create table result 
    row format delimited 
    fields terminated by '\t' 
    as select t1.account, t1.income, t1.expenses, t1.surplus, t2.name from user_info t2 
    join (select account, sum(income) as income, sum(expenses) as expenses, 
          sum(income-expenses
         ) as surplus 
    from trade_detail group by account) t1 on(t1.account = t2.account);

指定分区

hive> CREATE TABLE t_hive (a int, b int, c int)
      PARTITIONED BY (P_DAY_ID INT)                     #指定分区字段
      ROW FORMAT DELIMITED 
      FIELDS TERMINATED BY '\$'                         #指定分隔符
      STORED AS TEXTFILE                                #指定文件类型
      LOCATION '/datacenter/stg/stg_agentinfo/';          #指定文件存放位置 hdfs文件路径

导入数据t_hive.txt到t_hive表

导入本地文件追加到t_hive表中

hive> LOAD DATA LOCAL INPATH '/home/cos/demo/t_hive.txt' INTO TABLE t_hive ;

导入本地文件覆盖到t_hive表中

hive> LOAD DATA LOCAL INPATH '/home/cos/demo/t_hive.txt' OVERWRITE INFO TABLE t_hive;

从HDFS加载数据

hive> LOAD DATA INPATH '/user/hive/warehouse/t_hive/t_hive.txt' OVERWRITE INTO TABLE t_hive2;

正则匹配表名

hive>show tables '*t*';

增加一个字段

hive> ALTER TABLE t_hive ADD COLUMNS (new_col String);

重命令表名

hive> ALTER TABLE t_hive RENAME TO t_hadoop;

从其他表导入数据(基本的插入语法)

hive> INSERT OVERWRITE TABLE t_hive2 SELECT * FROM t_hive ;
hive> insert overwrite table tablename [partiton(partcol1=val1,partclo2=val2)]select_statement  
      from t_statement

创建表并从其他表导入数据

hive> CREATE TABLE t_hive AS SELECT * FROM t_hive2 ;

仅复制表结构不导数据

hive> CREATE TABLE t_hive3 LIKE t_hive;

通过Hive导出到本地文件系统

hive> INSERT OVERWRITE LOCAL DIRECTORY '/tmp/t_hive' SELECT * FROM t_hive;

Hive查询HiveQL

from ( select b,c as c2 from t_hive) t select t.b, t.c2 limit 2;
select b,c from t_hive limit 2;

创建视图

hive> CREATE VIEW v_hive AS SELECT a,b FROM t_hive;

删表

drop table if exists t_hft;

创建分区表

DROP TABLE IF EXISTS t_hft;
CREATE TABLE t_hft(
SecurityID STRING,
tradeTime STRING,
PreClosePx DOUBLE
) PARTITIONED BY (tradeDate INT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

导入分区数据

hive> load data local inpath '/home/BlueBreeze/data/t_hft_1.csv' overwrite into table t_hft partition(tradeDate=20130627);

查看分区表

hive> SHOW PARTITIONS t_hft;

将mysql中的数据直接保存到Hive中

sqoop export --connect jdbc:mysql://192.168.8.103:3306/hmbbs --username root --password hadoop --export-dir '/user/hive/warehouse/pv_2013_05_31/000000_0' --table pv

更新表的名称

hive> alter table source RENAME TO target;

添加新一列

alter table invites add columns (new_col2 INT COMMENT 'a comment');

删除表:

DROP TABLE records;

删除表中数据,但要保持表的结构定义

dfs -rmr /user/hive/warehouse/records;

显示所有函数

show functions;

查看函数用法

describe function substr;

内连接

SELECT sales.*, things.* FROM sales JOIN things ON (sales.id = things.id);

查看hive为某个查询使用多少个MapReduce作业

Explain SELECT sales.*, things.* FROM sales JOIN things ON (sales.id = things.id);

外连接

SELECT sales.*, things.* FROM sales LEFT OUTER JOIN things ON (sales.id = things.id);
SELECT sales.*, things.* FROM sales RIGHT OUTER JOIN things ON (sales.id = things.id);
SELECT sales.*, things.* FROM sales FULL OUTER JOIN things ON (sales.id = things.id);

创建视图

hive> CREATE VIEW valid_records AS SELECT * FROM records2 WHERE temperature !=9999;

查看视图详细信息

hive> DESCRIBE EXTENDED valid_records;
赞(0) 打赏
未经允许不得转载:刘鹏博客 » hive常用命令
分享到: 更多 (0)

评论 1

评论前必须登录!

 

    觉得文章有用就打赏一下文章作者

    支付宝扫一扫打赏

    微信扫一扫打赏