0% found this document useful (0 votes)
6 views5 pages

Revision Solution

Uploaded by

Asvini
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views5 pages

Revision Solution

Uploaded by

Asvini
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 5

*Cloudera Folks Revision*

==================
Slide 5
==================

=========
Insert the data
=========

mysql -uroot -pcloudera


drop database if exists zeyodbrev;
create database zeyodbrev;
use zeyodbrev;

CREATE TABLE customerrev(custid INT,firstname VARCHAR(20),lastname VARCHAR(20),city


varchar(50),age int,createdt date,transactamt int );

insert into customerrev values(1,'Arun','Kumar','chennai',33,'2015-09-20',100000);


insert into customerrev values(2,'srini','vasan','chennai',33,'2015-09-21',10000);
insert into customerrev values(3,'vasu','devan','banglore',39,'2015-09-22',90000);
insert into customerrev values(4,'mohamed','imran','hyderabad',33,'2015-09-
23',1000);
insert into customerrev values(5,'arun','basker','chennai',23,'2015-09-24',200000);
insert into customerrev values(6,'arun1','basker','chennai',23,'2015-09-
24',200000);
insert into customerrev values(7,'srini','vasan','chennai',33,'2015-09-21',10000);
insert into customerrev values(8,'vasu','devan','banglore',39,'2015-09-22',90000);
insert into customerrev values(9,'mohamed','imran','hyderabad',33,'2015-09-
23',1000);
insert into customerrev values(10,'mohamed','imran','hyderabad',33,'2015-09-
23',1000);

select * from customerrev;


quit

sqoop import --connect jdbc:mysql://localhost/zeyodbrev --username root --password


cloudera --table customerrev -m 1 --delete-target-dir --target-dir
/user/cloudera/revimport

hadoop fs -ls /user/cloudera/revimport


hadoop fs -cat /user/cloudera/revimport/*

==================
Slide 6 --- import chennai with two columns
==================

sqoop import --connect jdbc:mysql://localhost/zeyodbrev --username root --password


cloudera --table customerrev -m 1 --where "city='chennai'" --columns
firstname,lastname --delete-target-dir --target-dir /user/cloudera/whereimport

hadoop fs -ls /user/cloudera/whereimport


hadoop fs -cat /user/cloudera/whereimport/*

==================
Slide 7 --- same with query
==================

sqoop import --connect jdbc:mysql://localhost/zeyodbrev --username root --password


cloudera -m 1 --query "select firstname,lastname from customerrev where
city='chennai' and \$CONDITIONS" --delete-target-dir --target-dir
/user/cloudera/queryimport;

hadoop fs -ls /user/cloudera/queryimport


hadoop fs -cat /user/cloudera/queryimport/*

==================
Slide 8 --- Insert 11 and 12 alone
==================

========Normal Import===========

sqoop import --connect jdbc:mysql://localhost/zeyodbrev --username root --password


cloudera --table customerrev -m 1 --delete-target-dir --target-dir
/user/cloudera/inimport

hadoop fs -ls /user/cloudera/inimport


hadoop fs -cat /user/cloudera/inimport/part-m-00000

========Add records===========

mysql -uroot -pcloudera


use zeyodbrev;
insert into customerrev values(11,'mohamed','imran','hyderabad',33,'2015-09-
23',1000);
insert into customerrev values(12,'mohamed','imran','hyderabad',33,'2015-09-
23',1000);
select * from customerrev;
quit

sqoop import --connect jdbc:mysql://localhost/zeyodbrev --username root --password


cloudera --table customerrev -m 1 --target-dir /user/cloudera/inimport --
incremental append --check-column custid --last-value 10

hadoop fs -ls /user/cloudera/inimport


hadoop fs -cat /user/cloudera/inimport/part-m-00001

==================
Slide 9 --- Job
==================

echo -n cloudera>/home/cloudera/pfile
hadoop fs -rmr /user/cloudera/incrementalappendjob

sqoop job --create injobrev -- import --connect jdbc:mysql://localhost/zeyodbrev --


username root --password-file file:///home/cloudera/pfile --table customerrev -m 1
--target-dir /user/cloudera/incrementalappendjob --incremental append --check-
column custid --last-value 0;

sqoop job --list

sqoop job --exec injobrev

hadoop fs -ls /user/cloudera/incrementalappendjob


hadoop fs -cat /user/cloudera/incrementalappendjob/part-m-00000

sqoop job --show injobrev

==================
Slide 10 --- Parquet
==================

sqoop import --connect jdbc:mysql://localhost/zeyodbrev --username root --password


cloudera --table customerrev -m 1 --delete-target-dir --target-dir
/user/cloudera/parquetimport --as-parquetfile

hadoop fs -ls /user/cloudera/parquetimport

==================
Slide 11 --- Parquet
==================

Sequence File Format --

TextFile - Readable,Row format, Huge in size,Query takes time


Avro- Row format, 60% Compression,Query takes time,Schema evolution
Parquet - Columnar, 60-80%Compression, query faster, Predicate pushdown,Target
System
orc - Columnar,90%Compression, query faster but not as parquet,Historical storage

==================
Slide 12 --- Parquet
==================

sqoop import --connect jdbc:mysql://localhost/zeyodbrev --username root --password


cloudera --table customerrev -m 2 --split-by custid --delete-target-dir --target-
dir /user/cloudera/mulmappers

hadoop fs -ls /user/cloudera/mulmappers

==================
Slide 14 Hive
==================

Hive is layer on HDFS


ETL- SQL and ELT - Hive
Load Time parsing and Query time parsing

==================
Slide 15 Hive
==================

hive --type and Go Inside

create database if not exists revhivedb;


use revhivedb;
create table revhivetab(id int,name string,chk string) row format delimited fields
terminated by ',';
quit;
hadoop fs -ls /user/hive/warehouse/
hadoop fs -ls /user/hive/warehouse/revhivedb.db/

==================
Slide 16 Hive
==================

cd
echo 1,Sai,I>INDTxns
echo 2,zeyo,I>>INDTxns
hive --type and Go Inside

use revhivedb;
load data local inpath '/home/cloudera/INDTxns' into table revhivetab;
select * from revhivetab;
quit;

==================
Slide 17 Hive
==================

cd
echo 1,Sai,I>INDTxns
echo 2,zeyo,I>>INDTxns
hadoop fs -rmr /user/cloudera/ldir
hadoop fs -mkdir /user/cloudera/ldir
hadoop fs -put INDTxns /user/cloudera/ldir

hive --type and Go Inside

create database if not exists revhivedb;


use revhivedb;
create table loctab(id int,name string,chk string) row format delimited fields
terminated by ',' location '/user/cloudera/ldir';
select * from loctab;
drop table loctab;
quit;

hadoop fs -ls /user/cloudera ------------u will not see ldir

==================
Slide 17 Hive
==================

cd
echo 1,Sai,I>INDTxns
echo 2,zeyo,I>>INDTxns
hadoop fs -rmr /user/cloudera/ldir
hadoop fs -mkdir /user/cloudera/ldir
hadoop fs -put INDTxns /user/cloudera/ldir

hive --type and Go Inside

create database if not exists revhivedb;


use revhivedb;
create external table loctabexternal(id int,name string,chk string) row format
delimited fields terminated by ',' location '/user/cloudera/ldir';
select * from loctabexternal;
drop table loctabexternal;
quit;

hadoop fs -ls /user/cloudera/ ------------u will see ldir

You might also like