clickhouse集群安装
单机安装
- yum install yum-utils
- rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG
- yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo
- sudo yum install clickhouse-server clickhouse-client
配置文件
vim /etc/clickhouse-server/config.xml
<remote_servers><clickhouse_3shards_1replicas><shard><internal_replication>true</internal_replication><replica><host>10.0.5.153</host><port>9000</port></replica></shard><shard><replica><internal_replication>true</internal_replication><host>10.0.5.154</host><port>9000</port></replica></shard><shard><internal_replication>true</internal_replication><replica><host>10.0.5.155</host><port>9001</port></replica></shard></clickhouse_3shards_1replicas></remote_servers><zookeeper><node><host>10.0.5.37</host><port>2181</port></node><node><host>10.0.5.37</host><port>2182</port></node><node><host>10.0.5.37</host><port>2183</port></node>
</zookeeper><macros><shard>01</shard><replica>10.0.5.153</replica>
</macros><macros><shard>02</shard><replica>10.0.5.154</replica>
</macros><macros><shard>03</shard><replica>10.0.5.155</replica>
</macros>
使用scp命令将配置文件传到两外两台机器上:
scp /etc/clickhouse-server/config.xml root@10.0.5.153:/etc/clickhouse-server/config.xml
scp /etc/clickhouse-server/config.xml root@10.0.5.154:/etc/clickhouse-server/config.xml
clickhouse安装位置
- 配置文件: /etc/clickhouse-server/config.xml
- data存储: /var/lib/clickhouse/
开启zk集群
这个参考我之前发布的下载资源,windows下直接使用cmd即可打开zk集群服务。
启动Server(有点坑)
- 指明配置文件的情况下,如果是root账号,要使用 sudo -u clickhouse /usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml,否则就提示 Application: DB::Exception: Effective user of the process (root) does not match the owner of the data (clickhouse). Run under ‘sudo -u clickhouse’。一旦你首次用root账号指定配置文件启动的,启动错了,那么再用systemctl就无法启动了。
需要修改文件组:
- chgrp clickhouse clickhouse -R
- chgrp clickhouse /var/lib/clickhouse -R
- chown clickhouse /var/lib/clickhouse -R
- chown clickhouse /var/log/clickhouse-server -R
- chgrp clickhouse /var/log/clickhouse-server -R
客户端连接
# -m是多行模式,分号+回车执行当前语句。
clickhouse-client --port=xxx --host=xxx --user=xxx --password=xxx -m
查看集群是否搭建成功
基础SQL
-- 集群上创建数据库
create database a9_mixed on cluster clickhouse_3shards_1replicas;-- 在已有数据的表上创建分布式表,这里t_log_goods是机器上已经存在数据的表。
create table t_log_goods_all on cluster clickhouse_3shards_1replicas [as a9_mixed.t_log_goods] engine = Distributed(clickhouse_3shards_1replicas,a9_mixed,t_log_goods);-- show create table 的语句为:
CREATE TABLE a9_mixed.t_log_goods_all
(`val` Int32,`new` Int32,`flag` Int32,`rname` String,`optime` Int32,`level` Int32,`goodsid` Int32,`old` Int32,`rid` Int64,`platform` Int32,`sid` Int32,`quality` Int32,`uid` Int32,`guid` Int64,`id` Int32,`sdk` String,`power` Int64,`seri` Int32,`goodstype` Int32
)
ENGINE = Distributed('clickhouse_3shards_1replicas', 'a9_mixed', 't_log_goods');
-- 重新在机器上执行 这样子只会在执行sql的机器上新建名字为t_log_goods_all的表,其他机器并不会新建表。-- 删除分布式表
drop table t_log_goods_all on cluster clickhouse_3shards_1replicas;-- 新建本地表,然后对应一张分布式表
CREATE TABLE t_local on cluster clickhouse_3shards_1replicas
(EventDate DateTime,CounterID UInt32,UserID UInt32
) ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate);-- 对应的分布式表
CREATE TABLE t_logical_Distributed on cluster clickhouse_3shards_1replicas
(EventDate DateTime,CounterID UInt32,UserID UInt32
)
ENGINE = Distributed(clickhouse_3shards_1replicas, test01, t_local, CounterID) ;-- 向分布式表中写入数据,分布式表最终也是把数据分散写入对应的分片。
INSERT INTO t_logical_Distributed VALUES ('2021-01-16 00:00:00', 5, 5),('2021-02-10 00:00:00',6, 6),('2021-03-10 00:00:00',4, 4);