发布于 2015-11-09 14:01:22 | 647 次阅读 | 评论: 0 | 来源: PHPERZ

这里有新鲜出炉的ElasticSearch权威指南,程序狗速度看过来!

ElasticSearch 基于Lucene的搜索引擎

ElasticSearch是一个基于Lucene构建的开源,分布式,RESTful搜索引擎。设计用于云计算中,能够达到实时搜索,稳定,可靠,快速,安装使用方便。支持通过HTTP使用JSON进行数据索引。 我们建立一个网站或应用程序,并要添加搜索功能,令我们受打击的是:搜索工作是很难的。我们希望我们的搜索解决方案要快,我们希望有一个零配置和一个完全免费的搜索模式,我们希望能够简单地使用JSON通过HTTP的索引数据,我们希望我们的搜索服务器始终可用,我们希望能够一台开始并扩展到数百,我们要实时搜索,我们要简单的多租户,我们希望建立一个云的解决方案。Elasticsearch旨在解决所有这些问题和更多的。


方案:

  • 使用HAproxy,当其中一台ElasticSearch Master宕掉时,ElasticSearch集群会自动将运行正常的节点提升为Master,但HAproxy不会将失败的请求重新分发到新的Master Node。不知道是不是我的HAproxy配置有问题,求助一下网友们。(放弃治疗)
  • 使用ElasticSearch内建配置,架构为:单search load balancer双coordinator若干workhorse。先后在200并发Index、200并发Update测试下(跑在虚拟机下,线程太多就卡爆了),并前后分别测试了Down掉一台主coordinator、Down掉一台workhorse,都没有引起数据异常,集群工作正常

先贴一下使用HAproxy搭建集群失败的配置吧:

#全局配置
global
        daemon
        nbproc 4
        pidfile /tmp/haproxy.pid

#默认配置
defaults
        mode http				#默认的模式mode { tcp|http|health },tcp是4层,http是7层,health只会返回OK
        retries 2				#两次连接失败就认为是服务器不可用,也可以通过后面设置
        option redispatch			#当serverId对应的服务器挂掉后,强制定向到其他健康的服务器
        option httpclose			#HAProxy会针对客户端的第一条请求的返回添加cookie并返回给客户端,客户端发送后续请求时会发送此cookie到HAProxy
        #option abortonclose			#当服务器负载很高的时候,自动结束掉当前队列处理比较久的链接
        maxconn 4096				#默认的最大连接数
        timeout connect 5000ms			#连接超时
        timeout client 30000ms			#客户端超时
        timeout server 30000ms			#服务器超时
        timeout check 2000			#心跳检测超时
        log 127.0.0.1 local0 err #[err warning info debug]

#统计页面配置
listen admin_stats
        bind 0.0.0.0:8888               #监听端口
        mode http                       #http的7层模式
        option httplog                  #采用http日志格式
        #log 127.0.0.1 local0 err
        maxconn 10
        stats refresh 30s               #统计页面自动刷新时间
        stats uri /                     #统计页面url
        stats realm XingCloud\ Haproxy  #统计页面密码框上提示文本
        stats auth admin:admin          #统计页面用户名和密码设置
        #stats hide-version             #隐藏统计页面上HAProxy的版本信息

#ElasticSearch Frontend
frontend eshttp
	bind 0.0.0.0:9200
	mode tcp
	use_backend eshttp_server

#ElasticSearch Backend
backend eshttp_server
	server eshttp1 		vm12:9200 cookie 1 check inter 2000 rise 3 fall 3 weight 2
	server eshttp2 		vm13:9200 cookie 2 check inter 2000 rise 3 fall 3 weight 1
	server eshttp3_bk 	vm14:9200 cookie 3 check inter 1000 rise 3 fall 3 backup

采用ElasticSearch搭建集群的关键几个配置:

search load balancer:

cluster.name: harold					#集群名称
node.name: "harold_lb"					#节点名称

# 3. You want this node to be neither master nor data node, but
#    to act as a "search load balancer" (fetching data from nodes,
#    aggregating results, etc.)
#
node.master: false
node.data: false

discovery.zen.ping.unicast.hosts: ["vm11", "vm12", "vm13", "vm14", "vm15", "vm16"]

coordinator:

cluster.name: harold					#集群名称
node.name: "harold_coordinator_1"		   #节点名称

# 2. You want this node to only serve as a master: to not store any data and
#    to have free resources. This will be the "coordinator" of your cluster.
#
node.master: true
node.data: false

discovery.zen.ping.unicast.hosts: ["vm11", "vm12", "vm13", "vm14", "vm15", "vm16"]

workhorse:

cluster.name: harold					#集群名称
node.name: "harold_data_1"				#节点名称

# 1. You want this node to never become a master node, only to hold data.
#    This will be the "workhorse" of your cluster.
#
node.master: false
node.data: true

discovery.zen.ping.unicast.hosts: ["vm11", "vm12", "vm13", "vm14", "vm15", "vm16"]

配置完,启动后,/_plugin/head/页面应该是这个样子:

这样配置完的集群应该就是类似这样的:

可以使用curl初始化Index的主分片复制分片:

curl -XPUT -d'{"settings":{"number_of_shards":6, "number_of_replicas":1}}' http://vm11:9200/app1

Tip:

number_of_shards 主分片在集群中的总数量
number_of_replicas 每个主分片的复制分片数量

#复制分片在今后的分布式集群变化过程中,随时都可以根据业务进行新增或减少:
curl -XPUT -d'{"number_of_replicas":2}' http://vm11:9200/app1/_settings
#另外,ElasticSearch在没有任何索引的情况下新增一个文档,便自动创建了索引,为避免发生这种情况,可以在配置文件中添加:
action.auto_create_index: false

删除Index:

curl -XDELETE http://vm11:9200/app1

在当前版本中,这样组建集群会有一个小问题:

当单独把Master CoordinatorDown掉后,/plugin/head/插件页面会是这个样子:

但可喜的是,并不影响集群与集群客户端之间数据的CRUD操作。
数据有所改变而且较长一段时间后(大约10几分钟?),/
plugin/head/插件页面会恢复正常。

贴下PHP操作ElasticSearch的多进程并发测试代码吧,做下记录:

<?php
class es extends Command
{
    /**
     * The name and signature of the console command.
     *
     * @var string
     */
    protected $signature = 'es:test';

    /**
     * The console command description.
     *
     * @var string
     */
    protected $description = 'Command description.';

    private $hosts = ["vm11:9200"];
    private $index = "app1";
    private $type = "users1";
    private $process = 200;
    private $sum = 10000;
    private $num_per_proc;

    /**
     * Create a new command instance.
     *
     * @return void
     */
    public function __construct()
    {
        parent::__construct();
        $this->sum % $this->process !== 0 && die("invalid num. \n");
        $this->num_per_proc = $this->sum / $this->process;
    }

    private function insert()
    {
        $es = new ClientBuilder();
        $es->setHosts($this->hosts);
        $client = $es->build();

        $words = str_split("abcdefghijklmnopqrstuvwxyz");

        $birth_year = [];
        for ($i = 1; $i <= 50; $i++) {
            $birth_year[] = 1960 + $i;
        }

        $type = ['1', '2', '3', '4'];

        $process = [];
        for ($p = 0; $p < $this->process; $p++) {
            $process[] = new \swoole_process(function () use ($client, $birth_year, $type, $words, $p) {
                for ($i = $this->num_per_proc * $p; $i < $this->num_per_proc * ($p + 1); $i++) {
                    $client->index([
                        'index' => $this->index,
                        'type' => $this->type,
                        'id' => $i,
                        'body' => [
                            'birth_year' => $birth_year[array_rand($birth_year)],
                            'type' => $type[array_rand($type)],
                            'name' => $words[mt_rand(0, 25)] . $words[mt_rand(0, 25)] . $words[mt_rand(0, 25)] . $words[mt_rand(0, 25)],
                            'height' => mt_rand(150, 200),
                            'weight' => mt_rand(40, 200),
                            'test' => 1,
                            'userid' => $i
                        ]
                    ]);
                }
            });
        }

        foreach ($process as $p) {
            $pid = $p->start();
            echo $pid . "\n";
        }
    }

    private function update()
    {
        $es = new ClientBuilder();
        $es->setHosts($this->hosts);
        $client = $es->build();

        $process = [];
        for ($i = 0; $i < $this->process; $i++) {
            $process[] = new \swoole_process(function () use ($client, $i) {
                $response = $client->search([
                    'index' => $this->index,
                    'type' => $this->type,
                    'size' => $this->num_per_proc,
                    'from' => $this->num_per_proc * $i,
                    'sort' => "userid:asc"
                ]);
                foreach ($response['hits']['hits'] as $v) {
                    $id = $v['_id'];
                    $test = $v['_source']['test'];
                    $test++;
                    file_put_contents("/tmp/s", $test . "\n", FILE_APPEND);

                    $client->update([
                        'index' => $this->index,
                        'type' => $this->type,
                        'id' => $id,
                        'body' => [
                            'doc' => [
                                'test' => $test
                            ]
                        ]
                    ]);
                }
            });
        }

        foreach ($process as $p) {
            $pid = $p->start();
            echo $pid . "\n";
        }
    }

    private function gets()
    {
        $es = new ClientBuilder();
        $es->setHosts($this->hosts);
        $client = $es->build();

        $response = $client->search([
            'index' => $this->index,
            'type' => $this->type,
            'size' => 5000,
            'from' => 500,
            'sort' => "userid:asc"
        ]);
        foreach ($response['hits']['hits'] as $v) {
            $id = $v['_id'];
            $test = $v['_source']['test'];
            // file_put_contents("/tmp/s", $test . "\n", FILE_APPEND);
            var_dump($test);
        }
    }

    /**
     * Execute the console command.
     *
     * @return mixed
     */
    public function handle()
    {
        $this->insert();
    }
}


最新网友评论  共有(0)条评论 发布评论 返回顶部

Copyright © 2007-2017 PHPERZ.COM All Rights Reserved   冀ICP备14009818号  版权声明  广告服务