Huge Discounts on Mobiles, Books, Cameras, Computers etc: @Flipkart
Flipkart.com

Thursday, April 16, 2015

Getting started with ElasticSearch

Elasticsearch is a search server based on Apache Lucene. It provides a distributed, multitenant-capable full-text search engine with a RESTful web interface and schema-free JSON documents. Elasticsearch is developed in Java and is released as open source under the terms of the Apache License. Elasticsearch is the second most popular enterprise search engine, at the time of this post.

Elasticsearch can be used to search all kinds of documents. It provides scalable search, has near real-time search, and supports multitenancy. Elasticsearch is distributed, which means that indices can be divided into shards and each shard can have zero or more replicas. Each node hosts one or more shards, and acts as a coordinator to delegate operations to the correct shard(s). Rebalancing and routing are done automatically. Notable users of Elasticsearch include Wikimedia, GitHub, Stack Exchange, Netflix, The Guardian etc

But Elasticsearch is not just for mega-corporations. It has enabled many startups like Datadog and Klout to prototype ideas and to turn them into scalable solutions. Elasticsearch can run on your laptop, or scale out to hundreds of servers and petabytes of data. No individual part of Elasticsearch is new or revolutionary. Full-text search has been done before, as have analytics systems and distributed databases. The revolution is the combination of these individually useful parts into a single, coherent, real-time application. It has a low barrier to entry for the new user, but can keep pace with you as your skills and needs grow.

You can find a very useful Getting Started Guide here:
http://www.elastic.co/guide/en/elasticsearch/guide/master/getting-started.html

If you are impatient, this You Tube Video helps you get started quickly: Getting started with ElasticSearch

Note: The syntax of the settings posted at 24:54 of the video doesn't work as there are some changes in version 1.5.1 I used. Refer this link for correct usage: Using Synonyms

I would suggest Sense plugin for Chrome to easily explore elastic search:
https://chrome.google.com/webstore/search/sense
https://www.found.no/foundation/Sense-Elasticsearch-interface/

Alternatives are RESTClient for firefox and Fiddler for pretty much any platform.

You can use the head plugin for Elasticsearch to monitor and manage the server,

You can find all the REST commands below for easy copy-paste if you want to follow the video and try hands-on.

Add Sample Data


POST /places/restaurant
{
    "name" : "Joes Italiana",
    "description": "Best pasta around",
    "address": {
        "street":"464 S Main St",
        "city":"Los Angeles",
        "state":"CA",
        "zip":"90013"
    },
    "location":[34.023954, -118.3927072],
    "tags":["italian","spaghetti","pasta"],
    "rating": "4.5"
}
    POST /places/restaurant
{
    "name" : "Jose's Taco Shop",
    "description": "Best Tacos in SoCal",
    "address": {
        "street":"950 Vine St",
        "city":"Los Angeles",
        "state":"CA",
        "zip":"90038"
    },
    "location":[34.088186, -118.326603],
    "tags":["mexican","tacos","burritos"],
    "rating": "4.0"
}
POST /places/restaurant
{
    "name" : "Berry's Burritos",
    "description": "Best Burritos in New York!",
    "address": {
        "street":"230 W 4th St",
        "city":"New York",
        "state":"NY",
        "zip":"10014"
    },
    "location":[40.7543385, -73.976313],
    "tags":["mexican","tacos","burritos"],
    "rating": "4.3"
}
POST /places/restaurant
{
    "name" : "Steve's Italian Restaurant",
    "description": "Great food, great atmosphere",
    "address": {
        "street":"46 W 46th St",
        "city":"New York",
        "state":"NY",
        "zip":"10036"
    },
    "location":[40.751624, -73.9783865],
    "tags":["italian","spaghetti","pasta"],
    "rating": "3.5"
}

Various forms of Search and Filter

POST /places/restaurant/_search
{
    "query":{
        "match_all": {}
    }
}
POST /places/restaurant/_search
{
    "query":{
        "query_string": {
           "query": "tacos"
        }
    }
}
POST /places/restaurant/_search
{
    "query":{
        "query_string": {
           "query": "tacos",
           "fields": ["tags"]
        }
    }
}
POST /places/restaurant/_search
{
    "query":{
        "query_string": {
           "query": "taco",
           "fields": ["name"]
        }
    }
}
POST /places/restaurant/_search
{
    "query": {
        "filtered": {
           "query": {
               "query_string": {
                   "query": "tacos",
                   "fields": ["tags"]
                }
           },
           "filter": {
               "range": {
                  "rating": {
                     "gte": 4.0
                  }
               }
           }
        }
    }
}
POST /places/restaurant/_search
{
    "query": {
        "filtered": {
           "filter": {
               "range": {
                  "rating": {
                     "gte": 4.0
                  }
               }
           }
        }
    }
}
 
POST /places/restaurant/_search
{
    "query": {
        "filtered": {
            "query": {
                "match": {
                   "address.state": "ny"
                }
            },
            "filter": {
               "range": {
                  "rating": {
                     "gte": 4.0
                  }
               }
           }
        }
    }
}

Clear Index

DELETE /places

Rebuild index with Synonym support

POST /places
{
"settings": {
"analysis": {
"filter": {
"synonym": {
"type": "synonym",
"synonyms_path": "synonyms.txt",
"ignore_case": "true"
}
},
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": ["synonym"]
}
}
}
},
"mappings": {
"restaurant": {
"_all": {
"enabled": true
},
"properties": {
"address.state": {
"type": "string",
"analyzer": "synonym"
},
"location": {
"type": "geo_point"
}
}
}
}
}

Bulk Insert of Data 

POST /places/restaurant/_bulk
{"index":{}}
{ "name" : "Joes Italiana", "description": "Best pasta around", "address": { "street":"464 S Main St", "city":"Los Angeles", "state":"CA", "zip":"90013" }, "location":[34.023954, -118.3927072], "tags":["italian","spaghetti","pasta"], "rating": "4.5" }
{"index":{}}
{ "name" : "Jose's Taco Shop", "description": "Best Tacos in SoCal", "address": { "street":"950 Vine St", "city":"Los Angeles", "state":"CA", "zip":"90038" }, "location":[34.088186, -118.326603], "tags":["mexican","tacos","burritos"], "rating": "4.0" }
{"index":{}}
{ "name" : "Berry's Burritos", "description": "Best Burritos in New York!", "address": { "street":"230 W 4th St", "city":"New York", "state":"NY", "zip":"10014" }, "location":[40.7543385, -73.976313], "tags":["mexican","tacos","burritos"], "rating": "4.3" }
{"index":{}}
{ "name" : "Steve's Italian Restaurant", "description": "Great food, great atmosphere", "address": { "street":"46 W 46th St", "city":"New York", "state":"NY", "zip":"10036" }, "location":[40.751624, -73.9783865], "tags":["italian","spaghetti","pasta"], "rating": "3.5" }

Synoym Search 

POST /places/restaurant/_search
{
    "query": {
        "filtered": {
            "query": {
                "match": {
                   "address.state": "new york"
           
    }
            },
            "filter": {
               "range": {
                  "rating": {
                     "gte": 4.0
                  }
               }
           }
        }
    }
}

Geospatial Search

POST /places/restaurant/_search
{
    "query": {
        "filtered": {
            "filter": {
               "geo_distance": {
                  "distance": "100km",
                  "location": [40.7894537,-73.9481288]
               }
           }
        }
    }
}

POST /places/restaurant/_search
{
    "query": {
        "filtered": {
            "filter": {
                "bool": {
                    "must": [
                        {
                           "range": {
                              "rating": {
                                "gte":"4.0"
                              }
                           }
                        },
                        {
                            "geo_distance": {
                              "distance": "100km",
                              "location": [40.7894537,-73.9481288]
                            }
                        }  
                    ]
                }
           }
        }
    }
}

Getting just Count

POST /places/restaurant/_count
{
    "query":{
        "query_string": {
           "query": "tacos"
        }
    }
}

Paged Data

POST /places/restaurant/_search?size=1&from=0
{
    "query":{
        "query_string": {
           "query": "tacos"
        }
    }

1 comment:

  1. hi,

    Very nice article. It is really helpful for all developer and other.

    I would like add more details regarding how to used ElasticSearch with your favourite programming language.

    Check out - http://www.multidots.com/what-is-elasticsearch

    How to integrate with PHP?

    You can find PHP client api on github: https://github.com/elastic/elasticsearch-php


    Thank you




    ReplyDelete