Support for metadata search api and its testing suite

2017-11-07 12:20:51 +05:30 · 2017-11-07 12:20:51 +05:30 · 714c1782ac
parent 94ce03cbec
commit 714c1782ac
7 changed files with 169 additions and 22 deletions
--- a/bigchaindb/backend/mongodb/query.py
+++ b/bigchaindb/backend/mongodb/query.py
@ -366,9 +366,12 @@ def get_new_blocks_feed(conn, start_block_id):

@register_query(MongoDBConnection)
 def text_search(conn, search, *, language='english', case_sensitive=False,
-                diacritic_sensitive=False, text_score=False, limit=0):
+                diacritic_sensitive=False, text_score=False, limit=0, table=None):
+    if table is None:
+        table = 'assets'
+
    cursor = conn.run(
-        conn.collection('assets')
+        conn.collection(table)
        .find({'$text': {
                '$search': search,
                '$language': language,
@ -381,7 +384,7 @@ def text_search(conn, search, *, language='english', case_sensitive=False,
    if text_score:
        return cursor

-    return (_remove_text_score(asset) for asset in cursor)
+    return (_remove_text_score(obj) for obj in cursor)


 def _remove_text_score(asset):
--- a/bigchaindb/backend/query.py
+++ b/bigchaindb/backend/query.py
@ -387,7 +387,7 @@ def get_new_blocks_feed(connection, start_block_id):

@singledispatch
 def text_search(conn, search, *, language='english', case_sensitive=False,
-                diacritic_sensitive=False, text_score=False, limit=0):
+                diacritic_sensitive=False, text_score=False, limit=0, table=None):
    """Return all the assets that match the text search.

    The results are sorted by text score.
--- a/bigchaindb/core.py
+++ b/bigchaindb/core.py
@ -672,7 +672,7 @@ class Bigchain(object):
        """
        return backend.query.write_metadata(self.connection, metadata)

-    def text_search(self, search, *, limit=0):
+    def text_search(self, search, *, limit=0, table=None):
        """
        Return an iterator of assets that match the text search

@ -683,12 +683,16 @@ class Bigchain(object):
        Returns:
            iter: An iterator of assets that match the text search.
        """
-        assets = backend.query.text_search(self.connection, search, limit=limit)
+        if table is None:
+            table = 'assets'
+
+        objects = backend.query.text_search(self.connection, search, limit=limit,
+                                            table=table)

        # TODO: This is not efficient. There may be a more efficient way to
        #       query by storing block ids with the assets and using fastquery.
        #       See https://github.com/bigchaindb/bigchaindb/issues/1496
-        for asset in assets:
-            tx, status = self.get_transaction(asset['id'], True)
+        for obj in objects:
+            tx, status = self.get_transaction(obj['id'], True)
            if status == self.TX_VALID:
-                yield asset
+                yield obj
--- a/bigchaindb/web/routes.py
+++ b/bigchaindb/web/routes.py
@ -2,6 +2,7 @@
 from flask_restful import Api
 from bigchaindb.web.views import (
    assets,
+    metadata,
    blocks,
    info,
    statuses,
@ -27,6 +28,7 @@ def r(*args, **kwargs):
 ROUTES_API_V1 = [
    r('/', info.ApiV1Index),
    r('assets/', assets.AssetListApi),
+    r('metadata/', metadata.MetadataApi),
    r('blocks/<string:block_id>', blocks.BlockApi),
    r('blocks/', blocks.BlockListApi),
    r('statuses/', statuses.StatusApi),
--- a/bigchaindb/web/views/metadata.py
+++ b/bigchaindb/web/views/metadata.py
@ -0,0 +1,50 @@
+"""This module provides the blueprint for some basic API endpoints.
+
+For more information please refer to the documentation: http://bigchaindb.com/http-api
+"""
+import logging
+
+from flask_restful import reqparse, Resource
+from flask import current_app
+
+from bigchaindb.backend.exceptions import OperationError
+from bigchaindb.web.views.base import make_error
+
+logger = logging.getLogger(__name__)
+
+
+class MetadataApi(Resource):
+    def get(self):
+        """API endpoint to perform a text search on transaction metadata.
+
+        Args:
+            search (str): Text search string to query the text index
+            limit (int, optional): Limit the number of returned documents.
+
+        Return:
+            A list of metadata that match the query.
+        """
+        parser = reqparse.RequestParser()
+        parser.add_argument('search', type=str, required=True)
+        parser.add_argument('limit', type=int)
+        args = parser.parse_args()
+
+        if not args['search']:
+            return make_error(400, 'text_search cannot be empty')
+        if not args['limit']:
+            del args['limit']
+
+        pool = current_app.config['bigchain_pool']
+
+        with pool() as bigchain:
+            args['table'] = 'metadata'
+            metadata = bigchain.text_search(**args)
+
+        try:
+            # This only works with MongoDB as the backend
+            return list(metadata)
+        except OperationError as e:
+            return make_error(
+                400,
+                '({}): {}'.format(type(e).__name__, e)
+            )
--- a/tests/backend/mongodb/test_queries.py
+++ b/tests/backend/mongodb/test_queries.py
@ -529,13 +529,14 @@ def test_get_assets():
    assert list(cursor.sort('id', pymongo.ASCENDING)) == assets[::2]


-def test_text_search():
+@pytest.mark.parametrize("table", ['assets', 'metadata'])
+def test_text_search(table):
    from bigchaindb.backend import connect, query
    conn = connect()

    # Example data and tests cases taken from the mongodb documentation
    # https://docs.mongodb.com/manual/reference/operator/query/text/
-    assets = [
+    objects = [
        {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
        {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
        {'id': 3, 'subject': 'Baking a cake', 'author': 'abc', 'views': 90},
@ -547,17 +548,17 @@ def test_text_search():
    ]

    # insert the assets
-    conn.db.assets.insert_many(deepcopy(assets), ordered=False)
+    conn.db[table].insert_many(deepcopy(objects), ordered=False)

    # test search single word
-    assert list(query.text_search(conn, 'coffee')) == [
+    assert list(query.text_search(conn, 'coffee', table=table)) == [
        {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
        {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
        {'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10},
    ]

    # match any of the search terms
-    assert list(query.text_search(conn, 'bake coffee cake')) == [
+    assert list(query.text_search(conn, 'bake coffee cake', table=table)) == [
        {'author': 'abc', 'id': 3, 'subject': 'Baking a cake', 'views': 90},
        {'author': 'xyz', 'id': 1, 'subject': 'coffee', 'views': 50},
        {'author': 'xyz', 'id': 4, 'subject': 'baking', 'views': 100},
@ -566,48 +567,48 @@ def test_text_search():
    ]

    # search for a phrase
-    assert list(query.text_search(conn, '\"coffee shop\"')) == [
+    assert list(query.text_search(conn, '\"coffee shop\"', table=table)) == [
        {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
    ]

    # exclude documents that contain a term
-    assert list(query.text_search(conn, 'coffee -shop')) == [
+    assert list(query.text_search(conn, 'coffee -shop', table=table)) == [
        {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
        {'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10},
    ]

    # search different language
-    assert list(query.text_search(conn, 'leche', language='es')) == [
+    assert list(query.text_search(conn, 'leche', language='es', table=table)) == [
        {'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200},
        {'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10}
    ]

    # case and diacritic insensitive search
-    assert list(query.text_search(conn, 'сы́рники CAFÉS')) == [
+    assert list(query.text_search(conn, 'сы́рники CAFÉS', table=table)) == [
        {'id': 6, 'subject': 'Сырники', 'author': 'jkl', 'views': 80},
        {'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200},
        {'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10}
    ]

    # case sensitive search
-    assert list(query.text_search(conn, 'Coffee', case_sensitive=True)) == [
+    assert list(query.text_search(conn, 'Coffee', case_sensitive=True, table=table)) == [
        {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
    ]

    # diacritic sensitive search
-    assert list(query.text_search(conn, 'CAFÉ', diacritic_sensitive=True)) == [
+    assert list(query.text_search(conn, 'CAFÉ', diacritic_sensitive=True, table=table)) == [
        {'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200},
    ]

    # return text score
-    assert list(query.text_search(conn, 'coffee', text_score=True)) == [
+    assert list(query.text_search(conn, 'coffee', text_score=True, table=table)) == [
        {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50, 'score': 1.0},
        {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5, 'score': 0.75},
        {'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10, 'score': 0.75},
    ]

    # limit search result
-    assert list(query.text_search(conn, 'coffee', limit=2)) == [
+    assert list(query.text_search(conn, 'coffee', limit=2, table=table)) == [
        {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
        {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
    ]
--- a/tests/web/test_metadata.py
+++ b/tests/web/test_metadata.py
@ -0,0 +1,87 @@
+import pytest
+
+METADATA_ENDPOINT = '/api/v1/metadata/'
+
+
+def test_get_metadata_with_empty_text_search(client):
+    res = client.get(METADATA_ENDPOINT + '?search=')
+    assert res.json == {'status': 400,
+                        'message': 'text_search cannot be empty'}
+    assert res.status_code == 400
+
+
+def test_get_metadata_with_missing_text_search(client):
+    res = client.get(METADATA_ENDPOINT)
+    assert res.status_code == 400
+
+
+@pytest.mark.genesis
+def test_get_metadata(client, b):
+    from bigchaindb.models import Transaction
+    from bigchaindb.backend.mongodb.connection import MongoDBConnection
+
+    if isinstance(b.connection, MongoDBConnection):
+        # test returns empty list when no assets are found
+        res = client.get(METADATA_ENDPOINT + '?search=abc')
+        assert res.json == []
+        assert res.status_code == 200
+
+        # create asset
+        asset = {'msg': 'abc'}
+        metadata = {'key': 'my_meta'}
+        tx = Transaction.create([b.me], [([b.me], 1)], metadata=metadata,
+                                asset=asset).sign([b.me_private])
+        # create block
+        block = b.create_block([tx])
+        b.write_block(block)
+        # vote valid
+        vote = b.vote(block.id, b.get_last_voted_block().id, True)
+        b.write_vote(vote)
+
+        # test that metadata is returned
+        res = client.get(METADATA_ENDPOINT + '?search=my_meta')
+        assert res.status_code == 200
+        assert len(res.json) == 1
+        assert res.json[0] == {
+            'key': 'my_meta',
+            'id': tx.id
+        }
+    else:
+        # test that the correct error is returned if not running MongoDB
+        res = client.get(METADATA_ENDPOINT + '?search=my_meta')
+        assert res.status_code == 400
+        assert res.json['message'].startswith('(OperationError)')
+
+
+@pytest.mark.genesis
+def test_get_metadata_limit(client, b):
+    from bigchaindb.models import Transaction
+    from bigchaindb.backend.mongodb.connection import MongoDBConnection
+
+    if isinstance(b.connection, MongoDBConnection):
+        # create two assets
+        asset1 = {'msg': 'abc 1'}
+        meta1 = {'key': 'meta 1'}
+        tx1 = Transaction.create([b.me], [([b.me], 1)], metadata=meta1,
+                                 asset=asset1).sign([b.me_private])
+
+        asset2 = {'msg': 'abc 2'}
+        meta2 = {'key': 'meta 2'}
+        tx2 = Transaction.create([b.me], [([b.me], 1)], metadata=meta2,
+                                 asset=asset2).sign([b.me_private])
+        # create block
+        block = b.create_block([tx1, tx2])
+        b.write_block(block)
+        # vote valid
+        vote = b.vote(block.id, b.get_last_voted_block().id, True)
+        b.write_vote(vote)
+
+        # test that both assets are returned without limit
+        res = client.get(METADATA_ENDPOINT + '?search=meta')
+        assert res.status_code == 200
+        assert len(res.json) == 2
+
+        # test that only one asset is returned when using limit=1
+        res = client.get(METADATA_ENDPOINT + '?search=meta&limit=1')
+        assert res.status_code == 200
+        assert len(res.json) == 1