Support for metadata search api and its testing suite

This commit is contained in:
kansi 2017-11-07 12:20:51 +05:30
parent 94ce03cbec
commit 714c1782ac
7 changed files with 169 additions and 22 deletions

View File

@ -366,9 +366,12 @@ def get_new_blocks_feed(conn, start_block_id):
@register_query(MongoDBConnection)
def text_search(conn, search, *, language='english', case_sensitive=False,
diacritic_sensitive=False, text_score=False, limit=0):
diacritic_sensitive=False, text_score=False, limit=0, table=None):
if table is None:
table = 'assets'
cursor = conn.run(
conn.collection('assets')
conn.collection(table)
.find({'$text': {
'$search': search,
'$language': language,
@ -381,7 +384,7 @@ def text_search(conn, search, *, language='english', case_sensitive=False,
if text_score:
return cursor
return (_remove_text_score(asset) for asset in cursor)
return (_remove_text_score(obj) for obj in cursor)
def _remove_text_score(asset):

View File

@ -387,7 +387,7 @@ def get_new_blocks_feed(connection, start_block_id):
@singledispatch
def text_search(conn, search, *, language='english', case_sensitive=False,
diacritic_sensitive=False, text_score=False, limit=0):
diacritic_sensitive=False, text_score=False, limit=0, table=None):
"""Return all the assets that match the text search.
The results are sorted by text score.

View File

@ -672,7 +672,7 @@ class Bigchain(object):
"""
return backend.query.write_metadata(self.connection, metadata)
def text_search(self, search, *, limit=0):
def text_search(self, search, *, limit=0, table=None):
"""
Return an iterator of assets that match the text search
@ -683,12 +683,16 @@ class Bigchain(object):
Returns:
iter: An iterator of assets that match the text search.
"""
assets = backend.query.text_search(self.connection, search, limit=limit)
if table is None:
table = 'assets'
objects = backend.query.text_search(self.connection, search, limit=limit,
table=table)
# TODO: This is not efficient. There may be a more efficient way to
# query by storing block ids with the assets and using fastquery.
# See https://github.com/bigchaindb/bigchaindb/issues/1496
for asset in assets:
tx, status = self.get_transaction(asset['id'], True)
for obj in objects:
tx, status = self.get_transaction(obj['id'], True)
if status == self.TX_VALID:
yield asset
yield obj

View File

@ -2,6 +2,7 @@
from flask_restful import Api
from bigchaindb.web.views import (
assets,
metadata,
blocks,
info,
statuses,
@ -27,6 +28,7 @@ def r(*args, **kwargs):
ROUTES_API_V1 = [
r('/', info.ApiV1Index),
r('assets/', assets.AssetListApi),
r('metadata/', metadata.MetadataApi),
r('blocks/<string:block_id>', blocks.BlockApi),
r('blocks/', blocks.BlockListApi),
r('statuses/', statuses.StatusApi),

View File

@ -0,0 +1,50 @@
"""This module provides the blueprint for some basic API endpoints.
For more information please refer to the documentation: http://bigchaindb.com/http-api
"""
import logging
from flask_restful import reqparse, Resource
from flask import current_app
from bigchaindb.backend.exceptions import OperationError
from bigchaindb.web.views.base import make_error
logger = logging.getLogger(__name__)
class MetadataApi(Resource):
def get(self):
"""API endpoint to perform a text search on transaction metadata.
Args:
search (str): Text search string to query the text index
limit (int, optional): Limit the number of returned documents.
Return:
A list of metadata that match the query.
"""
parser = reqparse.RequestParser()
parser.add_argument('search', type=str, required=True)
parser.add_argument('limit', type=int)
args = parser.parse_args()
if not args['search']:
return make_error(400, 'text_search cannot be empty')
if not args['limit']:
del args['limit']
pool = current_app.config['bigchain_pool']
with pool() as bigchain:
args['table'] = 'metadata'
metadata = bigchain.text_search(**args)
try:
# This only works with MongoDB as the backend
return list(metadata)
except OperationError as e:
return make_error(
400,
'({}): {}'.format(type(e).__name__, e)
)

View File

@ -529,13 +529,14 @@ def test_get_assets():
assert list(cursor.sort('id', pymongo.ASCENDING)) == assets[::2]
def test_text_search():
@pytest.mark.parametrize("table", ['assets', 'metadata'])
def test_text_search(table):
from bigchaindb.backend import connect, query
conn = connect()
# Example data and tests cases taken from the mongodb documentation
# https://docs.mongodb.com/manual/reference/operator/query/text/
assets = [
objects = [
{'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
{'id': 3, 'subject': 'Baking a cake', 'author': 'abc', 'views': 90},
@ -547,17 +548,17 @@ def test_text_search():
]
# insert the assets
conn.db.assets.insert_many(deepcopy(assets), ordered=False)
conn.db[table].insert_many(deepcopy(objects), ordered=False)
# test search single word
assert list(query.text_search(conn, 'coffee')) == [
assert list(query.text_search(conn, 'coffee', table=table)) == [
{'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
{'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10},
]
# match any of the search terms
assert list(query.text_search(conn, 'bake coffee cake')) == [
assert list(query.text_search(conn, 'bake coffee cake', table=table)) == [
{'author': 'abc', 'id': 3, 'subject': 'Baking a cake', 'views': 90},
{'author': 'xyz', 'id': 1, 'subject': 'coffee', 'views': 50},
{'author': 'xyz', 'id': 4, 'subject': 'baking', 'views': 100},
@ -566,48 +567,48 @@ def test_text_search():
]
# search for a phrase
assert list(query.text_search(conn, '\"coffee shop\"')) == [
assert list(query.text_search(conn, '\"coffee shop\"', table=table)) == [
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
]
# exclude documents that contain a term
assert list(query.text_search(conn, 'coffee -shop')) == [
assert list(query.text_search(conn, 'coffee -shop', table=table)) == [
{'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
{'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10},
]
# search different language
assert list(query.text_search(conn, 'leche', language='es')) == [
assert list(query.text_search(conn, 'leche', language='es', table=table)) == [
{'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200},
{'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10}
]
# case and diacritic insensitive search
assert list(query.text_search(conn, 'сы́рники CAFÉS')) == [
assert list(query.text_search(conn, 'сы́рники CAFÉS', table=table)) == [
{'id': 6, 'subject': 'Сырники', 'author': 'jkl', 'views': 80},
{'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200},
{'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10}
]
# case sensitive search
assert list(query.text_search(conn, 'Coffee', case_sensitive=True)) == [
assert list(query.text_search(conn, 'Coffee', case_sensitive=True, table=table)) == [
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
]
# diacritic sensitive search
assert list(query.text_search(conn, 'CAFÉ', diacritic_sensitive=True)) == [
assert list(query.text_search(conn, 'CAFÉ', diacritic_sensitive=True, table=table)) == [
{'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200},
]
# return text score
assert list(query.text_search(conn, 'coffee', text_score=True)) == [
assert list(query.text_search(conn, 'coffee', text_score=True, table=table)) == [
{'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50, 'score': 1.0},
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5, 'score': 0.75},
{'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10, 'score': 0.75},
]
# limit search result
assert list(query.text_search(conn, 'coffee', limit=2)) == [
assert list(query.text_search(conn, 'coffee', limit=2, table=table)) == [
{'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50},
{'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5},
]

View File

@ -0,0 +1,87 @@
import pytest
METADATA_ENDPOINT = '/api/v1/metadata/'
def test_get_metadata_with_empty_text_search(client):
res = client.get(METADATA_ENDPOINT + '?search=')
assert res.json == {'status': 400,
'message': 'text_search cannot be empty'}
assert res.status_code == 400
def test_get_metadata_with_missing_text_search(client):
res = client.get(METADATA_ENDPOINT)
assert res.status_code == 400
@pytest.mark.genesis
def test_get_metadata(client, b):
from bigchaindb.models import Transaction
from bigchaindb.backend.mongodb.connection import MongoDBConnection
if isinstance(b.connection, MongoDBConnection):
# test returns empty list when no assets are found
res = client.get(METADATA_ENDPOINT + '?search=abc')
assert res.json == []
assert res.status_code == 200
# create asset
asset = {'msg': 'abc'}
metadata = {'key': 'my_meta'}
tx = Transaction.create([b.me], [([b.me], 1)], metadata=metadata,
asset=asset).sign([b.me_private])
# create block
block = b.create_block([tx])
b.write_block(block)
# vote valid
vote = b.vote(block.id, b.get_last_voted_block().id, True)
b.write_vote(vote)
# test that metadata is returned
res = client.get(METADATA_ENDPOINT + '?search=my_meta')
assert res.status_code == 200
assert len(res.json) == 1
assert res.json[0] == {
'key': 'my_meta',
'id': tx.id
}
else:
# test that the correct error is returned if not running MongoDB
res = client.get(METADATA_ENDPOINT + '?search=my_meta')
assert res.status_code == 400
assert res.json['message'].startswith('(OperationError)')
@pytest.mark.genesis
def test_get_metadata_limit(client, b):
from bigchaindb.models import Transaction
from bigchaindb.backend.mongodb.connection import MongoDBConnection
if isinstance(b.connection, MongoDBConnection):
# create two assets
asset1 = {'msg': 'abc 1'}
meta1 = {'key': 'meta 1'}
tx1 = Transaction.create([b.me], [([b.me], 1)], metadata=meta1,
asset=asset1).sign([b.me_private])
asset2 = {'msg': 'abc 2'}
meta2 = {'key': 'meta 2'}
tx2 = Transaction.create([b.me], [([b.me], 1)], metadata=meta2,
asset=asset2).sign([b.me_private])
# create block
block = b.create_block([tx1, tx2])
b.write_block(block)
# vote valid
vote = b.vote(block.id, b.get_last_voted_block().id, True)
b.write_vote(vote)
# test that both assets are returned without limit
res = client.get(METADATA_ENDPOINT + '?search=meta')
assert res.status_code == 200
assert len(res.json) == 2
# test that only one asset is returned when using limit=1
res = client.get(METADATA_ENDPOINT + '?search=meta&limit=1')
assert res.status_code == 200
assert len(res.json) == 1