1
0
mirror of https://github.com/bigchaindb/bigchaindb.git synced 2024-06-28 00:27:45 +02:00

Merge remote-tracking branch 'origin/master' into minor-fixes-to-aws-deploy-scripts

This commit is contained in:
Troy McConaghy 2017-02-03 10:46:12 +01:00
commit b6f3fb4307
21 changed files with 3 additions and 345 deletions

View File

@ -1,13 +1,11 @@
import multiprocessing as mp
import uuid
import json
import argparse
import csv
import time
import logging
import rethinkdb as r
from os.path import expanduser
from bigchaindb.common.transaction import Transaction
from bigchaindb import Bigchain
@ -48,15 +46,6 @@ def run_add_backlog(args):
workers.start()
def run_set_statsd_host(args):
with open(expanduser('~') + '/.bigchaindb', 'r') as f:
conf = json.load(f)
conf['statsd']['host'] = args.statsd_host
with open(expanduser('~') + '/.bigchaindb', 'w') as f:
json.dump(conf, f)
def run_gather_metrics(args):
# setup a rethinkdb connection
conn = r.connect(args.bigchaindb_host, 28015, 'bigchain')
@ -126,12 +115,6 @@ def main():
default='minimal',
help='Payload size')
# set statsd host
statsd_parser = subparsers.add_parser('set-statsd-host',
help='Set statsd host')
statsd_parser.add_argument('statsd_host', metavar='statsd_host', default='localhost',
help='Hostname of the statsd server')
# metrics
metrics_parser = subparsers.add_parser('gather-metrics',
help='Gather metrics to a csv file')
@ -149,4 +132,3 @@ def main():
if __name__ == '__main__':
main()

View File

@ -28,14 +28,6 @@ def put_benchmark_utils():
put('benchmark_utils.py')
@task
@parallel
def set_statsd_host(statsd_host='localhost'):
run('python3 benchmark_utils.py set-statsd-host {}'.format(statsd_host))
print('update configuration')
run('bigchaindb show-config')
@task
@parallel
def prepare_backlog(num_transactions=10000):

View File

@ -15,7 +15,6 @@ Then:
```bash
fab put_benchmark_utils
fab set_statsd_host:<hostname of the statsd server>
fab prepare_backlog:<num txs per node> # wait for process to finish
fab start_bigchaindb
```
```

View File

@ -26,10 +26,6 @@ Entry point for the BigchainDB process, after initialization. All subprocesses
Methods for managing the configuration, including loading configuration files, automatically generating the configuration, and keeping the configuration consistent across BigchainDB instances.
### [`monitor.py`](./monitor.py)
Code for monitoring speed of various processes in BigchainDB via `statsd` and Grafana. [See documentation.](https://docs.bigchaindb.com/projects/server/en/latest/clusters-feds/monitoring.html)
## Folders
### [`pipelines`](./pipelines)

View File

@ -41,11 +41,6 @@ config = {
'private': None,
},
'keyring': [],
'statsd': {
'host': 'localhost',
'port': 8125,
'rate': 0.01,
},
'backlog_reassign_delay': 120
}

View File

@ -105,12 +105,6 @@ def run_configure(args, skip_if_exists=False):
input_on_stderr('Database {}? (default `{}`): '.format(key, val)) \
or val
for key in ('host', 'port', 'rate'):
val = conf['statsd'][key]
conf['statsd'][key] = \
input_on_stderr('Statsd {}? (default `{}`): '.format(key, val)) \
or val
val = conf['backlog_reassign_delay']
conf['backlog_reassign_delay'] = \
input_on_stderr(('Stale transaction reassignment delay (in '

View File

@ -1,32 +0,0 @@
from platform import node
import statsd
import bigchaindb
from bigchaindb import config_utils
class Monitor(statsd.StatsClient):
"""Set up statsd monitoring."""
def __init__(self, *args, **kwargs):
"""Overrides statsd client, fixing prefix to messages and loading configuration
Args:
*args: arguments (identical to Statsclient)
**kwargs: keyword arguments (identical to Statsclient)
"""
config_utils.autoconfigure()
if not kwargs:
kwargs = {}
# set prefix, parameters from configuration file
if 'prefix' not in kwargs:
kwargs['prefix'] = '{hostname}.'.format(hostname=node())
if 'host' not in kwargs:
kwargs['host'] = bigchaindb.config['statsd']['host']
if 'port' not in kwargs:
kwargs['port'] = bigchaindb.config['statsd']['port']
super().__init__(*args, **kwargs)

View File

@ -13,8 +13,6 @@ from bigchaindb import utils
from bigchaindb import Bigchain
from bigchaindb.web.routes import add_routes
from bigchaindb.monitor import Monitor
# TODO: Figure out if we do we need all this boilerplate.
class StandaloneApplication(gunicorn.app.base.BaseApplication):
@ -65,7 +63,6 @@ def create_app(*, debug=False, threads=4):
app.debug = debug
app.config['bigchain_pool'] = utils.pool(Bigchain, size=threads)
app.config['monitor'] = Monitor()
add_routes(app)

View File

@ -23,7 +23,6 @@ from bigchaindb.common.exceptions import (
ValidationError,
)
import bigchaindb
from bigchaindb.models import Transaction
from bigchaindb.web.views.base import make_error
from bigchaindb.web.views import parameters
@ -72,7 +71,6 @@ class TransactionListApi(Resource):
A ``dict`` containing the data about the transaction.
"""
pool = current_app.config['bigchain_pool']
monitor = current_app.config['monitor']
# `force` will try to format the body of the POST request even if the
# `content-type` header is not set to `application/json`
@ -109,8 +107,6 @@ class TransactionListApi(Resource):
'Invalid transaction ({}): {}'.format(type(e).__name__, e)
)
else:
rate = bigchaindb.config['statsd']['rate']
with monitor.timer('write_transaction', rate=rate):
bigchain.write_transaction(tx_obj)
bigchain.write_transaction(tx_obj)
return tx, 202

View File

@ -1,89 +0,0 @@
# -*- coding: utf-8 -*-
"""A Fabric fabfile with functionality to install Docker,
install Docker Compose, and run a BigchainDB monitoring server
(using the docker-compose-monitor.yml file)
"""
from __future__ import with_statement, unicode_literals
from fabric.api import sudo, env
from fabric.api import task
from fabric.operations import put, run
from ssh_key import ssh_key_path
# Ignore known_hosts
# http://docs.fabfile.org/en/1.10/usage/env.html#disable-known-hosts
env.disable_known_hosts = True
env.user = 'ubuntu'
env.key_filename = ssh_key_path
@task
def install_docker_engine():
"""Install Docker on an EC2 Ubuntu 14.04 instance
Example:
fab --fabfile=fabfile-monitor.py \
--hosts=ec2-52-58-106-17.eu-central-1.compute.amazonaws.com \
install_docker_engine
"""
# install prerequisites
sudo('apt-get update')
sudo('apt-get -y install apt-transport-https ca-certificates linux-image-extra-$(uname -r) apparmor')
# install docker repositories
sudo('apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 \
--recv-keys 58118E89F3A912897C070ADBF76221572C52609D')
sudo("echo 'deb https://apt.dockerproject.org/repo ubuntu-trusty main' | \
sudo tee /etc/apt/sources.list.d/docker.list")
# install docker engine
sudo('apt-get update')
sudo('apt-get -y install docker-engine')
# add ubuntu user to the docker group
sudo('usermod -aG docker ubuntu')
@task
def install_docker_compose():
"""Install Docker Compose on an EC2 Ubuntu 14.04 instance
Example:
fab --fabfile=fabfile-monitor.py \
--hosts=ec2-52-58-106-17.eu-central-1.compute.amazonaws.com \
install_docker_compose
"""
sudo('curl -L https://github.com/docker/compose/releases/download/1.7.0/docker-compose-`uname \
-s`-`uname -m` > /usr/local/bin/docker-compose')
sudo('chmod +x /usr/local/bin/docker-compose')
@task
def install_docker():
"""Install Docker and Docker Compose on an EC2 Ubuntu 14.04 instance
Example:
fab --fabfile=fabfile-monitor.py \
--hosts=ec2-52-58-106-17.eu-central-1.compute.amazonaws.com \
install_docker
"""
install_docker_engine()
install_docker_compose()
@task
def run_monitor():
"""Run BigchainDB monitor on an EC2 Ubuntu 14.04 instance
Example:
fab --fabfile=fabfile-monitor.py \
--hosts=ec2-52-58-106-17.eu-central-1.compute.amazonaws.com \
run_monitor
"""
# copy docker-compose-monitor to the ec2 instance
put('../docker-compose-monitor.yml')
run('INFLUXDB_DATA=/influxdb-data docker-compose -f docker-compose-monitor.yml up -d')

View File

@ -1,28 +0,0 @@
version: '2'
services:
influxdb:
image: tutum/influxdb
ports:
- "8083:8083"
- "8086:8086"
- "8090"
- "8099"
environment:
PRE_CREATE_DB: "telegraf"
volumes:
- $INFLUXDB_DATA:/data
grafana:
image: bigchaindb/grafana-bigchaindb-docker
tty: true
ports:
- "3000:3000"
environment:
INFLUXDB_HOST: "influxdb"
statsd:
image: bigchaindb/docker-telegraf-statsd
ports:
- "8125:8125/udp"
environment:
INFLUXDB_HOST: "influxdb"

View File

@ -44,11 +44,6 @@ Port 161 is the default SNMP port (usually UDP, sometimes TCP). SNMP is used, fo
Port 443 is the default HTTPS port (TCP). You may need to open it up for outbound requests (and inbound responses) temporarily because some RethinkDB installation instructions use wget over HTTPS to get the RethinkDB GPG key. Package managers might also get some packages using HTTPS.
## Port 8125
If you set up a [cluster-monitoring server](../clusters-feds/monitoring.html), then StatsD will send UDP packets to Telegraf (on the monitoring server) via port 8125.
## Port 8080
Port 8080 is the default port used by RethinkDB for its adminstrative web (HTTP) interface (TCP). While you _can_, you shouldn't allow traffic arbitrary external sources. You can still use the RethinkDB web interface by binding it to localhost and then accessing it via a SOCKS proxy or reverse proxy; see "Binding the web interface port" on [the RethinkDB page about securing your cluster](https://rethinkdb.com/docs/security/).
@ -76,8 +71,3 @@ Port 29015 is the default port for RethinkDB intracluster connections (TCP). It
## Other Ports
On Linux, you can use commands such as `netstat -tunlp` or `lsof -i` to get a sense of currently open/listening ports and connections, and the associated processes.
## Cluster-Monitoring Server
If you set up a [cluster-monitoring server](../clusters-feds/monitoring.html) (running Telegraf, InfluxDB & Grafana), Telegraf will listen on port 8125 for UDP packets from StatsD, and the Grafana web dashboard will use port 3000. (Those are the default ports.)

View File

@ -64,50 +64,6 @@ For a super lax, somewhat risky, anything-can-enter security group, add these ru
If you want to set up a more secure security group, see the [Notes for Firewall Setup](../appendices/firewall-notes.html).
## Deploy a BigchainDB Monitor
This step is optional.
One way to monitor a BigchainDB cluster is to use the monitoring setup described in the [Monitoring](monitoring.html) section of this documentation. If you want to do that, then you may want to deploy the monitoring server first, so you can tell your BigchainDB nodes where to send their monitoring data.
You can deploy a monitoring server on AWS. To do that, go to the AWS EC2 Console and launch an instance:
1. Choose an AMI: select Ubuntu Server 16.04 LTS.
2. Choose an Instance Type: a t2.micro will suffice.
3. Configure Instance Details: you can accept the defaults, but feel free to change them.
4. Add Storage: A "Root" volume type should already be included. You _could_ store monitoring data there (e.g. in a folder named `/influxdb-data`) but we will attach another volume and store the monitoring data there instead. Select "Add New Volume" and an EBS volume type.
5. Tag Instance: give your instance a memorable name.
6. Configure Security Group: choose your bigchaindb security group.
7. Review and launch your instance.
When it asks, choose an existing key pair: the one you created earlier (named `bigchaindb`).
Give your instance some time to launch and become able to accept SSH connections. You can see its current status in the AWS EC2 Console (in the "Instances" section). SSH into your instance using something like:
```text
cd deploy-cluster-aws
ssh -i pem/bigchaindb.pem ubuntu@ec2-52-58-157-229.eu-central-1.compute.amazonaws.com
```
where `ec2-52-58-157-229.eu-central-1.compute.amazonaws.com` should be replaced by your new instance's EC2 hostname. (To get that, go to the AWS EC2 Console, select Instances, click on your newly-launched instance, and copy its "Public DNS" name.)
Next, create a file system on the attached volume, make a directory named `/influxdb-data`, and set the attached volume's mount point to be `/influxdb-data`. For detailed instructions on how to do that, see the AWS documentation for [Making an Amazon EBS Volume Available for Use](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-using-volumes.html).
Then install Docker and Docker Compose:
```text
# in a Python 2.5-2.7 virtual environment where fabric, boto3, etc. are installed
fab --fabfile=fabfile-monitor.py --hosts=<EC2 hostname> install_docker
```
After Docker is installed, we can run the monitor with:
```text
fab --fabfile=fabfile-monitor.py --hosts=<EC2 hostname> run_monitor
```
For more information about monitoring (e.g. how to view the Grafana dashboard in your web browser), see the [Monitoring](monitoring.html) section of this documentation.
To configure a BigchainDB node to send monitoring data to the monitoring server, change the statsd host in the configuration of the BigchainDB node. The section on [Configuring a BigchainDB Node](../server-reference/configuration.html) explains how you can do that. (For example, you can change the statsd host in `$HOME/.bigchaindb`.)
## Deploy a BigchainDB Cluster
### Step 1

View File

@ -7,5 +7,4 @@ Clusters & Federations
set-up-a-federation
backup
aws-testing-cluster
monitoring

View File

@ -1,40 +0,0 @@
# Cluster Monitoring
BigchainDB uses [StatsD](https://github.com/etsy/statsd) for cluster monitoring. We require some additional infrastructure to take full advantage of its functionality:
* an agent to listen for metrics: [Telegraf](https://github.com/influxdata/telegraf),
* a time-series database: [InfluxDB](https://www.influxdata.com/time-series-platform/influxdb/), and
* a frontend to display analytics: [Grafana](http://grafana.org/).
We put each of those inside its own Docker container. The whole system is illustrated below.
![BigchainDB monitoring system diagram: Application metrics flow from servers running BigchainDB to Telegraf to InfluxDB to Grafana](../_static/monitoring_system_diagram.png)
For ease of use, we've created a Docker [_Compose file_](https://docs.docker.com/compose/compose-file/) (named `docker-compose-monitor.yml`) to define the monitoring system setup. To use it, just go to to the top `bigchaindb` directory and run:
```text
$ docker-compose -f docker-compose-monitor.yml build
$ docker-compose -f docker-compose-monitor.yml up
```
It is also possible to mount a host directory as a data volume for InfluxDB
by setting the `INFLUXDB_DATA` environment variable:
```text
$ INFLUXDB_DATA=/data docker-compose -f docker-compose-monitor.yml up
```
You can view the Grafana dashboard in your web browser at:
[http://localhost:3000/dashboard/script/bigchaindb_dashboard.js](http://localhost:3000/dashboard/script/bigchaindb_dashboard.js)
(You may want to replace `localhost` with another hostname in that URL, e.g. the hostname of a remote monitoring server.)
The login and password are `admin` by default. If BigchainDB is running and processing transactions, you should see analytics—if not, [start BigchainDB](../dev-and-test/setup-run-node.html#run-bigchaindb) and load some test transactions:
```text
$ bigchaindb load
```
then refresh the page after a few seconds.
If you're not interested in monitoring, don't worry: BigchainDB will function just fine without any monitoring setup.
Feel free to modify the [custom Grafana dashboard](https://github.com/rhsimplex/grafana-bigchaindb-docker/blob/master/bigchaindb_dashboard.js) to your liking!

View File

@ -19,9 +19,6 @@ For convenience, here's a list of all the relevant environment variables (docume
`BIGCHAINDB_SERVER_BIND`<br>
`BIGCHAINDB_SERVER_WORKERS`<br>
`BIGCHAINDB_SERVER_THREADS`<br>
`BIGCHAINDB_STATSD_HOST`<br>
`BIGCHAINDB_STATSD_PORT`<br>
`BIGCHAINDB_STATSD_RATE`<br>
`BIGCHAINDB_CONFIG_PATH`<br>
`BIGCHAINDB_BACKLOG_REASSIGN_DELAY`<br>
@ -151,23 +148,6 @@ export BIGCHAINDB_SERVER_THREADS=5
}
```
## statsd.host, statsd.port & statsd.rate
These settings are used to configure where, and how often, [StatsD](https://github.com/etsy/statsd) should send data for [cluster monitoring](../clusters-feds/monitoring.html) purposes. `statsd.host` is the hostname of the monitoring server, where StatsD should send its data. `stats.port` is the port. `statsd.rate` is the fraction of transaction operations that should be sampled. It's a float between 0.0 and 1.0.
**Example using environment variables**
```text
export BIGCHAINDB_STATSD_HOST="http://monitor.monitors-r-us.io"
export BIGCHAINDB_STATSD_PORT=8125
export BIGCHAINDB_STATSD_RATE=0.01
```
**Example config file snippet: the default**
```js
"statsd": {"host": "localhost", "port": 8125, "rate": 0.01}
```
## backlog_reassign_delay
Specifies how long, in seconds, transactions can remain in the backlog before being reassigned. Long-waiting transactions must be reassigned because the assigned node may no longer be responsive. The default duration is 120 seconds.

View File

@ -62,14 +62,6 @@ resource "aws_security_group" "node_sg1" {
cidr_blocks = ["0.0.0.0/0"]
}
# StatsD
ingress {
from_port = 8125
to_port = 8125
protocol = "udp"
cidr_blocks = ["0.0.0.0/0"]
}
# Future: Don't allow port 8080 for the RethinkDB web interface.
# Use a SOCKS proxy or reverse proxy instead.

View File

@ -65,7 +65,6 @@ install_requires = [
'pymongo~=3.4',
'pysha3==1.0.0',
'cryptoconditions>=0.5.0',
'statsd>=3.2.1',
'python-rapidjson>=0.0.8',
'logstats>=0.2.1',
'flask>=0.10.1',

View File

@ -35,7 +35,6 @@ def mock_bigchaindb_backup_config(monkeypatch):
config = {
'keypair': {},
'database': {'host': 'host', 'port': 12345, 'name': 'adbname'},
'statsd': {'host': 'host', 'port': 12345, 'rate': 0.1},
'backlog_reassign_delay': 5
}
monkeypatch.setattr('bigchaindb._config', config)

View File

@ -167,11 +167,6 @@ def test_autoconfigure_read_both_from_file_and_env(monkeypatch, request):
'private': None,
},
'keyring': KEYRING.split(':'),
'statsd': {
'host': 'localhost',
'port': 8125,
'rate': 0.01,
},
'backlog_reassign_delay': 5
}

View File

@ -1,14 +0,0 @@
from platform import node
def test_monitor_class_init_defaults():
import bigchaindb
from bigchaindb.monitor import Monitor
monitor = Monitor()
assert monitor
assert len(monitor._addr) == 2
# TODO get value from config
# assert monitor._addr[0] == bigchaindb.config['statsd']['host']
assert monitor._addr[0] == '127.0.0.1'
assert monitor._addr[1] == bigchaindb.config['statsd']['port']
assert monitor._prefix == node() + '.'