Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions docs/import_export_datasources.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
Importing and Exporting Datasources
===================================

The superset cli allows you to import and export datasources from and to YAML.
Datasources include both databases and druid clusters. The data is expected to be organized in the following hierarchy: ::

.
├──databases
| ├──database_1
| | ├──table_1
| | | ├──columns
| | | | ├──column_1
| | | | ├──column_2
| | | | └──... (more columns)
| | | └──metrics
| | | ├──metric_1
| | | ├──metric_2
| | | └──... (more metrics)
| | └── ... (more tables)
| └── ... (more databases)
└──druid_clusters
├──cluster_1
| ├──datasource_1
| | ├──columns
| | | ├──column_1
| | | ├──column_2
| | | └──... (more columns)
| | └──metrics
| | ├──metric_1
| | ├──metric_2
| | └──... (more metrics)
| └── ... (more datasources)
└── ... (more clusters)


Exporting Datasources to YAML
-----------------------------
You can print your current datasources to stdout by running: ::

superset export_datasources


To save your datasources to a file run: ::

superset export_datasources -f <filename>


By default, default (null) values will be omitted. Use the ``-d`` flag to include them.
If you want back references to be included (e.g. a column to include the table id
it belongs to) use the ``-b`` flag.

Alternatively you can export datasources using the UI: ::

1. Open **Sources** -> **Databases** to export all tables associated to a single or multiple databases. (**Tables** for one or more tables, **Druid Clusters** for clusters, **Druid Datasources** for datasources)
2. Select the items you would like to export
3. Click **Actions** -> **Export to YAML**
4. If you want to import an item that you exported through the UI, you will need to nest it inside its parent element, e.g. a `database` needs to be nested under `databases` a `table` needs to be nested inside a `database` element.

Exporting the complete supported YAML schema
--------------------------------------------
In order to obtain an exhaustive list of all fields you can import using the YAML import run: ::

superset export_datasource_schema

Again, you can use the ``-b`` flag to include back references.


Importing Datasources from YAML
-------------------------------
In order to import datasources from a YAML file(s), run: ::

superset import_datasources -p <path or filename>

If you supply a path all files ending with ``*.yaml`` or ``*.yml`` will be parsed.
You can apply additional falgs e.g.: ::

superset import_datasources -p <path> -r

Will search the supplied path recursively.

The sync flag ``-s`` takes parameters in order to sync the supplied elements with
your file. Be careful this can delete the contents of your meta database. Example:

superset import_datasources -p <path / filename> -s columns,metrics

This will sync all ``metrics`` and ``columns`` for all datasources found in the
``<path / filename>`` in the Superset meta database. This means columns and metrics
not specified in YAML will be deleted. If you would add ``tables`` to ``columns,metrics``
those would be synchronised as well.


If you don't supply the sync flag (``-s``) importing will only add and update (override) fields.
E.g. you can add a ``verbose_name`` to the the column ``ds`` in the table ``random_time_series`` from the example datasets
by saving the following YAML to file and then running the ``import_datasources`` command. ::

databases:
- database_name: main
tables:
- table_name: random_time_series
columns:
- column_name: ds
verbose_name: datetime

2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,11 @@ def get_git_sha():
'markdown==2.6.8',
'pandas==0.20.3',
'parsedatetime==2.0.0',
'pathlib2==2.3.0',
'pydruid==0.3.1',
'PyHive>=0.4.0',
'python-dateutil==2.6.0',
'pyyaml>=3.11',
'requests==2.17.3',
'simplejson==3.10.0',
'six==1.10.0',
Expand Down
81 changes: 80 additions & 1 deletion superset/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@
from datetime import datetime
import logging
from subprocess import Popen
from sys import stdout

from colorama import Fore, Style
from flask_migrate import MigrateCommand
from flask_script import Manager
from pathlib2 import Path
import yaml

from superset import app, db, security, utils
from superset import app, db, dict_import_export_util, security, utils

config = app.config
celery_app = utils.get_celery_app(config)
Expand Down Expand Up @@ -178,6 +181,82 @@ def refresh_druid(datasource, merge):
session.commit()


@manager.option(
'-p', '--path', dest='path',
help='Path to a single YAML file or path containing multiple YAML '
'files to import (*.yaml or *.yml)')
@manager.option(
'-s', '--sync', dest='sync', default='',
help='comma seperated list of element types to synchronize '
'e.g. "metrics,columns" deletes metrics and columns in the DB '
'that are not specified in the YAML file')
@manager.option(
'-r', '--recursive', dest='recursive', action='store_true',
help='recursively search the path for yaml files')
def import_datasources(path, sync, recursive=False):
"""Import datasources from YAML"""
sync_array = sync.split(',')
p = Path(path)
files = []
if p.is_file():
files.append(p)
elif p.exists() and not recursive:
files.extend(p.glob('*.yaml'))
files.extend(p.glob('*.yml'))
elif p.exists() and recursive:
files.extend(p.rglob('*.yaml'))
files.extend(p.rglob('*.yml'))
for f in files:
logging.info('Importing datasources from file %s', f)
try:
with f.open() as data_stream:
dict_import_export_util.import_from_dict(
db.session,
yaml.load(data_stream),
sync=sync_array)
except Exception as e:
logging.error('Error when importing datasources from file %s', f)
logging.error(e)


@manager.option(
'-f', '--datasource-file', default=None, dest='datasource_file',
help='Specify the the file to export to')
@manager.option(
'-p', '--print', action='store_true', dest='print_stdout',
help='Print YAML to stdout')
@manager.option(
'-b', '--back-references', action='store_true', dest='back_references',
help='Include parent back references')
@manager.option(
'-d', '--include-defaults', action='store_true', dest='include_defaults',
help='Include fields containing defaults')
def export_datasources(print_stdout, datasource_file,
back_references, include_defaults):
"""Export datasources to YAML"""
data = dict_import_export_util.export_to_dict(
session=db.session,
recursive=True,
back_references=back_references,
include_defaults=include_defaults)
if print_stdout or not datasource_file:
yaml.safe_dump(data, stdout, default_flow_style=False)
if datasource_file:
logging.info('Exporting datasources to %s', datasource_file)
with open(datasource_file, 'w') as data_stream:
yaml.safe_dump(data, data_stream, default_flow_style=False)


@manager.option(
'-b', '--back-references', action='store_false',
help='Include parent back references')
def export_datasource_schema(back_references):
"""Export datasource YAML schema to stdout"""
data = dict_import_export_util.export_schema_to_dict(
back_references=back_references)
yaml.safe_dump(data, stdout, default_flow_style=False)


@manager.command
def update_datasources_cache():
"""Refresh sqllab datasources cache"""
Expand Down
30 changes: 24 additions & 6 deletions superset/connectors/druid/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@

from superset import conf, db, import_util, sm, utils
from superset.connectors.base.models import BaseColumn, BaseDatasource, BaseMetric
from superset.models.helpers import AuditMixinNullable, QueryResult, set_perm
from superset.models.helpers import (
AuditMixinNullable, ImportMixin, QueryResult, set_perm,
)
from superset.utils import (
DimSelector, DTTM_ALIAS, flasher, MetricPermException,
)
Expand Down Expand Up @@ -60,7 +62,7 @@ def __init__(self, name, post_aggregator):
self.post_aggregator = post_aggregator


class DruidCluster(Model, AuditMixinNullable):
class DruidCluster(Model, AuditMixinNullable, ImportMixin):

"""ORM object referencing the Druid clusters"""

Expand All @@ -81,6 +83,11 @@ class DruidCluster(Model, AuditMixinNullable):
metadata_last_refreshed = Column(DateTime)
cache_timeout = Column(Integer)

export_fields = ('cluster_name', 'coordinator_host', 'coordinator_port',
'coordinator_endpoint', 'broker_host', 'broker_port',
'broker_endpoint', 'cache_timeout')
export_children = ['datasources']

def __repr__(self):
return self.verbose_name if self.verbose_name else self.cluster_name

Expand Down Expand Up @@ -219,6 +226,7 @@ class DruidColumn(Model, BaseColumn):
"""ORM model for storing Druid datasource column metadata"""

__tablename__ = 'columns'
__table_args__ = (UniqueConstraint('column_name', 'datasource_id'),)

datasource_id = Column(
Integer,
Expand All @@ -233,8 +241,9 @@ class DruidColumn(Model, BaseColumn):
export_fields = (
'datasource_id', 'column_name', 'is_active', 'type', 'groupby',
'count_distinct', 'sum', 'avg', 'max', 'min', 'filterable',
'description', 'dimension_spec_json',
'description', 'dimension_spec_json', 'verbose_name',
)
export_parent = 'datasource'

def __repr__(self):
return self.column_name
Expand Down Expand Up @@ -360,6 +369,7 @@ class DruidMetric(Model, BaseMetric):
"""ORM object referencing Druid metrics for a datasource"""

__tablename__ = 'metrics'
__table_args__ = (UniqueConstraint('metric_name', 'datasource_id'),)
datasource_id = Column(
Integer,
ForeignKey('datasources.id'))
Expand All @@ -374,6 +384,7 @@ class DruidMetric(Model, BaseMetric):
'metric_name', 'verbose_name', 'metric_type', 'datasource_id',
'json', 'description', 'is_restricted', 'd3format',
)
export_parent = 'datasource'

@property
def expression(self):
Expand Down Expand Up @@ -409,6 +420,7 @@ class DruidDatasource(Model, BaseDatasource):
"""ORM object referencing Druid datasources (tables)"""

__tablename__ = 'datasources'
__table_args__ = (UniqueConstraint('datasource_name', 'cluster_name'),)

type = 'druid'
query_langtage = 'json'
Expand Down Expand Up @@ -438,6 +450,9 @@ class DruidDatasource(Model, BaseDatasource):
'cluster_name', 'offset', 'cache_timeout', 'params',
)

export_parent = 'cluster'
export_children = ['columns', 'metrics']

@property
def database(self):
return self.cluster
Expand Down Expand Up @@ -556,9 +571,12 @@ def int_or_0(v):
v2nums = [int_or_0(n) for n in v2.split('.')]
v1nums = (v1nums + [0, 0, 0])[:3]
v2nums = (v2nums + [0, 0, 0])[:3]
return v1nums[0] > v2nums[0] or \
(v1nums[0] == v2nums[0] and v1nums[1] > v2nums[1]) or \
(v1nums[0] == v2nums[0] and v1nums[1] == v2nums[1] and v1nums[2] > v2nums[2])
return (
v1nums[0] > v2nums[0] or
(v1nums[0] == v2nums[0] and v1nums[1] > v2nums[1]) or
(v1nums[0] == v2nums[0] and v1nums[1] == v2nums[1] and
v1nums[2] > v2nums[2])
)

def latest_metadata(self):
"""Returns segment metadata from the latest segment"""
Expand Down
6 changes: 3 additions & 3 deletions superset/connectors/druid/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from superset.views.base import (
BaseSupersetView, DatasourceFilter, DeleteMixin,
get_datasource_exist_error_mgs, ListWidgetWithCheckboxes, SupersetModelView,
validate_json,
validate_json, YamlExportMixin,
)
from . import models

Expand Down Expand Up @@ -122,7 +122,7 @@ def post_update(self, metric):
appbuilder.add_view_no_menu(DruidMetricInlineView)


class DruidClusterModelView(SupersetModelView, DeleteMixin): # noqa
class DruidClusterModelView(SupersetModelView, DeleteMixin, YamlExportMixin): # noqa
datamodel = SQLAInterface(models.DruidCluster)

list_title = _('List Druid Cluster')
Expand Down Expand Up @@ -168,7 +168,7 @@ def _delete(self, pk):
category_icon='fa-database',)


class DruidDatasourceModelView(DatasourceModelView, DeleteMixin): # noqa
class DruidDatasourceModelView(DatasourceModelView, DeleteMixin, YamlExportMixin): # noqa
datamodel = SQLAInterface(models.DruidDatasource)

list_title = _('List Druid Datasource')
Expand Down
Loading