Skip to content

merge doesn't work on csv uri #1480

@pratapvardhan

Description

@pratapvardhan

While, trying a reduction operation with merge bz.merge(**{'a': data['petal_width'].sum(), 'b': data['petal_length'].sum()}).

Works with sql uri, fails for csv

In [498]: datasql = bz.Data('sqlite:///iris.db::iris')

In [499]: datacsv = bz.Data('iris.csv')

In [500]: bz.merge(**{'a': datasql['petal_width'].sum(), 'b': datasql['petal_length'].sum()})
Out[500]:
       a      b
0  179.9  563.7

In [501]: bz.merge(**{'a': datacsv['petal_width'].sum(), 'b': datacsv['petal_length'].sum()})
Out[501]: ---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
d:\apps\anaconda2\lib\site-packages\IPython\core\formatters.pyc in __call__(self, obj)
    697                 type_pprinters=self.type_printers,
    698                 deferred_pprinters=self.deferred_printers)
--> 699             printer.pretty(obj)
    700             printer.flush()
    701             return stream.getvalue()

d:\apps\anaconda2\lib\site-packages\IPython\lib\pretty.pyc in pretty(self, obj)
    381                             if callable(meth):
    382                                 return meth(obj, self, cycle)
--> 383             return _default_pprint(obj, self, cycle)
    384         finally:
    385             self.end_group()

d:\apps\anaconda2\lib\site-packages\IPython\lib\pretty.pyc in _default_pprint(obj, p, cycle)
    501     if _safe_getattr(klass, '__repr__', None) not in _baseclass_reprs:
    502         # A user-provided repr. Find newlines and replace them with p.break_()
--> 503         _repr_pprint(obj, p, cycle)
    504         return
    505     p.begin_group(1, '<')

d:\apps\anaconda2\lib\site-packages\IPython\lib\pretty.pyc in _repr_pprint(obj, p, cycle)
    692     """A pprint that just redirects to the normal repr function."""
    693     # Find newlines and replace them with p.break_()
--> 694     output = repr(obj)
    695     for idx,output_line in enumerate(output.splitlines()):
    696         if idx:

d:\apps\anaconda2\lib\site-packages\blaze\interactive.pyc in expr_repr(expr, n)
    266                              isscalar(expr.dshape.measure) or
    267                              isinstance(expr.dshape.measure, Map))):
--> 268         return repr_tables(expr, 10)
    269
    270     # Smallish arrays

d:\apps\anaconda2\lib\site-packages\blaze\interactive.pyc in repr_tables(expr, n)
    199
    200 def repr_tables(expr, n=10):
--> 201     result = concrete_head(expr, n).rename(columns={None: ''})
    202
    203     if isinstance(result, (DataFrame, Series)):

d:\apps\anaconda2\lib\site-packages\blaze\interactive.pyc in concrete_head(expr, n)
    181         return odo(head, object)
    182     elif isrecord(expr.dshape.measure):
--> 183         return odo(head, DataFrame)
    184     else:
    185         df = odo(head, DataFrame)

d:\apps\anaconda2\lib\site-packages\odo\odo.pyc in odo(source, target, **kwargs)
     89     odo.append.append      - Add things onto existing things
     90     """
---> 91     return into(target, source, **kwargs)

d:\apps\anaconda2\lib\site-packages\multipledispatch\dispatcher.pyc in __call__(self, *args, **kwargs)
    162             self._cache[types] = func
    163         try:
--> 164             return func(*args, **kwargs)
    165
    166         except MDNotImplementedError:

d:\apps\anaconda2\lib\site-packages\blaze\interactive.pyc in into(a, b, **kwargs)
    310 @dispatch((object, type, str, unicode), Expr)
    311 def into(a, b, **kwargs):
--> 312     result = compute(b, **kwargs)
    313     kwargs['dshape'] = b.dshape
    314     return into(a, result, **kwargs)

d:\apps\anaconda2\lib\site-packages\multipledispatch\dispatcher.pyc in __call__(self, *args, **kwargs)
    162             self._cache[types] = func
    163         try:
--> 164             return func(*args, **kwargs)
    165
    166         except MDNotImplementedError:

d:\apps\anaconda2\lib\site-packages\blaze\interactive.pyc in compute(expr, **kwargs)
    166         raise ValueError("No data resources found")
    167     else:
--> 168         return compute(expr, resources, **kwargs)
    169
    170

d:\apps\anaconda2\lib\site-packages\multipledispatch\dispatcher.pyc in __call__(self, *args, **kwargs)
    162             self._cache[types] = func
    163         try:
--> 164             return func(*args, **kwargs)
    165
    166         except MDNotImplementedError:

d:\apps\anaconda2\lib\site-packages\blaze\compute\core.pyc in compute(expr, d, **kwargs)
    404         d4 = d3
    405
--> 406     result = top_then_bottom_then_top_again_etc(expr3, d4, **kwargs)
    407     if post_compute_:
    408         result = post_compute_(expr3, result, scope=d4)

d:\apps\anaconda2\lib\site-packages\blaze\compute\core.pyc in top_then_bottom_then_top_again_etc(expr, scope, **kwargs)
    166
    167     # 2. Compute from the bottom until there is a data type change
--> 168     expr2, scope2 = bottom_up_until_type_break(expr, scope, **kwargs)
    169
    170     # 3. Re-optimize data and expressions

d:\apps\anaconda2\lib\site-packages\blaze\compute\core.pyc in bottom_up_until_type_break(expr, scope, **kwargs)
    308     #    (this is the bottom part of bottom up)
    309     exprs, new_scopes = zip(*[bottom_up_until_type_break(i, scope, **kwargs)
--> 310                               for i in inputs])
    311
    312     # 2. Form new (much shallower) expression and new (more computed) scope

d:\apps\anaconda2\lib\site-packages\blaze\compute\core.pyc in bottom_up_until_type_break(expr, scope, **kwargs)
    332     try:
    333         return leaf, {leaf: compute_up(new_expr, *_data, scope=new_scope,
--> 334                                        **kwargs)}
    335     except NotImplementedError:
    336         return new_expr, new_scope

d:\apps\anaconda2\lib\site-packages\multipledispatch\dispatcher.pyc in __call__(self, *args, **kwargs)
    162             self._cache[types] = func
    163         try:
--> 164             return func(*args, **kwargs)
    165
    166         except MDNotImplementedError:

d:\apps\anaconda2\lib\site-packages\blaze\compute\pandas.pyc in compute_up(t, df, scope, **kwargs)
    576     scope = merge_dicts(scope or {}, {subexpression: df})
    577     children = [compute(_child, scope) for _child in t.children]
--> 578     return pd.concat(children, axis=1)
    579
    580

d:\apps\anaconda2\lib\site-packages\pandas\tools\merge.pyc in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
    832                        keys=keys, levels=levels, names=names,
    833                        verify_integrity=verify_integrity,
--> 834                        copy=copy)
    835     return op.get_result()
    836

d:\apps\anaconda2\lib\site-packages\pandas\tools\merge.pyc in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy)
    888         for obj in objs:
    889             if not isinstance(obj, NDFrame):
--> 890                 raise TypeError("cannot concatenate a non-NDFrame object")
    891
    892             # consolidate

TypeError: cannot concatenate a non-NDFrame object

This was tested on blaze 0.9.1 and pandas 0.18.0

In [502]: bz.__version__
Out[502]: u'0.9.1'

In [503]: bz.pd.__version__
Out[503]: u'0.18.0'

Issue persists on master 0.10.0rc.xx.x too.

In [8]: bz.__version__
Out[8]: '0.10.0rc4+0.g6961844.dirty'

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions