fix pydatavec for python 3... and python2 install problems (#8422)
parent
29990b1214
commit
d9e9733387
|
@ -62,3 +62,7 @@ doc_sources/
|
||||||
doc_sources_*
|
doc_sources_*
|
||||||
|
|
||||||
*.pyc
|
*.pyc
|
||||||
|
|
||||||
|
# Python virtual environments
|
||||||
|
venv/
|
||||||
|
venv2/
|
|
@ -26,7 +26,7 @@ pydl4j.validate_datavec_jars()
|
||||||
# -------------JVM starts here-------------
|
# -------------JVM starts here-------------
|
||||||
from jnius import autoclass
|
from jnius import autoclass
|
||||||
|
|
||||||
|
JString = autoclass("java.lang.String")
|
||||||
JSchema = autoclass('org.datavec.api.transform.schema.Schema')
|
JSchema = autoclass('org.datavec.api.transform.schema.Schema')
|
||||||
SchemaBuilder = autoclass('org/datavec/api/transform/schema/Schema$Builder')
|
SchemaBuilder = autoclass('org/datavec/api/transform/schema/Schema$Builder')
|
||||||
|
|
||||||
|
@ -43,7 +43,6 @@ CategoricalColumnCondition = autoclass(
|
||||||
'org.datavec.api.transform.condition.column.CategoricalColumnCondition')
|
'org.datavec.api.transform.condition.column.CategoricalColumnCondition')
|
||||||
DoubleColumnCondition = autoclass(
|
DoubleColumnCondition = autoclass(
|
||||||
'org.datavec.api.transform.condition.column.DoubleColumnCondition')
|
'org.datavec.api.transform.condition.column.DoubleColumnCondition')
|
||||||
#FloatColumnCondition = autoclass('org.datavec.api.transform.condition.column.FloatColumnCondition')
|
|
||||||
StringColumnCondition = autoclass(
|
StringColumnCondition = autoclass(
|
||||||
'org.datavec.api.transform.condition.column.StringColumnCondition')
|
'org.datavec.api.transform.condition.column.StringColumnCondition')
|
||||||
|
|
||||||
|
|
|
@ -71,8 +71,7 @@ class Schema(object):
|
||||||
return schema
|
return schema
|
||||||
|
|
||||||
def to_java(self):
|
def to_java(self):
|
||||||
from .java_classes import SchemaBuilder
|
from .java_classes import SchemaBuilder, JString, JFloat, JDouble
|
||||||
from .java_classes import JFloat, JDouble
|
|
||||||
builder = SchemaBuilder()
|
builder = SchemaBuilder()
|
||||||
for c in self.columns:
|
for c in self.columns:
|
||||||
meta = self.columns[c]
|
meta = self.columns[c]
|
||||||
|
@ -80,19 +79,20 @@ class Schema(object):
|
||||||
col_name = c
|
col_name = c
|
||||||
col_args = meta[1:]
|
col_args = meta[1:]
|
||||||
if col_type == "string":
|
if col_type == "string":
|
||||||
builder.addColumnString(col_name)
|
builder.addColumnString(JString(col_name))
|
||||||
elif col_type == "categorical":
|
elif col_type == "categorical":
|
||||||
builder.addColumnCategorical(col_name, *col_args)
|
col_args = [JString(arg) for arg in col_args]
|
||||||
|
builder.addColumnCategorical(JString(col_name), *col_args)
|
||||||
else:
|
else:
|
||||||
# numerics
|
# numerical data
|
||||||
num_type = col_type[0].upper() + col_type[1:]
|
num_type = col_type[0].upper() + col_type[1:]
|
||||||
f = getattr(builder, 'addColumn' + num_type)
|
f = getattr(builder, 'addColumn' + num_type)
|
||||||
col_args = list(col_args)
|
col_args = list(col_args)
|
||||||
if num_type in ('Float', 'Double'):
|
if num_type in ('Float', 'Double'):
|
||||||
jtype = eval('J' + num_type)
|
java_type = eval('J' + num_type)
|
||||||
for i, a in enumerate(col_args):
|
for i, a in enumerate(col_args):
|
||||||
if type(a) in [int, float]:
|
if type(a) in [int, float]:
|
||||||
col_args[i] = jtype(a)
|
col_args[i] = java_type(a)
|
||||||
f(col_name, *col_args)
|
f(col_name, *col_args)
|
||||||
return builder.build()
|
return builder.build()
|
||||||
|
|
||||||
|
|
|
@ -20,10 +20,11 @@ from .conditions import *
|
||||||
from .schema import Schema
|
from .schema import Schema
|
||||||
import warnings
|
import warnings
|
||||||
import logging
|
import logging
|
||||||
|
from .java_classes import JString
|
||||||
|
|
||||||
|
|
||||||
def _dq(x):
|
def _dq(x):
|
||||||
return "\"" + x.replace("\"", "\\\"") + "\""
|
return "JString(\"" + x.replace("\"", "\\\"") + "\")"
|
||||||
|
|
||||||
|
|
||||||
def _to_camel(x, first_upper=False):
|
def _to_camel(x, first_upper=False):
|
||||||
|
@ -151,14 +152,14 @@ class TransformProcess(object):
|
||||||
else:
|
else:
|
||||||
new_d[k] = old_d[k]
|
new_d[k] = old_d[k]
|
||||||
self.final_schema.columns = new_d
|
self.final_schema.columns = new_d
|
||||||
self.add_step("renameColumn", column, new_name)
|
self.add_step("renameColumn", JString(column), JString(new_name))
|
||||||
if not self.inplace:
|
if not self.inplace:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def string_to_time(self, column, format="YYY-MM-DD HH:mm:ss.SSS", time_zone="UTC"):
|
def string_to_time(self, column, format="YYY-MM-DD HH:mm:ss.SSS", time_zone="UTC"):
|
||||||
self.final_schema.columns[column][0] = "DateTime"
|
self.final_schema.columns[column][0] = "DateTime"
|
||||||
self.add_step("exec", "stringToTimeTransform({}, {}, {})".format(
|
py_string = "stringToTimeTransform({}, {}, {})".format(_dq(column), _dq(format), "DateTimeZone." + time_zone)
|
||||||
_dq(column), _dq(format), "DateTimeZone." + time_zone))
|
self.add_step("exec", py_string)
|
||||||
if not self.inplace:
|
if not self.inplace:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@ -184,7 +185,7 @@ class TransformProcess(object):
|
||||||
if self.final_schema.columns[column][0] != 'string':
|
if self.final_schema.columns[column][0] != 'string':
|
||||||
raise Exception(
|
raise Exception(
|
||||||
'Can not apply append_string transform to column {} because it is not a string column'.format(column))
|
'Can not apply append_string transform to column {} because it is not a string column'.format(column))
|
||||||
self.add_step('appendStringColumnTransform', column, string)
|
self.add_step('appendStringColumnTransform', JString(column), JString(string))
|
||||||
if not self.inplace:
|
if not self.inplace:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@ -378,6 +379,7 @@ class TransformProcess(object):
|
||||||
tp.steps = config['steps'][:]
|
tp.steps = config['steps'][:]
|
||||||
return tp
|
return tp
|
||||||
|
|
||||||
|
# TODO from_java is used in konduit a lot
|
||||||
def to_java(self):
|
def to_java(self):
|
||||||
from .java_classes import TransformProcessBuilder
|
from .java_classes import TransformProcessBuilder
|
||||||
from .java_classes import ConditionOp
|
from .java_classes import ConditionOp
|
||||||
|
@ -407,6 +409,7 @@ class TransformProcess(object):
|
||||||
from .java_classes import Arrays
|
from .java_classes import Arrays
|
||||||
from .java_classes import ReducerBuilder
|
from .java_classes import ReducerBuilder
|
||||||
from .java_classes import ReduceOp
|
from .java_classes import ReduceOp
|
||||||
|
from .java_classes import JString
|
||||||
|
|
||||||
jschema = self.schema.to_java()
|
jschema = self.schema.to_java()
|
||||||
builder = TransformProcessBuilder(jschema)
|
builder = TransformProcessBuilder(jschema)
|
||||||
|
|
|
@ -22,7 +22,7 @@ from setuptools import find_packages
|
||||||
|
|
||||||
|
|
||||||
setup(name='pydatavec',
|
setup(name='pydatavec',
|
||||||
version='0.1.1',
|
version='0.1.2',
|
||||||
description='Python interface for DataVec',
|
description='Python interface for DataVec',
|
||||||
long_description='Python interface for DataVec',
|
long_description='Python interface for DataVec',
|
||||||
|
|
||||||
|
@ -39,7 +39,12 @@ setup(name='pydatavec',
|
||||||
url='https://github.com/deeplearning4j/deeplearning4j.git',
|
url='https://github.com/deeplearning4j/deeplearning4j.git',
|
||||||
license='Apache',
|
license='Apache',
|
||||||
setup_requires=['Cython', 'pytest-runner'],
|
setup_requires=['Cython', 'pytest-runner'],
|
||||||
install_requires=['Cython', 'requests', 'pydl4j', 'numpy'],
|
install_requires=[
|
||||||
|
'Cython',
|
||||||
|
'requests',
|
||||||
|
'pydl4j',
|
||||||
|
'numpy<=1.16.4', # For compatibility with python 2
|
||||||
|
],
|
||||||
extras_require={
|
extras_require={
|
||||||
'spark': ['pyspark'],
|
'spark': ['pyspark'],
|
||||||
'tests': ['pytest',
|
'tests': ['pytest',
|
||||||
|
|
Loading…
Reference in New Issue