fix pydatavec for python 3... and python2 install problems (#8422)
parent
29990b1214
commit
d9e9733387
|
@ -61,4 +61,8 @@ nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/tensorflow/
|
|||
doc_sources/
|
||||
doc_sources_*
|
||||
|
||||
*.pyc
|
||||
*.pyc
|
||||
|
||||
# Python virtual environments
|
||||
venv/
|
||||
venv2/
|
|
@ -26,7 +26,7 @@ pydl4j.validate_datavec_jars()
|
|||
# -------------JVM starts here-------------
|
||||
from jnius import autoclass
|
||||
|
||||
|
||||
JString = autoclass("java.lang.String")
|
||||
JSchema = autoclass('org.datavec.api.transform.schema.Schema')
|
||||
SchemaBuilder = autoclass('org/datavec/api/transform/schema/Schema$Builder')
|
||||
|
||||
|
@ -43,7 +43,6 @@ CategoricalColumnCondition = autoclass(
|
|||
'org.datavec.api.transform.condition.column.CategoricalColumnCondition')
|
||||
DoubleColumnCondition = autoclass(
|
||||
'org.datavec.api.transform.condition.column.DoubleColumnCondition')
|
||||
#FloatColumnCondition = autoclass('org.datavec.api.transform.condition.column.FloatColumnCondition')
|
||||
StringColumnCondition = autoclass(
|
||||
'org.datavec.api.transform.condition.column.StringColumnCondition')
|
||||
|
||||
|
|
|
@ -71,8 +71,7 @@ class Schema(object):
|
|||
return schema
|
||||
|
||||
def to_java(self):
|
||||
from .java_classes import SchemaBuilder
|
||||
from .java_classes import JFloat, JDouble
|
||||
from .java_classes import SchemaBuilder, JString, JFloat, JDouble
|
||||
builder = SchemaBuilder()
|
||||
for c in self.columns:
|
||||
meta = self.columns[c]
|
||||
|
@ -80,19 +79,20 @@ class Schema(object):
|
|||
col_name = c
|
||||
col_args = meta[1:]
|
||||
if col_type == "string":
|
||||
builder.addColumnString(col_name)
|
||||
builder.addColumnString(JString(col_name))
|
||||
elif col_type == "categorical":
|
||||
builder.addColumnCategorical(col_name, *col_args)
|
||||
col_args = [JString(arg) for arg in col_args]
|
||||
builder.addColumnCategorical(JString(col_name), *col_args)
|
||||
else:
|
||||
# numerics
|
||||
# numerical data
|
||||
num_type = col_type[0].upper() + col_type[1:]
|
||||
f = getattr(builder, 'addColumn' + num_type)
|
||||
col_args = list(col_args)
|
||||
if num_type in ('Float', 'Double'):
|
||||
jtype = eval('J' + num_type)
|
||||
java_type = eval('J' + num_type)
|
||||
for i, a in enumerate(col_args):
|
||||
if type(a) in [int, float]:
|
||||
col_args[i] = jtype(a)
|
||||
col_args[i] = java_type(a)
|
||||
f(col_name, *col_args)
|
||||
return builder.build()
|
||||
|
||||
|
|
|
@ -20,10 +20,11 @@ from .conditions import *
|
|||
from .schema import Schema
|
||||
import warnings
|
||||
import logging
|
||||
from .java_classes import JString
|
||||
|
||||
|
||||
def _dq(x):
|
||||
return "\"" + x.replace("\"", "\\\"") + "\""
|
||||
return "JString(\"" + x.replace("\"", "\\\"") + "\")"
|
||||
|
||||
|
||||
def _to_camel(x, first_upper=False):
|
||||
|
@ -151,14 +152,14 @@ class TransformProcess(object):
|
|||
else:
|
||||
new_d[k] = old_d[k]
|
||||
self.final_schema.columns = new_d
|
||||
self.add_step("renameColumn", column, new_name)
|
||||
self.add_step("renameColumn", JString(column), JString(new_name))
|
||||
if not self.inplace:
|
||||
return self
|
||||
|
||||
def string_to_time(self, column, format="YYY-MM-DD HH:mm:ss.SSS", time_zone="UTC"):
|
||||
self.final_schema.columns[column][0] = "DateTime"
|
||||
self.add_step("exec", "stringToTimeTransform({}, {}, {})".format(
|
||||
_dq(column), _dq(format), "DateTimeZone." + time_zone))
|
||||
py_string = "stringToTimeTransform({}, {}, {})".format(_dq(column), _dq(format), "DateTimeZone." + time_zone)
|
||||
self.add_step("exec", py_string)
|
||||
if not self.inplace:
|
||||
return self
|
||||
|
||||
|
@ -184,7 +185,7 @@ class TransformProcess(object):
|
|||
if self.final_schema.columns[column][0] != 'string':
|
||||
raise Exception(
|
||||
'Can not apply append_string transform to column {} because it is not a string column'.format(column))
|
||||
self.add_step('appendStringColumnTransform', column, string)
|
||||
self.add_step('appendStringColumnTransform', JString(column), JString(string))
|
||||
if not self.inplace:
|
||||
return self
|
||||
|
||||
|
@ -378,6 +379,7 @@ class TransformProcess(object):
|
|||
tp.steps = config['steps'][:]
|
||||
return tp
|
||||
|
||||
# TODO from_java is used in konduit a lot
|
||||
def to_java(self):
|
||||
from .java_classes import TransformProcessBuilder
|
||||
from .java_classes import ConditionOp
|
||||
|
@ -407,6 +409,7 @@ class TransformProcess(object):
|
|||
from .java_classes import Arrays
|
||||
from .java_classes import ReducerBuilder
|
||||
from .java_classes import ReduceOp
|
||||
from .java_classes import JString
|
||||
|
||||
jschema = self.schema.to_java()
|
||||
builder = TransformProcessBuilder(jschema)
|
||||
|
|
|
@ -22,7 +22,7 @@ from setuptools import find_packages
|
|||
|
||||
|
||||
setup(name='pydatavec',
|
||||
version='0.1.1',
|
||||
version='0.1.2',
|
||||
description='Python interface for DataVec',
|
||||
long_description='Python interface for DataVec',
|
||||
|
||||
|
@ -39,7 +39,12 @@ setup(name='pydatavec',
|
|||
url='https://github.com/deeplearning4j/deeplearning4j.git',
|
||||
license='Apache',
|
||||
setup_requires=['Cython', 'pytest-runner'],
|
||||
install_requires=['Cython', 'requests', 'pydl4j', 'numpy'],
|
||||
install_requires=[
|
||||
'Cython',
|
||||
'requests',
|
||||
'pydl4j',
|
||||
'numpy<=1.16.4', # For compatibility with python 2
|
||||
],
|
||||
extras_require={
|
||||
'spark': ['pyspark'],
|
||||
'tests': ['pytest',
|
||||
|
|
Loading…
Reference in New Issue