fix pydatavec for python 3... and python2 install problems (#8422)

master
Max Pumperla 2019-11-20 08:20:04 +01:00 committed by GitHub
parent 29990b1214
commit d9e9733387
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 28 additions and 17 deletions

6
.gitignore vendored
View File

@ -61,4 +61,8 @@ nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/tensorflow/
doc_sources/
doc_sources_*
*.pyc
*.pyc
# Python virtual environments
venv/
venv2/

View File

@ -26,7 +26,7 @@ pydl4j.validate_datavec_jars()
# -------------JVM starts here-------------
from jnius import autoclass
JString = autoclass("java.lang.String")
JSchema = autoclass('org.datavec.api.transform.schema.Schema')
SchemaBuilder = autoclass('org/datavec/api/transform/schema/Schema$Builder')
@ -43,7 +43,6 @@ CategoricalColumnCondition = autoclass(
'org.datavec.api.transform.condition.column.CategoricalColumnCondition')
DoubleColumnCondition = autoclass(
'org.datavec.api.transform.condition.column.DoubleColumnCondition')
#FloatColumnCondition = autoclass('org.datavec.api.transform.condition.column.FloatColumnCondition')
StringColumnCondition = autoclass(
'org.datavec.api.transform.condition.column.StringColumnCondition')

View File

@ -71,8 +71,7 @@ class Schema(object):
return schema
def to_java(self):
from .java_classes import SchemaBuilder
from .java_classes import JFloat, JDouble
from .java_classes import SchemaBuilder, JString, JFloat, JDouble
builder = SchemaBuilder()
for c in self.columns:
meta = self.columns[c]
@ -80,19 +79,20 @@ class Schema(object):
col_name = c
col_args = meta[1:]
if col_type == "string":
builder.addColumnString(col_name)
builder.addColumnString(JString(col_name))
elif col_type == "categorical":
builder.addColumnCategorical(col_name, *col_args)
col_args = [JString(arg) for arg in col_args]
builder.addColumnCategorical(JString(col_name), *col_args)
else:
# numerics
# numerical data
num_type = col_type[0].upper() + col_type[1:]
f = getattr(builder, 'addColumn' + num_type)
col_args = list(col_args)
if num_type in ('Float', 'Double'):
jtype = eval('J' + num_type)
java_type = eval('J' + num_type)
for i, a in enumerate(col_args):
if type(a) in [int, float]:
col_args[i] = jtype(a)
col_args[i] = java_type(a)
f(col_name, *col_args)
return builder.build()

View File

@ -20,10 +20,11 @@ from .conditions import *
from .schema import Schema
import warnings
import logging
from .java_classes import JString
def _dq(x):
return "\"" + x.replace("\"", "\\\"") + "\""
return "JString(\"" + x.replace("\"", "\\\"") + "\")"
def _to_camel(x, first_upper=False):
@ -151,14 +152,14 @@ class TransformProcess(object):
else:
new_d[k] = old_d[k]
self.final_schema.columns = new_d
self.add_step("renameColumn", column, new_name)
self.add_step("renameColumn", JString(column), JString(new_name))
if not self.inplace:
return self
def string_to_time(self, column, format="YYY-MM-DD HH:mm:ss.SSS", time_zone="UTC"):
self.final_schema.columns[column][0] = "DateTime"
self.add_step("exec", "stringToTimeTransform({}, {}, {})".format(
_dq(column), _dq(format), "DateTimeZone." + time_zone))
py_string = "stringToTimeTransform({}, {}, {})".format(_dq(column), _dq(format), "DateTimeZone." + time_zone)
self.add_step("exec", py_string)
if not self.inplace:
return self
@ -184,7 +185,7 @@ class TransformProcess(object):
if self.final_schema.columns[column][0] != 'string':
raise Exception(
'Can not apply append_string transform to column {} because it is not a string column'.format(column))
self.add_step('appendStringColumnTransform', column, string)
self.add_step('appendStringColumnTransform', JString(column), JString(string))
if not self.inplace:
return self
@ -378,6 +379,7 @@ class TransformProcess(object):
tp.steps = config['steps'][:]
return tp
# TODO from_java is used in konduit a lot
def to_java(self):
from .java_classes import TransformProcessBuilder
from .java_classes import ConditionOp
@ -407,6 +409,7 @@ class TransformProcess(object):
from .java_classes import Arrays
from .java_classes import ReducerBuilder
from .java_classes import ReduceOp
from .java_classes import JString
jschema = self.schema.to_java()
builder = TransformProcessBuilder(jschema)

View File

@ -22,7 +22,7 @@ from setuptools import find_packages
setup(name='pydatavec',
version='0.1.1',
version='0.1.2',
description='Python interface for DataVec',
long_description='Python interface for DataVec',
@ -39,7 +39,12 @@ setup(name='pydatavec',
url='https://github.com/deeplearning4j/deeplearning4j.git',
license='Apache',
setup_requires=['Cython', 'pytest-runner'],
install_requires=['Cython', 'requests', 'pydl4j', 'numpy'],
install_requires=[
'Cython',
'requests',
'pydl4j',
'numpy<=1.16.4', # For compatibility with python 2
],
extras_require={
'spark': ['pyspark'],
'tests': ['pytest',