diff --git a/.gitignore b/.gitignore index d6217555f..501c270c4 100644 --- a/.gitignore +++ b/.gitignore @@ -61,4 +61,8 @@ nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/tensorflow/ doc_sources/ doc_sources_* -*.pyc \ No newline at end of file +*.pyc + +# Python virtual environments +venv/ +venv2/ \ No newline at end of file diff --git a/pydatavec/pydatavec/java_classes.py b/pydatavec/pydatavec/java_classes.py index 4031c9257..bd934779f 100644 --- a/pydatavec/pydatavec/java_classes.py +++ b/pydatavec/pydatavec/java_classes.py @@ -26,7 +26,7 @@ pydl4j.validate_datavec_jars() # -------------JVM starts here------------- from jnius import autoclass - +JString = autoclass("java.lang.String") JSchema = autoclass('org.datavec.api.transform.schema.Schema') SchemaBuilder = autoclass('org/datavec/api/transform/schema/Schema$Builder') @@ -43,7 +43,6 @@ CategoricalColumnCondition = autoclass( 'org.datavec.api.transform.condition.column.CategoricalColumnCondition') DoubleColumnCondition = autoclass( 'org.datavec.api.transform.condition.column.DoubleColumnCondition') -#FloatColumnCondition = autoclass('org.datavec.api.transform.condition.column.FloatColumnCondition') StringColumnCondition = autoclass( 'org.datavec.api.transform.condition.column.StringColumnCondition') diff --git a/pydatavec/pydatavec/schema.py b/pydatavec/pydatavec/schema.py index 1bfb56594..47a9e665c 100644 --- a/pydatavec/pydatavec/schema.py +++ b/pydatavec/pydatavec/schema.py @@ -71,8 +71,7 @@ class Schema(object): return schema def to_java(self): - from .java_classes import SchemaBuilder - from .java_classes import JFloat, JDouble + from .java_classes import SchemaBuilder, JString, JFloat, JDouble builder = SchemaBuilder() for c in self.columns: meta = self.columns[c] @@ -80,19 +79,20 @@ class Schema(object): col_name = c col_args = meta[1:] if col_type == "string": - builder.addColumnString(col_name) + builder.addColumnString(JString(col_name)) elif col_type == "categorical": - builder.addColumnCategorical(col_name, *col_args) + col_args = [JString(arg) for arg in col_args] + builder.addColumnCategorical(JString(col_name), *col_args) else: - # numerics + # numerical data num_type = col_type[0].upper() + col_type[1:] f = getattr(builder, 'addColumn' + num_type) col_args = list(col_args) if num_type in ('Float', 'Double'): - jtype = eval('J' + num_type) + java_type = eval('J' + num_type) for i, a in enumerate(col_args): if type(a) in [int, float]: - col_args[i] = jtype(a) + col_args[i] = java_type(a) f(col_name, *col_args) return builder.build() diff --git a/pydatavec/pydatavec/transform_process.py b/pydatavec/pydatavec/transform_process.py index 041bb6bc4..b5ccd7620 100644 --- a/pydatavec/pydatavec/transform_process.py +++ b/pydatavec/pydatavec/transform_process.py @@ -20,10 +20,11 @@ from .conditions import * from .schema import Schema import warnings import logging +from .java_classes import JString def _dq(x): - return "\"" + x.replace("\"", "\\\"") + "\"" + return "JString(\"" + x.replace("\"", "\\\"") + "\")" def _to_camel(x, first_upper=False): @@ -151,14 +152,14 @@ class TransformProcess(object): else: new_d[k] = old_d[k] self.final_schema.columns = new_d - self.add_step("renameColumn", column, new_name) + self.add_step("renameColumn", JString(column), JString(new_name)) if not self.inplace: return self def string_to_time(self, column, format="YYY-MM-DD HH:mm:ss.SSS", time_zone="UTC"): self.final_schema.columns[column][0] = "DateTime" - self.add_step("exec", "stringToTimeTransform({}, {}, {})".format( - _dq(column), _dq(format), "DateTimeZone." + time_zone)) + py_string = "stringToTimeTransform({}, {}, {})".format(_dq(column), _dq(format), "DateTimeZone." + time_zone) + self.add_step("exec", py_string) if not self.inplace: return self @@ -184,7 +185,7 @@ class TransformProcess(object): if self.final_schema.columns[column][0] != 'string': raise Exception( 'Can not apply append_string transform to column {} because it is not a string column'.format(column)) - self.add_step('appendStringColumnTransform', column, string) + self.add_step('appendStringColumnTransform', JString(column), JString(string)) if not self.inplace: return self @@ -378,6 +379,7 @@ class TransformProcess(object): tp.steps = config['steps'][:] return tp + # TODO from_java is used in konduit a lot def to_java(self): from .java_classes import TransformProcessBuilder from .java_classes import ConditionOp @@ -407,6 +409,7 @@ class TransformProcess(object): from .java_classes import Arrays from .java_classes import ReducerBuilder from .java_classes import ReduceOp + from .java_classes import JString jschema = self.schema.to_java() builder = TransformProcessBuilder(jschema) diff --git a/pydatavec/setup.py b/pydatavec/setup.py index cfe14da33..db109b6dd 100644 --- a/pydatavec/setup.py +++ b/pydatavec/setup.py @@ -22,7 +22,7 @@ from setuptools import find_packages setup(name='pydatavec', - version='0.1.1', + version='0.1.2', description='Python interface for DataVec', long_description='Python interface for DataVec', @@ -39,7 +39,12 @@ setup(name='pydatavec', url='https://github.com/deeplearning4j/deeplearning4j.git', license='Apache', setup_requires=['Cython', 'pytest-runner'], - install_requires=['Cython', 'requests', 'pydl4j', 'numpy'], + install_requires=[ + 'Cython', + 'requests', + 'pydl4j', + 'numpy<=1.16.4', # For compatibility with python 2 + ], extras_require={ 'spark': ['pyspark'], 'tests': ['pytest',