python – cannot load a stringIndex model from HDFS

When I use the stringindex to fit the train data, I want to save the index model for my predict data. And I saved to HDFS path successfully, But when I load it, it shows the error below. How could I load the index model correctly?

stringIndexerPath = "hdfs://users/stringindexer"
stringindex_model.write().overwrite().save(stringIndexerPath) 
loadedIndexer = StringIndexer.load(stringIndexerPath)
Output exceeds the size limit. Open the full output data in a text editor
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
/ldap_home/yafeng.cheng/myproject/project_user_profile/apps/project_occupation/analyze/housewife/housewife_case_study.ipynb Cell 55' in <cell line: 3>()
      1 # from pyspark.ml.pipeline import PipelineModel
      2 # loadedIndexer = PipelineModel.load(stringIndexerPath)
----> 3 loadedIndexer = StringIndexer.load(stringIndexerPath)

File /usr/share/spark3/python/pyspark/ml/util.py:332, in MLReadable.load(cls, path)
    329 @classmethod
    330 def load(cls, path):
    331     """Reads an ML instance from the input path, a shortcut of `read().load(path)`."""
--> 332     return cls.read().load(path)

File /usr/share/spark3/python/pyspark/ml/util.py:282, in JavaMLReader.load(self, path)
    280 if not isinstance(path, str):
    281     raise TypeError("path should be a string, got type %s" % type(path))
--> 282 java_obj = self._jread.load(path)
    283 if not hasattr(self._clazz, "_from_java"):
    284     raise NotImplementedError("This Java ML type cannot be loaded into Python currently: %r"
    285                               % self._clazz)

File /usr/share/spark3/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py:1304, in JavaMember.__call__(self, *args)
   1298 command = proto.CALL_COMMAND_NAME +
   1299     self.command_header +
   1300     args_command +
...
    at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
    at py4j.commands.CallCommand.execute(CallCommand.java:79)
    at py4j.GatewayConnection.run(GatewayConnection.java:238)
    at java.lang.Thread.run(Thread.java:748)
{
    "name": "Py4JJavaError",
    "message": "An error occurred while calling o3064.load.n: java.lang.NoSuchMethodException: org.apache.spark.ml.feature.StringIndexerModel.<init>(java.lang.String)ntat java.lang.Class.getConstructor0(Class.java:3082)ntat java.lang.Class.getConstructor(Class.java:1825)ntat org.apache.spark.ml.util.DefaultParamsReader.load(ReadWrite.scala:468)ntat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)ntat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)ntat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)ntat java.lang.reflect.Method.invoke(Method.java:498)ntat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)ntat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)ntat py4j.Gateway.invoke(Gateway.java:282)ntat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)ntat py4j.commands.CallCommand.execute(CallCommand.java:79)ntat py4j.GatewayConnection.run(GatewayConnection.java:238)ntat java.lang.Thread.run(Thread.java:748)n",
    "stack": "u001b[0;31m---------------------------------------------------------------------------u001b[0mnu001b[0;31mPy4JJavaErroru001b[0m                             Traceback (most recent call last)nu001b[1;32m/user/case_study.ipynb Cell 55'u001b[0m in u001b[0;36m<cell line: 1>u001b[0;34m()u001b[0mnu001b[0;32m----> <a href="vscode-notebook-cell://ssh-remote%2Bdev01/user/case_study.ipynb#ch0000094vscode-remote?line=0">1</a>u001b[0m loadedIndexer u001b[39m=u001b[39m StringIndexeru001b[39m.u001b[39;49mload(stringIndexerPath)nnFile u001b[0;32m/usr/share/spark3/python/pyspark/ml/util.py:332u001b[0m, in u001b[0;36mMLReadable.loadu001b[0;34m(cls, path)u001b[0mnu001b[1;32m    329u001b[0m u001b[39m@classmethodu001b[39mnu001b[1;32m    330u001b[0m u001b[39mdefu001b[39;00m u001b[39mloadu001b[39m(u001b[39mclsu001b[39m, path):nu001b[1;32m    331u001b[0m     u001b[39m"""Reads an ML instance from the input path, a shortcut of `read().load(path)`."""u001b[39;00mnu001b[0;32m--> 332u001b[0m     u001b[39mreturnu001b[39;00m u001b[39mclsu001b[39;49mu001b[39m.u001b[39;49mread()u001b[39m.u001b[39;49mload(path)nnFile u001b[0;32m/usr/share/spark3/python/pyspark/ml/util.py:282u001b[0m, in u001b[0;36mJavaMLReader.loadu001b[0;34m(self, path)u001b[0mnu001b[1;32m    280u001b[0m u001b[39mifu001b[39;00m u001b[39mnotu001b[39;00m u001b[39misinstanceu001b[39m(path, u001b[39mstru001b[39m):nu001b[1;32m    281u001b[0m     u001b[39mraiseu001b[39;00m u001b[39mTypeErroru001b[39;00m(u001b[39m"u001b[39mu001b[39mpath should be a string, got type u001b[39mu001b[39m%su001b[39;00mu001b[39m"u001b[39m u001b[39m%u001b[39m u001b[39mtypeu001b[39m(path))nu001b[0;32m--> 282u001b[0m java_obj u001b[39m=u001b[39m u001b[39mselfu001b[39;49mu001b[39m.u001b[39;49m_jreadu001b[39m.u001b[39;49mload(path)nu001b[1;32m    283u001b[0m u001b[39mifu001b[39;00m u001b[39mnotu001b[39;00m u001b[39mhasattru001b[39m(u001b[39mselfu001b[39mu001b[39m.u001b[39m_clazz, u001b[39m"u001b[39mu001b[39m_from_javau001b[39mu001b[39m"u001b[39m):nu001b[1;32m    284u001b[0m     u001b[39mraiseu001b[39;00m u001b[39mNotImplementedErroru001b[39;00m(u001b[39m"u001b[39mu001b[39mThis Java ML type cannot be loaded into Python currently: u001b[39mu001b[39m%ru001b[39;00mu001b[39m"u001b[39mnu001b[1;32m    285u001b[0m                               u001b[39m%u001b[39m u001b[39mselfu001b[39mu001b[39m.u001b[39m_clazz)nnFile u001b[0;32m/usr/share/spark3/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py:1304u001b[0m, in u001b[0;36mJavaMember.__call__u001b[0;34m(self, *args)u001b[0mnu001b[1;32m   1298u001b[0m command u001b[39m=u001b[39m protou001b[39m.u001b[39mCALL_COMMAND_NAME u001b[39m+u001b[39m\nu001b[1;32m   1299u001b[0m     u001b[39mselfu001b[39mu001b[39m.u001b[39mcommand_header u001b[39m+u001b[39m\nu001b[1;32m   1300u001b[0m     args_command u001b[39m+u001b[39m\nu001b[1;32m   1301u001b[0m     protou001b[39m.u001b[39mEND_COMMAND_PARTnu001b[1;32m   1303u001b[0m answer u001b[39m=u001b[39m u001b[39mselfu001b[39mu001b[39m.u001b[39mgateway_clientu001b[39m.u001b[39msend_command(command)nu001b[0;32m-> 1304u001b[0m return_value u001b[39m=u001b[39m get_return_value(nu001b[1;32m   1305u001b[0m     answer, u001b[39mselfu001b[39;49mu001b[39m.u001b[39;49mgateway_client, u001b[39mselfu001b[39;49mu001b[39m.u001b[39;49mtarget_id, u001b[39mselfu001b[39;49mu001b[39m.u001b[39;49mname)nu001b[1;32m   1307u001b[0m u001b[39mforu001b[39;00m temp_arg u001b[39minu001b[39;00m temp_args:nu001b[1;32m   1308u001b[0m     temp_argu001b[39m.u001b[39m_detach()nnFile u001b[0;32m/usr/share/spark3/python/pyspark/sql/utils.py:111u001b[0m, in u001b[0;36mcapture_sql_exception.<locals>.decou001b[0;34m(*a, **kw)u001b[0mnu001b[1;32m    109u001b[0m u001b[39mdefu001b[39;00m u001b[39mdecou001b[39m(u001b[39m*u001b[39ma, u001b[39m*u001b[39mu001b[39m*u001b[39mkw):nu001b[1;32m    110u001b[0m     u001b[39mtryu001b[39;00m:nu001b[0;32m--> 111u001b[0m         u001b[39mreturnu001b[39;00m f(u001b[39m*u001b[39;49ma, u001b[39m*u001b[39;49mu001b[39m*u001b[39;49mkw)nu001b[1;32m    112u001b[0m     u001b[39mexceptu001b[39;00m py4ju001b[39m.u001b[39mprotocolu001b[39m.u001b[39mPy4JJavaError u001b[39masu001b[39;00m e:nu001b[1;32m    113u001b[0m         converted u001b[39m=u001b[39m convert_exception(eu001b[39m.u001b[39mjava_exception)nnFile u001b[0;32m/usr/share/spark3/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py:326u001b[0m, in u001b[0;36mget_return_valueu001b[0;34m(answer, gateway_client, target_id, name)u001b[0mnu001b[1;32m    324u001b[0m value u001b[39m=u001b[39m OUTPUT_CONVERTER[u001b[39mtypeu001b[39m](answer[u001b[39m2u001b[39m:], gateway_client)nu001b[1;32m    325u001b[0m u001b[39mifu001b[39;00m answer[u001b[39m1u001b[39m] u001b[39m==u001b[39m REFERENCE_TYPE:nu001b[0;32m--> 326u001b[0m     u001b[39mraiseu001b[39;00m Py4JJavaError(nu001b[1;32m    327u001b[0m         u001b[39m"u001b[39mu001b[39mAn error occurred while calling u001b[39mu001b[39m{0}u001b[39;00mu001b[39m{1}u001b[39;00mu001b[39m{2}u001b[39;00mu001b[39m.u001b[39mu001b[39m\nu001b[39;00mu001b[39m"u001b[39mu001b[39m.u001b[39mnu001b[1;32m    328u001b[0m         u001b[39mformatu001b[39m(target_id, u001b[39m"u001b[39mu001b[39m.u001b[39mu001b[39m"u001b[39m, name), value)nu001b[1;32m    329u001b[0m u001b[39melseu001b[39;00m:nu001b[1;32m    330u001b[0m     u001b[39mraiseu001b[39;00m Py4JError(nu001b[1;32m    331u001b[0m         u001b[39m"u001b[39mu001b[39mAn error occurred while calling u001b[39mu001b[39m{0}u001b[39;00mu001b[39m{1}u001b[39;00mu001b[39m{2}u001b[39;00mu001b[39m. Trace:u001b[39mu001b[39m\nu001b[39;00mu001b[39m{3}u001b[39;00mu001b[39m\nu001b[39;00mu001b[39m"u001b[39mu001b[39m.u001b[39mnu001b[1;32m    332u001b[0m         u001b[39mformatu001b[39m(target_id, u001b[39m"u001b[39mu001b[39m.u001b[39mu001b[39m"u001b[39m, name, value))nnu001b[0;31mPy4JJavaErroru001b[0m: An error occurred while calling o3064.load.n: java.lang.NoSuchMethodException: org.apache.spark.ml.feature.StringIndexerModel.<init>(java.lang.String)ntat java.lang.Class.getConstructor0(Class.java:3082)ntat java.lang.Class.getConstructor(Class.java:1825)ntat org.apache.spark.ml.util.DefaultParamsReader.load(ReadWrite.scala:468)ntat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)ntat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)ntat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)ntat java.lang.reflect.Method.invoke(Method.java:498)ntat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)ntat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)ntat py4j.Gateway.invoke(Gateway.java:282)ntat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)ntat py4j.commands.CallCommand.execute(CallCommand.java:79)ntat py4j.GatewayConnection.run(GatewayConnection.java:238)ntat java.lang.Thread.run(Thread.java:748)n"
}

Leave a Comment