large models need device_maps (#633)

montanalow · web-flow · commit 0b42fccf84b8 · 2023-05-18T14:22:16.000-07:00
diff --git a/pgml-extension/examples/transformers.sql b/pgml-extension/examples/transformers.sql
@@ -3,7 +3,8 @@
 \timing on
 
 SELECT pgml.embed('intfloat/e5-small', 'hi mom');
-
+SELECT pgml.embed('intfloat/e5-small', 'hi mom', '{"device": "cuda"}');
+SELECT pgml.embed('intfloat/e5-small', 'hi mom', '{"device": "cpu"}');
 
 SELECT pgml.transform(
     'translation_en_to_fr',
@@ -16,7 +17,7 @@ SELECT pgml.transform(
 SELECT pgml.transform(
     '{"model": "roberta-large-mnli"}'::JSONB,
     inputs => ARRAY[
-        'I love how amazingly simple ML has become!', 
+        'I love how amazingly simple ML has become!',
         'Some models are painfully slow and expensive ☹️'
     ]
 ) AS result;
@@ -35,13 +36,13 @@ SELECT pgml.transform(
     ]
 );
 SELECT pgml.transform(
+    task => '{"task": "text-classification",
+              "model": "finiteautomata/bertweet-base-sentiment-analysis"
+             }'::JSONB,
     inputs => ARRAY[
         'I love how amazingly simple ML has become!', 
         'I hate doing mundane and thankless tasks. ☹️'
     ],
-    task => '{"task": "text-classification", 
-              "model": "finiteautomata/bertweet-base-sentiment-analysis"
-             }'::JSONB
 ) AS positivity;
 
 SELECT pgml.transform(
diff --git a/pgml-extension/requirements.txt b/pgml-extension/requirements.txt
@@ -1,4 +1,4 @@
-accelerate==0.16.0
+accelerate==0.19.0
 datasets==2.10.1
 deepspeed==0.8.1
 InstructorEmbedding
@@ -15,5 +15,5 @@ torch==1.13.1
 torchaudio==0.13.1
 torchvision==0.14.1
 tqdm==4.64.1
-transformers==4.26.1
+transformers==4.28.1
 xgboost
diff --git a/pgml-extension/src/api.rs b/pgml-extension/src/api.rs
@@ -574,10 +574,9 @@ pub fn transform_json(
     task: JsonB,
     args: default!(JsonB, "'{}'"),
     inputs: default!(Vec<String>, "ARRAY[]::TEXT[]"),
-    cache: default!(bool, false),
 ) -> JsonB {
     JsonB(crate::bindings::transformers::transform(
-        &task.0, &args.0, &inputs, cache,
+        &task.0, &args.0, &inputs,
     ))
 }
 
@@ -587,13 +586,12 @@ pub fn transform_string(
     task: String,
     args: default!(JsonB, "'{}'"),
     inputs: default!(Vec<String>, "ARRAY[]::TEXT[]"),
-    cache: default!(bool, false),
 ) -> JsonB {
     let mut task_map = HashMap::new();
     task_map.insert("task", task);
     let task_json = json!(task_map);
     JsonB(crate::bindings::transformers::transform(
-        &task_json, &args.0, &inputs, cache,
+        &task_json, &args.0, &inputs,
     ))
 }
 
diff --git a/pgml-extension/src/bindings/transformers.py b/pgml-extension/src/bindings/transformers.py
@@ -50,20 +50,17 @@ def default(self, obj):
         return super().default(obj)
 
 
-def transform(task, args, inputs, cache):
+def transform(task, args, inputs):
     task = json.loads(task)
     args = json.loads(args)
     inputs = json.loads(inputs)
 
-    task["device"] = assign_device(task.get("device"))
+    ensure_device(task)
 
-    if cache:
-        key = ",".join([f"{key}:{val}" for (key, val) in sorted(task.items())])
-        if key not in __cache_transform_pipeline_by_task:
-            __cache_transform_pipeline_by_task[key] = transformers.pipeline(**task)
-        pipe = __cache_transform_pipeline_by_task[key]
-    else:
-        pipe = transformers.pipeline(**task)
+    key = ",".join([f"{key}:{val}" for (key, val) in sorted(task.items())])
+    if key not in __cache_transform_pipeline_by_task:
+        __cache_transform_pipeline_by_task[key] = transformers.pipeline(**task)
+    pipe = __cache_transform_pipeline_by_task[key]
 
     if pipe.task == "question-answering":
         inputs = [json.loads(input) for input in inputs]
@@ -73,7 +70,7 @@ def transform(task, args, inputs, cache):
 
 def embed(transformer, text, kwargs):
     kwargs = json.loads(kwargs)
-    kwargs["device"] = assign_device(kwargs.get("device"))
+    ensure_device(kwargs)
     instructor = transformer.startswith("hkunlp/instructor")
     if instructor:
         klass = INSTRUCTOR
@@ -543,16 +540,12 @@ def generate(model_id, data, config):
     return all_preds
 
 
-def assign_device(device=None):
-    if device is not None:
-        if device == "cpu" or "cuda:" in device:
-            return device
-        if "cuda" in device and not torch.cuda.is_available():
-            raise Exception("CUDA is not available")
-
-    if torch.cuda.is_available():
-        device = "cuda:" + str(os.getpid() % torch.cuda.device_count())
-    else:
-        device = "cpu"
+def ensure_device(kwargs):
+    device = kwargs.get("device")
+    device_map = kwargs.get("device_map")
+    if device is None and device_map is None:
+        if torch.cuda.is_available():
+            kwargs["device"] = "cuda:" + str(os.getpid() % torch.cuda.device_count())
+        else:
+            kwargs["device"] = "cpu"
 
-    return device
diff --git a/pgml-extension/src/bindings/transformers.rs b/pgml-extension/src/bindings/transformers.rs
@@ -25,7 +25,6 @@ pub fn transform(
     task: &serde_json::Value,
     args: &serde_json::Value,
     inputs: &Vec<String>,
-    cache: bool,
 ) -> serde_json::Value {
     crate::bindings::venv::activate();
 
@@ -45,7 +44,6 @@ pub fn transform(
                         task.into_py(py),
                         args.into_py(py),
                         inputs.into_py(py),
-                        cache.into_py(py),
                     ],
                 ),
             )