port some python tests

pcmoritz · pcmoritz · commit bc681ca87124 · 2017-07-19T10:06:32.000-07:00
diff --git a/cpp/src/plasma/plasma.pyx b/cpp/src/plasma/plasma.pyx
@@ -9,26 +9,7 @@ from libcpp.vector cimport vector as c_vector
 from libc.stdint cimport int64_t, uint8_t, uintptr_t
 
 from pyarrow.lib cimport Buffer, NativeFile, check_status
-from pyarrow.includes.libarrow cimport MutableBuffer, CBuffer, CFixedSizeBufferWrite
-
-cdef extern from "arrow/api.h" namespace "arrow" nogil:
-    # We can later add more of the common status factory methods as needed
-    cdef CStatus CStatus_OK "Status::OK"()
-
-    cdef cppclass CStatus "arrow::Status":
-        CStatus()
-
-        c_string ToString()
-
-        c_bool ok()
-        c_bool IsIOError()
-        c_bool IsOutOfMemory()
-        c_bool IsInvalid()
-        c_bool IsKeyError()
-        c_bool IsNotImplemented()
-        c_bool IsTypeError()
-
-
+from pyarrow.includes.libarrow cimport MutableBuffer, CBuffer, CFixedSizeBufferWrite, CStatus
 
 cdef class FixedSizeBufferOutputStream(NativeFile):
 
@@ -55,7 +36,7 @@ cdef extern from "plasma/client.h" nogil:
 
     CStatus Connect(const c_string& store_socket_name, const c_string& manager_socket_name, int release_delay)
 
-    CStatus Create(const CUniqueID& object_id, int64_t data_size, uint8_t* metadata,
+    CStatus Create(const CUniqueID& object_id, int64_t data_size, const uint8_t* metadata,
       int64_t metadata_size, uint8_t** data)
 
     CStatus Get(const CUniqueID* object_ids, int64_t num_objects, int64_t timeout_ms, CObjectBuffer* object_buffers)
@@ -91,6 +72,14 @@ cdef class PlasmaClient:
   def __cinit__(self):
     self.client.reset(new CPlasmaClient())
 
+  cdef _get_object_buffers(self, object_ids, c_vector[CObjectBuffer]* result):
+    cdef c_vector[CUniqueID] ids
+    cdef ObjectID object_id
+    for object_id in object_ids:
+      ids.push_back(object_id.data)
+    result[0].resize(ids.size())
+    check_status(self.client.get().Get(ids.data(), ids.size(), 0, result[0].data()))
+
   cdef _make_buffer(self, uint8_t* data, int64_t size):
     cdef shared_ptr[MutableBuffer] buffer
     buffer.reset(new MutableBuffer(data, size))
@@ -99,21 +88,22 @@ cdef class PlasmaClient:
     return result
 
   def connect(self, store_socket_name, manager_socket_name, release_delay):
-    check_status(self.client.get().Connect(store_socket_name, manager_socket_name, release_delay))
+    check_status(self.client.get().Connect(store_socket_name.encode(), manager_socket_name.encode(), release_delay))
 
-  def create(self, ObjectID object_id, data_size):
+  def create(self, ObjectID object_id, data_size, c_string metadata=b""):
     cdef uint8_t* data
-    check_status(self.client.get().Create(object_id.data, data_size, NULL, 0, &data))
+    check_status(self.client.get().Create(object_id.data, data_size, <uint8_t*>(metadata.data()), metadata.size(), &data))
     return self._make_buffer(data, data_size)
 
   def get(self, object_ids):
-    cdef c_vector[CUniqueID] ids
-    cdef ObjectID object_id
-    for object_id in object_ids:
-      ids.push_back(object_id.data)
-    cdef c_vector[CObjectBuffer] result = c_vector[CObjectBuffer](ids.size())
-    check_status(self.client.get().Get(ids.data(), ids.size(), 0, result.data()))
-    return [self._make_buffer(r.data, r.data_size) for r in result]
+    cdef c_vector[CObjectBuffer] object_buffers
+    self._get_object_buffers(object_ids, &object_buffers)
+    return [self._make_buffer(b.data, b.data_size) for b in object_buffers]
+
+  def get_metadata(self, object_ids):
+    cdef c_vector[CObjectBuffer] object_buffers
+    self._get_object_buffers(object_ids, &object_buffers)
+    return [self._make_buffer(b.metadata, b.metadata_size) for b in object_buffers]
 
   def seal(self, ObjectID object_id):
     check_status(self.client.get().Seal(object_id.data))
diff --git a/cpp/src/plasma/test/test.py b/cpp/src/plasma/test/test.py
@@ -0,0 +1,183 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import os
+import random
+import signal
+import subprocess
+import sys
+import threading
+import time
+import unittest
+
+import plasma
+import pyarrow as pa
+
+DEFAULT_PLASMA_STORE_MEMORY = 10 ** 9
+
+USE_VALGRIND = False
+
+def random_name():
+  return str(random.randint(0, 99999999))
+
+def random_object_id():
+    return plasma.ObjectID(np.random.bytes(20))
+
+def generate_metadata(length):
+  metadata_buffer = bytearray(length)
+  if length > 0:
+    metadata_buffer[0] = random.randint(0, 255)
+    metadata_buffer[-1] = random.randint(0, 255)
+    for _ in range(100):
+      metadata_buffer[random.randint(0, length - 1)] = random.randint(0, 255)
+  return metadata_buffer
+
+def assert_get_object_equal(unit_test, client1, client2, object_id,
+                            memory_buffer=None, metadata=None):
+  client1_buff = client1.get([object_id])[0]
+  client2_buff = client2.get([object_id])[0]
+  client1_metadata = client1.get_metadata([object_id])[0]
+  client2_metadata = client2.get_metadata([object_id])[0]
+  unit_test.assertEqual(len(client1_buff), len(client2_buff))
+  unit_test.assertEqual(len(client1_metadata), len(client2_metadata))
+  # Check that the buffers from the two clients are the same.
+  unit_test.assertTrue(plasma.buffers_equal(client1_buff, client2_buff))
+  # Check that the metadata buffers from the two clients are the same.
+  unit_test.assertTrue(plasma.buffers_equal(client1_metadata,
+                                            client2_metadata))
+  # If a reference buffer was provided, check that it is the same as well.
+  if memory_buffer is not None:
+    unit_test.assertTrue(plasma.buffers_equal(memory_buffer, client1_buff))
+  # If reference metadata was provided, check that it is the same as well.
+  if metadata is not None:
+    unit_test.assertTrue(plasma.buffers_equal(metadata, client1_metadata))
+
+def start_plasma_store(plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY,
+                       use_valgrind=False, use_profiler=False,
+                       stdout_file=None, stderr_file=None):
+  """Start a plasma store process.
+  Args:
+    use_valgrind (bool): True if the plasma store should be started inside of
+      valgrind. If this is True, use_profiler must be False.
+    use_profiler (bool): True if the plasma store should be started inside a
+      profiler. If this is True, use_valgrind must be False.
+    stdout_file: A file handle opened for writing to redirect stdout to. If no
+      redirection should happen, then this should be None.
+    stderr_file: A file handle opened for writing to redirect stderr to. If no
+      redirection should happen, then this should be None.
+  Return:
+    A tuple of the name of the plasma store socket and the process ID of the
+      plasma store process.
+  """
+  if use_valgrind and use_profiler:
+    raise Exception("Cannot use valgrind and profiler at the same time.")
+  plasma_store_executable = os.path.join(os.path.abspath(
+      os.path.dirname(__file__)),
+      "../../../build/debug/plasma_store")
+  plasma_store_name = "/tmp/plasma_store{}".format(random_name())
+  command = [plasma_store_executable,
+             "-s", plasma_store_name,
+             "-m", str(plasma_store_memory)]
+  if use_valgrind:
+    pid = subprocess.Popen(["valgrind",
+                            "--track-origins=yes",
+                            "--leak-check=full",
+                            "--show-leak-kinds=all",
+                            "--error-exitcode=1"] + command,
+                           stdout=stdout_file, stderr=stderr_file)
+    time.sleep(1.0)
+  elif use_profiler:
+    pid = subprocess.Popen(["valgrind", "--tool=callgrind"] + command,
+                           stdout=stdout_file, stderr=stderr_file)
+    time.sleep(1.0)
+  else:
+    pid = subprocess.Popen(command, stdout=stdout_file, stderr=stderr_file)
+    time.sleep(0.1)
+  return plasma_store_name, pid
+
+class TestPlasmaClient(unittest.TestCase):
+
+  def setUp(self):
+    # Start Plasma store.
+    plasma_store_name, self.p = start_plasma_store(
+        use_valgrind=USE_VALGRIND)
+    # Connect to Plasma.
+    self.plasma_client = plasma.PlasmaClient()
+    self.plasma_client.connect(plasma_store_name, "", 64)
+    # For the eviction test
+    self.plasma_client2 = plasma.PlasmaClient()
+    self.plasma_client2.connect(plasma_store_name, "", 0)
+
+  def tearDown(self):
+    # Check that the Plasma store is still alive.
+    self.assertEqual(self.p.poll(), None)
+    # Kill the plasma store process.
+    if USE_VALGRIND:
+      self.p.send_signal(signal.SIGTERM)
+      self.p.wait()
+      if self.p.returncode != 0:
+        os._exit(-1)
+    else:
+      self.p.kill()
+
+  def test_create(self):
+    # Create an object id string.
+    object_id = random_object_id()
+    # Create a new buffer and write to it.
+    length = 50
+    memory_buffer = np.frombuffer(self.plasma_client.create(object_id, length), dtype="uint8")
+    for i in range(length):
+      memory_buffer[i] = i % 256
+    # Seal the object.
+    self.plasma_client.seal(object_id)
+    # Get the object.
+    memory_buffer = np.frombuffer(self.plasma_client.get([object_id])[0], dtype="uint8")
+    for i in range(length):
+      self.assertEqual(memory_buffer[i], i % 256)
+
+  def test_create_with_metadata(self):
+    for length in range(1000):
+      # Create an object id string.
+      object_id = random_object_id()
+      # Create a random metadata string.
+      metadata = generate_metadata(length)
+      # Create a new buffer and write to it.
+      memory_buffer = np.frombuffer(self.plasma_client.create(object_id, length, metadata), dtype="uint8")
+      for i in range(length):
+        memory_buffer[i] = i % 256
+      # Seal the object.
+      self.plasma_client.seal(object_id)
+      # Get the object.
+      memory_buffer = np.frombuffer(self.plasma_client.get([object_id])[0], dtype="uint8")
+      for i in range(length):
+        self.assertEqual(memory_buffer[i], i % 256)
+      # Get the metadata.
+      metadata_buffer = np.frombuffer(self.plasma_client.get_metadata([object_id])[0], dtype="uint8")
+      self.assertEqual(len(metadata), len(metadata_buffer))
+      for i in range(len(metadata)):
+        self.assertEqual(metadata[i], metadata_buffer[i])
+
+  def test_create_existing(self):
+    # This test is partially used to test the code path in which we create an
+    # object with an ID that already exists
+    length = 100
+    for _ in range(1000):
+      object_id = random_object_id()
+      self.plasma_client.create(object_id, length, generate_metadata(length))
+      try:
+        self.plasma_client.create(object_id, length, generate_metadata(length))
+      except pa.lib.ArrowException as e:
+        pass
+      else:
+        self.assertTrue(False)
+
+if __name__ == "__main__":
+  if len(sys.argv) > 1:
+    # Pop the argument so we don't mess with unittest's own argument parser.
+    if sys.argv[-1] == "valgrind":
+      arg = sys.argv.pop()
+      USE_VALGRIND = True
+      print("Using valgrind for tests")
+  unittest.main(verbosity=2)