@@ -57,6 +57,19 @@ class DocumentArrayGetAttrMixin:
5757 def __iter__ (self ):
5858 ...
5959
60+ @abstractmethod
61+ def __len__ (self ):
62+ """Any implementation needs to implement the `length` method"""
63+ ...
64+
65+ @abstractmethod
66+ def __getitem__ (self , item : int ):
67+ """Any implementation needs to implement access via integer item
68+
69+ :param item: the item index to access
70+ """
71+ ...
72+
6073 def get_attributes (self , * fields : str ) -> Union [List , List [List ]]:
6174 """Return all nonempty values of the fields from all docs this array contains
6275
@@ -108,6 +121,38 @@ def embeddings(self, emb: np.ndarray):
108121 """
109122 ...
110123
124+ @property
125+ def blobs (self ) -> np .ndarray :
126+ """Return a `np.ndarray` stacking all the `blob` attributes as rows.
127+
128+ .. warning:: This operation assumes all Documents have `blob` as content, blobs have the same shape and dtype.
129+ All dtype and shape values are assumed to be equal to the values of the
130+ first element in the DocumentArray / DocumentArrayMemmap
131+
132+ .. warning:: This operation currently does not support sparse arrays.
133+
134+ :return: blobs stacked per row as `np.ndarray`.
135+ """
136+ x_mat = b'' .join (d .proto .blob .dense .buffer for d in self )
137+
138+ return np .frombuffer (x_mat , dtype = self [0 ].proto .blob .dense .dtype ).reshape (
139+ (len (self ), * self [0 ].proto .blob .dense .shape )
140+ )
141+
142+ @blobs .setter
143+ def blobs (self , b : np .ndarray ):
144+ """Set the blobs of the Documents
145+
146+ :param b: The blobs matrix to set
147+ """
148+
149+ assert len (b ) == len (
150+ self
151+ ), f'the number of rows in the input ({ len (b )} ), should match the number of Documents ({ len (self )} )'
152+
153+ for d , x in zip (self , b ):
154+ d .blob = x
155+
111156
112157class DocumentArray (
113158 TraversableSequence ,
@@ -246,39 +291,6 @@ def __iadd__(self, other: Iterable['Document']):
246291 self .append (doc )
247292 return self
248293
249- @property
250- def embeddings (self ) -> np .ndarray :
251- """Return a `np.ndarray` stacking all the `embedding` attributes as rows.
252-
253- .. warning:: This operation assumes all embeddings have the same shape and dtype.
254- All dtype and shape values are assumed to be equal to the values of the
255- first element in the DocumentArray / DocumentArrayMemmap
256-
257- .. warning:: This operation currently does not support sparse arrays.
258-
259- :return: embeddings stacked per row as `np.ndarray`.
260- """
261- x_mat = b'' .join (d .proto .embedding .dense .buffer for d in self )
262-
263- return np .frombuffer (x_mat , dtype = self [0 ].proto .embedding .dense .dtype ).reshape (
264- (len (self ), self [0 ].proto .embedding .dense .shape [0 ])
265- )
266-
267- @embeddings .setter
268- def embeddings (self , emb : np .ndarray ):
269- """Set the embeddings of the Documents
270-
271- :param emb: The embedding matrix to set
272- """
273-
274- assert len (emb ) == len (self ), (
275- 'the number of rows in the input ({len(emb)}),'
276- 'should match the number of Documents ({len(self)})'
277- )
278-
279- for d , x in zip (self , emb ):
280- d .embedding = x
281-
282294 def append (self , doc : 'Document' ):
283295 """
284296 Append :param:`doc` in :class:`DocumentArray`.
@@ -491,3 +503,36 @@ def load_binary(cls, file: Union[str, BinaryIO]) -> 'DocumentArray':
491503 @staticmethod
492504 def _flatten (sequence ):
493505 return DocumentArray (list (itertools .chain .from_iterable (sequence )))
506+
507+ # Properties for fast access of commonly used attributes
508+ @property
509+ def embeddings (self ) -> np .ndarray :
510+ """Return a `np.ndarray` stacking all the `embedding` attributes as rows.
511+
512+ .. warning:: This operation assumes all embeddings have the same shape and dtype.
513+ All dtype and shape values are assumed to be equal to the values of the
514+ first element in the DocumentArray / DocumentArrayMemmap
515+
516+ .. warning:: This operation currently does not support sparse arrays.
517+
518+ :return: embeddings stacked per row as `np.ndarray`.
519+ """
520+ x_mat = b'' .join (d .proto .embedding .dense .buffer for d in self )
521+
522+ return np .frombuffer (x_mat , dtype = self [0 ].proto .embedding .dense .dtype ).reshape (
523+ (len (self ), self [0 ].proto .embedding .dense .shape [0 ])
524+ )
525+
526+ @embeddings .setter
527+ def embeddings (self , emb : np .ndarray ):
528+ """Set the embeddings of the Documents
529+
530+ :param emb: The embedding matrix to set
531+ """
532+
533+ assert len (emb ) == len (
534+ self
535+ ), f'the number of rows in the input ({ len (emb )} ), should match the number of Documents ({ len (self )} )'
536+
537+ for d , x in zip (self , emb ):
538+ d .embedding = x
0 commit comments