@@ -433,6 +433,17 @@ def __hash__(self):
433433 self ._hash = SpanArray .__hash__ (self )
434434 return self ._hash
435435
436+ def __contains__ (self , item ) -> bool :
437+ """
438+ Return true if scalar item exists in this TokenSpanArray.
439+ :param item: scalar TokenSpan value.
440+ :return: true if item exists in this TokenSpanArray.
441+ """
442+ if isinstance (item , TokenSpan ) and \
443+ item .begin == TokenSpan .NULL_OFFSET_VALUE :
444+ return TokenSpan .NULL_OFFSET_VALUE in self ._begin_tokens
445+ return super ().__contains__ (item )
446+
436447 @classmethod
437448 def _concat_same_type (
438449 cls , to_concat : Sequence [pd .api .extensions .ExtensionArray ]
@@ -474,7 +485,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
474485 for information about this method.
475486 """
476487 tokens = None
477- if isinstance (scalars , Span ):
488+ if isinstance (scalars , TokenSpan ):
478489 scalars = [scalars ]
479490 if isinstance (scalars , TokenSpanArray ):
480491 tokens = scalars .tokens
@@ -483,14 +494,18 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
483494 i = 0
484495 for s in scalars :
485496 if not isinstance (s , TokenSpan ):
486- raise ValueError (
487- f"Can only convert a sequence of TokenSpan "
488- f"objects to a TokenSpanArray. Found an "
489- f"object of type { type (s )} "
490- )
491- if tokens is None :
497+ # TODO: Temporary fix for np.nan values, pandas-dev GH#38980
498+ if np .isnan (s ):
499+ s = TokenSpanDtype ().na_value
500+ else :
501+ raise ValueError (
502+ f"Can only convert a sequence of TokenSpan "
503+ f"objects to a TokenSpanArray. Found an "
504+ f"object of type { type (s )} "
505+ )
506+ if tokens is None and not (s .begin == TokenSpan .NULL_OFFSET_VALUE and s .tokens .target_text is None ):
492507 tokens = s .tokens
493- if not s .tokens .equals (tokens ):
508+ if not ( s . begin == TokenSpan . NULL_OFFSET_VALUE and s . tokens . target_text is None ) and not s .tokens .equals (tokens ):
494509 raise ValueError (
495510 f"Mixing different token sets is not currently "
496511 f"supported. Received two token sets:\n "
0 commit comments