3
3
import sys
4
4
import typing as t
5
5
6
- from redisvl import readers
7
- from redisvl .index import SearchIndex
8
- from redisvl .load import concurrent_store_as_hash
9
- from redisvl .utils .connection import get_async_redis_connection
6
+ from redisvl .index import AsyncSearchIndex
10
7
from redisvl .utils .log import get_logger
11
8
12
9
logger = get_logger (__name__ )
@@ -26,58 +23,50 @@ def __init__(self):
26
23
parser .add_argument (
27
24
"-a" , "--password" , help = "Redis password" , type = str , default = ""
28
25
)
26
+ parser .add_argument ("-r" , "--reader" , help = "Reader" , type = str , default = "pandas" )
27
+ parser .add_argument ("-f" , "--format" , help = "Format" , type = str , default = "pickle" )
29
28
parser .add_argument ("-c" , "--concurrency" , type = int , default = 50 )
30
29
# TODO add argument to optionally not create index
31
30
args = parser .parse_args (sys .argv [2 :])
32
31
if not args .data :
33
32
parser .print_help ()
34
33
exit (0 )
35
34
36
- # Create Redis Connection
37
- try :
38
- logger .info (f"Connecting to { args .host } :{ str (args .port )} " )
39
- redis_conn = get_async_redis_connection (args .host , args .port , args .password )
40
- logger .info ("Connected." )
41
- except :
42
- # TODO: be more specific about the exception
43
- logger .error ("Could not connect to redis." )
44
- exit (1 )
45
-
46
35
# validate schema
47
- index = SearchIndex .from_yaml (redis_conn , args .schema )
36
+ index = AsyncSearchIndex .from_yaml (args .schema )
37
+
38
+ # try to connect to redis
39
+ index .connect (host = args .host , port = args .port , password = args .password )
48
40
49
41
# read in data
50
42
logger .info ("Reading data..." )
51
- data = self .read_data (args ) # TODO add other readers and formats
43
+ reader = self ._get_reader (args )
52
44
logger .info ("Data read." )
53
45
54
46
# load data and create the index
55
- asyncio .run (self .load_and_create_index (args .concurrency , data , index ))
47
+ asyncio .run (self ._load_and_create_index (args .concurrency , reader , index ))
56
48
57
- def read_data (
58
- self , args : t .List [str ], reader : str = "pandas" , format : str = "pickle"
59
- ) -> dict :
60
- if reader == "pandas" :
61
- if format == "pickle" :
62
- return readers .pandas .from_pickle (args .data )
49
+ def _get_reader (self , args : t .List [str ]) -> dict :
50
+ if args .reader == "pandas" :
51
+ from redisvl .readers import PandasReader
52
+
53
+ if args .format == "pickle" :
54
+ return PandasReader .from_pickle (args .data )
55
+ elif args .format == "json" :
56
+ return PandasReader .from_json (args .data )
63
57
else :
64
58
raise NotImplementedError (
65
- "Only pickle format is supported for pandas reader. "
59
+ "Only pickle and json formats are supported for pandas reader using the CLI "
66
60
)
67
61
else :
68
62
raise NotImplementedError ("Only pandas reader is supported." )
69
63
70
- async def load_and_create_index (
71
- self , concurrency : int , data : dict , index : SearchIndex
64
+ async def _load_and_create_index (
65
+ self , concurrency : int , reader : t . Iterable [ dict ] , index : AsyncSearchIndex
72
66
):
73
67
74
68
logger .info ("Loading data..." )
75
- if index .storage_type == "hash" :
76
- await concurrent_store_as_hash (
77
- data , concurrency , index .key_field , index .prefix , index .redis_conn
78
- )
79
- else :
80
- raise NotImplementedError ("Only hash storage type is supported." )
69
+ await index .load (data = reader , concurrency = concurrency )
81
70
logger .info ("Data loaded." )
82
71
83
72
# create index
0 commit comments