Skip to content

Commit 6d444ee

Browse files
committed
fixed bugs related to self.store vs store_select. Found a couple of date caching bugs as well. Rewrote config.py for YAML format.
@hhuuggoo can you confirm that start=start_row, stop=end_row are unnecessary inf store_select statements now?
1 parent 9ee1ac2 commit 6d444ee

File tree

4 files changed

+71
-74
lines changed

4 files changed

+71
-74
lines changed

arraymanagement/nodes/sqlcaching.py

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -369,12 +369,12 @@ def select(self, query_filter, where=None):
369369
if cache_info is None:
370370
self.cache_data(query_filter)
371371
cache_info = self.cache_info(query_filter)
372+
372373
start_row, end_row = cache_info
373-
if not where:
374-
where = None
375-
376-
result = store_select(self.store, self.localpath, where=where,
377-
start=start_row, stop=end_row)
374+
375+
#removed start_row, end_row
376+
result = store_select(self.store, self.localpath, where=where)
377+
# start=start_row, stop=end_row)
378378
return result
379379

380380
def cache_query(self, query_filter):
@@ -403,11 +403,13 @@ def store_cache_spec(self, query_filter, start_row, end_row):
403403
write_pandas(self.store, 'cache_spec', data, {}, 1.1,
404404
replace=False)
405405

406+
406407
def cache_info(self, query_filter):
407408
hashval = self.gethashval(query_filter)
408409
try:
409-
result = store_select(self.store, 'cache_spec',
410-
where=[('hashval', hashval)])
410+
#rewriting where statement for 0.13 pandas style
411+
result = store_select(self.store, 'cache_spec',
412+
where='hashval=="{}"'.format(hashval))
411413
except KeyError:
412414
return None
413415
if result is None:
@@ -446,14 +448,13 @@ def init_from_file(self):
446448
setattr(self, name, column(name))
447449

448450
def select(self, query_filter, where=None, **kwargs):
449-
450451
ignore_cache = kwargs.get('IgnoreCache',None)
451452
if ignore_cache:
452453
query = self.compiled_query(query_filter,kwargs)
453454
return query
454455

455-
456-
if 'date' not in kwargs.keys():
456+
dateKeys = [k for k in kwargs.keys() if 'date' in k]
457+
if not dateKeys:
457458
#no dates in query
458459

459460
fs = FlexibleSqlCaching(self)
@@ -464,11 +465,9 @@ def select(self, query_filter, where=None, **kwargs):
464465
return result
465466

466467
else:
467-
dateKeys = [k for k in kwargs.keys() if 'date' in k]
468468
dateKeys = sorted(dateKeys)
469469
start_date, end_date = kwargs[dateKeys[0]], kwargs[dateKeys[1]]
470470

471-
472471
result = self.cache_info(query_filter,start_date, end_date)
473472

474473
if result is None:
@@ -500,11 +499,11 @@ def store_cache_spec(self, query_filter, start_row, end_row, start_date, end_dat
500499
def cache_info(self, query_filter, start_date, end_date):
501500
hashval = self.gethashval(query_filter)
502501
try:
503-
504502
# print self.store['/cache_spec']
505-
result = store_select(self.store, 'cache_spec',
506-
where=[('hashval', hashval),
507-
('start_date',start_date)])
503+
# result = store_select(self.store, 'cache_spec',
504+
# where=[('hashval', hashval),
505+
# ('start_date',start_date)])
506+
508507
start_date = pd.Timestamp(start_date)
509508
end_date = pd.Timestamp(end_date)
510509

@@ -562,7 +561,6 @@ def cache_data(self, query_params, start_date, end_date):
562561
break;
563562

564563
all_query = and_(query_params,column(col_date) >=start_date, column(col_date) <= end_date)
565-
566564
q = self.cache_query(all_query)
567565
log.debug(str(q))
568566

@@ -582,7 +580,6 @@ def cache_data(self, query_params, start_date, end_date):
582580
db_string_types=db_string_types,
583581
db_datetime_types=db_datetime_types
584582
)
585-
586583
self.min_itemsize = min_itemsize
587584
self.finalize_min_itemsize()
588585
overrides = self.col_types
@@ -592,6 +589,7 @@ def cache_data(self, query_params, start_date, end_date):
592589
starting_row = self.table.nrows
593590
except AttributeError:
594591
starting_row = 0
592+
595593
write_pandas_hdf_from_cursor(self.store, self.localpath, cur,
596594
columns, self.min_itemsize,
597595
dtype_overrides=overrides,
@@ -602,19 +600,17 @@ def cache_data(self, query_params, start_date, end_date):
602600
ending_row = self.table.nrows
603601
except AttributeError:
604602
ending_row = 0
605-
606603
self.store_cache_spec(query_params, starting_row, ending_row, start_date, end_date)
607604

608605

609606
def munge_tables(self, hashval, start_date, end_date):
610607

611608
store = self.store
612-
store.select('cache_spec', where=[('hashval', hashval)])
609+
# store.select('cache_spec', where=[('hashval', hashval)])
613610

614611
store['/cache_spec'][['start_date','end_date']].sort(['start_date'])
615-
616-
df_min = store.select('cache_spec', where=[('start_date', '<=', start_date)]).reset_index()
617-
df_max = store.select('cache_spec', where=[('end_date', '<=', end_date)]).reset_index()
612+
df_min = store_select(store, 'cache_spec', where=[('start_date', '<=', start_date)]).reset_index()
613+
df_max = store_select(store, 'cache_spec', where=[('end_date', '<=', end_date)]).reset_index()
618614

619615
df_total = df_min.append(df_max)
620616
df_total.drop_duplicates('_end_row',inplace=True)
@@ -626,8 +622,7 @@ def munge_tables(self, hashval, start_date, end_date):
626622
for s in ss_vals:
627623
start_row = s[0]
628624
end_row = s[1]
629-
630-
temp = store.select(self.localpath,
625+
temp = store_select(store, self.localpath,
631626
start=start_row, stop=end_row)
632627
temp.head()
633628

example/datalib/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from arraymanagement.nodes.csvnodes import PandasCSVNode
33
from arraymanagement.nodes.hdfnodes import PandasHDFNode, PyTables
44
from arraymanagement.nodes.sql import SimpleQueryTable
5+
from arraymanagement.nodes.sqlcaching import YamlSqlDateCaching
6+
57

68
global_config = dict(
79
is_dataset = False,
@@ -14,6 +16,7 @@
1416
('*.hdf5' , PandasHDFNode),
1517
('*.h5' , PandasHDFNode),
1618
('*.sql' , SimpleQueryTable),
19+
("*.yaml", YamlSqlDateCaching),
1720
])
1821
)
1922

example/sqlviews/example_no_dates_not_entities.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
SQL:
22
# Query for EOD data for list of entities
33
eod_stock:
4-
type: 'conditional'
54
conditionals:
65

76
query: >

tests/node_test.py

Lines changed: 48 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -9,54 +9,54 @@
99

1010
from arraymanagement.client import ArrayClient
1111

12-
# def setup_module():
13-
# basepath = join(dirname(dirname(__file__)), 'example')
14-
# client = ArrayClient(basepath)
15-
# client.clear_disk_cache()
16-
#
17-
# def teardown_module():
18-
# basepath = join(dirname(dirname(__file__)), 'example')
19-
# client = ArrayClient(basepath)
20-
# client.clear_disk_cache()
21-
#
22-
# def test_csv_node():
23-
# basepath = join(dirname(dirname(__file__)), 'example')
24-
# client = ArrayClient(basepath)
25-
# node = client.get_node('/csvs/sample')
26-
# data = node.get()
27-
# #better check later
28-
# assert data.shape == (73,2)
29-
#
30-
# def test_hdf_node():
31-
# basepath = join(dirname(dirname(__file__)), 'example')
32-
# client = ArrayClient(basepath)
33-
# node = client.get_node('/pandashdf5/data')
34-
# assert 'sample' in node.keys()
35-
# node = node.get_node('sample')
36-
# data = node.select()
37-
# assert data.shape == (73,2)
38-
#
39-
# def test_custom_node():
40-
# basepath = join(dirname(dirname(__file__)), 'example')
41-
# client = ArrayClient(basepath)
42-
# node = client.get_node('/custom/sample2')
43-
# data1 = node.select()
44-
# node = client.get_node('/custom/sample')
45-
# data2 = node.get()
46-
# assert data2.iloc[2]['values'] == 2
47-
# assert data1.iloc[2]['values'] == 4
48-
#
49-
#
50-
# def test_csv_node():
51-
# basepath = join(dirname(dirname(__file__)), 'example')
52-
# client = ArrayClient(basepath)
53-
# node = client.get_node('/customcsvs/sample')
54-
# data1 = node.get()
55-
# node = client.get_node('/customcsvs/sample2')
56-
# data2 = node.select()
57-
# node = client.get_node('/customcsvs/sample_pipe')
58-
# data3 = node.select()
59-
# #better check later
12+
def setup_module():
13+
basepath = join(dirname(dirname(__file__)), 'example')
14+
client = ArrayClient(basepath)
15+
client.clear_disk_cache()
16+
17+
def teardown_module():
18+
basepath = join(dirname(dirname(__file__)), 'example')
19+
client = ArrayClient(basepath)
20+
client.clear_disk_cache()
21+
22+
def test_csv_node():
23+
basepath = join(dirname(dirname(__file__)), 'example')
24+
client = ArrayClient(basepath)
25+
node = client.get_node('/csvs/sample')
26+
data = node.get()
27+
#better check later
28+
assert data.shape == (73,2)
29+
30+
def test_hdf_node():
31+
basepath = join(dirname(dirname(__file__)), 'example')
32+
client = ArrayClient(basepath)
33+
node = client.get_node('/pandashdf5/data')
34+
assert 'sample' in node.keys()
35+
node = node.get_node('sample')
36+
data = node.select()
37+
assert data.shape == (73,2)
38+
39+
def test_custom_node():
40+
basepath = join(dirname(dirname(__file__)), 'example')
41+
client = ArrayClient(basepath)
42+
node = client.get_node('/custom/sample2')
43+
data1 = node.select()
44+
node = client.get_node('/custom/sample')
45+
data2 = node.get()
46+
assert data2.iloc[2]['values'] == 2
47+
assert data1.iloc[2]['values'] == 4
48+
49+
50+
def test_csv_node():
51+
basepath = join(dirname(dirname(__file__)), 'example')
52+
client = ArrayClient(basepath)
53+
node = client.get_node('/customcsvs/sample')
54+
data1 = node.get()
55+
node = client.get_node('/customcsvs/sample2')
56+
data2 = node.select()
57+
node = client.get_node('/customcsvs/sample_pipe')
58+
data3 = node.select()
59+
#better check later
6060

6161
def test_sql_yaml_cache():
6262
basepath = join(dirname(dirname(__file__)), 'example')

0 commit comments

Comments
 (0)