|
7 | 7 |
|
8 | 8 | from datetime import datetime, date
|
9 | 9 | import time
|
| 10 | +import re |
10 | 11 |
|
11 | 12 | import numpy as np
|
12 | 13 | from pandas import (
|
@@ -916,27 +917,40 @@ def _read_panel_table(self, group, where=None):
|
916 | 917 | lp = DataFrame(new_values, index=new_index, columns=lp.columns)
|
917 | 918 | wp = lp.to_panel()
|
918 | 919 |
|
919 |
| - if sel.column_filter: |
920 |
| - new_minor = sorted(set(wp.minor_axis) & sel.column_filter) |
| 920 | + if sel.filter: |
| 921 | + new_minor = sorted(set(wp.minor_axis) & sel.filter) |
921 | 922 | wp = wp.reindex(minor=new_minor)
|
922 | 923 | return wp
|
923 | 924 |
|
924 | 925 |
|
925 |
| - def _delete_from_table(self, group, where = None): |
| 926 | + def _delete_from_table(self, group, where): |
| 927 | + """ delete rows from a group where condition is True """ |
926 | 928 | table = getattr(group, 'table')
|
927 | 929 |
|
928 | 930 | # create the selection
|
929 |
| - s = Selection(table, where, table._v_attrs.index_kind) |
| 931 | + s = Selection(table,where,table._v_attrs.index_kind) |
930 | 932 | s.select_coords()
|
931 | 933 |
|
932 | 934 | # delete the rows in reverse order
|
933 |
| - l = list(s.values) |
934 |
| - l.reverse() |
935 |
| - for c in l: |
936 |
| - table.removeRows(c) |
937 |
| - self.handle.flush() |
938 |
| - return len(s.values) |
| 935 | + l = list(s.values) |
| 936 | + ln = len(l) |
| 937 | + |
| 938 | + if ln: |
| 939 | + |
| 940 | + # if we can do a consecutive removal - do it! |
| 941 | + if l[0]+ln-1 == l[-1]: |
| 942 | + table.removeRows(start = l[0], stop = l[-1]+1) |
939 | 943 |
|
| 944 | + # one by one |
| 945 | + else: |
| 946 | + l.reverse() |
| 947 | + for c in l: |
| 948 | + table.removeRows(c) |
| 949 | + |
| 950 | + self.handle.flush() |
| 951 | + |
| 952 | + # return the number of rows removed |
| 953 | + return ln |
940 | 954 |
|
941 | 955 | def _convert_index(index):
|
942 | 956 | if isinstance(index, DatetimeIndex):
|
@@ -1088,87 +1102,203 @@ def _alias_to_class(alias):
|
1088 | 1102 | return _reverse_index_map.get(alias, Index)
|
1089 | 1103 |
|
1090 | 1104 |
|
| 1105 | +class Term(object): |
| 1106 | + """ create a term object that holds a field, op, and value |
| 1107 | +
|
| 1108 | + Parameters |
| 1109 | + ---------- |
| 1110 | + field : dict, string term expression, or the field to operate (must be a valid index/column type of DataFrame/Panel) |
| 1111 | + op : a valid op (defaults to '=') (optional) |
| 1112 | + >, >=, <, <=, =, != (not equal) are allowed |
| 1113 | + value : a value or list of values (required) |
| 1114 | +
|
| 1115 | + Returns |
| 1116 | + ------- |
| 1117 | + a Term object |
| 1118 | +
|
| 1119 | + Examples |
| 1120 | + -------- |
| 1121 | + Term(dict(field = 'index', op = '>', value = '20121114')) |
| 1122 | + Term('index', '20121114') |
| 1123 | + Term('index', '>', '20121114') |
| 1124 | + Term('index', ['20121114','20121114']) |
| 1125 | + Term('index', datetime(2012,11,14)) |
| 1126 | + Term('index>20121114') |
| 1127 | + |
| 1128 | + """ |
| 1129 | + |
| 1130 | + _ops = ['<','<=','>','>=','=','!='] |
| 1131 | + _search = re.compile("^(?P<field>\w+)(?P<op>%s)(?P<value>.+)$" % '|'.join(_ops)) |
| 1132 | + _index = ['index','major_axis'] |
| 1133 | + _column = ['column','minor_axis','items'] |
| 1134 | + |
| 1135 | + def __init__(self, field, op = None, value = None, index_kind = None): |
| 1136 | + self.field = None |
| 1137 | + self.op = None |
| 1138 | + self.value = None |
| 1139 | + self.typ = None |
| 1140 | + self.index_kind = index_kind |
| 1141 | + self.filter = None |
| 1142 | + self.condition = None |
| 1143 | + |
| 1144 | + # unpack lists/tuples in field |
| 1145 | + if isinstance(field,(tuple,list)): |
| 1146 | + f = field |
| 1147 | + field = f[0] |
| 1148 | + if len(f) > 1: |
| 1149 | + op = f[1] |
| 1150 | + if len(f) > 2: |
| 1151 | + value = f[2] |
| 1152 | + |
| 1153 | + # backwards compatible |
| 1154 | + if isinstance(field, dict): |
| 1155 | + self.field = field.get('field') |
| 1156 | + self.op = field.get('op') or '=' |
| 1157 | + self.value = field.get('value') |
| 1158 | + |
| 1159 | + # passed a term |
| 1160 | + elif isinstance(field,Term): |
| 1161 | + self.field = field.field |
| 1162 | + self.op = field.op |
| 1163 | + self.value = field.value |
| 1164 | + |
| 1165 | + # a string expression (or just the field) |
| 1166 | + elif isinstance(field,basestring): |
| 1167 | + |
| 1168 | + # is a term is passed |
| 1169 | + s = self._search.match(field) |
| 1170 | + if s is not None: |
| 1171 | + self.field = s.group('field') |
| 1172 | + self.op = s.group('op') |
| 1173 | + self.value = s.group('value') |
| 1174 | + |
| 1175 | + else: |
| 1176 | + self.field = field |
| 1177 | + |
| 1178 | + # is an op passed? |
| 1179 | + if isinstance(op, basestring) and op in self._ops: |
| 1180 | + self.op = op |
| 1181 | + self.value = value |
| 1182 | + else: |
| 1183 | + self.op = '=' |
| 1184 | + self.value = op |
| 1185 | + |
| 1186 | + else: |
| 1187 | + raise Exception("Term does not understand the supplied field [%s]" % field) |
| 1188 | + |
| 1189 | + # we have valid fields |
| 1190 | + if self.field is None or self.op is None or self.value is None: |
| 1191 | + raise Exception("Could not create this term [%s]" % str(self)) |
| 1192 | + |
| 1193 | + # valid field name |
| 1194 | + if self.field in self._index: |
| 1195 | + self.typ = 'index' |
| 1196 | + elif self.field in self._column: |
| 1197 | + self.typ = 'column' |
| 1198 | + else: |
| 1199 | + raise Exception("field is not a valid index/column for this term [%s]" % str(self)) |
| 1200 | + |
| 1201 | + # we have valid conditions |
| 1202 | + if self.op in ['>','>=','<','<=']: |
| 1203 | + if hasattr(self.value,'__iter__') and len(self.value) > 1: |
| 1204 | + raise Exception("an inequality condition cannot have multiple values [%s]" % str(self)) |
| 1205 | + |
| 1206 | + if not hasattr(self.value,'__iter__'): |
| 1207 | + self.value = [ self.value ] |
| 1208 | + |
| 1209 | + self.eval() |
| 1210 | + |
| 1211 | + def __str__(self): |
| 1212 | + return "typ->%s,field->%s,op->%s,value->%s" % (self.typ,self.field,self.op,self.value) |
| 1213 | + |
| 1214 | + __repr__ = __str__ |
| 1215 | + |
| 1216 | + def eval(self): |
| 1217 | + """ set the numexpr expression for this term """ |
| 1218 | + |
| 1219 | + # convert values |
| 1220 | + values = [ self.convert_value(v) for v in self.value ] |
| 1221 | + |
| 1222 | + # equality conditions |
| 1223 | + if self.op in ['=','!=']: |
| 1224 | + |
| 1225 | + # too many values to create the expression? |
| 1226 | + if len(values) <= 61: |
| 1227 | + self.condition = "(%s)" % ' | '.join([ "(%s == %s)" % (self.field,v[0]) for v in values]) |
| 1228 | + |
| 1229 | + # use a filter after reading |
| 1230 | + else: |
| 1231 | + self.filter = set([ v[1] for v in values ]) |
| 1232 | + |
| 1233 | + else: |
| 1234 | + |
| 1235 | + self.condition = '(%s %s %s)' % (self.field, self.op, values[0][0]) |
| 1236 | + |
| 1237 | + def convert_value(self, v): |
| 1238 | + |
| 1239 | + if self.typ == 'index': |
| 1240 | + if self.index_kind == 'datetime64' : |
| 1241 | + return [lib.Timestamp(v).value, None] |
| 1242 | + elif isinstance(v, datetime): |
| 1243 | + return [time.mktime(v.timetuple()), None] |
| 1244 | + elif not isinstance(v, basestring): |
| 1245 | + return [str(v), None] |
| 1246 | + |
| 1247 | + # string quoting |
| 1248 | + return ["'" + v + "'", v] |
| 1249 | + |
1091 | 1250 | class Selection(object):
|
1092 | 1251 | """
|
1093 | 1252 | Carries out a selection operation on a tables.Table object.
|
1094 | 1253 |
|
1095 | 1254 | Parameters
|
1096 | 1255 | ----------
|
1097 | 1256 | table : tables.Table
|
1098 |
| - where : list of dicts of the following form |
1099 |
| -
|
1100 |
| - Comparison op |
1101 |
| - {'field' : 'index', |
1102 |
| - 'op' : '>=', |
1103 |
| - 'value' : value} |
1104 |
| -
|
1105 |
| - Match single value |
1106 |
| - {'field' : 'index', |
1107 |
| - 'value' : v1} |
| 1257 | + where : list of Terms (or convertable to) |
1108 | 1258 |
|
1109 |
| - Match a set of values |
1110 |
| - {'field' : 'index', |
1111 |
| - 'value' : [v1, v2, v3]} |
1112 | 1259 | """
|
1113 | 1260 | def __init__(self, table, where=None, index_kind=None):
|
1114 |
| - self.table = table |
1115 |
| - self.where = where |
| 1261 | + self.table = table |
| 1262 | + self.where = where |
1116 | 1263 | self.index_kind = index_kind
|
1117 |
| - self.column_filter = None |
1118 |
| - self.the_condition = None |
1119 |
| - self.conditions = [] |
1120 |
| - self.values = None |
1121 |
| - if where: |
1122 |
| - self.generate(where) |
| 1264 | + self.values = None |
| 1265 | + self.condition = None |
| 1266 | + self.filter = None |
| 1267 | + self.terms = self.generate(where) |
| 1268 | + |
| 1269 | + # create the numexpr & the filter |
| 1270 | + if self.terms: |
| 1271 | + conds = [ t.condition for t in self.terms if t.condition is not None ] |
| 1272 | + if len(conds): |
| 1273 | + self.condition = "(%s)" % ' & '.join(conds) |
| 1274 | + self.filter = set() |
| 1275 | + for t in self.terms: |
| 1276 | + if t.filter is not None: |
| 1277 | + self.filter |= t.filter |
1123 | 1278 |
|
1124 | 1279 | def generate(self, where):
|
1125 |
| - # and condictions |
1126 |
| - for c in where: |
1127 |
| - op = c.get('op', None) |
1128 |
| - value = c['value'] |
1129 |
| - field = c['field'] |
1130 |
| - |
1131 |
| - if field == 'index' and self.index_kind == 'datetime64': |
1132 |
| - val = lib.Timestamp(value).value |
1133 |
| - self.conditions.append('(%s %s %s)' % (field, op, val)) |
1134 |
| - elif field == 'index' and isinstance(value, datetime): |
1135 |
| - value = time.mktime(value.timetuple()) |
1136 |
| - self.conditions.append('(%s %s %s)' % (field, op, value)) |
1137 |
| - else: |
1138 |
| - self.generate_multiple_conditions(op, value, field) |
| 1280 | + """ generate and return the terms """ |
| 1281 | + if where is None: return None |
1139 | 1282 |
|
1140 |
| - if len(self.conditions): |
1141 |
| - self.the_condition = '(' + ' & '.join(self.conditions) + ')' |
| 1283 | + if not isinstance(where, (list,tuple)): |
| 1284 | + where = [ where ] |
1142 | 1285 |
|
1143 |
| - def generate_multiple_conditions(self, op, value, field): |
1144 |
| - |
1145 |
| - if op and op == 'in' or isinstance(value, (list, np.ndarray)): |
1146 |
| - if len(value) <= 61: |
1147 |
| - l = '(' + ' | '.join([ "(%s == '%s')" % (field, v) |
1148 |
| - for v in value]) + ')' |
1149 |
| - self.conditions.append(l) |
1150 |
| - else: |
1151 |
| - self.column_filter = set(value) |
1152 |
| - else: |
1153 |
| - if op is None: |
1154 |
| - op = '==' |
1155 |
| - self.conditions.append('(%s %s "%s")' % (field, op, value)) |
| 1286 | + return [ Term(c, index_kind = self.index_kind) for c in where ] |
1156 | 1287 |
|
1157 | 1288 | def select(self):
|
1158 | 1289 | """
|
1159 | 1290 | generate the selection
|
1160 | 1291 | """
|
1161 |
| - if self.the_condition: |
1162 |
| - self.values = self.table.readWhere(self.the_condition) |
1163 |
| - |
| 1292 | + if self.condition is not None: |
| 1293 | + self.values = self.table.readWhere(self.condition) |
1164 | 1294 | else:
|
1165 | 1295 | self.values = self.table.read()
|
1166 | 1296 |
|
1167 | 1297 | def select_coords(self):
|
1168 | 1298 | """
|
1169 | 1299 | generate the selection
|
1170 | 1300 | """
|
1171 |
| - self.values = self.table.getWhereList(self.the_condition) |
| 1301 | + self.values = self.table.getWhereList(self.condition) |
1172 | 1302 |
|
1173 | 1303 |
|
1174 | 1304 | def _get_index_factory(klass):
|
|
0 commit comments