8
8
import subprocess
9
9
import re
10
10
11
+ import requests
12
+
11
13
from typing import Any , Dict , List
12
14
# stubs
13
15
user_profile_stub = Dict [str , Any ]
@@ -227,14 +229,17 @@ def channels2zerver_stream(slack_dir, realm_id, added_users):
227
229
print ('######### IMPORTING STREAMS FINISHED #########\n ' )
228
230
return zerver_defaultstream , zerver_stream , added_channels , zerver_subscription , zerver_recipient
229
231
230
- def channelmessage2zerver_message_one_stream (slack_dir , channel , added_users ,
232
+ def channelmessage2zerver_message_one_stream (constants , channel , added_users ,
231
233
zerver_userprofile ,
232
- added_channels , msg_id_count ,
233
- usermessage_id , zerver_subscription ):
234
+ added_channels , ids ,
235
+ zerver_subscription ):
236
+ slack_dir , REALM_ID , upload_dir = constants
237
+ message_id , usermessage_id , attachment_id = ids
234
238
json_names = os .listdir (slack_dir + '/' + channel )
235
239
users = json .load (open (slack_dir + '/users.json' ))
236
240
zerver_message = []
237
241
zerver_usermessage = []
242
+ zerver_attachment = []
238
243
239
244
# Sanitize the message text
240
245
def sanitize_text (text ):
@@ -279,19 +284,51 @@ def check_has_link(msg):
279
284
if 'has_link' in msg :
280
285
return msg ['has_link' ]
281
286
else :
287
+ # TODO map msg['attachments']['from_url']
282
288
text = msg ['text' ]
283
289
return ('http://' in text or 'https://' in text )
284
290
285
291
def parse_url (url ):
286
292
return url .replace ("\/\/" , "//" ).replace ("\/" , "/" )
287
293
294
+ def save_attachment (url , _id , name ):
295
+ url = parse_url (url )
296
+ response = requests .get (url , stream = True )
297
+ os .makedirs (upload_dir + '/' + str (id ), exist_ok = True )
298
+ with open (upload_dir + '/' + str (id ) + '/' + name , 'wb' ) as output_file :
299
+ shutil .copyfileobj (response .raw , output_file )
300
+
288
301
for json_name in json_names :
289
302
msgs = json .load (open (slack_dir + '/%s/%s' % (channel , json_name )))
290
303
for msg in msgs :
291
304
text = msg ['text' ]
292
- if 'subtype' in msg .keys () and msg ['subtype' ] in ["channel_join" , "channel_leave" , "channel_name" ]:
293
- # Ignore noisy messages
294
- continue
305
+ has_attachment = False
306
+
307
+ if 'subtype' in msg .keys ():
308
+ st = msg ['subtype' ]
309
+ if st in ["channel_join" , "channel_leave" , "channel_name" ]:
310
+ # Ignore noisy messages
311
+ continue
312
+ elif st == "file_share" :
313
+ has_attachment = True
314
+ _file = msg ['file' ]
315
+ slack_user_id = _file ['user' ]
316
+ zulip_user_id = added_users [slack_user_id ]
317
+ save_attachment (_file ['url_private' ], attachment_id , _file ['name' ])
318
+ path_id = "%d\/%d\/%s" % (REALM_ID , attachment_id , _file ['name' ])
319
+ # construct attachments object and append it to zerver_attachment
320
+ attachments = dict (
321
+ id = attachment_id ,
322
+ is_realm_public = True , # TOODOO map for private messages and huddles, where is_realm_public = False
323
+ file_name = _file ['name' ],
324
+ create_time = _file ['created' ],
325
+ size = _file ['size' ],
326
+ path_id = path_id ,
327
+ realm = REALM_ID ,
328
+ owner = zulip_user_id ,
329
+ messages = [message_id ])
330
+ attachment_id += 1
331
+ zerver_attachment .append (attachments )
295
332
296
333
try :
297
334
user = msg .get ('user' , msg ['file' ]['user' ])
@@ -305,8 +342,8 @@ def parse_url(url):
305
342
has_image = msg .get ('has_image' , False ),
306
343
subject = channel , # This is Zulip-specific
307
344
pub_date = msg ['ts' ],
308
- id = msg_id_count ,
309
- has_attachment = False , # attachment will be posted in the subsequent message; this is how Slack does it, i.e. less like email
345
+ id = message_id ,
346
+ has_attachment = has_attachment , # attachment will be posted in the subsequent message; this is how Slack does it, i.e. less like email
310
347
edit_history = None ,
311
348
sender = added_users [user ], # map slack id to zulip id
312
349
content = sanitize_text (text ),
@@ -332,8 +369,8 @@ def parse_url(url):
332
369
usermessage_id += 1
333
370
zerver_usermessage .append (usermessage )
334
371
335
- msg_id_count += 1
336
- return zerver_message , zerver_usermessage
372
+ message_id += 1
373
+ return zerver_message , zerver_usermessage , zerver_attachment
337
374
338
375
def main (slack_zip_file : str ) -> None :
339
376
slack_dir = slack_zip_file .replace ('.zip' , '' )
@@ -360,7 +397,7 @@ def main(slack_zip_file: str) -> None:
360
397
# Make sure the directory output is clean
361
398
output_dir = 'zulip_data'
362
399
rm_tree (output_dir )
363
- os .makedirs (output_dir )
400
+ os .makedirs (output_dir , exist_ok = True )
364
401
365
402
realm = dict (zerver_client = [{"name" : "populate_db" , "id" : 1 },
366
403
{"name" : "website" , "id" : 2 },
@@ -400,19 +437,22 @@ def main(slack_zip_file: str) -> None:
400
437
message_json = {}
401
438
zerver_message = []
402
439
zerver_usermessage = []
440
+ zerver_attachment = []
403
441
442
+ upload_dir = output_dir + '/uploads/' + str (REALM_ID )
443
+ constants = [slack_dir , REALM_ID , upload_dir ]
404
444
for channel in added_channels .keys ():
405
- msg_id_count = len (zerver_message ) + 1 # For the id of the messages
445
+ message_id = len (zerver_message ) + 1 # For the id of the messages
406
446
usermessage_id = len (zerver_usermessage ) + 1
407
- zm_one_stream , zum_one_stream = channelmessage2zerver_message_one_stream ( slack_dir , channel ,
408
- added_users ,
409
- zerver_userprofile ,
410
- added_channels ,
411
- msg_id_count ,
412
- usermessage_id ,
413
- zerver_subscription )
414
- zerver_message += zm_one_stream
415
- zerver_usermessage += zum_one_stream
447
+ attachment_id = len ( zerver_attachment ) + 1
448
+ ids = [ message_id , usermessage_id , attachment_id ]
449
+ zm , zum , za = channelmessage2zerver_message_one_stream ( constants , channel ,
450
+ added_users , zerver_userprofile ,
451
+ added_channels , ids ,
452
+ zerver_subscription )
453
+ zerver_message += zm
454
+ zerver_usermessage += zum
455
+ zerver_attachment += za
416
456
# TOODOO add zerver_usermessage corresponding to the
417
457
# private messages and huddles type recipients
418
458
@@ -424,17 +464,17 @@ def main(slack_zip_file: str) -> None:
424
464
425
465
# IO avatar records
426
466
avatar_records_file = output_dir + '/avatars/records.json'
427
- os .makedirs (output_dir + '/avatars' )
467
+ os .makedirs (output_dir + '/avatars' , exist_ok = True )
428
468
json .dump ([], open (avatar_records_file , 'w' ))
429
469
430
470
# IO uploads TODO
431
471
uploads_records_file = output_dir + '/uploads/records.json'
432
- os .makedirs (output_dir + '/uploads' )
472
+ os .makedirs (output_dir + '/uploads' , exist_ok = True )
433
473
json .dump ([], open (uploads_records_file , 'w' ))
434
474
435
- # IO attachments TODO
475
+ # IO attachments
436
476
attachment_file = output_dir + '/attachment.json'
437
- attachment = {"zerver_attachment" : [] }
477
+ attachment = {"zerver_attachment" : zerver_attachment }
438
478
json .dump (attachment , open (attachment_file , 'w' ))
439
479
440
480
print ('ls' , os .listdir ())
0 commit comments