Skip to content

Commit a57753a

Browse files
rheaparekhrht
authored andcommitted
slack: Map attachments.
With minor patch by @rht
1 parent ac9e170 commit a57753a

File tree

1 file changed

+65
-25
lines changed

1 file changed

+65
-25
lines changed

zulip/integrations/slack/slackdata2zulipdata.py

Lines changed: 65 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import subprocess
99
import re
1010

11+
import requests
12+
1113
from typing import Any, Dict, List
1214
# stubs
1315
user_profile_stub = Dict[str, Any]
@@ -227,14 +229,17 @@ def channels2zerver_stream(slack_dir, realm_id, added_users):
227229
print('######### IMPORTING STREAMS FINISHED #########\n')
228230
return zerver_defaultstream, zerver_stream, added_channels, zerver_subscription, zerver_recipient
229231

230-
def channelmessage2zerver_message_one_stream(slack_dir, channel, added_users,
232+
def channelmessage2zerver_message_one_stream(constants, channel, added_users,
231233
zerver_userprofile,
232-
added_channels, msg_id_count,
233-
usermessage_id, zerver_subscription):
234+
added_channels, ids,
235+
zerver_subscription):
236+
slack_dir, REALM_ID, upload_dir = constants
237+
message_id, usermessage_id, attachment_id = ids
234238
json_names = os.listdir(slack_dir + '/' + channel)
235239
users = json.load(open(slack_dir + '/users.json'))
236240
zerver_message = []
237241
zerver_usermessage = []
242+
zerver_attachment = []
238243

239244
# Sanitize the message text
240245
def sanitize_text(text):
@@ -279,19 +284,51 @@ def check_has_link(msg):
279284
if 'has_link' in msg:
280285
return msg['has_link']
281286
else:
287+
# TODO map msg['attachments']['from_url']
282288
text = msg['text']
283289
return ('http://' in text or 'https://' in text)
284290

285291
def parse_url(url):
286292
return url.replace("\/\/", "//").replace("\/", "/")
287293

294+
def save_attachment(url, _id, name):
295+
url = parse_url(url)
296+
response = requests.get(url, stream=True)
297+
os.makedirs(upload_dir + '/' + str(id), exist_ok=True)
298+
with open(upload_dir + '/' + str(id) + '/' + name, 'wb') as output_file:
299+
shutil.copyfileobj(response.raw, output_file)
300+
288301
for json_name in json_names:
289302
msgs = json.load(open(slack_dir + '/%s/%s' % (channel, json_name)))
290303
for msg in msgs:
291304
text = msg['text']
292-
if 'subtype' in msg.keys() and msg['subtype'] in ["channel_join", "channel_leave", "channel_name"]:
293-
# Ignore noisy messages
294-
continue
305+
has_attachment = False
306+
307+
if 'subtype' in msg.keys():
308+
st = msg['subtype']
309+
if st in ["channel_join", "channel_leave", "channel_name"]:
310+
# Ignore noisy messages
311+
continue
312+
elif st == "file_share":
313+
has_attachment = True
314+
_file = msg['file']
315+
slack_user_id = _file['user']
316+
zulip_user_id = added_users[slack_user_id]
317+
save_attachment(_file['url_private'], attachment_id, _file['name'])
318+
path_id = "%d\/%d\/%s" % (REALM_ID, attachment_id, _file['name'])
319+
# construct attachments object and append it to zerver_attachment
320+
attachments = dict(
321+
id=attachment_id,
322+
is_realm_public=True, # TOODOO map for private messages and huddles, where is_realm_public = False
323+
file_name=_file['name'],
324+
create_time=_file['created'],
325+
size=_file['size'],
326+
path_id=path_id,
327+
realm=REALM_ID,
328+
owner=zulip_user_id,
329+
messages=[message_id])
330+
attachment_id += 1
331+
zerver_attachment.append(attachments)
295332

296333
try:
297334
user = msg.get('user', msg['file']['user'])
@@ -305,8 +342,8 @@ def parse_url(url):
305342
has_image=msg.get('has_image', False),
306343
subject=channel, # This is Zulip-specific
307344
pub_date=msg['ts'],
308-
id=msg_id_count,
309-
has_attachment=False, # attachment will be posted in the subsequent message; this is how Slack does it, i.e. less like email
345+
id=message_id,
346+
has_attachment=has_attachment, # attachment will be posted in the subsequent message; this is how Slack does it, i.e. less like email
310347
edit_history=None,
311348
sender=added_users[user], # map slack id to zulip id
312349
content=sanitize_text(text),
@@ -332,8 +369,8 @@ def parse_url(url):
332369
usermessage_id += 1
333370
zerver_usermessage.append(usermessage)
334371

335-
msg_id_count += 1
336-
return zerver_message, zerver_usermessage
372+
message_id += 1
373+
return zerver_message, zerver_usermessage, zerver_attachment
337374

338375
def main(slack_zip_file: str) -> None:
339376
slack_dir = slack_zip_file.replace('.zip', '')
@@ -360,7 +397,7 @@ def main(slack_zip_file: str) -> None:
360397
# Make sure the directory output is clean
361398
output_dir = 'zulip_data'
362399
rm_tree(output_dir)
363-
os.makedirs(output_dir)
400+
os.makedirs(output_dir, exist_ok=True)
364401

365402
realm = dict(zerver_client=[{"name": "populate_db", "id": 1},
366403
{"name": "website", "id": 2},
@@ -400,19 +437,22 @@ def main(slack_zip_file: str) -> None:
400437
message_json = {}
401438
zerver_message = []
402439
zerver_usermessage = []
440+
zerver_attachment = []
403441

442+
upload_dir = output_dir + '/uploads/' + str(REALM_ID)
443+
constants = [slack_dir, REALM_ID, upload_dir]
404444
for channel in added_channels.keys():
405-
msg_id_count = len(zerver_message) + 1 # For the id of the messages
445+
message_id = len(zerver_message) + 1 # For the id of the messages
406446
usermessage_id = len(zerver_usermessage) + 1
407-
zm_one_stream, zum_one_stream = channelmessage2zerver_message_one_stream(slack_dir, channel,
408-
added_users,
409-
zerver_userprofile,
410-
added_channels,
411-
msg_id_count,
412-
usermessage_id,
413-
zerver_subscription)
414-
zerver_message += zm_one_stream
415-
zerver_usermessage += zum_one_stream
447+
attachment_id = len(zerver_attachment) + 1
448+
ids = [message_id, usermessage_id, attachment_id]
449+
zm, zum, za = channelmessage2zerver_message_one_stream(constants, channel,
450+
added_users, zerver_userprofile,
451+
added_channels, ids,
452+
zerver_subscription)
453+
zerver_message += zm
454+
zerver_usermessage += zum
455+
zerver_attachment += za
416456
# TOODOO add zerver_usermessage corresponding to the
417457
# private messages and huddles type recipients
418458

@@ -424,17 +464,17 @@ def main(slack_zip_file: str) -> None:
424464

425465
# IO avatar records
426466
avatar_records_file = output_dir + '/avatars/records.json'
427-
os.makedirs(output_dir + '/avatars')
467+
os.makedirs(output_dir + '/avatars', exist_ok=True)
428468
json.dump([], open(avatar_records_file, 'w'))
429469

430470
# IO uploads TODO
431471
uploads_records_file = output_dir + '/uploads/records.json'
432-
os.makedirs(output_dir + '/uploads')
472+
os.makedirs(output_dir + '/uploads', exist_ok=True)
433473
json.dump([], open(uploads_records_file, 'w'))
434474

435-
# IO attachments TODO
475+
# IO attachments
436476
attachment_file = output_dir + '/attachment.json'
437-
attachment = {"zerver_attachment": []}
477+
attachment = {"zerver_attachment": zerver_attachment}
438478
json.dump(attachment, open(attachment_file, 'w'))
439479

440480
print('ls', os.listdir())

0 commit comments

Comments
 (0)