Skip to content

Commit 5d41e06

Browse files
committed
Add slack data importer.
This importer is more comprehensive than the existing one.
1 parent 09060af commit 5d41e06

File tree

1 file changed

+289
-0
lines changed

1 file changed

+289
-0
lines changed
Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
#!/usr/bin/env python
2+
import os
3+
import json
4+
import hashlib
5+
import sys
6+
import argparse
7+
import shutil
8+
import subprocess
9+
import zipfile
10+
11+
12+
# Transported from https://github.com/zulip/zulip/blob/master/zerver/lib/export.py
13+
def rm_tree(path):
14+
# type: (str) -> None
15+
if os.path.exists(path):
16+
shutil.rmtree(path)
17+
18+
def users2zerver_userprofile(slack_dir, realm_id, timestamp, domain_name):
19+
# type: () -> None
20+
print('######### IMPORTING USERS STARTED #########\n')
21+
users = json.load(open(slack_dir + '/users.json'))
22+
zerver_userprofile = []
23+
added_users = {}
24+
user_id_count = 1
25+
for user in users:
26+
slack_user_id = user['id']
27+
profile = user['profile']
28+
DESKTOP_NOTIFICATION = True
29+
if 'email' not in profile:
30+
email = (hashlib.blake2b(user['real_name'].encode()).hexdigest() +
31+
"@%s" % (domain_name))
32+
userprofile = dict(
33+
enable_desktop_notifications=DESKTOP_NOTIFICATION,
34+
is_staff=user['is_admin'],
35+
# avatar_source='G',
36+
is_bot=user['is_bot'],
37+
avatar_version=1,
38+
autoscroll_forever=False,
39+
default_desktop_notifications=True,
40+
timezone=user.get("tz", ""),
41+
default_sending_stream=None,
42+
enable_offline_email_notifications=True,
43+
user_permissions=[], # TODO ???
44+
is_mirror_dummy=False,
45+
pointer=-1,
46+
default_events_register_stream=None,
47+
is_realm_admin=user['is_owner'],
48+
invites_granted=0,
49+
enter_sends=True,
50+
bot_type=1 if user['is_bot'] else None,
51+
enable_stream_sounds=False,
52+
is_api_super_user=False,
53+
rate_limits="",
54+
last_login=timestamp,
55+
tos_version=None,
56+
default_all_public_streams=False,
57+
full_name=user['real_name'],
58+
twenty_four_hour_time=False,
59+
groups=[], # TODO
60+
muted_topics=[],
61+
enable_online_push_notifications=False,
62+
alert_words="[]",
63+
# bot_owner= null, TODO
64+
short_name=user['name'],
65+
enable_offline_push_notifications=True,
66+
left_side_userlist=False,
67+
enable_stream_desktop_notifications=False,
68+
enable_digest_emails=True,
69+
last_pointer_updater="",
70+
email=email,
71+
date_joined=timestamp,
72+
last_reminder=timestamp,
73+
is_superuser=False,
74+
tutorial_status="T",
75+
default_language="en",
76+
enable_sounds=True,
77+
pm_content_in_desktop_notifications=True,
78+
is_active=user['deleted'],
79+
onboarding_steps="[]",
80+
emojiset="google",
81+
emoji_alt_code=False,
82+
realm=realm_id, # TODO
83+
quota=1073741824, # TODO
84+
invites_used=0,
85+
id=user_id_count)
86+
87+
# TODO map the avatar
88+
# zerver auto-infer the url from Gravatar instead of from a specified
89+
# url; zerver.lib.avatar needs to be patched
90+
# profile['image_32'], Slack has 24, 32, 48, 72, 192, 512 size range
91+
92+
zerver_userprofile.append(userprofile)
93+
added_users[slack_user_id] = user_id_count
94+
user_id_count += 1
95+
print(u"{} -> {}\nCreated\n".format(user['name'], userprofile['email']))
96+
print('######### IMPORTING USERS FINISHED #########\n')
97+
return zerver_userprofile, added_users
98+
99+
def channels2zerver_stream(slack_dir, realm_id, added_users):
100+
# type: (Dict[str, Dict[str, str]]) -> None
101+
print('######### IMPORTING CHANNELS STARTED #########\n')
102+
channels = json.load(open(slack_dir + '/channels.json'))
103+
added_channels = {}
104+
zerver_stream = []
105+
stream_id_count = 1
106+
zerver_subscription = []
107+
zerver_recipient = []
108+
for channel in channels:
109+
# slack_channel_id = channel['id']
110+
111+
# map Slack's topic and purpose content into Zulip's stream description.
112+
# WARN This mapping is lossy since the topic.creator, topic.last_set,
113+
# purpose.creator, purpose.last_set fields are not preserved.
114+
description = "topic: {}\npurpose: {}".format(channel["topic"]["value"],
115+
channel["purpose"]["value"])
116+
117+
# construct the stream object and append it to zerver_stream
118+
stream = dict(
119+
realm=realm_id,
120+
name=channel["name"],
121+
deactivated=channel["is_archived"],
122+
description=description,
123+
invite_only=not channel["is_general"],
124+
date_created=channel["created"],
125+
id=stream_id_count)
126+
zerver_stream.append(stream)
127+
added_channels[stream['name']] = stream_id_count
128+
129+
# construct the subscription object and append it to zerver_subscription
130+
for member in channel['members']:
131+
sub = dict(
132+
recipient=added_users[member],
133+
notifications=False,
134+
color="#c2c2c2",
135+
desktop_notifications=True,
136+
pin_to_top=False,
137+
in_home_view=True,
138+
active=True,
139+
user_profile=added_users[member],
140+
id=stream_id_count) # TODO is this the correct interpretation?
141+
zerver_subscription.append(sub)
142+
143+
# recipient
144+
# type_id's
145+
# 1: private message
146+
# 2: stream
147+
# 3: huddle
148+
# TOODO currently the goal is to map Slack's standard export
149+
# This defaults to 2
150+
# TOODO do private message subscriptions between each users have to
151+
# be generated from scratch?
152+
rcpt = dict(
153+
type=2,
154+
type_id=stream_id_count,
155+
id=added_users[member])
156+
zerver_recipient.append(rcpt)
157+
158+
stream_id_count += 1
159+
print(u"{} -> created\n".format(channel['name']))
160+
161+
# TODO map Slack's pins to Zulip's stars
162+
# There is the security model that Slack's pins are known to the team owner
163+
# as evident from where it is stored at (channels)
164+
# "pins": [
165+
# {
166+
# "id": "1444755381.000003",
167+
# "type": "C",
168+
# "user": "U061A5N1G",
169+
# "owner": "U061A5N1G",
170+
# "created": "1444755463"
171+
# }
172+
# ],
173+
print('######### IMPORTING STREAMS FINISHED #########\n')
174+
return zerver_stream, added_channels, zerver_subscription, zerver_recipient
175+
176+
def channelmessage2zerver_message(slack_dir, channel, added_users, added_channels):
177+
json_names = os.listdir(slack_dir + '/' + channel)
178+
zerver_message = []
179+
msg_id_count = 1
180+
for json_name in json_names:
181+
msgs = json.load(open(slack_dir + '/%s/%s' % (channel, json_name)))
182+
for msg in msgs:
183+
text = msg['text']
184+
zulip_message = dict(
185+
sending_client=1,
186+
rendered_content_version=1, # TODO ?? doublecheck
187+
has_image=False, # TODO
188+
subject=channel, # TODO default subject to channel name; Slack has subtype and type
189+
pub_date=msg['ts'],
190+
id=msg_id_count,
191+
has_attachment=False, # attachment will be posted in the subsequent message; this is how Slack does it, less like email
192+
edit_history=None,
193+
sender=added_users[msg['user']], # map slack id to zulip id
194+
content=text, # TODO sanitize slack text, which contains <@msg['user']|short_name>
195+
rendered_content=text, # TODO slack doesn't cache this, check whether text is rendered
196+
recipient=added_channels[channel],
197+
last_edit_time=None,
198+
has_link=False) # TODO
199+
zerver_message.append(zulip_message)
200+
return zerver_message
201+
202+
def main(slack_zip_file):
203+
# type: (str) -> None
204+
205+
slack_dir = slack_zip_file.replace('.zip', '')
206+
with zipfile.ZipFile(slack_zip_file, 'r') as zip_ref:
207+
zip_ref.extractall(slack_dir)
208+
209+
from datetime import datetime
210+
# TODO fetch realm config from zulip config
211+
DOMAIN_NAME = "zulipchat.com"
212+
REALM_ID = 1
213+
REALM_NAME = "FleshEatingBatswithFangs"
214+
NOW = datetime.utcnow().timestamp()
215+
zerver_realm_skeleton = json.load(open('zerver_realm_skeleton.json'))
216+
zerver_realm_skeleton[0]['id'] = REALM_ID
217+
zerver_realm_skeleton[0]['string_id'] = 'zulip' # subdomain / short_name of realm
218+
zerver_realm_skeleton[0]['name'] = REALM_NAME
219+
zerver_realm_skeleton[0]['date_created'] = NOW
220+
221+
# Make sure the directory output is clean
222+
output_dir = 'zulip_data'
223+
rm_tree(output_dir)
224+
os.makedirs(output_dir)
225+
226+
realm = dict(zerver_defaultstream=[], # TODO
227+
zerver_client=[{"name": "populate_db", "id": 1},
228+
{"name": "website", "id": 2},
229+
{"name": "API", "id": 3}],
230+
zerver_userpresence=[], # TODO
231+
zerver_userprofile_mirrordummy=[],
232+
zerver_realmdomain=[{"realm": REALM_ID,
233+
"allow_subdomains": False,
234+
"domain": DOMAIN_NAME,
235+
"id": REALM_ID}],
236+
zerver_useractivity=[],
237+
zerver_realm=zerver_realm_skeleton,
238+
zerver_huddle=[], # TODO
239+
zerver_userprofile_crossrealm=[], # TODO
240+
zerver_useractivityinterval=[],
241+
zerver_realmfilter=[],
242+
zerver_realmemoji=[])
243+
244+
zerver_userprofile, added_users = users2zerver_userprofile(slack_dir,
245+
REALM_ID,
246+
int(NOW),
247+
DOMAIN_NAME)
248+
realm['zerver_userprofile'] = zerver_userprofile
249+
250+
zerver_stream, added_channels, zerver_subscription, zerver_recipient = channels2zerver_stream(slack_dir, REALM_ID, added_users)
251+
realm['zerver_stream'] = zerver_stream
252+
realm['zerver_subscription'] = zerver_subscription
253+
realm['zerver_recipient'] = zerver_recipient
254+
# IO
255+
json.dump(realm, open(output_dir + '/realm.json', 'w'))
256+
257+
# now for message.json
258+
message_json = {}
259+
zerver_message = []
260+
# TODO map zerver_usermessage
261+
for channel in added_channels.keys():
262+
zerver_message.append(channelmessage2zerver_message(slack_dir, channel,
263+
added_users, added_channels))
264+
message_json['zerver_message'] = zerver_message
265+
# IO
266+
json.dump(message_json, open(output_dir + '/message.json', 'w'))
267+
268+
# TODO
269+
# attachments
270+
271+
# remove slack dir
272+
subprocess.check_call(['rm', '-r', slack_dir])
273+
274+
# compress the folder
275+
subprocess.check_call(['zip', '-r', output_dir + '.zip', output_dir])
276+
277+
# remove zulip dir
278+
subprocess.check_call(['rm', '-r', output_dir])
279+
280+
sys.exit(0)
281+
282+
if __name__ == '__main__':
283+
# from django.conf import settings
284+
# settings_module = "settings.py"
285+
# os.environ['DJANGO_SETTINGS_MODULE'] = settings_module
286+
description = ("script to convert Slack export data into Zulip export data")
287+
parser = argparse.ArgumentParser(description=description)
288+
slack_zip_file = sys.argv[1]
289+
main(slack_zip_file)

0 commit comments

Comments
 (0)