Skip to content

Commit 2de33d6

Browse files
committed
Add slack data importer.
This importer is more comprehensive than the existing one.
1 parent 5681b6e commit 2de33d6

File tree

2 files changed

+338
-0
lines changed

2 files changed

+338
-0
lines changed
Lines changed: 302 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
#!/usr/bin/env python
2+
import os
3+
import json
4+
import hashlib
5+
import sys
6+
import argparse
7+
import shutil
8+
import subprocess
9+
import zipfile
10+
11+
12+
# Transported from https://github.com/zulip/zulip/blob/master/zerver/lib/export.py
13+
def rm_tree(path):
14+
# type: (str) -> None
15+
if os.path.exists(path):
16+
shutil.rmtree(path)
17+
18+
def users2zerver_userprofile(slack_dir, realm_id, timestamp, domain_name):
19+
# type: () -> None
20+
print('######### IMPORTING USERS STARTED #########\n')
21+
users = json.load(open(slack_dir + '/users.json'))
22+
zerver_userprofile = []
23+
added_users = {}
24+
user_id_count = 1
25+
for user in users:
26+
slack_user_id = user['id']
27+
profile = user['profile']
28+
DESKTOP_NOTIFICATION = True
29+
if 'email' not in profile:
30+
email = (hashlib.blake2b(user['real_name'].encode()).hexdigest() +
31+
"@%s" % (domain_name))
32+
else:
33+
email = profile['email']
34+
35+
# userprofile's quota is hardcoded as per
36+
# https://github.com/zulip/zulip/blob/e1498988d9094961e6f9988fb308b3e7310a8e74/zerver/migrations/0059_userprofile_quota.py#L18
37+
userprofile = dict(
38+
enable_desktop_notifications=DESKTOP_NOTIFICATION,
39+
is_staff=user.get('is_admin', False),
40+
# avatar_source='G',
41+
is_bot=user.get('is_bot', False),
42+
avatar_version=1,
43+
autoscroll_forever=False,
44+
default_desktop_notifications=True,
45+
timezone=user.get("tz", ""),
46+
default_sending_stream=None,
47+
enable_offline_email_notifications=True,
48+
user_permissions=[], # TODO ???
49+
is_mirror_dummy=False,
50+
pointer=-1,
51+
default_events_register_stream=None,
52+
is_realm_admin=user.get('is_owner', False),
53+
invites_granted=0,
54+
enter_sends=True,
55+
bot_type=1 if user.get('is_bot', False) else None,
56+
enable_stream_sounds=False,
57+
is_api_super_user=False,
58+
rate_limits="",
59+
last_login=timestamp,
60+
tos_version=None,
61+
default_all_public_streams=False,
62+
full_name=user.get('real_name', user['name']),
63+
twenty_four_hour_time=False,
64+
groups=[], # TODO
65+
muted_topics=[],
66+
enable_online_push_notifications=False,
67+
alert_words="[]",
68+
# bot_owner=None, TODO
69+
short_name=user['name'],
70+
enable_offline_push_notifications=True,
71+
left_side_userlist=False,
72+
enable_stream_desktop_notifications=False,
73+
enable_digest_emails=True,
74+
last_pointer_updater="",
75+
email=email,
76+
date_joined=timestamp,
77+
last_reminder=timestamp,
78+
is_superuser=False,
79+
tutorial_status="T",
80+
default_language="en",
81+
enable_sounds=True,
82+
pm_content_in_desktop_notifications=True,
83+
is_active=user['deleted'],
84+
onboarding_steps="[]",
85+
emojiset="google",
86+
emoji_alt_code=False,
87+
realm=realm_id,
88+
quota=1073741824,
89+
invites_used=0,
90+
id=user_id_count)
91+
92+
# TODO map the avatar
93+
# zerver auto-infer the url from Gravatar instead of from a specified
94+
# url; zerver.lib.avatar needs to be patched
95+
# profile['image_32'], Slack has 24, 32, 48, 72, 192, 512 size range
96+
97+
zerver_userprofile.append(userprofile)
98+
added_users[slack_user_id] = user_id_count
99+
user_id_count += 1
100+
print(u"{} -> {}\nCreated\n".format(user['name'], userprofile['email']))
101+
print('######### IMPORTING USERS FINISHED #########\n')
102+
return zerver_userprofile, added_users
103+
104+
def channels2zerver_stream(slack_dir, realm_id, added_users):
105+
# type: (Dict[str, Dict[str, str]]) -> None
106+
print('######### IMPORTING CHANNELS STARTED #########\n')
107+
channels = json.load(open(slack_dir + '/channels.json'))
108+
added_channels = {}
109+
zerver_stream = []
110+
stream_id_count = 1
111+
zerver_subscription = []
112+
zerver_recipient = []
113+
for channel in channels:
114+
# slack_channel_id = channel['id']
115+
116+
# map Slack's topic and purpose content into Zulip's stream description.
117+
# WARN This mapping is lossy since the topic.creator, topic.last_set,
118+
# purpose.creator, purpose.last_set fields are not preserved.
119+
description = "topic: {}\npurpose: {}".format(channel["topic"]["value"],
120+
channel["purpose"]["value"])
121+
122+
# construct the stream object and append it to zerver_stream
123+
stream = dict(
124+
realm=realm_id,
125+
name=channel["name"],
126+
deactivated=channel["is_archived"],
127+
description=description,
128+
invite_only=not channel["is_general"],
129+
date_created=channel["created"],
130+
id=stream_id_count)
131+
zerver_stream.append(stream)
132+
added_channels[stream['name']] = stream_id_count
133+
134+
# construct the subscription object and append it to zerver_subscription
135+
for member in channel['members']:
136+
sub = dict(
137+
recipient=added_users[member],
138+
notifications=False,
139+
color="#c2c2c2",
140+
desktop_notifications=True,
141+
pin_to_top=False,
142+
in_home_view=True,
143+
active=True,
144+
user_profile=added_users[member],
145+
id=stream_id_count) # TODO is this the correct interpretation?
146+
zerver_subscription.append(sub)
147+
148+
# recipient
149+
# type_id's
150+
# 1: private message
151+
# 2: stream
152+
# 3: huddle
153+
# TOODO currently the goal is to map Slack's standard export
154+
# This defaults to 2
155+
# TOODO do private message subscriptions between each users have to
156+
# be generated from scratch?
157+
rcpt = dict(
158+
type=2,
159+
type_id=stream_id_count,
160+
id=added_users[member])
161+
zerver_recipient.append(rcpt)
162+
163+
stream_id_count += 1
164+
print(u"{} -> created\n".format(channel['name']))
165+
166+
# TODO map Slack's pins to Zulip's stars
167+
# There is the security model that Slack's pins are known to the team owner
168+
# as evident from where it is stored at (channels)
169+
# "pins": [
170+
# {
171+
# "id": "1444755381.000003",
172+
# "type": "C",
173+
# "user": "U061A5N1G",
174+
# "owner": "U061A5N1G",
175+
# "created": "1444755463"
176+
# }
177+
# ],
178+
print('######### IMPORTING STREAMS FINISHED #########\n')
179+
return zerver_stream, added_channels, zerver_subscription, zerver_recipient
180+
181+
def channelmessage2zerver_message(slack_dir, channel, added_users, added_channels):
182+
json_names = os.listdir(slack_dir + '/' + channel)
183+
zerver_message = []
184+
msg_id_count = 1
185+
for json_name in json_names:
186+
msgs = json.load(open(slack_dir + '/%s/%s' % (channel, json_name)))
187+
for msg in msgs:
188+
text = msg['text']
189+
try:
190+
user = msg.get('user', msg['file']['user'])
191+
except KeyError:
192+
# black magic, explain this later TOODOO
193+
user = msg['user']
194+
zulip_message = dict(
195+
sending_client=1,
196+
rendered_content_version=1, # TODO ?? doublecheck
197+
has_image=False, # TODO
198+
subject=channel, # TODO default subject to channel name; Slack has subtype and type
199+
pub_date=msg['ts'],
200+
id=msg_id_count,
201+
has_attachment=False, # attachment will be posted in the subsequent message; this is how Slack does it, less like email
202+
edit_history=None,
203+
sender=added_users[user], # map slack id to zulip id
204+
content=text, # TODO sanitize slack text, which contains <@msg['user']|short_name>
205+
rendered_content=text, # TODO slack doesn't cache this, check whether text is rendered
206+
recipient=added_channels[channel],
207+
last_edit_time=None,
208+
has_link=False) # TODO
209+
zerver_message.append(zulip_message)
210+
return zerver_message
211+
212+
def main(slack_zip_file):
213+
# type: (str) -> None
214+
215+
slack_dir = slack_zip_file.replace('.zip', '')
216+
subprocess.check_call(['unzip', slack_zip_file])
217+
# with zipfile.ZipFile(slack_zip_file, 'r') as zip_ref:
218+
# zip_ref.extractall(slack_dir)
219+
220+
from datetime import datetime
221+
# TODO fetch realm config from zulip config
222+
DOMAIN_NAME = "zulipchat.com"
223+
REALM_ID = 1 # TODO how to find this
224+
REALM_NAME = "FleshEatingBatswithFangs"
225+
NOW = datetime.utcnow().timestamp()
226+
227+
script_path = os.path.dirname(os.path.abspath(__file__)) + '/'
228+
zerver_realm_skeleton = json.load(open(script_path + 'zerver_realm_skeleton.json'))
229+
zerver_realm_skeleton[0]['id'] = REALM_ID
230+
zerver_realm_skeleton[0]['string_id'] = 'zulip' # subdomain / short_name of realm
231+
zerver_realm_skeleton[0]['name'] = REALM_NAME
232+
zerver_realm_skeleton[0]['date_created'] = NOW
233+
234+
# Make sure the directory output is clean
235+
output_dir = 'zulip_data'
236+
rm_tree(output_dir)
237+
os.makedirs(output_dir)
238+
239+
realm = dict(zerver_defaultstream=[], # TODO
240+
zerver_client=[{"name": "populate_db", "id": 1},
241+
{"name": "website", "id": 2},
242+
{"name": "API", "id": 3}],
243+
zerver_userpresence=[], # TODO
244+
zerver_userprofile_mirrordummy=[],
245+
zerver_realmdomain=[{"realm": REALM_ID,
246+
"allow_subdomains": False,
247+
"domain": DOMAIN_NAME,
248+
"id": REALM_ID}],
249+
zerver_useractivity=[],
250+
zerver_realm=zerver_realm_skeleton,
251+
zerver_huddle=[], # TODO
252+
zerver_userprofile_crossrealm=[], # TODO
253+
zerver_useractivityinterval=[],
254+
zerver_realmfilter=[],
255+
zerver_realmemoji=[])
256+
257+
zerver_userprofile, added_users = users2zerver_userprofile(slack_dir,
258+
REALM_ID,
259+
int(NOW),
260+
DOMAIN_NAME)
261+
realm['zerver_userprofile'] = zerver_userprofile
262+
263+
zerver_stream, added_channels, zerver_subscription, zerver_recipient = channels2zerver_stream(slack_dir, REALM_ID, added_users)
264+
realm['zerver_stream'] = zerver_stream
265+
realm['zerver_subscription'] = zerver_subscription
266+
realm['zerver_recipient'] = zerver_recipient
267+
# IO
268+
json.dump(realm, open(output_dir + '/realm.json', 'w'))
269+
270+
# now for message.json
271+
message_json = {}
272+
zerver_message = []
273+
# TODO map zerver_usermessage
274+
for channel in added_channels.keys():
275+
zerver_message.append(channelmessage2zerver_message(slack_dir, channel,
276+
added_users, added_channels))
277+
message_json['zerver_message'] = zerver_message
278+
# IO
279+
json.dump(message_json, open(output_dir + '/message.json', 'w'))
280+
281+
# TODO
282+
# attachments
283+
284+
# remove slack dir
285+
rm_tree(slack_dir)
286+
287+
# compress the folder
288+
subprocess.check_call(['zip', '-r', output_dir + '.zip', output_dir])
289+
290+
# remove zulip dir
291+
rm_tree(output_dir)
292+
293+
sys.exit(0)
294+
295+
if __name__ == '__main__':
296+
# from django.conf import settings
297+
# settings_module = "settings.py"
298+
# os.environ['DJANGO_SETTINGS_MODULE'] = settings_module
299+
description = ("script to convert Slack export data into Zulip export data")
300+
parser = argparse.ArgumentParser(description=description)
301+
slack_zip_file = sys.argv[1]
302+
main(slack_zip_file)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
[{
2+
"message_retention_days": null,
3+
"inline_image_preview": true,
4+
"name_changes_disabled": false,
5+
"string_id": "zulip",
6+
"icon_version": 1,
7+
"waiting_period_threshold": 0,
8+
"email_changes_disabled": false,
9+
"deactivated": false,
10+
"notifications_stream": null,
11+
"restricted_to_domain": true,
12+
"show_digest_email": true,
13+
"allow_message_editing": true,
14+
"description": "The Zulip development environment default organization. It's great for testing!",
15+
"default_language": "en",
16+
"icon_source": "G",
17+
"invite_required": false,
18+
"invite_by_admins_only": false,
19+
"create_stream_by_admins_only": false,
20+
"mandatory_topics": false,
21+
"inline_url_embed_preview": true,
22+
"message_content_edit_limit_seconds": 600,
23+
"authentication_methods": [
24+
["Google", true],
25+
["Email", true],
26+
["GitHub", true],
27+
["LDAP", true],
28+
["Dev", true],
29+
["RemoteUser", true]
30+
],
31+
"name": "",
32+
"org_type": 1,
33+
"add_emoji_by_admins_only": false,
34+
"date_created": null,
35+
"id": 1
36+
}]

0 commit comments

Comments
 (0)