|
| 1 | +#!/usr/bin/env python |
| 2 | +import os |
| 3 | +import json |
| 4 | +import hashlib |
| 5 | +import sys |
| 6 | +import argparse |
| 7 | +import shutil |
| 8 | +import subprocess |
| 9 | +import zipfile |
| 10 | + |
| 11 | + |
| 12 | +# Transported from https://github.com/zulip/zulip/blob/master/zerver/lib/export.py |
| 13 | +def rm_tree(path): |
| 14 | + # type: (str) -> None |
| 15 | + if os.path.exists(path): |
| 16 | + shutil.rmtree(path) |
| 17 | + |
| 18 | +def users2zerver_userprofile(slack_dir, realm_id, timestamp, domain_name): |
| 19 | + # type: () -> None |
| 20 | + print('######### IMPORTING USERS STARTED #########\n') |
| 21 | + users = json.load(open(slack_dir + '/users.json')) |
| 22 | + zerver_userprofile = [] |
| 23 | + added_users = {} |
| 24 | + user_id_count = 1 |
| 25 | + for user in users: |
| 26 | + slack_user_id = user['id'] |
| 27 | + profile = user['profile'] |
| 28 | + DESKTOP_NOTIFICATION = True |
| 29 | + if 'email' not in profile: |
| 30 | + email = (hashlib.blake2b(user['real_name'].encode()).hexdigest() + |
| 31 | + "@%s" % (domain_name)) |
| 32 | + else: |
| 33 | + email = profile['email'] |
| 34 | + |
| 35 | + # userprofile's quota is hardcoded as per |
| 36 | + # https://github.com/zulip/zulip/blob/e1498988d9094961e6f9988fb308b3e7310a8e74/zerver/migrations/0059_userprofile_quota.py#L18 |
| 37 | + userprofile = dict( |
| 38 | + enable_desktop_notifications=DESKTOP_NOTIFICATION, |
| 39 | + is_staff=user.get('is_admin', False), |
| 40 | + # avatar_source='G', |
| 41 | + is_bot=user.get('is_bot', False), |
| 42 | + avatar_version=1, |
| 43 | + autoscroll_forever=False, |
| 44 | + default_desktop_notifications=True, |
| 45 | + timezone=user.get("tz", ""), |
| 46 | + default_sending_stream=None, |
| 47 | + enable_offline_email_notifications=True, |
| 48 | + user_permissions=[], # TODO ??? |
| 49 | + is_mirror_dummy=False, |
| 50 | + pointer=-1, |
| 51 | + default_events_register_stream=None, |
| 52 | + is_realm_admin=user.get('is_owner', False), |
| 53 | + invites_granted=0, |
| 54 | + enter_sends=True, |
| 55 | + bot_type=1 if user.get('is_bot', False) else None, |
| 56 | + enable_stream_sounds=False, |
| 57 | + is_api_super_user=False, |
| 58 | + rate_limits="", |
| 59 | + last_login=timestamp, |
| 60 | + tos_version=None, |
| 61 | + default_all_public_streams=False, |
| 62 | + full_name=user.get('real_name', user['name']), |
| 63 | + twenty_four_hour_time=False, |
| 64 | + groups=[], # TODO |
| 65 | + muted_topics=[], |
| 66 | + enable_online_push_notifications=False, |
| 67 | + alert_words="[]", |
| 68 | + # bot_owner=None, TODO |
| 69 | + short_name=user['name'], |
| 70 | + enable_offline_push_notifications=True, |
| 71 | + left_side_userlist=False, |
| 72 | + enable_stream_desktop_notifications=False, |
| 73 | + enable_digest_emails=True, |
| 74 | + last_pointer_updater="", |
| 75 | + email=email, |
| 76 | + date_joined=timestamp, |
| 77 | + last_reminder=timestamp, |
| 78 | + is_superuser=False, |
| 79 | + tutorial_status="T", |
| 80 | + default_language="en", |
| 81 | + enable_sounds=True, |
| 82 | + pm_content_in_desktop_notifications=True, |
| 83 | + is_active=user['deleted'], |
| 84 | + onboarding_steps="[]", |
| 85 | + emojiset="google", |
| 86 | + emoji_alt_code=False, |
| 87 | + realm=realm_id, |
| 88 | + quota=1073741824, |
| 89 | + invites_used=0, |
| 90 | + id=user_id_count) |
| 91 | + |
| 92 | + # TODO map the avatar |
| 93 | + # zerver auto-infer the url from Gravatar instead of from a specified |
| 94 | + # url; zerver.lib.avatar needs to be patched |
| 95 | + # profile['image_32'], Slack has 24, 32, 48, 72, 192, 512 size range |
| 96 | + |
| 97 | + zerver_userprofile.append(userprofile) |
| 98 | + added_users[slack_user_id] = user_id_count |
| 99 | + user_id_count += 1 |
| 100 | + print(u"{} -> {}\nCreated\n".format(user['name'], userprofile['email'])) |
| 101 | + print('######### IMPORTING USERS FINISHED #########\n') |
| 102 | + return zerver_userprofile, added_users |
| 103 | + |
| 104 | +def channels2zerver_stream(slack_dir, realm_id, added_users): |
| 105 | + # type: (Dict[str, Dict[str, str]]) -> None |
| 106 | + print('######### IMPORTING CHANNELS STARTED #########\n') |
| 107 | + channels = json.load(open(slack_dir + '/channels.json')) |
| 108 | + added_channels = {} |
| 109 | + zerver_stream = [] |
| 110 | + stream_id_count = 1 |
| 111 | + zerver_subscription = [] |
| 112 | + zerver_recipient = [] |
| 113 | + for channel in channels: |
| 114 | + # slack_channel_id = channel['id'] |
| 115 | + |
| 116 | + # map Slack's topic and purpose content into Zulip's stream description. |
| 117 | + # WARN This mapping is lossy since the topic.creator, topic.last_set, |
| 118 | + # purpose.creator, purpose.last_set fields are not preserved. |
| 119 | + description = "topic: {}\npurpose: {}".format(channel["topic"]["value"], |
| 120 | + channel["purpose"]["value"]) |
| 121 | + |
| 122 | + # construct the stream object and append it to zerver_stream |
| 123 | + stream = dict( |
| 124 | + realm=realm_id, |
| 125 | + name=channel["name"], |
| 126 | + deactivated=channel["is_archived"], |
| 127 | + description=description, |
| 128 | + invite_only=not channel["is_general"], |
| 129 | + date_created=channel["created"], |
| 130 | + id=stream_id_count) |
| 131 | + zerver_stream.append(stream) |
| 132 | + added_channels[stream['name']] = stream_id_count |
| 133 | + |
| 134 | + # construct the subscription object and append it to zerver_subscription |
| 135 | + for member in channel['members']: |
| 136 | + sub = dict( |
| 137 | + recipient=added_users[member], |
| 138 | + notifications=False, |
| 139 | + color="#c2c2c2", |
| 140 | + desktop_notifications=True, |
| 141 | + pin_to_top=False, |
| 142 | + in_home_view=True, |
| 143 | + active=True, |
| 144 | + user_profile=added_users[member], |
| 145 | + id=stream_id_count) # TODO is this the correct interpretation? |
| 146 | + zerver_subscription.append(sub) |
| 147 | + |
| 148 | + # recipient |
| 149 | + # type_id's |
| 150 | + # 1: private message |
| 151 | + # 2: stream |
| 152 | + # 3: huddle |
| 153 | + # TOODO currently the goal is to map Slack's standard export |
| 154 | + # This defaults to 2 |
| 155 | + # TOODO do private message subscriptions between each users have to |
| 156 | + # be generated from scratch? |
| 157 | + rcpt = dict( |
| 158 | + type=2, |
| 159 | + type_id=stream_id_count, |
| 160 | + id=added_users[member]) |
| 161 | + zerver_recipient.append(rcpt) |
| 162 | + |
| 163 | + stream_id_count += 1 |
| 164 | + print(u"{} -> created\n".format(channel['name'])) |
| 165 | + |
| 166 | + # TODO map Slack's pins to Zulip's stars |
| 167 | + # There is the security model that Slack's pins are known to the team owner |
| 168 | + # as evident from where it is stored at (channels) |
| 169 | + # "pins": [ |
| 170 | + # { |
| 171 | + # "id": "1444755381.000003", |
| 172 | + # "type": "C", |
| 173 | + # "user": "U061A5N1G", |
| 174 | + # "owner": "U061A5N1G", |
| 175 | + # "created": "1444755463" |
| 176 | + # } |
| 177 | + # ], |
| 178 | + print('######### IMPORTING STREAMS FINISHED #########\n') |
| 179 | + return zerver_stream, added_channels, zerver_subscription, zerver_recipient |
| 180 | + |
| 181 | +def channelmessage2zerver_message(slack_dir, channel, added_users, added_channels): |
| 182 | + json_names = os.listdir(slack_dir + '/' + channel) |
| 183 | + zerver_message = [] |
| 184 | + msg_id_count = 1 |
| 185 | + for json_name in json_names: |
| 186 | + msgs = json.load(open(slack_dir + '/%s/%s' % (channel, json_name))) |
| 187 | + for msg in msgs: |
| 188 | + text = msg['text'] |
| 189 | + try: |
| 190 | + user = msg.get('user', msg['file']['user']) |
| 191 | + except KeyError: |
| 192 | + # black magic, explain this later TOODOO |
| 193 | + user = msg['user'] |
| 194 | + zulip_message = dict( |
| 195 | + sending_client=1, |
| 196 | + rendered_content_version=1, # TODO ?? doublecheck |
| 197 | + has_image=False, # TODO |
| 198 | + subject=channel, # TODO default subject to channel name; Slack has subtype and type |
| 199 | + pub_date=msg['ts'], |
| 200 | + id=msg_id_count, |
| 201 | + has_attachment=False, # attachment will be posted in the subsequent message; this is how Slack does it, less like email |
| 202 | + edit_history=None, |
| 203 | + sender=added_users[user], # map slack id to zulip id |
| 204 | + content=text, # TODO sanitize slack text, which contains <@msg['user']|short_name> |
| 205 | + rendered_content=text, # TODO slack doesn't cache this, check whether text is rendered |
| 206 | + recipient=added_channels[channel], |
| 207 | + last_edit_time=None, |
| 208 | + has_link=False) # TODO |
| 209 | + zerver_message.append(zulip_message) |
| 210 | + return zerver_message |
| 211 | + |
| 212 | +def main(slack_zip_file): |
| 213 | + # type: (str) -> None |
| 214 | + |
| 215 | + slack_dir = slack_zip_file.replace('.zip', '') |
| 216 | + subprocess.check_call(['unzip', slack_zip_file]) |
| 217 | + # with zipfile.ZipFile(slack_zip_file, 'r') as zip_ref: |
| 218 | + # zip_ref.extractall(slack_dir) |
| 219 | + |
| 220 | + from datetime import datetime |
| 221 | + # TODO fetch realm config from zulip config |
| 222 | + DOMAIN_NAME = "zulipchat.com" |
| 223 | + REALM_ID = 1 # TODO how to find this |
| 224 | + REALM_NAME = "FleshEatingBatswithFangs" |
| 225 | + NOW = datetime.utcnow().timestamp() |
| 226 | + |
| 227 | + script_path = os.path.dirname(os.path.abspath(__file__)) + '/' |
| 228 | + zerver_realm_skeleton = json.load(open(script_path + 'zerver_realm_skeleton.json')) |
| 229 | + zerver_realm_skeleton[0]['id'] = REALM_ID |
| 230 | + zerver_realm_skeleton[0]['string_id'] = 'zulip' # subdomain / short_name of realm |
| 231 | + zerver_realm_skeleton[0]['name'] = REALM_NAME |
| 232 | + zerver_realm_skeleton[0]['date_created'] = NOW |
| 233 | + |
| 234 | + # Make sure the directory output is clean |
| 235 | + output_dir = 'zulip_data' |
| 236 | + rm_tree(output_dir) |
| 237 | + os.makedirs(output_dir) |
| 238 | + |
| 239 | + realm = dict(zerver_defaultstream=[], # TODO |
| 240 | + zerver_client=[{"name": "populate_db", "id": 1}, |
| 241 | + {"name": "website", "id": 2}, |
| 242 | + {"name": "API", "id": 3}], |
| 243 | + zerver_userpresence=[], # TODO |
| 244 | + zerver_userprofile_mirrordummy=[], |
| 245 | + zerver_realmdomain=[{"realm": REALM_ID, |
| 246 | + "allow_subdomains": False, |
| 247 | + "domain": DOMAIN_NAME, |
| 248 | + "id": REALM_ID}], |
| 249 | + zerver_useractivity=[], |
| 250 | + zerver_realm=zerver_realm_skeleton, |
| 251 | + zerver_huddle=[], # TODO |
| 252 | + zerver_userprofile_crossrealm=[], # TODO |
| 253 | + zerver_useractivityinterval=[], |
| 254 | + zerver_realmfilter=[], |
| 255 | + zerver_realmemoji=[]) |
| 256 | + |
| 257 | + zerver_userprofile, added_users = users2zerver_userprofile(slack_dir, |
| 258 | + REALM_ID, |
| 259 | + int(NOW), |
| 260 | + DOMAIN_NAME) |
| 261 | + realm['zerver_userprofile'] = zerver_userprofile |
| 262 | + |
| 263 | + zerver_stream, added_channels, zerver_subscription, zerver_recipient = channels2zerver_stream(slack_dir, REALM_ID, added_users) |
| 264 | + realm['zerver_stream'] = zerver_stream |
| 265 | + realm['zerver_subscription'] = zerver_subscription |
| 266 | + realm['zerver_recipient'] = zerver_recipient |
| 267 | + # IO |
| 268 | + json.dump(realm, open(output_dir + '/realm.json', 'w')) |
| 269 | + |
| 270 | + # now for message.json |
| 271 | + message_json = {} |
| 272 | + zerver_message = [] |
| 273 | + # TODO map zerver_usermessage |
| 274 | + for channel in added_channels.keys(): |
| 275 | + zerver_message.append(channelmessage2zerver_message(slack_dir, channel, |
| 276 | + added_users, added_channels)) |
| 277 | + message_json['zerver_message'] = zerver_message |
| 278 | + # IO |
| 279 | + json.dump(message_json, open(output_dir + '/message.json', 'w')) |
| 280 | + |
| 281 | + # TODO |
| 282 | + # attachments |
| 283 | + |
| 284 | + # remove slack dir |
| 285 | + rm_tree(slack_dir) |
| 286 | + |
| 287 | + # compress the folder |
| 288 | + subprocess.check_call(['zip', '-r', output_dir + '.zip', output_dir]) |
| 289 | + |
| 290 | + # remove zulip dir |
| 291 | + rm_tree(output_dir) |
| 292 | + |
| 293 | + sys.exit(0) |
| 294 | + |
| 295 | +if __name__ == '__main__': |
| 296 | + # from django.conf import settings |
| 297 | + # settings_module = "settings.py" |
| 298 | + # os.environ['DJANGO_SETTINGS_MODULE'] = settings_module |
| 299 | + description = ("script to convert Slack export data into Zulip export data") |
| 300 | + parser = argparse.ArgumentParser(description=description) |
| 301 | + slack_zip_file = sys.argv[1] |
| 302 | + main(slack_zip_file) |
0 commit comments