|
5 | 5 | import json |
6 | 6 | import os |
7 | 7 | import shutil |
| 8 | +import xml.etree.ElementTree as et |
8 | 9 | from datetime import datetime |
| 10 | +from io import StringIO |
9 | 11 | from pathlib import Path |
10 | 12 | from pprint import pprint |
11 | 13 | from time import sleep |
@@ -1056,6 +1058,171 @@ def get_file_info_and_usage( |
1056 | 1058 | def purge(self): |
1057 | 1059 | self._page.purge() |
1058 | 1060 |
|
| 1061 | + class ExportConfig(model.OswBaseModel): |
| 1062 | + """Configuration to export a page to XML""" |
| 1063 | + |
| 1064 | + full_history: Optional[bool] = True |
| 1065 | + """if true, export the full history of the page, else only the current revision""" |
| 1066 | + include_templates: Optional[bool] = False |
| 1067 | + """if true, export the templates used in the page""" |
| 1068 | + |
| 1069 | + class ExportResult(model.OswBaseModel): |
| 1070 | + """Return type of export_xml""" |
| 1071 | + |
| 1072 | + xml: str |
| 1073 | + """the XML string""" |
| 1074 | + success: bool |
| 1075 | + """if true, the export was successful, else false""" |
| 1076 | + |
| 1077 | + def export_xml( |
| 1078 | + self, config: Optional[ExportConfig] = ExportConfig() |
| 1079 | + ) -> ExportResult: |
| 1080 | + """Exports the page to XML |
| 1081 | +
|
| 1082 | + Parameters |
| 1083 | + ---------- |
| 1084 | + config, optional |
| 1085 | + see ExportConfig |
| 1086 | +
|
| 1087 | + Returns |
| 1088 | + ------- |
| 1089 | + ExportResult |
| 1090 | + """ |
| 1091 | + url = ( |
| 1092 | + self.wtSite._site.scheme |
| 1093 | + + "://" |
| 1094 | + + self.wtSite._site.host |
| 1095 | + + self.wtSite._site.path |
| 1096 | + + "index.php?title=Special:Export/" |
| 1097 | + + self.title |
| 1098 | + ) |
| 1099 | + data = { |
| 1100 | + "title": "Special:Export", |
| 1101 | + "catname": "", |
| 1102 | + "pages": self.title, |
| 1103 | + "wpEditToken": self.wtSite._site.get_token("csrf"), |
| 1104 | + "wpDownload": "1", |
| 1105 | + } |
| 1106 | + if not config.full_history: |
| 1107 | + data["curonly"] = "1" |
| 1108 | + if config.include_templates: |
| 1109 | + data["templates"] = "1" |
| 1110 | + response = self.wtSite._site.connection.post(url, data=data) |
| 1111 | + if response.status_code != 200: |
| 1112 | + return WtPage.ExportResult(success=False, xml="") |
| 1113 | + else: |
| 1114 | + return WtPage.ExportResult(success=True, xml=response.text) |
| 1115 | + |
| 1116 | + class ImportConfig(model.OswBaseModel): |
| 1117 | + """Configuration to import a page from XML. |
| 1118 | + see also https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps""" |
| 1119 | + |
| 1120 | + xml: str |
| 1121 | + """the XML string to import (see WtPage.export_xml)""" |
| 1122 | + summary: str |
| 1123 | + """the edit summary to use for the import""" |
| 1124 | + source_domain: str |
| 1125 | + """the domain of the instance from which the XML was exported, e.g. mywiki.com""" |
| 1126 | + full_history: Optional[bool] = True |
| 1127 | + """if true, import the full history of the page, else only the current revision""" |
| 1128 | + include_templates: Optional[bool] = False |
| 1129 | + """if true, import the templates used in the page if contained in the XML""" |
| 1130 | + namespace_mapping: Optional[Dict[str, str]] = { |
| 1131 | + "Main": 0, |
| 1132 | + "File": 6, |
| 1133 | + "Template": 10, |
| 1134 | + "Category": 14, |
| 1135 | + "Item": 7000, |
| 1136 | + } |
| 1137 | + """mapping of namespaces names to IDs in the target instance""" |
| 1138 | + username_mapping: Optional[Dict[str, str]] = {} |
| 1139 | + """mapping of usernames in the XML to usernames in the target instance""" |
| 1140 | + |
| 1141 | + class ImportResult(model.OswBaseModel): |
| 1142 | + """Return type of import_xml""" |
| 1143 | + |
| 1144 | + success: bool |
| 1145 | + """if true, the import was successful, else false""" |
| 1146 | + imported_title: str |
| 1147 | + imported_revisions: int |
| 1148 | + error_msg: Optional[str] = None |
| 1149 | + |
| 1150 | + def import_xml(self, config: ImportConfig) -> ImportResult: |
| 1151 | + """Imports the page from an XML export |
| 1152 | +
|
| 1153 | + Parameters |
| 1154 | + ---------- |
| 1155 | + config |
| 1156 | + see ImportConfig |
| 1157 | +
|
| 1158 | + Returns |
| 1159 | + ------- |
| 1160 | + ExportResult |
| 1161 | + """ |
| 1162 | + |
| 1163 | + # remove default namespace definition (see https://stackoverflow.com/questions/34009992/python-elementtree-default-namespace) |
| 1164 | + config.xml = config.xml.replace( |
| 1165 | + 'xmlns="http://www.mediawiki.org', '_xmlns="http://www.mediawiki.org' |
| 1166 | + ) |
| 1167 | + print(config.xml) |
| 1168 | + tree = et.fromstring(config.xml) |
| 1169 | + |
| 1170 | + # replace title and namespace with the requested ones |
| 1171 | + tree.find(".//title").text = self.title.split(":")[1] |
| 1172 | + tree.find(".//ns").text = str( |
| 1173 | + config.namespace_mapping.get(self.title.split(":")[0], 0) |
| 1174 | + ) |
| 1175 | + # apply username mapping (user in the target system might have different names) |
| 1176 | + for e in tree.findall(".//username"): |
| 1177 | + e.text = config.username_mapping.get(e.text, e.text) |
| 1178 | + |
| 1179 | + config.xml = et.tostring(tree, encoding="unicode") |
| 1180 | + # restore default namespace definition |
| 1181 | + config.xml = config.xml.replace( |
| 1182 | + '_xmlns="http://www.mediawiki.org', 'xmlns="http://www.mediawiki.org' |
| 1183 | + ) |
| 1184 | + |
| 1185 | + api_url = ( |
| 1186 | + self.wtSite._site.scheme |
| 1187 | + + "://" |
| 1188 | + + self.wtSite._site.host |
| 1189 | + + self.wtSite._site.path |
| 1190 | + + "api.php" |
| 1191 | + ) |
| 1192 | + response = self.wtSite._site.connection.post( |
| 1193 | + url=api_url, |
| 1194 | + data={ |
| 1195 | + "action": "import", |
| 1196 | + "token": self.wtSite._site.get_token("csrf"), |
| 1197 | + "fullhistory": "1" if config.full_history else "0", |
| 1198 | + "templates": "1" if config.include_templates else "0", |
| 1199 | + "assignknownusers": "1", |
| 1200 | + "interwikiprefix": config.source_domain, |
| 1201 | + # "namespace": self.title.split(":")[0], |
| 1202 | + "summary": config.summary, |
| 1203 | + "format": "json", |
| 1204 | + }, |
| 1205 | + files={ |
| 1206 | + "xml": ( |
| 1207 | + "xml", |
| 1208 | + StringIO(config.xml), |
| 1209 | + "text/xml", |
| 1210 | + ) # read config.xml as file |
| 1211 | + }, |
| 1212 | + ) |
| 1213 | + |
| 1214 | + json = response.json() |
| 1215 | + if "error" in json: |
| 1216 | + # print("Error: ", json) |
| 1217 | + return WtPage.ImportResult(success=False, error_msg=json["error"]["info"]) |
| 1218 | + else: |
| 1219 | + # print("Imported: ", json["import"][0]["title"], " with ", json["import"][0]["revisions"], " revisions") |
| 1220 | + return WtPage.ImportResult( |
| 1221 | + success=True, |
| 1222 | + imported_title=json["import"][0]["title"], |
| 1223 | + imported_revisions=json["import"][0]["revisions"], |
| 1224 | + ) |
| 1225 | + |
1059 | 1226 |
|
1060 | 1227 | # Updating forwards refs in pydantic models |
1061 | 1228 | WtSite.UploadPageParam.update_forward_refs() |
|
0 commit comments