-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_citations_crossref.py
217 lines (177 loc) · 6.74 KB
/
get_citations_crossref.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
"""
This script does the following:
1. replace any partial bibliography entries of the form ["title"](url) in
papers.md with more complete citations using info from crossref.org.
2. write papers.html from scratch based on markdown.md
To revise a citation, can edit papers.md and then rerun this script.
If you have previously collected the papers in Zotero, you can right-click on
the new ones, export as a bibliography, then use ChatGPT with this prompt to
convert them into the required format:
Can you give me the titles and URLs of some papers in the following format?
```
["title1"](url)
["title2"](url)
```
These are the papers:
```
<insert bibliography entries here, including titles and URLs>
```
One good technique is to put all the new citations at the top of the document in
abbreviated form (as above), then run the script to convert them to full
bibliography entries, then distribute them to the right rows of the right
sections.
Before running, you should run something like
conda create -n bibliography requests fuzzywuzzy markdown
conda activate bibliography
"""
import requests
import json
from fuzzywuzzy import fuzz
from markdown import markdown
# Function to retrieve paper metadata using CrossRef API
def get_citation(title, url):
# this is slow, so give an update
print(f'Retrieving citation for "{title}".')
# basic citation
citation = f'["{title}."]({url})'
# get more metadata if possible
api_url = f"https://api.crossref.org/works"
params = {
"query.bibliographic": title,
# get several rows and pick our favorite (if we just use 1, sometimes
# it's an older preprint instead of a journal article)
"rows": 4,
"mailto": "[email protected]",
}
response = requests.get(api_url, params=params)
if response.status_code == 200:
data = response.json()
if data["message"]["items"]:
# prefer the newest version with a good enough match
items = data["message"]["items"]
for item in items:
item["cr_title"] = item.get("title", [""])[0]
item["match"] = fuzz.token_set_ratio(title, item["cr_title"])
item["issue_date"] = item.get("issued", {}).get("date-parts", [[None]])[
0
]
paper = max(
items,
key=lambda p: (
99 if p["match"] >= 99 else p["match"],
[0, 0, 0] if p["issue_date"] == [None] else p["issue_date"],
),
)
cr_title = paper.get("title", [""])[0]
match = paper["match"]
if match < 90:
# bail out if the title doesn't match the first paper
print(f' Not matched in crossref. Closest is "{cr_title}".')
return citation
if match < 99:
print(f" WARNING: weak title match:")
print(f' "{title}"')
print(f' vs. "{cr_title}"')
authors = paper.get("author", [])
authors = [
f"{author.get('given', '')} {author.get('family', '')}"
for author in authors
]
for i in range(0, len(authors) - 2):
authors[i] += ","
if len(authors) > 1:
authors[-1] = "and " + authors[-1]
author_names = " ".join(authors)
journal = paper.get("container-title", [""])[0]
publication_year = (
"undated"
if paper["issue_date"] == [None]
else str(paper["issue_date"][0])
)
volume = paper.get("volume", "")
issue = paper.get("issue", "")
page = paper.get("page", "")
citation = f'["{title}."]({url})'
if authors:
citation = f"{author_names}. {citation}"
if journal:
citation += f" _{journal}_".replace("&", "&")
if volume:
citation += f" {volume}"
if issue:
citation += f", no. {issue}"
citation += f" ({publication_year})"
if page:
citation += f": {page}"
citation += "."
# Add a bullet at the start
citation = "- " + citation
# if (
# title
# == "Climate Change and Its Influence on Water Systems Increases the Cost of Electricity System Decarbonization"
# ):
# breakpoint()
return citation
# read the current version of the article list
with open("papers.md") as f:
lines = f.read().splitlines()
# Replace plain [article](url) lines with full citations
for i, line in enumerate(lines):
line = line.strip()
if (
line.startswith('"[')
and line.endswith(')"')
or line.startswith("[")
and line.endswith(")")
):
# Extract title from the line
title_start = line.index("[") + 1
title_end = line.index("](")
title = line[title_start:title_end]
# clean up if we have one of our own short citations back
if title.startswith('"') and title.endswith('"'):
title = title[1:-1]
if title.endswith("."):
title = title[:-1]
# Extract URL from the line
url_start = title_end + 2
url_end = len(line) - (1 if line.endswith(")") else 2)
url = line[url_start:url_end]
# Retrieve metadata using CrossRef API
lines[i] = get_citation(title, url)
# else:
# print(f"Skipping `{line}`")
papers_markdown = "\n".join(lines)
papers_html = f"""<!DOCTYPE html>
<html>
<head>
<meta charset='utf-8'>
<meta http-equiv="X-UA-Compatible" content="chrome=1">
<meta name="description" content="Papers written with Switch power system planning model : ">
<link rel="stylesheet" type="text/css" media="screen" href="stylesheets/stylesheet.css">
<title>Papers Written with Switch</title>
</head>
<body>
<!-- HEADER -->
<div id="header_wrap" class="outer">
<header class="inner">
<h1 id="project_title">Papers Written with Switch</h1>
</header>
</div>
<!-- MAIN CONTENT -->
<div id="main_content_wrap" class="outer">
<section id="main_content" class="inner">
<p>These can give you an idea of work that others have done—possibly in your
region—and may point you toward possible data sources, collaborators, advisors
or shared code.</p>
{markdown(papers_markdown)}
</section>
</div>
</body>
</html>
"""
with open("papers.md", "w") as f:
f.write(papers_markdown)
with open("papers.html", "w") as f:
f.write(papers_html)
print("re-wrote papers.md and created papers.html")