Coverage for apps/ptf/citedby.py: 68%

479 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-05-19 19:20 +0000

1import concurrent.futures 

2import html 

3import re 

4from collections import defaultdict 

5from difflib import SequenceMatcher 

6 

7import requests 

8import xmltodict 

9from bs4 import BeautifulSoup 

10from pylatexenc.latex2text import LatexNodes2Text 

11 

12from django.conf import settings 

13 

14from ptf.bibtex import parse_bibtex 

15from ptf.cmds.xml.xml_utils import normalise_span 

16from ptf.model_data import RefData 

17from ptf.model_data import create_contributor 

18from ptf.model_data_converter import update_ref_data_for_jats 

19from ptf.model_helpers import get_extid 

20from ptf.models import BibItemId 

21from ptf.models import get_names 

22from ptf.utils import get_display_name 

23 

24ADS_URL = "https://api.adsabs.harvard.edu/v1/search" 

25ARXIV_URL = "https://export.arxiv.org/api/query" 

26CROSSREF_URL = "https://doi.crossref.org/servlet/getForwardLinks" 

27SEMANTIC_URL = "https://api.semanticscholar.org/v1/paper/" 

28ZBMATH_URL = "https://zbmath.org" 

29 

30ADS = "NASA ADS" 

31CROSSREF = "Crossref" 

32SEMANTIC = "Semantic Scholar" 

33ZBMATH = "zbMATH" 

34 

35 

36TIMEOUT = 4.0 

37 

38PRIORITY = defaultdict(int, {ZBMATH: 10, ADS: 9, CROSSREF: 8, SEMANTIC: 7}) 

39 

40LATEX_PARSER = LatexNodes2Text(math_mode="verbatim") 

41 

42 

43def create_refdata(lang="und"): 

44 data = RefData(lang=lang) 

45 data.type = "misc" 

46 data.doi = None 

47 data.arxiv = None 

48 data.zbl = None 

49 data.semantic = None 

50 return data 

51 

52 

53def is_same_title(compare, titles, tol=0.90): 

54 compare = re.sub(r"\W", "", compare).lower() 

55 for title in titles: 

56 title = re.sub(r"\W", "", title).lower() 

57 if SequenceMatcher(None, compare, title).ratio() > tol: 

58 return True 

59 return False 

60 

61 

62def get_zbmath_bibtex(params): 

63 text = "" 

64 headers = {"Content-Type": "text/html"} 

65 response = requests.get(ZBMATH_URL, params=params, headers=headers, timeout=0.5 * TIMEOUT) 

66 soup = BeautifulSoup(response.text, "html.parser") 

67 results = soup.find("div", {"class": "citations"}) 

68 if results: 68 ↛ 69line 68 didn't jump to line 69, because the condition on line 68 was never true

69 for ref in results.find_all("a", href=True): 

70 headers = {"Content-Type": "text/x-bibtex"} 

71 url = ZBMATH_URL + "/bibtexoutput" + ref.get("href", "") 

72 response = requests.get(url, headers=headers, timeout=0.5 * TIMEOUT) 

73 response.encoding = "utf-8" 

74 text += response.text 

75 return text 

76 

77 

78def citedby_zbmath(metadata): 

79 if "zbl_id" in metadata: 79 ↛ 80line 79 didn't jump to line 80, because the condition on line 79 was never true

80 params = {"q": "an:" + metadata["zbl_id"]} 

81 else: 

82 params = {"q": "en:" + metadata["doi"]} 

83 title_tex = normalise_span(metadata["title"]).replace("\xa0", " ") 

84 authors = "&au:".join(metadata["authors"]) 

85 params = {"q": params["q"] + "|(ti:" + f'"{title_tex}"' + "&au:" + authors + ")"} 

86 text = get_zbmath_bibtex(params) 

87 citations = parse_bibtex(text) 

88 return citations 

89 

90 

91def citedby_crossref(metadata): 

92 citations = [] 

93 user = settings.CROSSREF_USER 

94 password = settings.CROSSREF_PWD 

95 params = {"usr": user, "pwd": password, "doi": metadata["doi"]} 

96 response = requests.post(CROSSREF_URL, params=params, timeout=TIMEOUT) 

97 response.encoding = "utf-8" 

98 if response.status_code == 200: 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true

99 data = xmltodict.parse(response.text) 

100 body = data["crossref_result"]["query_result"]["body"] 

101 if body: 

102 citations = body["forward_link"] 

103 

104 if not isinstance(citations, list): 104 ↛ 105line 104 didn't jump to line 105, because the condition on line 104 was never true

105 citations = [citations] 

106 return citations 

107 

108 

109def get_arxiv_id(metadata): 

110 arxiv_id = None 

111 title_tex = normalise_span(metadata["title"]).replace("\xa0", " ") 

112 headers = {"Content-Type": "application/atom+xml"} 

113 query = "doi:" + metadata["doi"] + " OR (ti:" + f'"{title_tex}"' + ")" 

114 params = {"search_query": query, "max_results": 1} 

115 response = requests.get(ARXIV_URL, params=params, headers=headers, timeout=0.5 * TIMEOUT) 

116 if response.status_code == 200: 116 ↛ 123line 116 didn't jump to line 123, because the condition on line 116 was never false

117 data = xmltodict.parse(response.text) 

118 if "entry" in data["feed"]: 118 ↛ 119line 118 didn't jump to line 119, because the condition on line 118 was never true

119 entry = data["feed"]["entry"] 

120 if is_same_title(title_tex, [entry["title"]]): 

121 arxiv_id = entry["id"].split("arxiv.org/abs/") 

122 arxiv_id = arxiv_id[-1].split("v")[0] 

123 return arxiv_id 

124 

125 

126def citedby_ads(metadata, by_doi=True, citedby=True): 

127 if by_doi: 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never false

128 arxiv_id = get_arxiv_id(metadata) 

129 else: 

130 arxiv_id = metadata["arxiv_id"] 

131 if not arxiv_id: 131 ↛ 134line 131 didn't jump to line 134, because the condition on line 131 was never false

132 return [] 

133 

134 citations = [] 

135 url = ADS_URL + "/query" 

136 headers = {"Authorization": f"Bearer:{settings.ADS_TOKEN}"} 

137 reference = "citation" if citedby else "reference" 

138 params = {"q": "identifier:" + arxiv_id, "fl": reference} 

139 response = requests.get(url, headers=headers, params=params, timeout=0.5 * TIMEOUT) 

140 if response.status_code == 200: 

141 results = response.json().get("response", {}).get("docs") 

142 if results and isinstance(results, list) and reference in results[0]: 

143 url = ADS_URL + "/bigquery" 

144 bibcodes = "bibcode\n" + "\n".join(results[0][reference]) 

145 filters = "abstract,author,bibcode,comment,doi,doctype," 

146 filters += "eid,identifier,issue,keyword,orcid_pub," 

147 filters += "page,page_count,page_range,pub,pub_raw,title,volume,year" 

148 params = {"q": "*:*", "fl": filters, "rows": 200} 

149 response = requests.post( 

150 url, params=params, headers=headers, data=bibcodes, timeout=0.5 * TIMEOUT 

151 ) 

152 response.encoding = "utf-8" 

153 if response.status_code == 200: 

154 citations = response.json().get("response", {}).get("docs") 

155 return citations 

156 

157 

158def citedby_semantic(metadata, citedby=True): 

159 citations = [] 

160 reference = "citations" if citedby else "references" 

161 if settings.SITE_ID != 36: # all but PCJ 

162 response = requests.get(SEMANTIC_URL + metadata["doi"], timeout=TIMEOUT) 

163 response.encoding = "utf-8" 

164 if response.status_code == 200: 

165 citations.extend(response.json()[reference]) 

166 return citations 

167 

168 

169def set_contributors(ref, api_contributors, orcids=None): 

170 if not isinstance(api_contributors, list): 

171 api_contributors = [api_contributors] 

172 

173 contributors = [] 

174 for contributor in api_contributors: 

175 first_name = last_name = "" 

176 if ref.provider == CROSSREF: 

177 first_name = contributor.get("given_name") 

178 last_name = contributor.get("surname") 

179 elif ref.provider in [ADS, ZBMATH]: 

180 result = contributor.split(", ") 

181 if result: 181 ↛ 189line 181 didn't jump to line 189, because the condition on line 181 was never false

182 first_name = result[1] if len(result) > 1 else "" 

183 last_name = result[0] 

184 elif ref.provider == SEMANTIC: 184 ↛ 189line 184 didn't jump to line 189, because the condition on line 184 was never false

185 result = contributor["name"].split(" ") 

186 if result: 186 ↛ 189line 186 didn't jump to line 189, because the condition on line 186 was never false

187 first_name = " ".join(result[0:-1]) 

188 last_name = result[-1] 

189 contributor = create_contributor() 

190 contributor["first_name"] = first_name.strip() if first_name else "" 

191 contributor["last_name"] = last_name.strip() if last_name else "" 

192 contributor["role"] = "author" 

193 contributors.append(contributor) 

194 

195 if orcids and len(contributors) == len(orcids): 

196 for contrib, orcid in zip(contributors, orcids): 

197 contrib["orcid"] = orcid if orcid != "-" else "" 

198 setattr(ref, "contributors", contributors) 

199 

200 

201def ads_to_bibtex_type(doc_type): 

202 if doc_type in ["article", "eprint"]: 202 ↛ 204line 202 didn't jump to line 204, because the condition on line 202 was never false

203 bibtex_type = "article" 

204 elif doc_type in [ 

205 "book", 

206 "inbook", 

207 "inproceedings", 

208 "mastersthesis", 

209 "phdthesis", 

210 "proceedings", 

211 "techreport", 

212 ]: 

213 bibtex_type = doc_type 

214 else: 

215 bibtex_type = "misc" 

216 return bibtex_type 

217 

218 

219def crossref_to_bibtex_type(doc_type, item): 

220 if doc_type == "journal_cite": 220 ↛ 222line 220 didn't jump to line 222, because the condition on line 220 was never false

221 bibtex_type = "article" 

222 elif doc_type == "conf_cite": 

223 if "paper_title" in item: 

224 bibtex_type = "inproceedings" 

225 else: 

226 bibtex_type = "proceedings" 

227 elif doc_type == "book_cite": 

228 if "chapter_title" in item: 

229 bibtex_type = "inbook" 

230 else: 

231 bibtex_type = "book" 

232 else: 

233 bibtex_type = "misc" 

234 return bibtex_type 

235 

236 

237def citedby_crossref_refs(citations): 

238 refdata = [] 

239 for item in citations: 

240 item.pop("@doi") # the interior orderdict remains 

241 if not item: 241 ↛ 242line 241 didn't jump to line 242, because the condition on line 241 was never true

242 continue 

243 doc_type, item = item.popitem() 

244 ref = create_refdata() 

245 setattr(ref, "provider", CROSSREF) 

246 setattr(ref, "type", crossref_to_bibtex_type(doc_type, item)) 

247 if "journal_title" in item and item["journal_title"]: 247 ↛ 249line 247 didn't jump to line 249, because the condition on line 247 was never false

248 setattr(ref, "source_tex", item["journal_title"]) 

249 if "article_title" in item and item["article_title"]: 249 ↛ 251line 249 didn't jump to line 251, because the condition on line 249 was never false

250 setattr(ref, "article_title_tex", item["article_title"]) 

251 if "volume_title" in item: # book or proceedings title 251 ↛ 252line 251 didn't jump to line 252, because the condition on line 251 was never true

252 setattr(ref, "source_tex", item["volume_title"]) 

253 if "paper_title" in item and item["paper_title"]: # inproceedings title 253 ↛ 254line 253 didn't jump to line 254, because the condition on line 253 was never true

254 setattr(ref, "article_title_tex", item["paper_title"]) 

255 if "chapter_title" in item and item["chapter_title"]: # incollection or inbook 255 ↛ 256line 255 didn't jump to line 256, because the condition on line 255 was never true

256 setattr(ref, "chapter_title_tex", item["chapter_title"]) 

257 if "first_page" in item: 257 ↛ 259line 257 didn't jump to line 259, because the condition on line 257 was never false

258 setattr(ref, "fpage", item["first_page"]) 

259 if "last_page" in item: 259 ↛ 260line 259 didn't jump to line 260, because the condition on line 259 was never true

260 setattr(ref, "lpage", item["last_page"]) 

261 if "volume" in item: 261 ↛ 263line 261 didn't jump to line 263, because the condition on line 261 was never false

262 setattr(ref, "volume", item["volume"]) 

263 if "issue" in item: 

264 setattr(ref, "issue", item["issue"]) 

265 if "year" in item and item["year"]: 265 ↛ 267line 265 didn't jump to line 267, because the condition on line 265 was never false

266 setattr(ref, "year", item["year"]) 

267 if "contributors" in item and "contributor" in item["contributors"]: 267 ↛ 269line 267 didn't jump to line 269, because the condition on line 267 was never false

268 set_contributors(ref, item["contributors"]["contributor"]) 

269 if "doi" in item and item["doi"]: 269 ↛ 271line 269 didn't jump to line 271, because the condition on line 269 was never false

270 setattr(ref, "doi", item["doi"]["#text"].lower()) 

271 refdata.append(ref) 

272 return refdata 

273 

274 

275def citedby_zbmath_refs(citations): 

276 return bibtex_to_refs(citations) 

277 

278 

279def is_misc(doctype): 

280 if doctype not in [ 280 ↛ 294line 280 didn't jump to line 294, because the condition on line 280 was never true

281 "article", 

282 "book", 

283 # "booklet", 

284 "conference", 

285 "inbook", 

286 "incollection", 

287 "inproceedings", 

288 # "manual", 

289 # "mastersthesis", 

290 "phdthesis", 

291 "proceedings", 

292 "techreport", 

293 ]: 

294 return True 

295 return False 

296 

297 

298def bibtex_to_refs(bibitems): 

299 refdata = [] 

300 for item in bibitems: 

301 ref = create_refdata() 

302 setattr(ref, "provider", ZBMATH) 

303 item["doctype"] = "misc" if is_misc(item["doctype"]) else item["doctype"] 

304 setattr(ref, "type", item["doctype"]) 

305 if "fjournal" in item: 305 ↛ 307line 305 didn't jump to line 307, because the condition on line 305 was never false

306 setattr(ref, "source_tex", item["fjournal"]) 

307 elif "journal" in item: 

308 setattr(ref, "source_tex", item["journal"]) 

309 elif "booktitle" in item: 

310 setattr(ref, "source_tex", item["booktitle"]) 

311 elif "howpublished" in item: 

312 howpublished = re.sub(r" \([0-9]{4}\)\.?", "", item["howpublished"]) 

313 setattr(ref, "source_tex", howpublished) 

314 if "fseries" in item: 314 ↛ 315line 314 didn't jump to line 315, because the condition on line 314 was never true

315 setattr(ref, "series", item["fseries"]) 

316 elif "series" in item: 316 ↛ 317line 316 didn't jump to line 317, because the condition on line 316 was never true

317 setattr(ref, "series", item["series"]) 

318 if "title" in item: 318 ↛ 325line 318 didn't jump to line 325, because the condition on line 318 was never false

319 if item["doctype"] in ["article", "misc"]: 319 ↛ 321line 319 didn't jump to line 321, because the condition on line 319 was never false

320 setattr(ref, "article_title_tex", item["title"]) 

321 elif item["doctype"] in ["incollection", "inproceedings", "inbook"]: 

322 setattr(ref, "chapter_title_tex", item["title"]) 

323 else: 

324 setattr(ref, "source_tex", item["title"]) 

325 if "url" in item and not ref.source_tex: 325 ↛ 326line 325 didn't jump to line 326, because the condition on line 325 was never true

326 setattr(ref, "source_tex", item["url"]) 

327 if "pages" in item and item["pages"]: 327 ↛ 332line 327 didn't jump to line 332, because the condition on line 327 was never false

328 result = [x for x in re.split(r"\W", item["pages"])] 

329 setattr(ref, "fpage", result[0]) 

330 if len(result) == 2: 

331 setattr(ref, "lpage", result[1]) 

332 if "volume" in item: 332 ↛ 334line 332 didn't jump to line 334, because the condition on line 332 was never false

333 setattr(ref, "volume", item["volume"]) 

334 if "number" in item: 

335 setattr(ref, "issue", item["number"]) 

336 if "issue" in item: 336 ↛ 337line 336 didn't jump to line 337, because the condition on line 336 was never true

337 setattr(ref, "issue", item["issue"]) 

338 if "note" in item: 

339 setattr(ref, "comment", item["note"]) 

340 if "year" in item: 340 ↛ 342line 340 didn't jump to line 342, because the condition on line 340 was never false

341 setattr(ref, "year", item["year"]) 

342 if "author" in item: 342 ↛ 344line 342 didn't jump to line 344, because the condition on line 342 was never false

343 set_contributors(ref, item["author"].split(" and ")) 

344 if "publisher" in item: 344 ↛ 345line 344 didn't jump to line 345, because the condition on line 344 was never true

345 setattr(ref, "publisher_name", item["publisher"]) 

346 elif "school" in item: 346 ↛ 347line 346 didn't jump to line 347, because the condition on line 346 was never true

347 setattr(ref, "publisher_name", item["school"]) 

348 elif "institution" in item: 348 ↛ 349line 348 didn't jump to line 349, because the condition on line 348 was never true

349 setattr(ref, "publisher_name", item["institution"]) 

350 if "address" in item: 350 ↛ 351line 350 didn't jump to line 351, because the condition on line 350 was never true

351 setattr(ref, "publisher_loc", item["address"]) 

352 if "doi" in item and item["doi"]: 

353 setattr(ref, "doi", item["doi"].lower()) 

354 if "zbmath" in item: 354 ↛ 356line 354 didn't jump to line 356, because the condition on line 354 was never false

355 setattr(ref, "zbl", item["zbmath"]) 

356 if "zbl" in item: 

357 setattr(ref, "zbl", item["zbl"]) 

358 refdata.append(ref) 

359 return refdata 

360 

361 

362def citedby_ads_refs(citations): 

363 refdata = [] 

364 for item in citations: 

365 ref = create_refdata() 

366 setattr(ref, "provider", ADS) 

367 setattr(ref, "bibcode", item["bibcode"]) 

368 setattr(ref, "type", ads_to_bibtex_type(item["doctype"])) 

369 if "title" in item and item["title"]: 369 ↛ 371line 369 didn't jump to line 371, because the condition on line 369 was never false

370 setattr(ref, "article_title_tex", item["title"][0]) 

371 if "page_range" in item: 371 ↛ 372line 371 didn't jump to line 372, because the condition on line 371 was never true

372 result = item["page_range"].split("-") 

373 if len(result) == 2: 

374 setattr(ref, "fpage", result[0]) 

375 setattr(ref, "lpage", result[1]) 

376 elif "page" in item and item["page"] and item["page"][0].isdigit(): 

377 setattr(ref, "fpage", item["page"][0]) 

378 if "page_count" in item and item["page_count"]: 

379 setattr(ref, "lpage", str(item["page_count"] - 1)) 

380 if "year" in item and item["year"]: 380 ↛ 382line 380 didn't jump to line 382, because the condition on line 380 was never false

381 setattr(ref, "year", item["year"]) 

382 if "author" in item and item["author"]: 382 ↛ 384line 382 didn't jump to line 384, because the condition on line 382 was never false

383 set_contributors(ref, item["author"], item.get("orcid_pub", [])) 

384 if "issue" in item: 384 ↛ 385line 384 didn't jump to line 385, because the condition on line 384 was never true

385 setattr(ref, "issue", item["issue"]) 

386 if "volume" in item: 386 ↛ 387line 386 didn't jump to line 387, because the condition on line 386 was never true

387 setattr(ref, "volume", item["volume"]) 

388 if "doi" in item and item["doi"]: 388 ↛ 390line 388 didn't jump to line 390, because the condition on line 388 was never false

389 setattr(ref, "doi", item["doi"][0].lower()) 

390 if "eid" in item and item["eid"]: 390 ↛ 395line 390 didn't jump to line 395, because the condition on line 390 was never false

391 arxiv = item["eid"].split("arXiv:") 

392 if "pub" in item and "arXiv" in item["pub"]: 392 ↛ 395line 392 didn't jump to line 395, because the condition on line 392 was never false

393 setattr(ref, "arxiv", arxiv[-1]) 

394 setattr(ref, "source_tex", "arXiv") 

395 if "pub_raw" in item and item["pub_raw"] and ref.doi and not ref.arxiv: 395 ↛ 396line 395 didn't jump to line 396, because the condition on line 395 was never true

396 result = re.match(r"(^.+)?[,.]( vol. | Volume )", item["pub_raw"]) 

397 if result: 

398 setattr(ref, "source_tex", result.group(1)) 

399 elif "pub" in item and not ref.arxiv: 399 ↛ 400line 399 didn't jump to line 400, because the condition on line 399 was never true

400 setattr(ref, "source_tex", item["pub"]) 

401 if "abstract" in item and item["abstract"]: 401 ↛ 403line 401 didn't jump to line 403, because the condition on line 401 was never false

402 setattr(ref, "abstract", [item["abstract"]]) 

403 refdata.append(ref) 

404 return refdata 

405 

406 

407def citedby_semantic_refs(citations): 

408 refdata = [] 

409 for item in citations: 

410 ref = create_refdata() 

411 setattr(ref, "provider", SEMANTIC) 

412 if "title" in item: 412 ↛ 416line 412 didn't jump to line 416, because the condition on line 412 was never false

413 title = item["title"] 

414 title = title.capitalize() if title.isupper() else item["title"] 

415 setattr(ref, "article_title_tex", title) 

416 if "year" in item and item["year"]: 416 ↛ 418line 416 didn't jump to line 418, because the condition on line 416 was never false

417 setattr(ref, "year", str(item["year"])) 

418 if "authors" in item and item["authors"]: 418 ↛ 420line 418 didn't jump to line 420, because the condition on line 418 was never false

419 set_contributors(ref, item["authors"]) 

420 if "doi" in item and item["doi"]: 

421 setattr(ref, "doi", item["doi"].lower()) 

422 if "arxivId" in item and item["arxivId"]: 

423 setattr(ref, "arxiv", item["arxivId"]) 

424 setattr(ref, "source_tex", "arXiv") 

425 if "venue" in item and item["venue"]: 

426 setattr(ref, "source_tex", item["venue"]) 

427 if "paperId" in item: 427 ↛ 429line 427 didn't jump to line 429, because the condition on line 427 was never false

428 setattr(ref, "semantic", item["paperId"]) 

429 refdata.append(ref) 

430 return refdata 

431 

432 

433def get_extlinks(extids): 

434 extlinks = [] 

435 for extid in extids: 

436 eid = BibItemId() 

437 eid.id_type, eid.id_value = extid 

438 extlink = "" 

439 if eid.id_type == "doi": 

440 extlink = "DOI:" + eid.id_value 

441 elif eid.id_type == "arxiv": 

442 extlink = "arXiv:" + eid.id_value 

443 elif eid.id_type == "zbl-item-id": 

444 extlink = "Zbl:" + eid.id_value 

445 elif eid.id_type == "semantic-scholar": 445 ↛ 447line 445 didn't jump to line 447, because the condition on line 445 was never false

446 extlink = "Semantic-scholar:" + eid.id_value 

447 if extlink: 447 ↛ 435line 447 didn't jump to line 435, because the condition on line 447 was never false

448 extlink = f' | <a href="{eid.get_href()}">{extlink}</a>' 

449 extlinks.append(extlink) 

450 return extlinks 

451 

452 

453def built_extlinks(ref): 

454 extids = [] 

455 if ref.doi: 

456 extids.append(("doi", ref.doi)) 

457 if ref.arxiv: 

458 extids.append(("arxiv", ref.arxiv)) 

459 if ref.zbl: 

460 extids.append(("zbl-item-id", ref.zbl)) 

461 if not any((ref.doi, ref.zbl, ref.arxiv)) and getattr(ref, "semantic", False): 

462 extids.append(("semantic-scholar", ref.semantic)) 

463 setattr(ref, "extids", extids) 

464 

465 

466def get_values_for_stats(refs): 

467 """ 

468 extract data of a ref and return as a dict 

469 @param refs: dict of RefData.__dict__ 

470 @return: dict 

471 """ 

472 

473 citedby_for_stats = [] 

474 for ref_item in refs.values(): 

475 authors = [] 

476 for author in ref_item.get("contributors"): 

477 if author["role"] == "author": 477 ↛ 476line 477 didn't jump to line 476, because the condition on line 477 was never false

478 display_name = get_display_name( 

479 author["prefix"], 

480 author["first_name"], 

481 author["last_name"], 

482 author["suffix"], 

483 author["string_name"], 

484 ) 

485 authors.append({"author": display_name}) 

486 

487 title_key = get_publication_title(ref_item, "title") 

488 title = ref_item[title_key] 

489 publication_title_key = get_publication_title(ref_item, "publication_title") 

490 publication_title = ref_item[publication_title_key] 

491 

492 url = "" 

493 if ref_item["extlinks"]: 493 ↛ 497line 493 didn't jump to line 497

494 result = re.search(r'href="(.+)">', ref_item["extlinks"][0]) 

495 url = result.group(1) if result else "" 

496 

497 result = { 

498 "authors": authors, 

499 "title": title, 

500 "publication_title": publication_title, 

501 "year": ref_item["year"], 

502 "url": url, 

503 "source": ref_item["provider"], 

504 } 

505 citedby_for_stats.append(result) 

506 return citedby_for_stats 

507 

508 

509def get_publication_title(ref_item, category="title"): 

510 type_ = ref_item.get("type") 

511 

512 if "thesis" in type_: 512 ↛ 513line 512 didn't jump to line 513, because the condition on line 512 was never true

513 type_ = "thesis" 

514 else: 

515 type_ = "misc" 

516 

517 dic = { 

518 "incollection": {"title": "source_tex", "publication_title": "series"}, 

519 "thesis": {"title": "source_tex", "publication_title": "series"}, 

520 "article": {"title": "article_title_tex", "publication_title": "source_tex"}, 

521 "book": {"title": "source_tex", "publication_title": "series"}, 

522 "inbook": {"title": "chapter_title_tex", "publication_title": "series"}, 

523 "misc": {"title": "article_title_tex", "publication_title": "source_tex"}, 

524 } 

525 return dic.get(type_).get(category) 

526 

527 

528def built_citations(data): 

529 # to match citations and add these ids when missing 

530 doi_arxiv = {ref.doi: ref.arxiv for ref in data if ref.doi and ref.arxiv} 

531 arxiv_doi = {v: k for k, v in doi_arxiv.items()} 

532 

533 results = [] 

534 for n, ref in enumerate(data): 

535 if ref.arxiv and not ref.doi: 

536 setattr(ref, "doi", arxiv_doi.get(ref.arxiv)) 

537 elif not ref.arxiv and ref.doi: 

538 setattr(ref, "arxiv", doi_arxiv.get(ref.doi)) 

539 built_extlinks(ref) 

540 update_ref_data_for_jats(ref, n, with_label=False) 

541 ref.citation_html = html.unescape(ref.citation_html) 

542 results.append(vars(ref)) 

543 

544 results.sort( 

545 key=lambda k: ( 

546 -int(k["year"]) if k["year"] else 0, 

547 k["source_tex"], 

548 k["volume"], 

549 k["issue"], 

550 k["fpage"], 

551 ), 

552 ) 

553 

554 refs = {} 

555 titles = { 

556 item[get_publication_title(item)] 

557 for item in results 

558 if any((item["arxiv"], item["doi"], item["zbl"])) 

559 } 

560 

561 for item in results: 

562 links = get_extlinks(item["extids"]) 

563 level = PRIORITY[item["provider"]] 

564 citation = LATEX_PARSER.latex_to_text(item["citation_html"].replace("$$", "$")) 

565 ref = {"html": citation + "".join(links)} 

566 ref.update({"priority": level, "extlinks": links}) 

567 ref.update(item) 

568 

569 if item["doi"]: 

570 if item["doi"] not in refs or refs[item["doi"]]["priority"] < level: 

571 refs[item["doi"]] = ref 

572 elif item["zbl"]: 

573 refs[item["zbl"]] = ref 

574 elif item["arxiv"]: 

575 if item["arxiv"] not in refs or refs[item["arxiv"]]["priority"] < level: 575 ↛ 561line 575 didn't jump to line 561, because the condition on line 575 was never false

576 refs[item["arxiv"]] = ref 

577 elif item["semantic"] and (item["doi"] or item["arxiv"]): 577 ↛ 578line 577 didn't jump to line 578, because the condition on line 577 was never true

578 if not is_same_title(item[get_publication_title(item)], titles): 

579 refs[item["semantic"]] = ref 

580 

581 sources = list({ref["provider"] for ref in refs.values()}) 

582 sources = ", ".join(sorted(sources)) 

583 citations_html = [citation["html"] for citation in refs.values()] 

584 citedby_for_stats = get_values_for_stats(refs) 

585 return citations_html, sources, citedby_for_stats 

586 

587 

588def citations_to_refs(provider, citations): 

589 if provider == CROSSREF: 

590 return citedby_crossref_refs(citations) 

591 elif provider == ZBMATH: 

592 return citedby_zbmath_refs(citations) 

593 elif provider == ADS: 

594 return citedby_ads_refs(citations) 

595 elif provider == SEMANTIC: 

596 return citedby_semantic_refs(citations) 

597 

598 

599def get_citations(resource): 

600 """Returns documents that cite this doi and sources used for the research.""" 

601 data = {} 

602 authors = get_names(resource, "author") 

603 zbl_id = get_extid(resource, "zbl-item-id") 

604 preprint_id = get_extid(resource, "preprint") 

605 

606 metadata = { 

607 "authors": authors, 

608 "doi": resource.doi, 

609 "preprint_id": preprint_id.id_value if preprint_id else "", 

610 "title": resource.title_tex, 

611 } 

612 

613 if zbl_id and zbl_id.id_value: 613 ↛ 614line 613 didn't jump to line 614, because the condition on line 613 was never true

614 metadata.update({"zbl_id": zbl_id.id_value}) 

615 

616 with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: 

617 future_to_provider = { 

618 executor.submit(citedby_crossref, metadata): CROSSREF, 

619 executor.submit(citedby_zbmath, metadata): ZBMATH, 

620 executor.submit(citedby_ads, metadata): ADS, 

621 } 

622 for future in concurrent.futures.as_completed(future_to_provider): 

623 provider = future_to_provider[future] 

624 try: 

625 if future.result(): 625 ↛ 626line 625 didn't jump to line 626, because the condition on line 625 was never true

626 data.update({provider: future.result()}) 

627 except requests.exceptions.Timeout: 

628 continue 

629 except requests.exceptions.ConnectionError: 

630 continue 

631 

632 citations = [] 

633 for provider, cites in data.items(): 633 ↛ 634line 633 didn't jump to line 634, because the loop on line 633 never started

634 citations.extend(citations_to_refs(provider, cites)) 

635 

636 return built_citations(citations)