Coverage for sites/ptf_tools/ptf_tools/doi.py: 19%

245 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-05-19 19:20 +0000

1import os 

2from datetime import datetime 

3 

4import requests 

5from lxml import etree 

6 

7from django.conf import settings 

8from django.core.exceptions import ObjectDoesNotExist 

9from django.template.loader import render_to_string 

10 

11from mersenne_tools.models import DOIBatch 

12from ptf.display.resolver import find_id_type 

13 

14 

15def get_doibatch(resource): 

16 doibatch = None 

17 try: 

18 doibatch = resource.doibatch 

19 except ObjectDoesNotExist: 

20 pass 

21 

22 return doibatch 

23 

24 

25def get_or_create_doibatch(resource): 

26 """ 

27 @param models.Resource: 

28 @return: new or updated doibatch 

29 """ 

30 

31 update_doi = False 

32 # check DOI 

33 url = settings.DOI_BASE_URL + resource.doi 

34 r = requests.get(url, allow_redirects=False) 

35 if r.status_code == 302 and resource.get_url_absolute() == r.headers["Location"]: 

36 status = "Enregistré" 

37 log = "Vérifié sur CROSSREF" 

38 update_doi = True 

39 elif r.status_code == 302 and resource.get_url_absolute() != r.headers["Location"]: 

40 status = "Erreur" 

41 log = "Mauvaise URL pour le DOI !!!/à réenregistrer" 

42 update_doi = True 

43 doibatch = get_doibatch(resource) 

44 if update_doi: 

45 if doibatch: 

46 doibatch.status = status 

47 doibatch.log = log 

48 else: 

49 doibatch = DOIBatch(resource=resource, status=status, log=log) 

50 doibatch.save() 

51 return doibatch 

52 

53 # si on est dans le cas d'un book-part vu que l'enregistrement se fait niveau container, on ne peut pas interroger le batch 

54 # lié au book-part, car il a été créé juste pour afficher "En cours" sur le niveau book-part 

55 if ( 

56 doibatch 

57 and resource.classname == "Article" 

58 and resource.my_container.ctype.startswith("book") 

59 ): 

60 doibatch.delete() 

61 doibatch = None 

62 

63 if doibatch: 

64 doibatch = checkDOIBatch(doibatch) 

65 

66 return doibatch 

67 

68 

69# recordDOI par resource (article) 

70# problématique liée à l'enregistrement des DOI chez CROSSREF : 

71# - pour enregistrer un DOI, on utilise le DOI du journal comme référence : CROSSREF prend ça comme une demande d'enregistrement/modification ! 

72# du DOI du journal... 

73# ce qui se passe lorsque l'on envoie plusieurs requêtes les unes à la suite des autres (Record all DOIs), c'est que l'ordre de traitement est 

74# différent (aléatoire) de l'ordre d'envoi et on obtient ces erreurs : 

75# "Record not processed because submitted version: 201810150907372216 is less or equal to previously submitted version {1}" 

76# ( MAIS le record impliqué ici est celui du journal, celui de l'article ne pose globalement pas de pb) 

77# car il y a un timestamp dans chaque requête 

78# 

79# pour contrer ces erreurs (avant on ne diagnostiquait que le nombre de failure_count et donc il y en avait une) il faut interpréter le xml de retour :: 

80# <record_diagnostic status="Success"> 

81# <doi>10.5802/alco.21</doi> 

82# <msg>Successfully updated</msg> 

83# C'est ce qui est retenu (dans checkDOIBatch). 

84# 

85 

86 

87def recordDOI(resource, testing=False): 

88 """ 

89 @param resource: 

90 @param testing: Boolean set to True when testing 

91 @return: data {status: 200 ou 400, 'message': msg} 

92 """ 

93 

94 doibatch = get_doibatch(resource) 

95 if doibatch: 95 ↛ 98line 95 didn't jump to line 98, because the condition on line 95 was never false

96 doibatch.delete() 

97 

98 doibatch = DOIBatch(resource=resource, status="En cours") 

99 doibatch.save() 

100 context = {} 

101 context["doi_batch_id"] = f"{doibatch.pk:04d}" 

102 # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#timestamp 

103 timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") # len = 20, must be 19 

104 context["timestamp"] = timestamp[0:19] 

105 context["mail"] = settings.CROSSREF_MAIL 

106 template = f"crossref/{resource.classname.lower()}_doi_register.xml" 

107 crossref_user = None 

108 crossref_pwd = None 

109 

110 # hack pour déterminer la date de publication pour une resource 

111 if resource.classname == "Article": 111 ↛ 154line 111 didn't jump to line 154, because the condition on line 111 was never false

112 # si un article n'a pas de contributeurs, on enregistre un posted-content de type other 

113 # https://data.crossref.org/reports/help/schema_doc/4.4.2/schema_4_4_2.html#posted_content 

114 if not resource.get_author_contributions() and resource.classname == "Article": 114 ↛ 115line 114 didn't jump to line 115, because the condition on line 114 was never true

115 template = "crossref/posted-content.xml" 

116 

117 # on est en présence d'un objet qui a besoin d'une date de publication 

118 if not resource.date_published and not resource.date_online_first: 118 ↛ 120line 118 didn't jump to line 120, because the condition on line 118 was never true

119 # on extrapole la date du volume 

120 date = resource.my_container.year 

121 try: 

122 date = datetime.strptime(date, "%Y") 

123 resource.DOIdate = "<year>%s</year>" % resource.my_container.year 

124 except ValueError: 

125 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range 

126 year = resource.my_container.year.split("-")[1] 

127 resource.DOIdate = "<year>%s</year>" % year 

128 resource.my_container.year = year 

129 else: 

130 # on renseigne la date selon le format voulu par CROSSREF 

131 if resource.date_published: 131 ↛ 132line 131 didn't jump to line 132, because the condition on line 131 was never true

132 resource.DOIdate = resource.date_published.strftime( 

133 "<month>%m</month><day>%d</day><year>%Y</year>" 

134 ) 

135 

136 # on check aussi la date du container 

137 date = resource.my_container.year 

138 try: 

139 date = datetime.strptime(date, "%Y") 

140 except ValueError: 

141 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range 

142 year = resource.my_container.year.split("-")[1] 

143 resource.my_container.year = year 

144 else: 

145 # Online First 

146 # TODO: Is it possible to send 2 dates to Crossref ? 

147 # You can send multiple <publication_date> but it is for multiple media_type (print vs online) 

148 resource.DOIdate = resource.date_online_first.strftime( 

149 "<month>%m</month><day>%d</day><year>%Y</year>" 

150 ) 

151 

152 # Le year du container vaut '0' 

153 

154 elif resource.classname == "Container": 

155 if resource.ctype.startswith("book"): 

156 # PS : pas de gestion des chapitres pour les livres, tout est fait dans le template au moment de l'enregistrement du book 

157 # template en fct du ctype 

158 if resource.my_collection.issn or resource.my_collection.e_issn: 

159 template = "crossref/book_series_metadata.xml" 

160 else: 

161 template = "crossref/book_set_metadata.xml" 

162 # else #book tout seul n'appartenant pas à une série 

163 # template = book_metadata 

164 context["book_type"] = resource.ctype[5:].replace("-", "_") 

165 for bookpart in resource.article_set.all(): 

166 doibatch = get_doibatch(bookpart) 

167 if doibatch: 

168 doibatch.delete() 

169 doibatch = DOIBatch(resource=bookpart, status="En cours") 

170 doibatch.save() 

171 

172 elif resource.ctype == "issue": 

173 # TODO 

174 template = "issue.xml" 

175 date = resource.year 

176 try: 

177 date = datetime.strptime(date, "%Y") 

178 resource.DOIdate = "<year>%s</year>" % resource.year 

179 except ValueError: 

180 # on suppose que la date est du format 2010-2011, on garde la 2eme année du range 

181 year = resource.year.split("-")[1] 

182 resource.DOIdate = "<year>%s</year>" % year 

183 

184 elif resource.classname == "TranslatedArticle": 

185 with open( 

186 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8" 

187 ) as file_: 

188 file_.write(resource.doi + "\n") 

189 

190 resource.DOIdate = resource.date_published.strftime( 

191 "<month>%m</month><day>%d</day><year>%Y</year>" 

192 ) 

193 context["collection"] = resource.original_article.get_top_collection() 

194 

195 context["resource"] = resource 

196 

197 preprint_id = preprint_type = None 

198 qs = resource.extid_set.filter(id_type="preprint") 

199 if qs: 199 ↛ 200line 199 didn't jump to line 200, because the condition on line 199 was never true

200 extid = qs.first() 

201 preprint_id = extid.id_value 

202 preprint_type = find_id_type(preprint_id) 

203 # crossref allows "doi" and "arxiv", but not "hal" 

204 if preprint_type == "hal": 

205 preprint_type = "other" 

206 context["preprint_id"] = preprint_id 

207 context["preprint_type"] = preprint_type 

208 

209 rdoi = None 

210 qs = resource.extid_set.filter(id_type="rdoi") 

211 if qs: 211 ↛ 212line 211 didn't jump to line 212, because the condition on line 211 was never true

212 rdoi = qs.first().id_value 

213 context["rdoi"] = rdoi 

214 

215 try: 

216 xml = render_to_string(template_name=template, context=context) 

217 doibatch.xml = xml 

218 doibatch.save() 

219 except Exception as e: 

220 if resource.classname == "TranslatedArticle": 

221 with open( 

222 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8" 

223 ) as file_: 

224 file_.write(str(e) + "\n") 

225 raise e 

226 

227 files = {"file": (f"{doibatch.pk}.xml", xml)} 

228 

229 data = {"status": 404} 

230 if not testing: 230 ↛ 231line 230 didn't jump to line 231, because the condition on line 230 was never true

231 if resource.classname == "TranslatedArticle": 

232 crossref_user, crossref_pwd = get_user_pwd_crossref(resource.original_article) 

233 

234 with open( 

235 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8" 

236 ) as file_: 

237 file_.write("Call crossref\n") 

238 

239 elif resource.classname == "Container" and resource.ctype.startswith("book"): 

240 # pas de doi niveau container, alors pour obtenir les identifiants crossref on part sur le 1er book part 

241 crossref_user, crossref_pwd = get_user_pwd_crossref(resource.article_set.first()) 

242 else: 

243 crossref_user, crossref_pwd = get_user_pwd_crossref(resource) 

244 

245 crossref_batch_url = settings.CROSSREF_BATCHURL_TPL % (crossref_user, crossref_pwd) 

246 

247 r = requests.post(crossref_batch_url, files=files) 

248 body = r.text.encode("utf8") 

249 if r.status_code == 200: 

250 xml = etree.XML(body) 

251 title = xml.xpath("//*/title")[0].text 

252 if title == "SUCCESS": 

253 data["status"] = r.status_code 

254 elif r.status_code == 401: 

255 doibatch.status = "Erreur" 

256 doibatch.log = "Pb d'authentification" 

257 doibatch.save() 

258 else: 

259 doibatch.status = "Erreur" 

260 doibatch.save() 

261 data["message"] = body[:1000].decode("utf-8") 

262 

263 if resource.classname == "TranslatedArticle": 

264 with open( 

265 os.path.join(settings.LOG_DIR, "record_doi.log"), "a", encoding="utf-8" 

266 ) as file_: 

267 file_.write(doibatch.status + "\n") 

268 return data 

269 

270 

271def get_user_pwd_crossref(resource): 

272 # get CROSSREF credentials from DOI prefix 

273 doi = resource.doi 

274 prefix = doi.split("/")[0] 

275 md_prefix = prefix.split(".")[1] 

276 crossref_user_const = "CROSSREF_USER_" + md_prefix 

277 crossref_pwd_const = "CROSSREF_PWD_" + md_prefix 

278 try: 

279 crossref_user = getattr(settings, crossref_user_const) 

280 crossref_pwd = getattr(settings, crossref_pwd_const) 

281 except AttributeError: 

282 crossref_user = settings.CROSSREF_USER_5802 

283 crossref_pwd = settings.CROSSREF_PWD_5802 

284 return crossref_user, crossref_pwd 

285 

286 

287def checkDOIBatch(doibatch): 

288 """ 

289 check DOI batch status by HTTP request 

290 @param doibatch: DOIBatch 

291 @return: DOIBatch with status and log updated 

292 """ 

293 

294 resource = doibatch.resource 

295 crossref_user, crossref_pwd = get_user_pwd_crossref(resource) 

296 url = settings.CROSSREF_BASE_CHECKBATCH_URL_TPL % (crossref_user, crossref_pwd) 

297 url = url.format(doibatch.pk) 

298 r = requests.get(url) 

299 if r.status_code == 200: 

300 # analyse du xml de retour 

301 dataXml = r.text.encode("utf8") 

302 tree = etree.XML(dataXml) 

303 elem = tree.xpath("/doi_batch_diagnostic")[0] 

304 batch_status = elem.attrib["status"] 

305 if batch_status == "completed": 

306 # le batch a été traité 

307 doibatch.status = "batch terminé" 

308 doibatch.log = "Pas de DOI associé dans le batch : voir le xml" 

309 diags = tree.xpath("//*/record_diagnostic") 

310 for diag in diags: 

311 doi = diag.xpath("doi")[0].text 

312 log = diag.xpath("msg")[0].text 

313 status = diag.attrib["status"] 

314 if doi == doibatch.resource.doi: 

315 if status == "Success": 

316 doibatch.status = "Enregistré" 

317 else: 

318 doibatch.status = "Erreur" 

319 else: 

320 doibatch.status = "Erreur" 

321 doibatch.log = log 

322 

323 elif batch_status == "in_process" or batch_status == "queued": 

324 doibatch.status = "En cours" 

325 doibatch.log = "batch en cours de traitement" 

326 else: # rafraichit trop tot apres Record DOI 

327 doibatch.status = "Erreur" 

328 doibatch.log = ( 

329 "Attention, il se peut qu'il faille rafraichir " 

330 "un peu plus tard {} ".format(r.text) 

331 ) 

332 else: 

333 doibatch.status = "Erreur" 

334 doibatch.log = r.text 

335 doibatch.save() 

336 return doibatch 

337 

338 

339def removeOldDataInCrossref(article, testing=False): 

340 """ 

341 The CRAS 2002-2019 articles were registered by Elsevier 

342 To remove some metadata in Crossref, we need to provide a separate XML with the fields to remove 

343 

344 @param article: 

345 @param testing: Boolean set to True when testing 

346 @return: data {status: 200 ou 400, 'message': msg} 

347 """ 

348 

349 doibatch = get_doibatch(article) 

350 if doibatch: 

351 doibatch.delete() 

352 

353 doibatch = DOIBatch(resource=article, status="En cours") 

354 doibatch.save() 

355 

356 context = {"resource": article, "doi_batch_id": f"{doibatch.pk:04d}"} 

357 

358 timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f") # len = 20, must be 19 

359 context["timestamp"] = timestamp[0:19] 

360 

361 context["mail"] = settings.CROSSREF_MAIL 

362 template = "crossref/article_remove_old_data.xml" 

363 

364 if article.date_published: 

365 article.DOIdate = article.date_published.strftime( 

366 "<month>%m</month><day>%d</day><year>%Y</year>" 

367 ) 

368 

369 try: 

370 xml = render_to_string(template_name=template, context=context) 

371 

372 if testing: 

373 print(xml) 

374 

375 doibatch.xml = xml 

376 doibatch.save() 

377 except Exception as e: 

378 raise e 

379 

380 files = {"file": (f"{doibatch.pk}.xml", xml)} 

381 

382 data = {"status": 404} 

383 if not testing: 

384 crossref_user, crossref_pwd = get_user_pwd_crossref(article) 

385 crossref_batch_url = settings.CROSSREF_BATCHURL_TPL % (crossref_user, crossref_pwd) 

386 

387 r = requests.post(crossref_batch_url, files=files) 

388 body = r.text.encode("utf8") 

389 if r.status_code == 200: 

390 xml = etree.XML(body) 

391 title = xml.xpath("//*/title")[0].text 

392 if title == "SUCCESS": 

393 data["status"] = r.status_code 

394 elif r.status_code == 401: 

395 doibatch.status = "Erreur" 

396 doibatch.log = "Pb d'authentification" 

397 doibatch.save() 

398 else: 

399 doibatch.status = "Erreur" 

400 doibatch.save() 

401 data["message"] = body[:1000].decode("utf-8") 

402 

403 return data