Coverage for sites/ptf_tools/ptf_tools/doaj.py: 60%

195 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-05-19 19:20 +0000

1import json 

2import re 

3 

4import requests 

5 

6from django.conf import settings 

7from django.db.models import Q 

8from django.http import Http404 

9 

10from mersenne_tools.models import DOAJBatch 

11from ptf import model_helpers 

12from ptf.cmds.xml.xml_utils import remove_html 

13from ptf.models import Container 

14 

15 

16def has_date_online_first(document): 

17 return hasattr(document, "date_online_first") and document.date_online_first 

18 

19 

20def has_date_published(document): 

21 return hasattr(document, "date_published") and document.date_published 

22 

23 

24def has_publication_date(document): 

25 return has_date_online_first(document) or has_date_published(document) 

26 

27 

28def is_published(document): 

29 if not hasattr(document, "do_not_publish"): 29 ↛ 30line 29 didn't jump to line 30, because the condition on line 29 was never true

30 return True 

31 return not document.do_not_publish 

32 

33 

34def get_names(resource, role): 

35 names = [] 

36 for contribution in resource.contributions.all(): 

37 if contribution.role == role: 37 ↛ 36line 37 didn't jump to line 36, because the condition on line 37 was never false

38 person = {"name": str(contribution)} 

39 addresses = contribution.contribaddress_set.all() 

40 if addresses: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true

41 person["affiliation"] = "; ".join([c.address for c in addresses if c.address]) 

42 if contribution.orcid: 42 ↛ 47line 42 didn't jump to line 47, because the condition on line 42 was never false

43 orcid = contribution.orcid.strip() 

44 orcid = orcid.encode("ascii", "ignore").decode("utf-8") 

45 if re.match(r"^\d{4}-\d{4}-\d{4}-\d{3}(\d|X)$", orcid): 45 ↛ 47line 45 didn't jump to line 47, because the condition on line 45 was never false

46 person["orcid_id"] = "https://orcid.org/" + orcid 

47 names.append(person) 

48 return names 

49 

50 

51def get_token(colid): 

52 token = None 

53 if colid == "PCJ": 53 ↛ 54line 53 didn't jump to line 54, because the condition on line 53 was never true

54 token = settings.DOAJ_TOKEN_PCJ 

55 elif colid == "OJMO": 55 ↛ 56line 55 didn't jump to line 56, because the condition on line 55 was never true

56 token = settings.DOAJ_TOKEN_OJMO 

57 elif colid.startswith("CR") and len(colid) > 2: 57 ↛ 59line 57 didn't jump to line 59, because the condition on line 57 was never false

58 token = settings.DOAJ_TOKEN_CR 

59 return token 

60 

61 

62def doaj_pid_register(pid): 

63 resource = model_helpers.get_resource(pid) 

64 if not resource: 64 ↛ 65line 64 didn't jump to line 65, because the condition on line 64 was never true

65 raise Http404 

66 

67 container = None 

68 if resource.classname == "Container": 68 ↛ 71line 68 didn't jump to line 71, because the condition on line 68 was never false

69 container = resource.container 

70 

71 if not container: 71 ↛ 72line 71 didn't jump to line 72, because the condition on line 71 was never true

72 raise Http404 

73 

74 collection = container.get_collection() 

75 if not collection: 75 ↛ 76line 75 didn't jump to line 76, because the condition on line 75 was never true

76 raise Http404 

77 

78 results = [] 

79 data, response = None, None 

80 token = get_token(collection.pid) 

81 if token: 81 ↛ 88line 81 didn't jump to line 88, because the condition on line 81 was never false

82 for article in resource.container.article_set.all(): 

83 if is_published(article) and has_publication_date(article): 83 ↛ 82line 83 didn't jump to line 82, because the condition on line 83 was never false

84 data = doaj_resource_register(article) 

85 if data: 

86 results.append(data) 

87 

88 if results: 

89 url = f"https://doaj.org/api/bulk/articles?api_key={token}" 

90 response = requests.post(url, json=results) 

91 container_batch = DOAJBatch.objects.get_or_create(resource=resource)[0] 

92 if response.status_code == 201: 92 ↛ 93line 92 didn't jump to line 93, because the condition on line 92 was never true

93 container_batch.status = DOAJBatch.REGISTERED 

94 results = response.json() 

95 data = { 

96 "doaj_status": response.status_code, 

97 "doaj_message": [r["status"] for r in results], 

98 "doaj_id": [r["id"] for r in results], 

99 "doaj_location": [r["location"] for r in results], 

100 } 

101 for article in resource.container.article_set.all(): 

102 if is_published(article) and has_publication_date(article): 

103 article_batch = DOAJBatch.objects.get_or_create(resource=article)[0] 

104 article_batch.status = DOAJBatch.REGISTERED 

105 article_batch.save() 

106 else: 

107 container_batch.status = DOAJBatch.ERROR 

108 if response.text: 108 ↛ 110line 108 didn't jump to line 110, because the condition on line 108 was never false

109 container_batch.log = response.text 

110 container_batch.save() 

111 return data, response 

112 

113 

114def doaj_resource_register(resource): 

115 container = None 

116 if resource.classname == "Article": 

117 document = resource.article 

118 container = document.my_container 

119 fpage = document.fpage 

120 lpage = document.lpage 

121 elif resource.classname == "Container": 121 ↛ 126line 121 didn't jump to line 126, because the condition on line 121 was never false

122 document = resource.container 

123 container = document 

124 fpage = lpage = "" 

125 

126 if not container: 126 ↛ 127line 126 didn't jump to line 127, because the condition on line 126 was never true

127 return None 

128 

129 doi = resource.doi 

130 collection = container.get_collection() 

131 if not doi or not collection: 131 ↛ 132line 131 didn't jump to line 132, because the condition on line 131 was never true

132 return None 

133 

134 if collection.pid.startswith("CR") and not doi.startswith("10.5802/cr"): 

135 return None 

136 

137 month = year = "" 

138 if container.year != "0": 138 ↛ 141line 138 didn't jump to line 141, because the condition on line 138 was never false

139 year = container.year.split("-")[-1] 

140 

141 if has_date_online_first(document): 

142 month = document.date_online_first.strftime("%B") 

143 year = document.date_online_first.strftime("%Y") 

144 elif has_date_published(document): 144 ↛ 145line 144 didn't jump to line 145, because the condition on line 144 was never true

145 month = document.date_published.strftime("%B") 

146 year = document.date_published.strftime("%Y") 

147 

148 volume = number = "" 

149 if not container.to_appear(): 149 ↛ 156line 149 didn't jump to line 156, because the condition on line 149 was never false

150 is_cr = container.is_cr() 

151 if container.volume: 151 ↛ 152line 151 didn't jump to line 152, because the condition on line 151 was never true

152 volume = container.volume 

153 if container.number and not (is_cr and container.number[0] == "G"): 153 ↛ 156line 153 didn't jump to line 156, because the condition on line 153 was never false

154 number = container.number 

155 

156 eissn = collection.e_issn 

157 pissn = "" # collection.issn 

158 colid = collection.pid.lower() 

159 domain = settings.SITE_REGISTER[colid]["site_domain"] 

160 if colid == "pcj": 160 ↛ 161line 160 didn't jump to line 161, because the condition on line 160 was never true

161 domain = "peercommunityjournal.org" 

162 

163 url = f"https://{domain}/articles/{doi}/" 

164 lang = resource.lang if resource.lang and resource.lang != "und" else "" 

165 authors = get_names(resource, "author") 

166 publisher = container.my_publisher 

167 pub_name = publisher.pub_name if publisher and publisher.pub_name else "" 

168 

169 data = {"admin": {}, "bibjson": {"journal": {}}} 

170 data["admin"]["publisher_record_id"] = doi 

171 data["bibjson"]["title"] = remove_html(document.title_tex) 

172 data["bibjson"]["month"] = month 

173 data["bibjson"]["year"] = year 

174 

175 keywords = [ 

176 kwd.value for kwd in document.kwd_set.all() if kwd.type != "msc" and kwd.lang == lang 

177 ] 

178 

179 abstract = ( 

180 document.abstract_set.all().filter(Q(lang="en") | Q(lang="und")).order_by("lang").first() 

181 ) 

182 data["bibjson"]["abstract"] = remove_html(abstract.value_tex) if abstract else "" 

183 data["bibjson"]["author"] = authors 

184 data["bibjson"]["keywords"] = keywords 

185 data["bibjson"]["link"] = [{"url": url, "type": "fulltext", "content_type": "HTML"}] 

186 

187 data["bibjson"]["identifier"] = [{"type": "doi", "id": doi}] 

188 if eissn: 188 ↛ 189line 188 didn't jump to line 189, because the condition on line 188 was never true

189 data["bibjson"]["identifier"].append({"type": "eissn", "id": eissn}) 

190 if pissn: 190 ↛ 191line 190 didn't jump to line 191, because the condition on line 190 was never true

191 data["bibjson"]["identifier"].append({"type": "pissn", "id": pissn}) 

192 if not eissn and colid == "pcj": 192 ↛ 193line 192 didn't jump to line 193, because the condition on line 192 was never true

193 data["bibjson"]["identifier"].append({"type": "eissn", "id": "2804-3871"}) 

194 

195 data["bibjson"]["journal"]["country"] = "FR" 

196 data["bibjson"]["journal"]["title"] = collection.title_tex 

197 data["bibjson"]["journal"]["start_page"] = fpage 

198 data["bibjson"]["journal"]["end_page"] = lpage 

199 data["bibjson"]["journal"]["language"] = [lang] 

200 data["bibjson"]["journal"]["number"] = number 

201 data["bibjson"]["journal"]["volume"] = volume 

202 data["bibjson"]["journal"]["publisher"] = pub_name 

203 return data 

204 

205 

206def doaj_delete_article(doi): 

207 colid = "" 

208 resource = model_helpers.get_resource_by_doi(doi) 

209 if resource: 

210 colid = resource.article.my_container.get_collection().pid 

211 

212 token = get_token(colid) 

213 url = f"https://doaj.org/api/search/articles/{doi}" 

214 response = requests.get(url) 

215 if response.status_code == 200: 

216 results = response.json().get("results") 

217 if results: 

218 article_id = results[0].get("id", "") 

219 url = f"https://doaj.org/api/articles/{article_id}?api_key={token}" 

220 response = requests.delete(url) 

221 if response.status_code == 204: 

222 return doi + " deleted" 

223 else: 

224 return doi + " not found or article already deleted" 

225 return doi + " deletion failed" 

226 

227 

228def doaj_delete_articles_in_collection(colid, check_published=True): 

229 for container in Container.objects.filter(pid__startswith=colid): 

230 print(container) 

231 for article in container.article_set.all(): 

232 try: 

233 if check_published: 

234 if is_published(article) and has_publication_date(article): 

235 doaj_delete_article(article.doi) 

236 else: 

237 doaj_delete_article(article.doi) 

238 except Exception as ex: 

239 print(ex) 

240 

241 

242def doaj_retrieve_applications(): 

243 application_ids = [ 

244 "798d4f21a22d43579cea322bed8a560e", 

245 "7a0889a89de64979a3d5e26aace31db7", 

246 "0e30bf1ac2514bcda8d1cc0855237cd4", 

247 "4bcd45d13d23475bb246cbce9eaed9ee", 

248 "d85467c6c5914759886aa29481cce4b4", 

249 "11b60f2f3dd64ec087510dff3d82e0ab", 

250 "71951ece12524e45abac7628de6a8d22", 

251 ] 

252 

253 for app_id in application_ids: 

254 response = requests.get("https://doaj.org/api/search/journals/" + app_id) 

255 if response.status_code == 200: 

256 results = response.json().get("results") 

257 if results: 

258 filename = results[0]["bibjson"]["title"] + ".json" 

259 with open(filename, "w") as fio: 

260 json.dump(results, fio)