Coverage for apps/ptf/tex.py: 69%

535 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-02-28 09:09 +0000

1import os 

2import subprocess 

3import tempfile 

4 

5import pypdf 

6from pylatexenc.latexencode import unicode_to_latex 

7 

8from django.conf import settings 

9 

10from ptf import model_helpers 

11from ptf import models 

12from ptf import utils 

13from ptf.cmds.xml import xml_utils 

14from ptf.cmds.xml.jats.jats_parser import get_tex_from_xml 

15from ptf.display import resolver 

16 

17 

18def get_tex_keyword_date_published(colid, article, lang=""): 

19 keyword = "\\dateposted" if colid in ["AHL", "AIF", "OGEO", "JTNB"] else "\\datepublished" 

20 

21 if colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"]: 21 ↛ 22line 21 didn't jump to line 22, because the condition on line 21 was never true

22 if not article.date_online_first: 

23 keyword = "\\dateposted" 

24 

25 if lang != "": 25 ↛ 26line 25 didn't jump to line 26, because the condition on line 25 was never true

26 keyword = "\\CDRsetmeta{traduction_date_posted}" 

27 

28 return keyword 

29 

30 

31def get_tex_keyword_date_online_first(colid, article, lang=""): 

32 return "\\dateposted" 

33 

34 

35def read_tex_file(filename): 

36 """ 

37 read a tex file. Detects if it is in utf-8 or iso-8859-1 

38 returns a string of the body 

39 """ 

40 

41 lines = [] 

42 

43 if os.path.isfile(filename): 43 ↛ 51line 43 didn't jump to line 51, because the condition on line 43 was never false

44 try: 

45 with open(filename, encoding="utf-8") as f_: 

46 lines = f_.readlines() 

47 except UnicodeDecodeError: 

48 with open(filename, encoding="iso-8859-1") as f_: 

49 lines = f_.readlines() 

50 

51 return lines 

52 

53 

54def convert_file_to_utf8(article_path, from_name, to_name): 

55 filename = os.path.join(article_path, from_name) 

56 lines = read_tex_file(filename) 

57 

58 user = settings.MERSENNE_TEX_USER 

59 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

60 resolver.create_folder(prefix) 

61 

62 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

63 fpath = f.name # ex: /tmp/Rxsft 

64 f.write("".join(lines)) 

65 f.close() 

66 

67 out_filemane = os.path.join(article_path, to_name) 

68 # copy to mersenne-tex 

69 cmd = f"scp {fpath} {user}@mathdoc-tex:{out_filemane}" 

70 utils.execute_cmd(cmd) 

71 

72 

73def write_tex_file(filename, lines, create_temp_file=False): 

74 fpath = filename 

75 if create_temp_file: 75 ↛ 82line 75 didn't jump to line 82, because the condition on line 75 was never false

76 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

77 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

78 fpath = f.name # ex: /tmp/Rxsft 

79 f.write("".join(lines)) 

80 f.close() 

81 else: 

82 with open(filename, "w", encoding="utf-8") as f_: 

83 f_.write("".join(lines)) 

84 return fpath 

85 

86 

87def insert_date_published(new_lines, article, colid, begin_document_pos, lang=""): 

88 if article.date_published is not None: 88 ↛ 94line 88 didn't jump to line 94, because the condition on line 88 was never false

89 keyword = get_tex_keyword_date_published(colid, article, lang) 

90 str_ = f'{keyword}{{{article.date_published.strftime("%Y-%m-%d")}}}\n' 

91 new_lines.insert(begin_document_pos, str_) 

92 begin_document_pos += 1 

93 

94 if article.my_container is not None: 94 ↛ 108line 94 didn't jump to line 108, because the condition on line 94 was never false

95 is_thematic_issue = len(article.my_container.title_html) > 0 

96 is_issue_finalized = not article.my_container.with_online_first 

97 is_thematic_finalized = is_thematic_issue and is_issue_finalized 

98 

99 if is_thematic_finalized and article.date_online_first is None: 99 ↛ 102line 99 didn't jump to line 102, because the condition on line 99 was never true

100 # Finalized thematic issue where the article did not go through online first 

101 # => Add \datepublished so that "Issue date :" appears in the PDF 

102 keyword2 = "\\datepublished" 

103 if keyword2 != keyword: 

104 str_ = f'{keyword2}{{{article.date_published.strftime("%Y-%m-%d")}}}\n' 

105 new_lines.insert(begin_document_pos, str_) 

106 begin_document_pos += 1 

107 

108 return begin_document_pos 

109 

110 

111def insert_date_online_first(new_lines, article, colid, begin_document_pos, lang=""): 

112 if article.date_online_first is not None: 

113 keyword = get_tex_keyword_date_online_first(colid, article, lang) 

114 str_ = f'{keyword}{{{article.date_online_first.strftime("%Y-%m-%d")}}}\n' 

115 new_lines.insert(begin_document_pos, str_) 

116 begin_document_pos += 1 

117 

118 return begin_document_pos 

119 

120 

121def insert_end_page(new_lines, article, colid, begin_document_pos): 

122 if article.lpage: 122 ↛ 126line 122 didn't jump to line 126, because the condition on line 122 was never false

123 str_ = "\\makeatletter\\def\\cdr@end@page{" + article.lpage + "}\\makeatother\n" 

124 new_lines.insert(begin_document_pos + 1, str_) 

125 

126 return begin_document_pos 

127 

128 

129def replace_dates_in_tex(lines, article, colid, replace_frontpage_only=False, lang=""): 

130 r""" 

131 add or replace \dateposted and \datepublished in the source Tex 

132 lines is a list of lines of the source Tex 

133 

134 """ 

135 new_lines = [] 

136 bib_name = "" 

137 

138 keyword_date_published = get_tex_keyword_date_published(colid, article, lang) 

139 keyword_date_online_first = get_tex_keyword_date_online_first(colid, article, lang) 

140 found_date_online_first = False 

141 found_date_published = False 

142 begin_document_pos = -1 

143 i = 0 

144 skip_lines = 0 

145 

146 while i < len(lines): 

147 line = lines[i] 

148 len_line = len(line) 

149 j = 0 

150 while j < len_line and line[j] in [" ", "\t"]: 

151 j += 1 

152 

153 if j < len_line and line[j] != "%": # the line is not a comment 

154 if replace_frontpage_only and ( 154 ↛ 159line 154 didn't jump to line 159, because the condition on line 154 was never true

155 line.find("\\datepublished{", j) == j 

156 or line.find("\\dateposted{", j) == j 

157 or line.find("\\CDRsetmeta{traduction_date_posted}{", j) == j 

158 ): 

159 skip_lines += 1 

160 elif ( 160 ↛ 163line 160 didn't jump to line 163

161 line.find(f"{keyword_date_published}{{", j) == j 

162 ): # replace existing \datepublished 

163 found_date_published = True 

164 insert_date_published(new_lines, article, colid, len(new_lines), lang=lang) 

165 

166 elif ( 166 ↛ 169line 166 didn't jump to line 169

167 line.find(f"{keyword_date_online_first}{{", j) == j 

168 ): # replace existing \dateposted 

169 found_date_online_first = True 

170 insert_date_online_first(new_lines, article, colid, len(new_lines), lang=lang) 

171 

172 elif ( 

173 line.find("\\begin{document", j) == j 

174 ): # \begin{document} add dates if not present 

175 begin_document_pos = i - skip_lines 

176 new_lines.append(line) 

177 

178 elif line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j: 

179 # remove published from \documentclass to allow compilation 

180 line = ( 

181 line.replace(",published,", ",") 

182 .replace(",published", "") 

183 .replace("published", "") 

184 ) 

185 # # remove Unicode temporarily 

186 # line = line.replace(",Unicode,", ",").replace(",Unicode", "").replace("Unicode", "") 

187 new_lines.append(line) 

188 

189 elif line.find("\\makeatletter\\def\\cdr@end@page", j) == j: 189 ↛ 192line 189 didn't jump to line 192, because the condition on line 189 was never true

190 # Command to specify the last page (present in the front page) 

191 # Move it after \begin{document} 

192 pass 

193 elif ( 

194 line.find("\\bibliography", j) == j 

195 and line.find("\\bibliographystyle", j) != j 

196 and replace_frontpage_only 

197 ): 

198 end = line.find("}") 

199 if end > 0: 199 ↛ 207line 199 didn't jump to line 207, because the condition on line 199 was never false

200 bib_name = line[j + 14 : end] 

201 new_lines.append("\\bibliography{" + bib_name + "_FP}\n") 

202 else: 

203 new_lines.append(line) 

204 else: 

205 new_lines.append(line) 

206 

207 i += 1 

208 

209 if begin_document_pos > 0 and not found_date_online_first: 209 ↛ 214line 209 didn't jump to line 214, because the condition on line 209 was never false

210 begin_document_pos = insert_date_online_first( 

211 new_lines, article, colid, begin_document_pos, lang=lang 

212 ) 

213 

214 if begin_document_pos > 0 and not found_date_published: 214 ↛ 219line 214 didn't jump to line 219, because the condition on line 214 was never false

215 begin_document_pos = insert_date_published( 

216 new_lines, article, colid, begin_document_pos, lang=lang 

217 ) 

218 

219 if replace_frontpage_only and begin_document_pos > 0: 

220 begin_document_pos = insert_end_page(new_lines, article, colid, begin_document_pos) 

221 

222 # Always add Unicode as the new tex file is in utf-8 

223 # new_lines = protect_tex(new_lines, "Unicode") 

224 

225 return new_lines, bib_name 

226 

227 

228def protect_tex(lines, keyword="published"): 

229 new_lines = [] 

230 

231 i = 0 

232 inside_documentclass = False 

233 

234 while i < len(lines): 

235 line = lines[i] 

236 len_line = len(line) 

237 j = 0 

238 while j < len_line and line[j] in [" ", "\t"]: 

239 j += 1 

240 

241 if j < len_line and line[j] != "%": # the line is not a comment 

242 if line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j: 

243 # add published to \documentclass after compilation 

244 j = line.find("]") 

245 if j > 0: 245 ↛ 249line 245 didn't jump to line 249, because the condition on line 245 was never false

246 if line.find("{cedram") > 0: # Ignore {article} 246 ↛ 261line 246 didn't jump to line 261, because the condition on line 246 was never false

247 line = line[0:j] + "," + keyword + line[j:] 

248 else: 

249 inside_documentclass = True 

250 elif inside_documentclass: 250 ↛ 251line 250 didn't jump to line 251, because the condition on line 250 was never true

251 k = line.find("]") 

252 if k == j: 

253 if line.find("{cedram") > 0: # Ignore {article} 

254 new_lines.append(f",{keyword}\n") 

255 inside_documentclass = False 

256 elif k > -1: 

257 if line.find("{cedram") > 0: # Ignore {article} 

258 line = line[0:k] + "," + keyword + line[k:] 

259 inside_documentclass = False 

260 

261 new_lines.append(line) 

262 i += 1 

263 

264 return new_lines 

265 

266 

267def get_tex_corresponding_emails(author_contributions): 

268 emails = [] 

269 

270 for contribution in author_contributions: 

271 if contribution.corresponding and contribution.email: 271 ↛ 272line 271 didn't jump to line 272, because the condition on line 271 was never true

272 emails.append(unicode_to_latex(contribution.email).replace(r"\_", r"_")) 

273 

274 return emails 

275 

276 

277def get_tex_authors(author_contributions): 

278 lines = [] 

279 

280 # are_all_equal = models.are_all_equal_contrib(author_contributions) 

281 

282 for contribution in author_contributions: 

283 # \author{\firstname{Antoine} \lastname{Lavoisier}} 

284 # \address{Rue sans aplomb, Paris, France} 

285 # \email[A. Lavoisier]{a-lavois@lead-free-univ.edu} 

286 first_name = unicode_to_latex(contribution.first_name) 

287 last_name = unicode_to_latex(contribution.last_name) 

288 line = f"\\author{{\\firstname{{{first_name}}} \\lastname{{{last_name}}}" 

289 if contribution.orcid: 289 ↛ 290line 289 didn't jump to line 290, because the condition on line 289 was never true

290 line += f"\\CDRorcid{{{contribution.orcid}}}" 

291 if contribution.equal_contrib: # and not are_all_equal: 291 ↛ 292line 291 didn't jump to line 292, because the condition on line 291 was never true

292 line += "\\IsEqualContrib" 

293 if contribution.deceased_before_publication: # and not are_all_equal: 293 ↛ 294line 293 didn't jump to line 294, because the condition on line 293 was never true

294 line += "\\dead" 

295 lines.append(line + "}\n") 

296 

297 for contribaddress in contribution.contribaddress_set.all(): 

298 address = unicode_to_latex(contribaddress.address) 

299 lines.append(f"\\address{{{address}}}\n") 

300 

301 if contribution.corresponding and len(contribution.email) > 0: 301 ↛ 302line 301 didn't jump to line 302, because the condition on line 301 was never true

302 email = unicode_to_latex(contribution.email) 

303 lines.append(f"\\email{{{email}}}\n") 

304 

305 lines.append("\n") 

306 

307 return lines 

308 

309 

310def create_tex_for_pcj(article): 

311 pci = article.get_pci_section() 

312 

313 extid = model_helpers.get_extid(article, "rdoi") 

314 rdoi = extid.id_value if extid is not None else "" 

315 

316 lines = [ 

317 "\\documentclass[PCJ,Unicode,screen,Recup]{cedram}\n", 

318 "\\usepackage{pax}\n", 

319 "\\usepackage{mathrsfs}\n" "\n", 

320 "\\issueinfo{" 

321 + article.my_container.volume 

322 + "}{}{}{" 

323 + article.my_container.year 

324 + "}\n", 

325 f"\\renewcommand*{{\\thearticle}}{{{article.article_number}}}\n", 

326 f"\\DOI{{{article.doi}}}\n", 

327 f"\\RDOI{{{rdoi}}}\n", 

328 f"\\setPCI{{{pci}}}\n", 

329 f"\\CDRsetmeta{{articletype}}{{{article.atype}}}", 

330 ] 

331 

332 conf = article.get_conference() 

333 if len(conf) > 0: 333 ↛ 334line 333 didn't jump to line 334, because the condition on line 333 was never true

334 lines.append(f"\\setPCIconf{{{conf}}}\n") 

335 

336 author_contributions = article.get_author_contributions() 

337 

338 corresponding_emails = get_tex_corresponding_emails(author_contributions) 

339 for email in corresponding_emails: 339 ↛ 340line 339 didn't jump to line 340, because the loop on line 339 never started

340 lines.append(f"\\PCIcorresp{{{email}}}\n") 

341 

342 lines.append("\n") 

343 

344 # \title[Sample for the template]{Sample for the template, with quite a very long title} 

345 title = article.title_tex.replace("<i>", "|||i|||").replace("</i>", "|||/i|||") 

346 title = title.replace("<sup>", "|||sup|||").replace("</sup>", "|||/sup|||") 

347 title = title.replace("<sub>", "|||sub|||").replace("</sub>", "|||/sub|||") 

348 title = unicode_to_latex(title) 

349 title = title.replace("|||i|||", "\\protect\\emph{").replace("|||/i|||", "}") 

350 title = title.replace("|||sup|||", "\\protect\\textsuperscript{").replace("|||/sup|||", "}") 

351 title = title.replace("|||sub|||", "\\protect\\textsubscript{").replace("|||/sub|||", "}") 

352 lines.append(f"\\title{{{title}}}\n") 

353 lines.append("\n") 

354 lines.extend(get_tex_authors(author_contributions)) 

355 

356 # No keywords for PCJ 

357 # # \keywords{Example, Keyword} 

358 # kwd_gps = article.get_non_msc_kwds() 

359 # if len(kwd_gps) > 0: 

360 # kwd_gp = kwd_gps.first() 

361 # keywords = ", ".join([kwd.value for kwd in kwd_gp.kwd_set.all()]) 

362 # lines.append(f"\\keywords{{{unicode_to_latex(keywords)}}}\n") 

363 # lines.append("\n") 

364 

365 abstracts = article.get_abstracts() 

366 if len(abstracts) > 0: 366 ↛ 416line 366 didn't jump to line 416, because the condition on line 366 was never false

367 abstract = abstracts.first() 

368 value = get_tex_from_xml(abstract.value_xml, "abstract", for_tex_file=True) 

369 

370 # .replace('<span class="mathjax-formula">$', '$').replace('$</span>', '$') \ 

371 # .replace('<span class="italique">', '|||i|||').replace('</span>', '|||/i|||') \ 

372 

373 # value = abstract.value_tex \ 

374 # .replace('<i>', '|||i|||').replace('</i>', '|||/i|||') \ 

375 # .replace('<strong>', '|||strong|||').replace('</strong>', '|||/strong|||') \ 

376 # .replace('<sub>', '|||sub|||').replace('</sub>', '|||/sub|||') \ 

377 # .replace('<sup>', '|||sup|||').replace('</sup>', '|||/sup|||') \ 

378 # .replace('<p>', '').replace('</p>', '') \ 

379 # .replace('<ul>', '|||ul|||').replace('</ul>', '|||/ul|||') \ 

380 # .replace('<ol type="1">', '|||ol|||').replace('</ol>', '|||/ol|||') \ 

381 # .replace('<li>', '|||li|||').replace('</li>', '|||/li|||') \ 

382 # .replace('<br/>', '|||newline|||') \ 

383 # .replace('&amp;', '\\&') \ 

384 # .replace('&lt;', '<') \ 

385 # .replace('&gt;', '>') 

386 # 

387 # links = [] 

388 # pos = value.find("<a href=") 

389 # while pos != -1: 

390 # last_href = value.find('"', pos + 9) 

391 # href = value[pos + 9:last_href] 

392 # first_text = value.find('>', last_href) + 1 

393 # last_text = value.find('</a>', first_text) 

394 # text = value[first_text:last_text] 

395 # links.append((href, text)) 

396 # value = value[0:pos] + '|||a|||' + value[last_text + 4:] 

397 # pos = value.find("<a href=") 

398 # 

399 # value = unicode_to_latex(value) 

400 # value = value.replace('|||i|||', '{\\it ').replace('|||/i|||', '}') 

401 # value = value.replace('|||strong|||', '{\\bf ').replace('|||/strong|||', '}') 

402 # value = value.replace('|||sub|||', '\\textsubscript{').replace('|||/sub|||', '}') 

403 # value = value.replace('|||sup|||', '\\textsuperscript{').replace('|||/sup|||', '}') 

404 # value = value.replace('|||ul|||', '\n\\begin{itemize}\n').replace('|||/ul|||', '\\end{itemize}\n') 

405 # value = value.replace('|||ol|||', '\n\\begin{enumerate}\n').replace('|||/ol|||', '\\end{enumerate}\n') 

406 # value = value.replace('|||li|||', '\\item ').replace('|||/li|||', '\n') 

407 # value = value.replace('|||newline|||', '\\newline\n') 

408 # for link in links: 

409 # text = f'\\href{{{link[0]}}}{{{link[1]}}}' 

410 # value = value.replace('|||a|||', text, 1) 

411 

412 lines.append("\\begin{abstract}\n") 

413 lines.append(value + "\n") 

414 lines.append("\\end{abstract}\n") 

415 

416 date_ = article.date_published.strftime("%Y-%m-%d") if article.date_published else "AAAA-MM-DD" 

417 keyword = get_tex_keyword_date_published("PCJ", article) 

418 lines.append(f"{keyword}{{{date_}}}\n") 

419 

420 lines.append("\\begin{document}\n") 

421 lines.append("\\maketitle\n") 

422 article_pdf = f"article_{article.pid}.pdf" 

423 lines.append(f"\\PCIincludepdf{{{article_pdf}}}\n") 

424 

425 lines.append("\\end{document}\n") 

426 

427 return lines 

428 

429 

430def compile_tex(lines, article, update=False): 

431 """ 

432 1) Create a tex file from the list of lines 

433 2) Upload the file to mathdoc-tex (+ the pdf for PCJ) 

434 3) Compile the file 

435 4) Replace the pdf in /mersenne_test_data 

436 5) linearize the pdf 

437 TODO: merge ptf_tools/views create_frontpage (not done while PCJ is unstable to avoid compilation bugs in prod) 

438 """ 

439 

440 # Only allowed on ptf-tools 

441 if settings.SITE_NAME != "ptf_tools": 441 ↛ 442line 441 didn't jump to line 442, because the condition on line 441 was never true

442 return 

443 

444 user = settings.MERSENNE_TEX_USER 

445 issue = article.my_container 

446 colid = issue.my_collection.pid 

447 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue.pid) 

448 article_pdf = "" 

449 

450 if colid != "PCJ": 450 ↛ 451line 450 didn't jump to line 451, because the condition on line 450 was never true

451 article_tex_name = article.get_ojs_id() 

452 if not article_tex_name: 

453 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path") 

454 article_path = os.path.join(issue_path, article_tex_name) 

455 else: 

456 article_tex_name = article.pid 

457 article_path = os.path.join(issue_path, article_tex_name) 

458 article_pdf = f"article_{article.pid}.pdf" 

459 

460 if not update: 460 ↛ 462line 460 didn't jump to line 462, because the condition on line 460 was never true

461 # Create the article folder 

462 cmd = f"ssh {user}@mathdoc-tex mkdir -p {article_path}" 

463 utils.execute_cmd(cmd) 

464 

465 # copy the pdf to mersenne-tex 

466 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid) 

467 folder = os.path.join(settings.RESOURCES_ROOT, relative_folder) 

468 pdf_file_name = os.path.join(folder, article.pid + ".pdf") 

469 

470 cmd = f"scp {pdf_file_name} {user}@mathdoc-tex:{article_path}/{article_pdf}" 

471 utils.execute_cmd(cmd) 

472 

473 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex") 

474 fpath = write_tex_file("", lines, create_temp_file=True) 

475 

476 # copy to mersenne-tex 

477 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}" 

478 utils.execute_cmd(cmd) 

479 # os.unlink(f.name) 

480 

481 # recompile article 

482 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

483 # execute script to compile 

484 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name} {colid} {article_pdf}" 

485 utils.execute_cmd(cmd) 

486 

487 # replace pdf 

488 cedram_pdf_location = os.path.join(article_path, article_tex_name + ".pdf") 

489 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid) 

490 to_path = os.path.join( 

491 settings.MERSENNE_TEST_DATA_FOLDER, relative_folder, article.pid + ".pdf" 

492 ) 

493 if settings.MERSENNE_CREATE_FRONTPAGE: 493 ↛ 494line 493 didn't jump to line 494, because the condition on line 493 was never true

494 utils.linearize_pdf(cedram_pdf_location, to_path) 

495 

496 return to_path 

497 

498 

499def add_outline(reader, writer, outlines, parent=None): 

500 child_parent = parent 

501 for item in outlines: 

502 if type(item) == list: 502 ↛ 503line 502 didn't jump to line 503, because the condition on line 502 was never true

503 add_outline(reader, writer, item, child_parent) 

504 else: 

505 title = item["/Title"] 

506 page_num = reader.get_destination_page_number(item) 

507 

508 if item["/Type"] == "/XYZ": 508 ↛ 519line 508 didn't jump to line 519, because the condition on line 508 was never false

509 child_parent = writer.add_outline_item( 

510 title, 

511 page_num, 

512 parent, 

513 None, 

514 False, 

515 False, 

516 pypdf.generic.Fit("/XYZ", (item["/Left"], item["/Top"], 1)), 

517 ) 

518 else: 

519 child_parent = writer.add_outline_item(title, page_num, parent, None, False, False) 

520 

521 

522def test(): 

523 local_fp_pdf = "/home/touvierj/Bureau/test_FP.pdf" 

524 local_content_pdf = "/home/touvierj/Bureau/test_content.pdf" 

525 merged_pdf = "/home/touvierj/Bureau/test_merged.pdf" 

526 

527 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False) 

528 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False) 

529 pdf_writer = pypdf.PdfWriter() 

530 

531 for page in range(len(pdf_reader_fp.pages)): 

532 current_page = pdf_reader_fp.pages[page] 

533 if page == 0: 

534 pdf_writer.add_page(current_page) 

535 

536 for page in range(len(pdf_reader_content.pages)): 

537 current_page = pdf_reader_content.pages[page] 

538 if page > 0: 

539 pdf_writer.add_page(current_page) 

540 

541 # Add the Table of Contents (sidebar in a PDF reader) 

542 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline) 

543 

544 # Add the anchors 

545 for dest in pdf_reader_content.named_destinations.values(): 

546 pdf_writer.add_named_destination_object(dest) 

547 

548 with open(merged_pdf, "wb") as f_: 

549 pdf_writer.write(f_) 

550 

551 # Add metadata to the PDF, including EXIF data 

552 add_metadata(models.Article.objects.first(), local_content_pdf, merged_pdf) 

553 

554 exit() 

555 

556 fpage = "i" 

557 merged_pdf = "/home/touvierj/Bureau/good2.pdf" 

558 local_pdf = "/home/touvierj/Bureau/new2.pdf" 

559 

560 is_roman = False 

561 try: 

562 first_page = int(fpage) 

563 except ValueError: 

564 first_page = xml_utils.roman_to_int(fpage) 

565 is_roman = True 

566 

567 reader = pypdf.PdfReader(merged_pdf) 

568 writer = pypdf.PdfWriter() 

569 for page in reader.pages: 

570 writer.add_page(page) 

571 

572 if is_roman: 

573 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/r") 

574 else: 

575 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/D") 

576 writer.write(local_pdf) 

577 writer.close() 

578 

579 

580def add_metadata(article, in_pdf, out_pdf): 

581 reader = pypdf.PdfReader(in_pdf, strict=False) 

582 

583 metadata = reader.metadata 

584 cmd = f"exiftool -tagsFromFile {in_pdf}" 

585 

586 if in_pdf == out_pdf: 586 ↛ 587line 586 didn't jump to line 587, because the condition on line 586 was never true

587 cmd += " -overwrite_original_in_place" 

588 

589 container = article.my_container 

590 collection = article.get_collection() 

591 

592 msc_kwds, kwds, trans_kwds = article.get_kwds_by_type() 

593 keywords = ", ".join([str(x.value) for x in kwds]) 

594 

595 lang = "" 

596 if article.lang == "fr": 596 ↛ 597line 596 didn't jump to line 597, because the condition on line 596 was never true

597 lang = "fr-FR" 

598 elif article.lang == "en": 598 ↛ 601line 598 didn't jump to line 601, because the condition on line 598 was never false

599 lang = "en-GB" 

600 

601 if "/Title" in metadata: 601 ↛ 608line 601 didn't jump to line 608, because the condition on line 601 was never false

602 title = metadata["/Title"] 

603 if "'" in title and '"' not in title: 603 ↛ 604line 603 didn't jump to line 604, because the condition on line 603 was never true

604 cmd += f' -Title="{title}"' 

605 elif "'" not in title: 605 ↛ 608line 605 didn't jump to line 608, because the condition on line 605 was never false

606 cmd += f" -Title='{title}'" 

607 

608 if "/Author" in metadata: 608 ↛ 615line 608 didn't jump to line 615, because the condition on line 608 was never false

609 author = metadata["/Author"] 

610 if "'" in author and '"' not in author: 

611 cmd += f' -Author="{author}"' 

612 elif "'" not in author: 612 ↛ 615line 612 didn't jump to line 615, because the condition on line 612 was never false

613 cmd += f" -Author='{author}'" 

614 

615 cmd += " -Creator='Centre Mersenne'" 

616 cmd += " -Subject=''" 

617 if lang: 617 ↛ 619line 617 didn't jump to line 619, because the condition on line 617 was never false

618 cmd += f" -xmp-dc-Language='{lang}'" 

619 cmd += f" -xmp-dc:publisher='{container.my_publisher.pub_name}'" 

620 cmd += f" -xmp-prism:DOI='{article.doi}'" 

621 cmd += f" -Keywords='{keywords}'" 

622 cmd += f" -xmp-xmp:Keywords='{keywords}'" 

623 cmd += f" -xmp-pdf:Keywords='{keywords}'" 

624 cmd += " -xmp-pdf:Copyright='© The author(s)'" 

625 

626 if container.volume: 626 ↛ 628line 626 didn't jump to line 628, because the condition on line 626 was never false

627 cmd += f" -xmp-prism:Volume='{container.volume}'" 

628 if container.number: 628 ↛ 629line 628 didn't jump to line 629, because the condition on line 628 was never true

629 cmd += f" -xmp-prism:Number='{container.number}'" 

630 if collection.issn: 630 ↛ 632line 630 didn't jump to line 632, because the condition on line 630 was never false

631 cmd += f" -xmp-prism:ISSN='{collection.issn}'" 

632 if collection.e_issn: 632 ↛ 634line 632 didn't jump to line 634, because the condition on line 632 was never false

633 cmd += f" -xmp-prism:EISSN='{collection.e_issn}'" 

634 if container.title_tex: 634 ↛ 635line 634 didn't jump to line 635, because the condition on line 634 was never true

635 cmd += f" -xmp-prism:IssueName='{container.title_tex}'" 

636 cmd += " " + out_pdf 

637 

638 output = subprocess.check_output(cmd, shell=True) 

639 return output 

640 

641 

642def replace_front_page( 

643 article, article_tex_name, fp_pdf_file_name, content_pdf_file_name, final_pdf_file_name 

644): 

645 # At the point the PDF has been recompiled, possibly with a new template 

646 # Use the 1st page of the new PDF with the other pages of the .pdf_SAV 

647 

648 user = settings.MERSENNE_TEX_USER 

649 

650 # Copy the PDF files locally (pypdf is installed in ptf-tools) 

651 local_fp_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_FP") 

652 cmd = f"scp {user}@mathdoc-tex:{fp_pdf_file_name} {local_fp_pdf}" 

653 utils.execute_cmd(cmd) 

654 

655 local_content_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_content") 

656 cmd = f"scp {user}@mathdoc-tex:{content_pdf_file_name} {local_content_pdf}" 

657 utils.execute_cmd(cmd) 

658 

659 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False) 

660 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False) 

661 pdf_writer = pypdf.PdfWriter() 

662 

663 for page in range(len(pdf_reader_fp.pages)): 

664 current_page = pdf_reader_fp.pages[page] 

665 if page == 0: 

666 pdf_writer.add_page(current_page) 

667 

668 for page in range(len(pdf_reader_content.pages)): 

669 current_page = pdf_reader_content.pages[page] 

670 if page > 0: 

671 pdf_writer.add_page(current_page) 

672 

673 # Add the Table of Contents (sidebar in a PDF reader) 

674 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline) 

675 

676 # Add the anchors 

677 for dest in pdf_reader_content.named_destinations.values(): 

678 pdf_writer.add_named_destination_object(dest) 

679 

680 merged_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_merged") 

681 with open(merged_pdf, "wb") as f_: 

682 pdf_writer.write(f_) 

683 

684 # Compiled PDF are sometimes buggy (wrong xref table). Use pdftk to fix the file. 

685 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

686 cmd = f"{ptf_tools_bin}/update_pdf.sh {local_content_pdf} {merged_pdf}" 

687 utils.execute_cmd(cmd) 

688 

689 # Add metadata to the PDF, including EXIF data 

690 add_metadata(article, local_content_pdf, merged_pdf) 

691 

692 local_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf") 

693 

694 # pypdf creates a PDF that starts on page 1, fix it 

695 if article.fpage: 695 ↛ 716line 695 didn't jump to line 716, because the condition on line 695 was never false

696 is_roman = False 

697 try: 

698 first_page = int(article.fpage) 

699 except ValueError: 

700 first_page = xml_utils.roman_to_int(article.fpage) 

701 is_roman = True 

702 

703 reader = pypdf.PdfReader(merged_pdf) 

704 writer = pypdf.PdfWriter() 

705 for page in reader.pages: 

706 writer.add_page(page) 

707 

708 if is_roman: 708 ↛ 709line 708 didn't jump to line 709, because the condition on line 708 was never true

709 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/r") 

710 else: 

711 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/D") 

712 writer.write(local_pdf) 

713 writer.close() 

714 

715 # copy to mersenne-tex 

716 cmd = f"scp {local_pdf} {user}@mathdoc-tex:{final_pdf_file_name}" 

717 utils.execute_cmd(cmd) 

718 

719 

720def compile_article( 

721 article, 

722 colid, 

723 issue_id, 

724 article_path, 

725 article_tex_name, 

726 replace_frontpage_only=False, 

727 skip_compilation=False, 

728 lang="", 

729): 

730 user = settings.MERSENNE_TEX_USER 

731 

732 if lang != "": 732 ↛ 733line 732 didn't jump to line 733, because the condition on line 732 was never true

733 article_tex_name += "-" + lang 

734 

735 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg") 

736 # Regular compilation: compiled_pdf and final_pdf are the same 

737 # recompilation of the front page: compiled_pdf is the entire pdf with the new front page 

738 # final_pdf is the pdf after the merge (new front page; old content) 

739 compiled_pdf_file_name = final_pdf_file_name = os.path.join( 

740 article_path, article_tex_name + ".pdf" 

741 ) 

742 content_pdf_file_name = compiled_pdf_file_name + "_SAV" 

743 

744 # Save the pdf file 

745 cmd = f"ssh {user}@mathdoc-tex cp {compiled_pdf_file_name} {content_pdf_file_name}" 

746 utils.execute_cmd(cmd) 

747 

748 # Save the cfg file (no cfg for translations) 

749 if lang == "": 749 ↛ 754line 749 didn't jump to line 754, because the condition on line 749 was never false

750 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name}_SAV" 

751 utils.execute_cmd(cmd) 

752 

753 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!. 

754 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

755 resolver.create_folder(prefix) 

756 

757 if replace_frontpage_only: 

758 # Copy CFG/TEX/PDF to a new name. pdflatex will generate new files, thus preserving existing files 

759 article_tex_name2 = article_tex_name + "_FP" 

760 

761 cmd = f"ssh {user}@mathdoc-tex rm -f {os.path.join(article_path, article_tex_name2)}.*" 

762 utils.execute_cmd(cmd) 

763 

764 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex") 

765 article_tex_file_name2 = os.path.join(article_path, article_tex_name2 + ".tex") 

766 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name2}" 

767 utils.execute_cmd(cmd) 

768 

769 article_cfg_file_name2 = os.path.join(article_path, article_tex_name2 + ".cfg") 

770 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name2}_SAV" 

771 utils.execute_cmd(cmd) 

772 

773 article_cdrdoidates_file_name = os.path.join( 

774 article_path, article_tex_name + ".cdrdoidates" 

775 ) 

776 if os.path.isfile(article_cdrdoidates_file_name): 776 ↛ 777line 776 didn't jump to line 777, because the condition on line 776 was never true

777 article_cdrdoidates_file_name2 = os.path.join( 

778 article_path, article_tex_name2 + ".cdrdoidates" 

779 ) 

780 cmd = f"ssh {user}@mathdoc-tex cp {article_cdrdoidates_file_name} {article_cdrdoidates_file_name2}" 

781 utils.execute_cmd(cmd) 

782 

783 article_tex_name = article_tex_name2 

784 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg") 

785 compiled_pdf_file_name = os.path.join(article_path, article_tex_name + ".pdf") 

786 final_pdf_file_name = compiled_pdf_file_name + ".new" 

787 

788 if not skip_compilation: 788 ↛ 840line 788 didn't jump to line 840, because the condition on line 788 was never false

789 # Remove \ItIsPublished from the cfg file 

790 if lang == "": 790 ↛ 796line 790 didn't jump to line 796, because the condition on line 790 was never false

791 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\ItIsPublished//' {article_cfg_file_name}_SAV > {article_cfg_file_name}.1"''' 

792 utils.execute_cmd(cmd) 

793 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\gdef \\\\\\\\CDRpublished {{true}}//' {article_cfg_file_name}.1 > {article_cfg_file_name}"''' 

794 utils.execute_cmd(cmd) 

795 

796 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex") 

797 

798 # Save the tex file 

799 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name}_SAV" 

800 utils.execute_cmd(cmd) 

801 

802 lines = read_tex_file(article_tex_file_name) 

803 new_lines, bib_name = replace_dates_in_tex( 

804 lines, article, colid, replace_frontpage_only, lang=lang 

805 ) 

806 

807 if bib_name and replace_frontpage_only: 

808 convert_file_to_utf8(article_path, bib_name + ".bib", bib_name + "_FP.bib") 

809 

810 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

811 fpath = f.name # ex: /tmp/Rxsft 

812 f.write("".join(new_lines)) 

813 f.close() 

814 

815 # copy to mersenne-tex 

816 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}" 

817 utils.execute_cmd(cmd) 

818 # os.unlink(f.name) 

819 

820 # recompile article 

821 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

822 # execute script to compile 

823 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name}" 

824 utils.execute_cmd(cmd) 

825 

826 # Protect the tex file with the 'published' option 

827 new_lines = protect_tex(new_lines) 

828 

829 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!. 

830 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

831 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

832 fpath = f.name # ex: /tmp/Rxsft 

833 f.write("".join(new_lines)) 

834 f.close() 

835 

836 # copy to mersenne-tex 

837 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}" 

838 utils.execute_cmd(cmd) 

839 

840 if replace_frontpage_only: 

841 # At the point the PDF has been recompiled, possibly with a new template 

842 # Use the 1st page of the new PDF with the other pages of the .pdf_SAV 

843 replace_front_page( 

844 article, 

845 article_tex_name, 

846 compiled_pdf_file_name, 

847 content_pdf_file_name, 

848 final_pdf_file_name, 

849 ) 

850 

851 # Copy PDF to MERSENNE_TEST_DATA_FOLDER 

852 datastream = article.datastream_set.filter(mimetype="application/pdf").get() 

853 to_path = os.path.join(settings.MERSENNE_TEST_DATA_FOLDER, datastream.location) 

854 # remove destination if exists to test if final pdf is really created 

855 if os.path.exists(to_path): 855 ↛ 857line 855 didn't jump to line 857, because the condition on line 855 was never false

856 os.remove(to_path) 

857 utils.linearize_pdf(final_pdf_file_name, to_path) 

858 

859 # if not replace_frontpage_only: 

860 # # Add EXIF metadata in the final PDF (replace_front_page already does it) 

861 # add_metadata(article, to_path, to_path) 

862 

863 

864def create_frontpage( 

865 colid, 

866 container, 

867 updated_articles, 

868 test=True, 

869 replace_frontpage_only=False, 

870 skip_compilation=False, 

871 lang="", 

872): 

873 # create frontpage by recompiling articles on mersenne-tex with date XXXX-XX-XX 

874 # flow : 

875 # get directory of article sources : cedram_dev/production/ .. 

876 # Add publication date in the source TeX 

877 # remote execute latexmk -pdf article.pdf 

878 # replace pdf of the article on mersenne_test_data 

879 

880 # TODO refactor the code and only use compile_tex for all collections 

881 

882 if colid == "PCJ": 

883 for article in updated_articles: 

884 lines = create_tex_for_pcj(article) 

885 compile_tex(lines, article, update=True) 

886 return 

887 

888 try: 

889 year = int(container.year) 

890 except ValueError: 

891 year = 0 

892 

893 if ( 893 ↛ 899line 893 didn't jump to line 899

894 colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"] 

895 and year < 2020 

896 and lang == "" 

897 ): 

898 # No front page for Elsevier CRAS 

899 return 

900 

901 issue_id = container.pid 

902 

903 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue_id) 

904 # non utilisé ?now = datetime.now().astimezone() 

905 # non utilisé ? timestamp = now.strftime("%Y-%m-%d %H:%M:%S %Z") 

906 

907 try: 

908 for article in updated_articles: 

909 # article path 

910 article_tex_name = article.get_ojs_id() 

911 if not article_tex_name: 911 ↛ 912line 911 didn't jump to line 912, because the condition on line 911 was never true

912 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path") 

913 article_path = os.path.join(issue_path, article_tex_name) 

914 # non utilisé ? file_date = os.path.join(article_path, article_tex_name + '.ptf') 

915 

916 # publish_timestamp_file = os.path.join(article_path, article_tex_name + "-pdftimestamp.txt") 

917 # onlinefirst_timestamp_file = os.path.join(article_path, article_tex_name + "-dateposted.txt") 

918 

919 # flow : 

920 # - si on est en test ; 

921 # date_pre_publish a été mis à jour mais pas les autres 

922 # on ne crée une date temporaire type XXXX-XX-XX que pour online_first 

923 # (car sinon lors de la mise en prod du online_first, la présence du fichier pdftimestamp 

924 # avec XXXX-XX-XX ferait apparaitre cette date) 

925 # - si article.my_container.with_online_first && et pas de article.date_online_first existe: 

926 # : on met XXXX-xx-xx pour online first 

927 # (si l'article a déjà une date online-first, il a à priori déjà était recompilé) 

928 # - si on passe en prod, on prend les dates de l'article 

929 # si container.with_online_first: 

930 # article.date_online_first ds le bon fichier 

931 # si article.date_published : on met à jour le fichier qui va bien 

932 

933 if not test and (article.date_online_first or article.date_published): 933 ↛ 908line 933 didn't jump to line 908, because the condition on line 933 was never false

934 compile_article( 

935 article, 

936 colid, 

937 issue_id, 

938 article_path, 

939 article_tex_name, 

940 replace_frontpage_only, 

941 skip_compilation, 

942 lang, 

943 ) 

944 

945 except Exception as e: 

946 # pas de rollback car on ne modifie rien en BDD / éventuellement remettre un pdf.SAV en place 

947 raise e 

948 

949 

950def create_translated_pdf( 

951 article, xml_content, lang, pdf_file_name, html_file_name, skip_compilation=False 

952): 

953 user = settings.MERSENNE_TEX_USER 

954 

955 issue_path = resolver.get_cedram_issue_tex_folder( 

956 article.get_top_collection().pid, article.my_container.pid 

957 ) 

958 article_tex_name = article.get_ojs_id() 

959 if not article_tex_name: 

960 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path") 

961 article_path = os.path.join(issue_path, article_tex_name) 

962 

963 xml_base_name = article_tex_name + ".xml" 

964 local_xml = os.path.join(settings.LOG_DIR, "tmp", xml_base_name) 

965 remote_xml = os.path.join(article_path, xml_base_name) 

966 

967 if not skip_compilation: 

968 # Create the XML file locally 

969 with open(local_xml, "w", encoding="utf-8") as file_: 

970 file_.write(xml_content) 

971 

972 # Copy XML file to mersenne-tex 

973 cmd = f"scp {local_xml} {user}@mathdoc-tex:{remote_xml}" 

974 utils.execute_cmd(cmd) 

975 

976 remote_html_base_name = f"trad-{lang}.html" 

977 remote_html = os.path.join(article_path, remote_html_base_name) 

978 # Copy HTML file to mersenne-tex 

979 cmd = f"scp {html_file_name} {user}@mathdoc-tex:{remote_html}" 

980 utils.execute_cmd(cmd) 

981 

982 # Create the PDF 

983 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

984 # execute script to compile 

985 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/translate_article.sh {article_path} {xml_base_name} {remote_html_base_name} {lang}" 

986 utils.execute_cmd(cmd) 

987 

988 remote_pdf_base_name = f"{article_tex_name}-{lang}.pdf" 

989 remote_pdf = os.path.join(article_path, remote_pdf_base_name) 

990 # pdf-traduction should have created remote.pdf 

991 # Copy the PDF file 

992 cmd = f"scp {user}@mathdoc-tex:{remote_pdf} {pdf_file_name}" 

993 utils.execute_cmd(cmd)