Coverage for apps/ptf/tex.py: 71%

536 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-05-19 19:20 +0000

1import os 

2import subprocess 

3import tempfile 

4 

5import pypdf 

6from pylatexenc.latexencode import unicode_to_latex 

7 

8from django.conf import settings 

9 

10from ptf import model_helpers 

11from ptf import models 

12from ptf import utils 

13from ptf.cmds.xml import xml_utils 

14from ptf.cmds.xml.jats.jats_parser import get_tex_from_xml 

15from ptf.display import resolver 

16 

17 

18def get_tex_keyword_date_published(colid, article, lang=""): 

19 keyword = "\\dateposted" if colid in ["AHL", "AIF", "OGEO", "JTNB"] else "\\datepublished" 

20 

21 if colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"]: 21 ↛ 22line 21 didn't jump to line 22, because the condition on line 21 was never true

22 if not article.date_online_first: 

23 keyword = "\\dateposted" 

24 

25 if lang != "": 25 ↛ 26line 25 didn't jump to line 26, because the condition on line 25 was never true

26 keyword = "\\CDRsetmeta{traduction_date_posted}" 

27 

28 return keyword 

29 

30 

31def get_tex_keyword_date_online_first(colid, article, lang=""): 

32 return "\\dateposted" 

33 

34 

35def read_tex_file(filename): 

36 """ 

37 read a tex file. Detects if it is in utf-8 or iso-8859-1 

38 returns a string of the body 

39 """ 

40 

41 lines = [] 

42 

43 if os.path.isfile(filename): 43 ↛ 51line 43 didn't jump to line 51, because the condition on line 43 was never false

44 try: 

45 with open(filename, encoding="utf-8") as f_: 

46 lines = f_.readlines() 

47 except UnicodeDecodeError: 

48 with open(filename, encoding="iso-8859-1") as f_: 

49 lines = f_.readlines() 

50 

51 return lines 

52 

53 

54def convert_file_to_utf8(article_path, from_name, to_name): 

55 filename = os.path.join(article_path, from_name) 

56 lines = read_tex_file(filename) 

57 

58 user = settings.MERSENNE_TEX_USER 

59 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

60 resolver.create_folder(prefix) 

61 

62 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

63 fpath = f.name # ex: /tmp/Rxsft 

64 f.write("".join(lines)) 

65 f.close() 

66 

67 out_filemane = os.path.join(article_path, to_name) 

68 # copy to mersenne-tex 

69 cmd = f"scp {fpath} {user}@mathdoc-tex:{out_filemane}" 

70 utils.execute_cmd(cmd) 

71 

72 

73def write_tex_file(filename, lines, create_temp_file=False): 

74 fpath = filename 

75 if create_temp_file: 75 ↛ 85line 75 didn't jump to line 85, because the condition on line 75 was never false

76 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

77 resolver.create_folder(prefix) 

78 

79 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

80 

81 fpath = f.name # ex: /tmp/Rxsft 

82 f.write("".join(lines)) 

83 f.close() 

84 else: 

85 with open(filename, "w", encoding="utf-8") as f_: 

86 f_.write("".join(lines)) 

87 return fpath 

88 

89 

90def insert_date_published(new_lines, article, colid, begin_document_pos, lang=""): 

91 if article.date_published is not None: 91 ↛ 97line 91 didn't jump to line 97, because the condition on line 91 was never false

92 keyword = get_tex_keyword_date_published(colid, article, lang) 

93 str_ = f'{keyword}{{{article.date_published.strftime("%Y-%m-%d")}}}\n' 

94 new_lines.insert(begin_document_pos, str_) 

95 begin_document_pos += 1 

96 

97 if article.my_container is not None: 97 ↛ 111line 97 didn't jump to line 111, because the condition on line 97 was never false

98 is_thematic_issue = len(article.my_container.title_html) > 0 

99 is_issue_finalized = not article.my_container.with_online_first 

100 is_thematic_finalized = is_thematic_issue and is_issue_finalized 

101 

102 if is_thematic_finalized and article.date_online_first is None: 102 ↛ 105line 102 didn't jump to line 105, because the condition on line 102 was never true

103 # Finalized thematic issue where the article did not go through online first 

104 # => Add \datepublished so that "Issue date :" appears in the PDF 

105 keyword2 = "\\datepublished" 

106 if keyword2 != keyword: 

107 str_ = f'{keyword2}{{{article.date_published.strftime("%Y-%m-%d")}}}\n' 

108 new_lines.insert(begin_document_pos, str_) 

109 begin_document_pos += 1 

110 

111 return begin_document_pos 

112 

113 

114def insert_date_online_first(new_lines, article, colid, begin_document_pos, lang=""): 

115 if article.date_online_first is not None: 

116 keyword = get_tex_keyword_date_online_first(colid, article, lang) 

117 str_ = f'{keyword}{{{article.date_online_first.strftime("%Y-%m-%d")}}}\n' 

118 new_lines.insert(begin_document_pos, str_) 

119 begin_document_pos += 1 

120 

121 return begin_document_pos 

122 

123 

124def insert_end_page(new_lines, article, colid, begin_document_pos): 

125 if article.lpage: 125 ↛ 129line 125 didn't jump to line 129, because the condition on line 125 was never false

126 str_ = "\\makeatletter\\def\\cdr@end@page{" + article.lpage + "}\\makeatother\n" 

127 new_lines.insert(begin_document_pos + 1, str_) 

128 

129 return begin_document_pos 

130 

131 

132def replace_dates_in_tex(lines, article, colid, replace_frontpage_only=False, lang=""): 

133 r""" 

134 add or replace \dateposted and \datepublished in the source Tex 

135 lines is a list of lines of the source Tex 

136 

137 """ 

138 new_lines = [] 

139 bib_name = "" 

140 

141 keyword_date_published = get_tex_keyword_date_published(colid, article, lang) 

142 keyword_date_online_first = get_tex_keyword_date_online_first(colid, article, lang) 

143 found_date_online_first = False 

144 found_date_published = False 

145 begin_document_pos = -1 

146 i = 0 

147 skip_lines = 0 

148 

149 while i < len(lines): 

150 line = lines[i] 

151 len_line = len(line) 

152 j = 0 

153 while j < len_line and line[j] in [" ", "\t"]: 

154 j += 1 

155 

156 if j < len_line and line[j] != "%": # the line is not a comment 

157 if replace_frontpage_only and ( 157 ↛ 162line 157 didn't jump to line 162, because the condition on line 157 was never true

158 line.find("\\datepublished{", j) == j 

159 or line.find("\\dateposted{", j) == j 

160 or line.find("\\CDRsetmeta{traduction_date_posted}{", j) == j 

161 ): 

162 skip_lines += 1 

163 elif ( 163 ↛ 166line 163 didn't jump to line 166

164 line.find(f"{keyword_date_published}{{", j) == j 

165 ): # replace existing \datepublished 

166 found_date_published = True 

167 insert_date_published(new_lines, article, colid, len(new_lines), lang=lang) 

168 

169 elif ( 169 ↛ 172line 169 didn't jump to line 172

170 line.find(f"{keyword_date_online_first}{{", j) == j 

171 ): # replace existing \dateposted 

172 found_date_online_first = True 

173 insert_date_online_first(new_lines, article, colid, len(new_lines), lang=lang) 

174 

175 elif ( 

176 line.find("\\begin{document", j) == j 

177 ): # \begin{document} add dates if not present 

178 begin_document_pos = i - skip_lines 

179 new_lines.append(line) 

180 

181 elif line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j: 

182 # remove published from \documentclass to allow compilation 

183 line = ( 

184 line.replace(",published,", ",") 

185 .replace(",published", "") 

186 .replace("published", "") 

187 ) 

188 # # remove Unicode temporarily 

189 # line = line.replace(",Unicode,", ",").replace(",Unicode", "").replace("Unicode", "") 

190 new_lines.append(line) 

191 

192 elif line.find("\\makeatletter\\def\\cdr@end@page", j) == j: 192 ↛ 195line 192 didn't jump to line 195, because the condition on line 192 was never true

193 # Command to specify the last page (present in the front page) 

194 # Move it after \begin{document} 

195 pass 

196 elif ( 

197 line.find("\\bibliography", j) == j 

198 and line.find("\\bibliographystyle", j) != j 

199 and replace_frontpage_only 

200 ): 

201 end = line.find("}") 

202 if end > 0: 202 ↛ 210line 202 didn't jump to line 210, because the condition on line 202 was never false

203 bib_name = line[j + 14 : end] 

204 new_lines.append("\\bibliography{" + bib_name + "_FP}\n") 

205 else: 

206 new_lines.append(line) 

207 else: 

208 new_lines.append(line) 

209 

210 i += 1 

211 

212 if begin_document_pos > 0 and not found_date_online_first: 212 ↛ 217line 212 didn't jump to line 217, because the condition on line 212 was never false

213 begin_document_pos = insert_date_online_first( 

214 new_lines, article, colid, begin_document_pos, lang=lang 

215 ) 

216 

217 if begin_document_pos > 0 and not found_date_published: 217 ↛ 222line 217 didn't jump to line 222, because the condition on line 217 was never false

218 begin_document_pos = insert_date_published( 

219 new_lines, article, colid, begin_document_pos, lang=lang 

220 ) 

221 

222 if replace_frontpage_only and begin_document_pos > 0: 

223 begin_document_pos = insert_end_page(new_lines, article, colid, begin_document_pos) 

224 

225 # Always add Unicode as the new tex file is in utf-8 

226 # new_lines = protect_tex(new_lines, "Unicode") 

227 

228 return new_lines, bib_name 

229 

230 

231def protect_tex(lines, keyword="published"): 

232 new_lines = [] 

233 

234 i = 0 

235 inside_documentclass = False 

236 

237 while i < len(lines): 

238 line = lines[i] 

239 len_line = len(line) 

240 j = 0 

241 while j < len_line and line[j] in [" ", "\t"]: 

242 j += 1 

243 

244 if j < len_line and line[j] != "%": # the line is not a comment 

245 if line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j: 

246 # add published to \documentclass after compilation 

247 j = line.find("]") 

248 if j > 0: 248 ↛ 252line 248 didn't jump to line 252, because the condition on line 248 was never false

249 if line.find("{cedram") > 0: # Ignore {article} 249 ↛ 264line 249 didn't jump to line 264, because the condition on line 249 was never false

250 line = line[0:j] + "," + keyword + line[j:] 

251 else: 

252 inside_documentclass = True 

253 elif inside_documentclass: 253 ↛ 254line 253 didn't jump to line 254, because the condition on line 253 was never true

254 k = line.find("]") 

255 if k == j: 

256 if line.find("{cedram") > 0: # Ignore {article} 

257 new_lines.append(f",{keyword}\n") 

258 inside_documentclass = False 

259 elif k > -1: 

260 if line.find("{cedram") > 0: # Ignore {article} 

261 line = line[0:k] + "," + keyword + line[k:] 

262 inside_documentclass = False 

263 

264 new_lines.append(line) 

265 i += 1 

266 

267 return new_lines 

268 

269 

270def get_tex_corresponding_emails(author_contributions): 

271 emails = [] 

272 

273 for contribution in author_contributions: 

274 if contribution.corresponding and contribution.email: 274 ↛ 275line 274 didn't jump to line 275, because the condition on line 274 was never true

275 emails.append(unicode_to_latex(contribution.email).replace(r"\_", r"_")) 

276 

277 return emails 

278 

279 

280def get_tex_authors(author_contributions): 

281 lines = [] 

282 

283 # are_all_equal = models.are_all_equal_contrib(author_contributions) 

284 

285 for contribution in author_contributions: 

286 # \author{\firstname{Antoine} \lastname{Lavoisier}} 

287 # \address{Rue sans aplomb, Paris, France} 

288 # \email[A. Lavoisier]{a-lavois@lead-free-univ.edu} 

289 first_name = unicode_to_latex(contribution.first_name) 

290 last_name = unicode_to_latex(contribution.last_name) 

291 line = f"\\author{{\\firstname{{{first_name}}} \\lastname{{{last_name}}}" 

292 if contribution.orcid: 

293 line += f"\\CDRorcid{{{contribution.orcid}}}" 

294 if contribution.equal_contrib: # and not are_all_equal: 294 ↛ 295line 294 didn't jump to line 295, because the condition on line 294 was never true

295 line += "\\IsEqualContrib" 

296 if contribution.deceased_before_publication: # and not are_all_equal: 296 ↛ 297line 296 didn't jump to line 297, because the condition on line 296 was never true

297 line += "\\dead" 

298 lines.append(line + "}\n") 

299 

300 for contribaddress in contribution.contribaddress_set.all(): 

301 address = unicode_to_latex(contribaddress.address) 

302 lines.append(f"\\address{{{address}}}\n") 

303 

304 if contribution.corresponding and len(contribution.email) > 0: 304 ↛ 305line 304 didn't jump to line 305, because the condition on line 304 was never true

305 email = unicode_to_latex(contribution.email) 

306 lines.append(f"\\email{{{email}}}\n") 

307 

308 lines.append("\n") 

309 

310 return lines 

311 

312 

313def create_tex_for_pcj(article): 

314 pci = article.get_pci_section() 

315 

316 extid = model_helpers.get_extid(article, "rdoi") 

317 rdoi = extid.id_value if extid is not None else "" 

318 

319 lines = [ 

320 "\\documentclass[PCJ,Unicode,screen,Recup]{cedram}\n", 

321 "\\usepackage{pax}\n", 

322 "\\usepackage{mathrsfs}\n" "\n", 

323 "\\issueinfo{" 

324 + article.my_container.volume 

325 + "}{}{}{" 

326 + article.my_container.year 

327 + "}\n", 

328 f"\\renewcommand*{{\\thearticle}}{{{article.article_number}}}\n", 

329 f"\\DOI{{{article.doi}}}\n", 

330 f"\\RDOI{{{rdoi}}}\n", 

331 f"\\setPCI{{{pci}}}\n", 

332 f"\\CDRsetmeta{{articletype}}{{{article.atype}}}", 

333 ] 

334 

335 conf = article.get_conference() 

336 if len(conf) > 0: 336 ↛ 337line 336 didn't jump to line 337, because the condition on line 336 was never true

337 lines.append(f"\\setPCIconf{{{conf}}}\n") 

338 

339 author_contributions = article.get_author_contributions() 

340 

341 corresponding_emails = get_tex_corresponding_emails(author_contributions) 

342 for email in corresponding_emails: 342 ↛ 343line 342 didn't jump to line 343, because the loop on line 342 never started

343 lines.append(f"\\PCIcorresp{{{email}}}\n") 

344 

345 lines.append("\n") 

346 

347 # \title[Sample for the template]{Sample for the template, with quite a very long title} 

348 title = article.title_tex.replace("<i>", "|||i|||").replace("</i>", "|||/i|||") 

349 title = title.replace("<sup>", "|||sup|||").replace("</sup>", "|||/sup|||") 

350 title = title.replace("<sub>", "|||sub|||").replace("</sub>", "|||/sub|||") 

351 title = unicode_to_latex(title) 

352 title = title.replace("|||i|||", "\\protect\\emph{").replace("|||/i|||", "}") 

353 title = title.replace("|||sup|||", "\\protect\\textsuperscript{").replace("|||/sup|||", "}") 

354 title = title.replace("|||sub|||", "\\protect\\textsubscript{").replace("|||/sub|||", "}") 

355 lines.append(f"\\title{{{title}}}\n") 

356 lines.append("\n") 

357 lines.extend(get_tex_authors(author_contributions)) 

358 

359 # No keywords for PCJ 

360 # # \keywords{Example, Keyword} 

361 # kwd_gps = article.get_non_msc_kwds() 

362 # if len(kwd_gps) > 0: 

363 # kwd_gp = kwd_gps.first() 

364 # keywords = ", ".join([kwd.value for kwd in kwd_gp.kwd_set.all()]) 

365 # lines.append(f"\\keywords{{{unicode_to_latex(keywords)}}}\n") 

366 # lines.append("\n") 

367 

368 abstracts = article.get_abstracts() 

369 if len(abstracts) > 0: 369 ↛ 419line 369 didn't jump to line 419, because the condition on line 369 was never false

370 abstract = abstracts.first() 

371 value = get_tex_from_xml(abstract.value_xml, "abstract", for_tex_file=True) 

372 

373 # .replace('<span class="mathjax-formula">$', '$').replace('$</span>', '$') \ 

374 # .replace('<span class="italique">', '|||i|||').replace('</span>', '|||/i|||') \ 

375 

376 # value = abstract.value_tex \ 

377 # .replace('<i>', '|||i|||').replace('</i>', '|||/i|||') \ 

378 # .replace('<strong>', '|||strong|||').replace('</strong>', '|||/strong|||') \ 

379 # .replace('<sub>', '|||sub|||').replace('</sub>', '|||/sub|||') \ 

380 # .replace('<sup>', '|||sup|||').replace('</sup>', '|||/sup|||') \ 

381 # .replace('<p>', '').replace('</p>', '') \ 

382 # .replace('<ul>', '|||ul|||').replace('</ul>', '|||/ul|||') \ 

383 # .replace('<ol type="1">', '|||ol|||').replace('</ol>', '|||/ol|||') \ 

384 # .replace('<li>', '|||li|||').replace('</li>', '|||/li|||') \ 

385 # .replace('<br/>', '|||newline|||') \ 

386 # .replace('&amp;', '\\&') \ 

387 # .replace('&lt;', '<') \ 

388 # .replace('&gt;', '>') 

389 # 

390 # links = [] 

391 # pos = value.find("<a href=") 

392 # while pos != -1: 

393 # last_href = value.find('"', pos + 9) 

394 # href = value[pos + 9:last_href] 

395 # first_text = value.find('>', last_href) + 1 

396 # last_text = value.find('</a>', first_text) 

397 # text = value[first_text:last_text] 

398 # links.append((href, text)) 

399 # value = value[0:pos] + '|||a|||' + value[last_text + 4:] 

400 # pos = value.find("<a href=") 

401 # 

402 # value = unicode_to_latex(value) 

403 # value = value.replace('|||i|||', '{\\it ').replace('|||/i|||', '}') 

404 # value = value.replace('|||strong|||', '{\\bf ').replace('|||/strong|||', '}') 

405 # value = value.replace('|||sub|||', '\\textsubscript{').replace('|||/sub|||', '}') 

406 # value = value.replace('|||sup|||', '\\textsuperscript{').replace('|||/sup|||', '}') 

407 # value = value.replace('|||ul|||', '\n\\begin{itemize}\n').replace('|||/ul|||', '\\end{itemize}\n') 

408 # value = value.replace('|||ol|||', '\n\\begin{enumerate}\n').replace('|||/ol|||', '\\end{enumerate}\n') 

409 # value = value.replace('|||li|||', '\\item ').replace('|||/li|||', '\n') 

410 # value = value.replace('|||newline|||', '\\newline\n') 

411 # for link in links: 

412 # text = f'\\href{{{link[0]}}}{{{link[1]}}}' 

413 # value = value.replace('|||a|||', text, 1) 

414 

415 lines.append("\\begin{abstract}\n") 

416 lines.append(value + "\n") 

417 lines.append("\\end{abstract}\n") 

418 

419 date_ = article.date_published.strftime("%Y-%m-%d") if article.date_published else "AAAA-MM-DD" 

420 keyword = get_tex_keyword_date_published("PCJ", article) 

421 lines.append(f"{keyword}{{{date_}}}\n") 

422 

423 lines.append("\\begin{document}\n") 

424 lines.append("\\maketitle\n") 

425 article_pdf = f"article_{article.pid}.pdf" 

426 lines.append(f"\\PCIincludepdf{{{article_pdf}}}\n") 

427 

428 lines.append("\\end{document}\n") 

429 

430 return lines 

431 

432 

433def compile_tex(lines, article, update=False): 

434 """ 

435 1) Create a tex file from the list of lines 

436 2) Upload the file to mathdoc-tex (+ the pdf for PCJ) 

437 3) Compile the file 

438 4) Replace the pdf in /mersenne_test_data 

439 5) linearize the pdf 

440 TODO: merge ptf_tools/views create_frontpage (not done while PCJ is unstable to avoid compilation bugs in prod) 

441 """ 

442 

443 # Only allowed on ptf-tools 

444 if settings.SITE_NAME != "ptf_tools": 444 ↛ 445line 444 didn't jump to line 445, because the condition on line 444 was never true

445 return 

446 

447 user = settings.MERSENNE_TEX_USER 

448 issue = article.my_container 

449 colid = issue.my_collection.pid 

450 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue.pid) 

451 article_pdf = "" 

452 

453 if colid != "PCJ": 453 ↛ 454line 453 didn't jump to line 454, because the condition on line 453 was never true

454 article_tex_name = article.get_ojs_id() 

455 if not article_tex_name: 

456 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path") 

457 article_path = os.path.join(issue_path, article_tex_name) 

458 else: 

459 article_tex_name = article.pid 

460 article_path = os.path.join(issue_path, article_tex_name) 

461 article_pdf = f"article_{article.pid}.pdf" 

462 

463 if not update: 

464 # Create the article folder 

465 cmd = f"ssh {user}@mathdoc-tex mkdir -p {article_path}" 

466 utils.execute_cmd(cmd) 

467 

468 # copy the pdf to mersenne-tex 

469 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid) 

470 folder = os.path.join(settings.RESOURCES_ROOT, relative_folder) 

471 pdf_file_name = os.path.join(folder, article.pid + ".pdf") 

472 

473 cmd = f"scp {pdf_file_name} {user}@mathdoc-tex:{article_path}/{article_pdf}" 

474 utils.execute_cmd(cmd) 

475 

476 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex") 

477 fpath = write_tex_file("", lines, create_temp_file=True) 

478 

479 # copy to mersenne-tex 

480 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}" 

481 utils.execute_cmd(cmd) 

482 # os.unlink(f.name) 

483 

484 # recompile article 

485 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

486 # execute script to compile 

487 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name} {colid} {article_pdf}" 

488 utils.execute_cmd(cmd) 

489 

490 # replace pdf 

491 cedram_pdf_location = os.path.join(article_path, article_tex_name + ".pdf") 

492 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid) 

493 to_path = os.path.join( 

494 settings.MERSENNE_TEST_DATA_FOLDER, relative_folder, article.pid + ".pdf" 

495 ) 

496 if settings.MERSENNE_CREATE_FRONTPAGE: 496 ↛ 497line 496 didn't jump to line 497, because the condition on line 496 was never true

497 utils.linearize_pdf(cedram_pdf_location, to_path) 

498 

499 return to_path 

500 

501 

502def add_outline(reader, writer, outlines, parent=None): 

503 child_parent = parent 

504 for item in outlines: 

505 if type(item) == list: 505 ↛ 506line 505 didn't jump to line 506, because the condition on line 505 was never true

506 add_outline(reader, writer, item, child_parent) 

507 else: 

508 title = item["/Title"] 

509 page_num = reader.get_destination_page_number(item) 

510 

511 if item["/Type"] == "/XYZ": 511 ↛ 522line 511 didn't jump to line 522, because the condition on line 511 was never false

512 child_parent = writer.add_outline_item( 

513 title, 

514 page_num, 

515 parent, 

516 None, 

517 False, 

518 False, 

519 pypdf.generic.Fit("/XYZ", (item["/Left"], item["/Top"], 1)), 

520 ) 

521 else: 

522 child_parent = writer.add_outline_item(title, page_num, parent, None, False, False) 

523 

524 

525def test(): 

526 local_fp_pdf = "/home/touvierj/Bureau/test_FP.pdf" 

527 local_content_pdf = "/home/touvierj/Bureau/test_content.pdf" 

528 merged_pdf = "/home/touvierj/Bureau/test_merged.pdf" 

529 

530 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False) 

531 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False) 

532 pdf_writer = pypdf.PdfWriter() 

533 

534 for page in range(len(pdf_reader_fp.pages)): 

535 current_page = pdf_reader_fp.pages[page] 

536 if page == 0: 

537 pdf_writer.add_page(current_page) 

538 

539 for page in range(len(pdf_reader_content.pages)): 

540 current_page = pdf_reader_content.pages[page] 

541 if page > 0: 

542 pdf_writer.add_page(current_page) 

543 

544 # Add the Table of Contents (sidebar in a PDF reader) 

545 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline) 

546 

547 # Add the anchors 

548 for dest in pdf_reader_content.named_destinations.values(): 

549 pdf_writer.add_named_destination_object(dest) 

550 

551 with open(merged_pdf, "wb") as f_: 

552 pdf_writer.write(f_) 

553 

554 # Add metadata to the PDF, including EXIF data 

555 add_metadata(models.Article.objects.first(), local_content_pdf, merged_pdf) 

556 

557 exit() 

558 

559 fpage = "i" 

560 merged_pdf = "/home/touvierj/Bureau/good2.pdf" 

561 local_pdf = "/home/touvierj/Bureau/new2.pdf" 

562 

563 is_roman = False 

564 try: 

565 first_page = int(fpage) 

566 except ValueError: 

567 first_page = xml_utils.roman_to_int(fpage) 

568 is_roman = True 

569 

570 reader = pypdf.PdfReader(merged_pdf) 

571 writer = pypdf.PdfWriter() 

572 for page in reader.pages: 

573 writer.add_page(page) 

574 

575 if is_roman: 

576 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/r") 

577 else: 

578 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/D") 

579 writer.write(local_pdf) 

580 writer.close() 

581 

582 

583def add_metadata(article, in_pdf, out_pdf): 

584 reader = pypdf.PdfReader(in_pdf, strict=False) 

585 

586 metadata = reader.metadata 

587 cmd = f"exiftool -tagsFromFile {in_pdf}" 

588 

589 if in_pdf == out_pdf: 589 ↛ 590line 589 didn't jump to line 590, because the condition on line 589 was never true

590 cmd += " -overwrite_original_in_place" 

591 

592 container = article.my_container 

593 collection = article.get_collection() 

594 

595 msc_kwds, kwds, trans_kwds = article.get_kwds_by_type() 

596 keywords = ", ".join([str(x.value) for x in kwds]) 

597 

598 lang = "" 

599 if article.lang == "fr": 599 ↛ 600line 599 didn't jump to line 600, because the condition on line 599 was never true

600 lang = "fr-FR" 

601 elif article.lang == "en": 601 ↛ 604line 601 didn't jump to line 604, because the condition on line 601 was never false

602 lang = "en-GB" 

603 

604 if "/Title" in metadata: 604 ↛ 611line 604 didn't jump to line 611, because the condition on line 604 was never false

605 title = metadata["/Title"] 

606 if "'" in title and '"' not in title: 606 ↛ 607line 606 didn't jump to line 607, because the condition on line 606 was never true

607 cmd += f' -Title="{title}"' 

608 elif "'" not in title: 608 ↛ 611line 608 didn't jump to line 611, because the condition on line 608 was never false

609 cmd += f" -Title='{title}'" 

610 

611 if "/Author" in metadata: 611 ↛ 618line 611 didn't jump to line 618, because the condition on line 611 was never false

612 author = metadata["/Author"] 

613 if "'" in author and '"' not in author: 

614 cmd += f' -Author="{author}"' 

615 elif "'" not in author: 615 ↛ 618line 615 didn't jump to line 618, because the condition on line 615 was never false

616 cmd += f" -Author='{author}'" 

617 

618 cmd += " -Creator='Centre Mersenne'" 

619 cmd += " -Subject=''" 

620 if lang: 620 ↛ 622line 620 didn't jump to line 622, because the condition on line 620 was never false

621 cmd += f" -xmp-dc-Language='{lang}'" 

622 cmd += f" -xmp-dc:publisher='{container.my_publisher.pub_name}'" 

623 cmd += f" -xmp-prism:DOI='{article.doi}'" 

624 cmd += f" -Keywords='{keywords}'" 

625 cmd += f" -xmp-xmp:Keywords='{keywords}'" 

626 cmd += f" -xmp-pdf:Keywords='{keywords}'" 

627 cmd += " -xmp-pdf:Copyright='© The author(s)'" 

628 

629 if container.volume: 629 ↛ 631line 629 didn't jump to line 631, because the condition on line 629 was never false

630 cmd += f" -xmp-prism:Volume='{container.volume}'" 

631 if container.number: 631 ↛ 632line 631 didn't jump to line 632, because the condition on line 631 was never true

632 cmd += f" -xmp-prism:Number='{container.number}'" 

633 if collection.issn: 633 ↛ 635line 633 didn't jump to line 635, because the condition on line 633 was never false

634 cmd += f" -xmp-prism:ISSN='{collection.issn}'" 

635 if collection.e_issn: 635 ↛ 637line 635 didn't jump to line 637, because the condition on line 635 was never false

636 cmd += f" -xmp-prism:EISSN='{collection.e_issn}'" 

637 if container.title_tex: 637 ↛ 638line 637 didn't jump to line 638, because the condition on line 637 was never true

638 cmd += f" -xmp-prism:IssueName='{container.title_tex}'" 

639 cmd += " " + out_pdf 

640 

641 output = subprocess.check_output(cmd, shell=True) 

642 return output 

643 

644 

645def replace_front_page( 

646 article, article_tex_name, fp_pdf_file_name, content_pdf_file_name, final_pdf_file_name 

647): 

648 # At the point the PDF has been recompiled, possibly with a new template 

649 # Use the 1st page of the new PDF with the other pages of the .pdf_SAV 

650 

651 user = settings.MERSENNE_TEX_USER 

652 

653 # Copy the PDF files locally (pypdf is installed in ptf-tools) 

654 local_fp_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_FP") 

655 cmd = f"scp {user}@mathdoc-tex:{fp_pdf_file_name} {local_fp_pdf}" 

656 utils.execute_cmd(cmd) 

657 

658 local_content_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_content") 

659 cmd = f"scp {user}@mathdoc-tex:{content_pdf_file_name} {local_content_pdf}" 

660 utils.execute_cmd(cmd) 

661 

662 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False) 

663 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False) 

664 pdf_writer = pypdf.PdfWriter() 

665 

666 for page in range(len(pdf_reader_fp.pages)): 

667 current_page = pdf_reader_fp.pages[page] 

668 if page == 0: 

669 pdf_writer.add_page(current_page) 

670 

671 for page in range(len(pdf_reader_content.pages)): 

672 current_page = pdf_reader_content.pages[page] 

673 if page > 0: 

674 pdf_writer.add_page(current_page) 

675 

676 # Add the Table of Contents (sidebar in a PDF reader) 

677 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline) 

678 

679 # Add the anchors 

680 for dest in pdf_reader_content.named_destinations.values(): 

681 pdf_writer.add_named_destination_object(dest) 

682 

683 merged_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_merged") 

684 with open(merged_pdf, "wb") as f_: 

685 pdf_writer.write(f_) 

686 

687 # Compiled PDF are sometimes buggy (wrong xref table). Use pdftk to fix the file. 

688 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

689 cmd = f"{ptf_tools_bin}/update_pdf.sh {local_content_pdf} {merged_pdf}" 

690 utils.execute_cmd(cmd) 

691 

692 # Add metadata to the PDF, including EXIF data 

693 add_metadata(article, local_content_pdf, merged_pdf) 

694 

695 local_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf") 

696 

697 # pypdf creates a PDF that starts on page 1, fix it 

698 if article.fpage: 698 ↛ 719line 698 didn't jump to line 719, because the condition on line 698 was never false

699 is_roman = False 

700 try: 

701 first_page = int(article.fpage) 

702 except ValueError: 

703 first_page = xml_utils.roman_to_int(article.fpage) 

704 is_roman = True 

705 

706 reader = pypdf.PdfReader(merged_pdf) 

707 writer = pypdf.PdfWriter() 

708 for page in reader.pages: 

709 writer.add_page(page) 

710 

711 if is_roman: 711 ↛ 712line 711 didn't jump to line 712, because the condition on line 711 was never true

712 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/r") 

713 else: 

714 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/D") 

715 writer.write(local_pdf) 

716 writer.close() 

717 

718 # copy to mersenne-tex 

719 cmd = f"scp {local_pdf} {user}@mathdoc-tex:{final_pdf_file_name}" 

720 utils.execute_cmd(cmd) 

721 

722 

723def compile_article( 

724 article, 

725 colid, 

726 issue_id, 

727 article_path, 

728 article_tex_name, 

729 replace_frontpage_only=False, 

730 skip_compilation=False, 

731 lang="", 

732): 

733 user = settings.MERSENNE_TEX_USER 

734 

735 if lang != "": 735 ↛ 736line 735 didn't jump to line 736, because the condition on line 735 was never true

736 article_tex_name += "-" + lang 

737 

738 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg") 

739 # Regular compilation: compiled_pdf and final_pdf are the same 

740 # recompilation of the front page: compiled_pdf is the entire pdf with the new front page 

741 # final_pdf is the pdf after the merge (new front page; old content) 

742 compiled_pdf_file_name = final_pdf_file_name = os.path.join( 

743 article_path, article_tex_name + ".pdf" 

744 ) 

745 content_pdf_file_name = compiled_pdf_file_name + "_SAV" 

746 

747 # Save the pdf file 

748 cmd = f"ssh {user}@mathdoc-tex cp {compiled_pdf_file_name} {content_pdf_file_name}" 

749 utils.execute_cmd(cmd) 

750 

751 # Save the cfg file (no cfg for translations) 

752 if lang == "": 752 ↛ 757line 752 didn't jump to line 757, because the condition on line 752 was never false

753 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name}_SAV" 

754 utils.execute_cmd(cmd) 

755 

756 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!. 

757 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

758 resolver.create_folder(prefix) 

759 

760 if replace_frontpage_only: 

761 # Copy CFG/TEX/PDF to a new name. pdflatex will generate new files, thus preserving existing files 

762 article_tex_name2 = article_tex_name + "_FP" 

763 

764 cmd = f"ssh {user}@mathdoc-tex rm -f {os.path.join(article_path, article_tex_name2)}.*" 

765 utils.execute_cmd(cmd) 

766 

767 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex") 

768 article_tex_file_name2 = os.path.join(article_path, article_tex_name2 + ".tex") 

769 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name2}" 

770 utils.execute_cmd(cmd) 

771 

772 article_cfg_file_name2 = os.path.join(article_path, article_tex_name2 + ".cfg") 

773 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name2}_SAV" 

774 utils.execute_cmd(cmd) 

775 

776 article_cdrdoidates_file_name = os.path.join( 

777 article_path, article_tex_name + ".cdrdoidates" 

778 ) 

779 if os.path.isfile(article_cdrdoidates_file_name): 779 ↛ 780line 779 didn't jump to line 780, because the condition on line 779 was never true

780 article_cdrdoidates_file_name2 = os.path.join( 

781 article_path, article_tex_name2 + ".cdrdoidates" 

782 ) 

783 cmd = f"ssh {user}@mathdoc-tex cp {article_cdrdoidates_file_name} {article_cdrdoidates_file_name2}" 

784 utils.execute_cmd(cmd) 

785 

786 article_tex_name = article_tex_name2 

787 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg") 

788 compiled_pdf_file_name = os.path.join(article_path, article_tex_name + ".pdf") 

789 final_pdf_file_name = compiled_pdf_file_name + ".new" 

790 

791 if not skip_compilation: 791 ↛ 843line 791 didn't jump to line 843, because the condition on line 791 was never false

792 # Remove \ItIsPublished from the cfg file 

793 if lang == "": 793 ↛ 799line 793 didn't jump to line 799, because the condition on line 793 was never false

794 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\ItIsPublished//' {article_cfg_file_name}_SAV > {article_cfg_file_name}.1"''' 

795 utils.execute_cmd(cmd) 

796 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\gdef \\\\\\\\CDRpublished {{true}}//' {article_cfg_file_name}.1 > {article_cfg_file_name}"''' 

797 utils.execute_cmd(cmd) 

798 

799 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex") 

800 

801 # Save the tex file 

802 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name}_SAV" 

803 utils.execute_cmd(cmd) 

804 

805 lines = read_tex_file(article_tex_file_name) 

806 new_lines, bib_name = replace_dates_in_tex( 

807 lines, article, colid, replace_frontpage_only, lang=lang 

808 ) 

809 

810 if bib_name and replace_frontpage_only: 

811 convert_file_to_utf8(article_path, bib_name + ".bib", bib_name + "_FP.bib") 

812 

813 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

814 fpath = f.name # ex: /tmp/Rxsft 

815 f.write("".join(new_lines)) 

816 f.close() 

817 

818 # copy to mersenne-tex 

819 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}" 

820 utils.execute_cmd(cmd) 

821 # os.unlink(f.name) 

822 

823 # recompile article 

824 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

825 # execute script to compile 

826 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name}" 

827 utils.execute_cmd(cmd) 

828 

829 # Protect the tex file with the 'published' option 

830 new_lines = protect_tex(new_lines) 

831 

832 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!. 

833 prefix = os.path.join(settings.LOG_DIR, "tmp/") 

834 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False) 

835 fpath = f.name # ex: /tmp/Rxsft 

836 f.write("".join(new_lines)) 

837 f.close() 

838 

839 # copy to mersenne-tex 

840 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}" 

841 utils.execute_cmd(cmd) 

842 

843 if replace_frontpage_only: 

844 # At the point the PDF has been recompiled, possibly with a new template 

845 # Use the 1st page of the new PDF with the other pages of the .pdf_SAV 

846 replace_front_page( 

847 article, 

848 article_tex_name, 

849 compiled_pdf_file_name, 

850 content_pdf_file_name, 

851 final_pdf_file_name, 

852 ) 

853 

854 # Copy PDF to MERSENNE_TEST_DATA_FOLDER 

855 datastream = article.datastream_set.filter(mimetype="application/pdf").get() 

856 to_path = os.path.join(settings.MERSENNE_TEST_DATA_FOLDER, datastream.location) 

857 # remove destination if exists to test if final pdf is really created 

858 if os.path.exists(to_path): 858 ↛ 860line 858 didn't jump to line 860, because the condition on line 858 was never false

859 os.remove(to_path) 

860 utils.linearize_pdf(final_pdf_file_name, to_path) 

861 

862 # if not replace_frontpage_only: 

863 # # Add EXIF metadata in the final PDF (replace_front_page already does it) 

864 # add_metadata(article, to_path, to_path) 

865 

866 

867def create_frontpage( 

868 colid, 

869 container, 

870 updated_articles, 

871 test=True, 

872 replace_frontpage_only=False, 

873 skip_compilation=False, 

874 lang="", 

875): 

876 # create frontpage by recompiling articles on mersenne-tex with date XXXX-XX-XX 

877 # flow : 

878 # get directory of article sources : cedram_dev/production/ .. 

879 # Add publication date in the source TeX 

880 # remote execute latexmk -pdf article.pdf 

881 # replace pdf of the article on mersenne_test_data 

882 

883 # TODO refactor the code and only use compile_tex for all collections 

884 

885 if colid == "PCJ": 

886 for article in updated_articles: 

887 lines = create_tex_for_pcj(article) 

888 compile_tex(lines, article, update=True) 

889 return 

890 

891 try: 

892 year = int(container.year) 

893 except ValueError: 

894 year = 0 

895 

896 if ( 896 ↛ 902line 896 didn't jump to line 902

897 colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"] 

898 and year < 2020 

899 and lang == "" 

900 ): 

901 # No front page for Elsevier CRAS 

902 return 

903 

904 issue_id = container.pid 

905 

906 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue_id) 

907 # non utilisé ?now = datetime.now().astimezone() 

908 # non utilisé ? timestamp = now.strftime("%Y-%m-%d %H:%M:%S %Z") 

909 

910 try: 

911 for article in updated_articles: 

912 # article path 

913 article_tex_name = article.get_ojs_id() 

914 if not article_tex_name: 914 ↛ 915line 914 didn't jump to line 915, because the condition on line 914 was never true

915 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path") 

916 article_path = os.path.join(issue_path, article_tex_name) 

917 # non utilisé ? file_date = os.path.join(article_path, article_tex_name + '.ptf') 

918 

919 # publish_timestamp_file = os.path.join(article_path, article_tex_name + "-pdftimestamp.txt") 

920 # onlinefirst_timestamp_file = os.path.join(article_path, article_tex_name + "-dateposted.txt") 

921 

922 # flow : 

923 # - si on est en test ; 

924 # date_pre_publish a été mis à jour mais pas les autres 

925 # on ne crée une date temporaire type XXXX-XX-XX que pour online_first 

926 # (car sinon lors de la mise en prod du online_first, la présence du fichier pdftimestamp 

927 # avec XXXX-XX-XX ferait apparaitre cette date) 

928 # - si article.my_container.with_online_first && et pas de article.date_online_first existe: 

929 # : on met XXXX-xx-xx pour online first 

930 # (si l'article a déjà une date online-first, il a à priori déjà était recompilé) 

931 # - si on passe en prod, on prend les dates de l'article 

932 # si container.with_online_first: 

933 # article.date_online_first ds le bon fichier 

934 # si article.date_published : on met à jour le fichier qui va bien 

935 

936 if not test and (article.date_online_first or article.date_published): 936 ↛ 911line 936 didn't jump to line 911, because the condition on line 936 was never false

937 compile_article( 

938 article, 

939 colid, 

940 issue_id, 

941 article_path, 

942 article_tex_name, 

943 replace_frontpage_only, 

944 skip_compilation, 

945 lang, 

946 ) 

947 

948 except Exception as e: 

949 # pas de rollback car on ne modifie rien en BDD / éventuellement remettre un pdf.SAV en place 

950 raise e 

951 

952 

953def create_translated_pdf( 

954 article, xml_content, lang, pdf_file_name, html_file_name, skip_compilation=False 

955): 

956 user = settings.MERSENNE_TEX_USER 

957 

958 issue_path = resolver.get_cedram_issue_tex_folder( 

959 article.get_top_collection().pid, article.my_container.pid 

960 ) 

961 article_tex_name = article.get_ojs_id() 

962 if not article_tex_name: 

963 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path") 

964 article_path = os.path.join(issue_path, article_tex_name) 

965 

966 xml_base_name = article_tex_name + ".xml" 

967 local_xml = os.path.join(settings.LOG_DIR, "tmp", xml_base_name) 

968 remote_xml = os.path.join(article_path, xml_base_name) 

969 

970 if not skip_compilation: 

971 # Create the XML file locally 

972 with open(local_xml, "w", encoding="utf-8") as file_: 

973 file_.write(xml_content) 

974 

975 # Copy XML file to mersenne-tex 

976 cmd = f"scp {local_xml} {user}@mathdoc-tex:{remote_xml}" 

977 utils.execute_cmd(cmd) 

978 

979 remote_html_base_name = f"trad-{lang}.html" 

980 remote_html = os.path.join(article_path, remote_html_base_name) 

981 # Copy HTML file to mersenne-tex 

982 cmd = f"scp {html_file_name} {user}@mathdoc-tex:{remote_html}" 

983 utils.execute_cmd(cmd) 

984 

985 # Create the PDF 

986 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin") 

987 # execute script to compile 

988 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/translate_article.sh {article_path} {xml_base_name} {remote_html_base_name} {lang}" 

989 utils.execute_cmd(cmd) 

990 

991 remote_pdf_base_name = f"{article_tex_name}-{lang}.pdf" 

992 remote_pdf = os.path.join(article_path, remote_pdf_base_name) 

993 # pdf-traduction should have created remote.pdf 

994 # Copy the PDF file 

995 cmd = f"scp {user}@mathdoc-tex:{remote_pdf} {pdf_file_name}" 

996 utils.execute_cmd(cmd)