Coverage for apps/ptf/tex.py: 71%

1import os

2import subprocess

3import tempfile

5import pypdf

6from pylatexenc.latexencode import unicode_to_latex

8from django.conf import settings

10from ptf import model_helpers

11from ptf import models

12from ptf import utils

13from ptf.cmds.xml import xml_utils

14from ptf.cmds.xml.jats.jats_parser import get_tex_from_xml

15from ptf.display import resolver

18def get_tex_keyword_date_published(colid, article, lang=""):

19 keyword = "\\dateposted" if colid in ["AHL", "AIF", "OGEO", "JTNB"] else "\\datepublished"

21 if colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"]: 21 ↛ 22line 21 didn't jump to line 22, because the condition on line 21 was never true

22 if not article.date_online_first:

23 keyword = "\\dateposted"

25 if lang != "": 25 ↛ 26line 25 didn't jump to line 26, because the condition on line 25 was never true

26 keyword = "\\CDRsetmeta{traduction_date_posted}"

28 return keyword

31def get_tex_keyword_date_online_first(colid, article, lang=""):

32 return "\\dateposted"

35def read_tex_file(filename):

36 """

37 read a tex file. Detects if it is in utf-8 or iso-8859-1

38 returns a string of the body

39 """

41 lines = []

43 if os.path.isfile(filename): 43 ↛ 51line 43 didn't jump to line 51, because the condition on line 43 was never false

44 try:

45 with open(filename, encoding="utf-8") as f_:

46 lines = f_.readlines()

47 except UnicodeDecodeError:

48 with open(filename, encoding="iso-8859-1") as f_:

49 lines = f_.readlines()

51 return lines

54def convert_file_to_utf8(article_path, from_name, to_name):

55 filename = os.path.join(article_path, from_name)

56 lines = read_tex_file(filename)

58 user = settings.MERSENNE_TEX_USER

59 prefix = os.path.join(settings.LOG_DIR, "tmp/")

60 resolver.create_folder(prefix)

62 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)

63 fpath = f.name # ex: /tmp/Rxsft

64 f.write("".join(lines))

65 f.close()

67 out_filemane = os.path.join(article_path, to_name)

68 # copy to mersenne-tex

69 cmd = f"scp {fpath} {user}@mathdoc-tex:{out_filemane}"

70 utils.execute_cmd(cmd)

73def write_tex_file(filename, lines, create_temp_file=False):

74 fpath = filename

75 if create_temp_file: 75 ↛ 85line 75 didn't jump to line 85, because the condition on line 75 was never false

76 prefix = os.path.join(settings.LOG_DIR, "tmp/")

77 resolver.create_folder(prefix)

79 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)

81 fpath = f.name # ex: /tmp/Rxsft

82 f.write("".join(lines))

83 f.close()

84 else:

85 with open(filename, "w", encoding="utf-8") as f_:

86 f_.write("".join(lines))

87 return fpath

90def insert_date_published(new_lines, article, colid, begin_document_pos, lang=""):

91 if article.date_published is not None: 91 ↛ 97line 91 didn't jump to line 97, because the condition on line 91 was never false

92 keyword = get_tex_keyword_date_published(colid, article, lang)

93 str_ = f'{keyword}{{{article.date_published.strftime("%Y-%m-%d")}}}\n'

94 new_lines.insert(begin_document_pos, str_)

95 begin_document_pos += 1

97 if article.my_container is not None: 97 ↛ 111line 97 didn't jump to line 111, because the condition on line 97 was never false

98 is_thematic_issue = len(article.my_container.title_html) > 0

99 is_issue_finalized = not article.my_container.with_online_first

100 is_thematic_finalized = is_thematic_issue and is_issue_finalized

101

102 if is_thematic_finalized and article.date_online_first is None: 102 ↛ 105line 102 didn't jump to line 105, because the condition on line 102 was never true

103 # Finalized thematic issue where the article did not go through online first

104 # => Add \datepublished so that "Issue date :" appears in the PDF

105 keyword2 = "\\datepublished"

106 if keyword2 != keyword:

107 str_ = f'{keyword2}{{{article.date_published.strftime("%Y-%m-%d")}}}\n'

108 new_lines.insert(begin_document_pos, str_)

109 begin_document_pos += 1

110

111 return begin_document_pos

112

113

114def insert_date_online_first(new_lines, article, colid, begin_document_pos, lang=""):

115 if article.date_online_first is not None:

116 keyword = get_tex_keyword_date_online_first(colid, article, lang)

117 str_ = f'{keyword}{{{article.date_online_first.strftime("%Y-%m-%d")}}}\n'

118 new_lines.insert(begin_document_pos, str_)

119 begin_document_pos += 1

120

121 return begin_document_pos

122

123

124def insert_end_page(new_lines, article, colid, begin_document_pos):

125 if article.lpage: 125 ↛ 129line 125 didn't jump to line 129, because the condition on line 125 was never false

126 str_ = "\\makeatletter\\def\\cdr@end@page{" + article.lpage + "}\\makeatother\n"

127 new_lines.insert(begin_document_pos + 1, str_)

128

129 return begin_document_pos

130

131

132def replace_dates_in_tex(lines, article, colid, replace_frontpage_only=False, lang=""):

133 r"""

134 add or replace \dateposted and \datepublished in the source Tex

135 lines is a list of lines of the source Tex

136

137 """

138 new_lines = []

139 bib_name = ""

140

141 keyword_date_published = get_tex_keyword_date_published(colid, article, lang)

142 keyword_date_online_first = get_tex_keyword_date_online_first(colid, article, lang)

143 found_date_online_first = False

144 found_date_published = False

145 begin_document_pos = -1

146 i = 0

147 skip_lines = 0

148

149 while i < len(lines):

150 line = lines[i]

151 len_line = len(line)

152 j = 0

153 while j < len_line and line[j] in [" ", "\t"]:

154 j += 1

155

156 if j < len_line and line[j] != "%": # the line is not a comment

157 if replace_frontpage_only and ( 157 ↛ 162line 157 didn't jump to line 162, because the condition on line 157 was never true

158 line.find("\\datepublished{", j) == j

159 or line.find("\\dateposted{", j) == j

160 or line.find("\\CDRsetmeta{traduction_date_posted}{", j) == j

161 ):

162 skip_lines += 1

163 elif ( 163 ↛ 166line 163 didn't jump to line 166

164 line.find(f"{keyword_date_published}{{", j) == j

165 ): # replace existing \datepublished

166 found_date_published = True

167 insert_date_published(new_lines, article, colid, len(new_lines), lang=lang)

168

169 elif ( 169 ↛ 172line 169 didn't jump to line 172

170 line.find(f"{keyword_date_online_first}{{", j) == j

171 ): # replace existing \dateposted

172 found_date_online_first = True

173 insert_date_online_first(new_lines, article, colid, len(new_lines), lang=lang)

174

175 elif (

176 line.find("\\begin{document", j) == j

177 ): # \begin{document} add dates if not present

178 begin_document_pos = i - skip_lines

179 new_lines.append(line)

180

181 elif line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j:

182 # remove published from \documentclass to allow compilation

183 line = (

184 line.replace(",published,", ",")

185 .replace(",published", "")

186 .replace("published", "")

187 )

188 # # remove Unicode temporarily

189 # line = line.replace(",Unicode,", ",").replace(",Unicode", "").replace("Unicode", "")

190 new_lines.append(line)

191

192 elif line.find("\\makeatletter\\def\\cdr@end@page", j) == j: 192 ↛ 195line 192 didn't jump to line 195, because the condition on line 192 was never true

193 # Command to specify the last page (present in the front page)

194 # Move it after \begin{document}

195 pass

196 elif (

197 line.find("\\bibliography", j) == j

198 and line.find("\\bibliographystyle", j) != j

199 and replace_frontpage_only

200 ):

201 end = line.find("}")

202 if end > 0: 202 ↛ 210line 202 didn't jump to line 210, because the condition on line 202 was never false

203 bib_name = line[j + 14 : end]

204 new_lines.append("\\bibliography{" + bib_name + "_FP}\n")

205 else:

206 new_lines.append(line)

207 else:

208 new_lines.append(line)

209

210 i += 1

211

212 if begin_document_pos > 0 and not found_date_online_first: 212 ↛ 217line 212 didn't jump to line 217, because the condition on line 212 was never false

213 begin_document_pos = insert_date_online_first(

214 new_lines, article, colid, begin_document_pos, lang=lang

215 )

216

217 if begin_document_pos > 0 and not found_date_published: 217 ↛ 222line 217 didn't jump to line 222, because the condition on line 217 was never false

218 begin_document_pos = insert_date_published(

219 new_lines, article, colid, begin_document_pos, lang=lang

220 )

221

222 if replace_frontpage_only and begin_document_pos > 0:

223 begin_document_pos = insert_end_page(new_lines, article, colid, begin_document_pos)

224

225 # Always add Unicode as the new tex file is in utf-8

226 # new_lines = protect_tex(new_lines, "Unicode")

227

228 return new_lines, bib_name

229

230

231def protect_tex(lines, keyword="published"):

232 new_lines = []

233

234 i = 0

235 inside_documentclass = False

236

237 while i < len(lines):

238 line = lines[i]

239 len_line = len(line)

240 j = 0

241 while j < len_line and line[j] in [" ", "\t"]:

242 j += 1

243

244 if j < len_line and line[j] != "%": # the line is not a comment

245 if line.find("\\documentclass", j) == j or line.find("{\\documentclass", j) == j:

246 # add published to \documentclass after compilation

247 j = line.find("]")

248 if j > 0: 248 ↛ 252line 248 didn't jump to line 252, because the condition on line 248 was never false

249 if line.find("{cedram") > 0: # Ignore {article} 249 ↛ 264line 249 didn't jump to line 264, because the condition on line 249 was never false

250 line = line[0:j] + "," + keyword + line[j:]

251 else:

252 inside_documentclass = True

253 elif inside_documentclass: 253 ↛ 254line 253 didn't jump to line 254, because the condition on line 253 was never true

254 k = line.find("]")

255 if k == j:

256 if line.find("{cedram") > 0: # Ignore {article}

257 new_lines.append(f",{keyword}\n")

258 inside_documentclass = False

259 elif k > -1:

260 if line.find("{cedram") > 0: # Ignore {article}

261 line = line[0:k] + "," + keyword + line[k:]

262 inside_documentclass = False

263

264 new_lines.append(line)

265 i += 1

266

267 return new_lines

268

269

270def get_tex_corresponding_emails(author_contributions):

271 emails = []

272

273 for contribution in author_contributions:

274 if contribution.corresponding and contribution.email: 274 ↛ 275line 274 didn't jump to line 275, because the condition on line 274 was never true

275 emails.append(unicode_to_latex(contribution.email).replace(r"\_", r"_"))

276

277 return emails

278

279

280def get_tex_authors(author_contributions):

281 lines = []

282

283 # are_all_equal = models.are_all_equal_contrib(author_contributions)

284

285 for contribution in author_contributions:

286 # \author{\firstname{Antoine} \lastname{Lavoisier}}

287 # \address{Rue sans aplomb, Paris, France}

288 # \email[A. Lavoisier]{a-lavois@lead-free-univ.edu}

289 first_name = unicode_to_latex(contribution.first_name)

290 last_name = unicode_to_latex(contribution.last_name)

291 line = f"\\author{{\\firstname{{{first_name}}} \\lastname{{{last_name}}}"

292 if contribution.orcid:

293 line += f"\\CDRorcid{{{contribution.orcid}}}"

294 if contribution.equal_contrib: # and not are_all_equal: 294 ↛ 295line 294 didn't jump to line 295, because the condition on line 294 was never true

295 line += "\\IsEqualContrib"

296 if contribution.deceased_before_publication: # and not are_all_equal: 296 ↛ 297line 296 didn't jump to line 297, because the condition on line 296 was never true

297 line += "\\dead"

298 lines.append(line + "}\n")

299

300 for contribaddress in contribution.contribaddress_set.all():

301 address = unicode_to_latex(contribaddress.address)

302 lines.append(f"\\address{{{address}}}\n")

303

304 if contribution.corresponding and len(contribution.email) > 0: 304 ↛ 305line 304 didn't jump to line 305, because the condition on line 304 was never true

305 email = unicode_to_latex(contribution.email)

306 lines.append(f"\\email{{{email}}}\n")

307

308 lines.append("\n")

309

310 return lines

311

312

313def create_tex_for_pcj(article):

314 pci = article.get_pci_section()

315

316 extid = model_helpers.get_extid(article, "rdoi")

317 rdoi = extid.id_value if extid is not None else ""

318

319 lines = [

320 "\\documentclass[PCJ,Unicode,screen,Recup]{cedram}\n",

321 "\\usepackage{pax}\n",

322 "\\usepackage{mathrsfs}\n" "\n",

323 "\\issueinfo{"

324 + article.my_container.volume

325 + "}{}{}{"

326 + article.my_container.year

327 + "}\n",

328 f"\\renewcommand*{{\\thearticle}}{{{article.article_number}}}\n",

329 f"\\DOI{{{article.doi}}}\n",

330 f"\\RDOI{{{rdoi}}}\n",

331 f"\\setPCI{{{pci}}}\n",

332 f"\\CDRsetmeta{{articletype}}{{{article.atype}}}",

333 ]

334

335 conf = article.get_conference()

336 if len(conf) > 0: 336 ↛ 337line 336 didn't jump to line 337, because the condition on line 336 was never true

337 lines.append(f"\\setPCIconf{{{conf}}}\n")

338

339 author_contributions = article.get_author_contributions()

340

341 corresponding_emails = get_tex_corresponding_emails(author_contributions)

342 for email in corresponding_emails: 342 ↛ 343line 342 didn't jump to line 343, because the loop on line 342 never started

343 lines.append(f"\\PCIcorresp{{{email}}}\n")

344

345 lines.append("\n")

346

347 # \title[Sample for the template]{Sample for the template, with quite a very long title}

348 title = article.title_tex.replace("", "|||i|||").replace("", "|||/i|||")

349 title = title.replace("", "|||sup|||").replace("", "|||/sup|||")

350 title = title.replace("", "|||sub|||").replace("", "|||/sub|||")

351 title = unicode_to_latex(title)

352 title = title.replace("|||i|||", "\\protect\\emph{").replace("|||/i|||", "}")

353 title = title.replace("|||sup|||", "\\protect\\textsuperscript{").replace("|||/sup|||", "}")

354 title = title.replace("|||sub|||", "\\protect\\textsubscript{").replace("|||/sub|||", "}")

355 lines.append(f"\\title{{{title}}}\n")

356 lines.append("\n")

357 lines.extend(get_tex_authors(author_contributions))

358

359 # No keywords for PCJ

360 # # \keywords{Example, Keyword}

361 # kwd_gps = article.get_non_msc_kwds()

362 # if len(kwd_gps) > 0:

363 # kwd_gp = kwd_gps.first()

364 # keywords = ", ".join([kwd.value for kwd in kwd_gp.kwd_set.all()])

365 # lines.append(f"\\keywords{{{unicode_to_latex(keywords)}}}\n")

366 # lines.append("\n")

367

368 abstracts = article.get_abstracts()

369 if len(abstracts) > 0: 369 ↛ 419line 369 didn't jump to line 419, because the condition on line 369 was never false

370 abstract = abstracts.first()

371 value = get_tex_from_xml(abstract.value_xml, "abstract", for_tex_file=True)

372

373 # .replace('$', '$').replace('$', '$') \

374 # .replace('', '|||i|||').replace('', '|||/i|||') \

375

376 # value = abstract.value_tex \

377 # .replace('', '|||i|||').replace('', '|||/i|||') \

378 # .replace('', '|||strong|||').replace('', '|||/strong|||') \

379 # .replace('', '|||sub|||').replace('', '|||/sub|||') \

380 # .replace('', '|||sup|||').replace('', '|||/sup|||') \

381 # .replace('', '').replace('', '') \

382 # .replace('<ul>', '|||ul|||').replace('</ul>', '|||/ul|||') \

383 # .replace('<ol type="1">', '|||ol|||').replace('</ol>', '|||/ol|||') \

384 # .replace('<li>', '|||li|||').replace('</li>', '|||/li|||') \

385 # .replace(' ', '|||newline|||') \

386 # .replace('&', '\\&') \

387 # .replace('<', '<') \

388 # .replace('>', '>')

389 #

390 # links = []

391 # pos = value.find("<a href=")

392 # while pos != -1:

393 # last_href = value.find('"', pos + 9)

394 # href = value[pos + 9:last_href]

395 # first_text = value.find('>', last_href) + 1

396 # last_text = value.find('</a>', first_text)

397 # text = value[first_text:last_text]

398 # links.append((href, text))

399 # value = value[0:pos] + '|||a|||' + value[last_text + 4:]

400 # pos = value.find("<a href=")

401 #

402 # value = unicode_to_latex(value)

403 # value = value.replace('|||i|||', '{\\it ').replace('|||/i|||', '}')

404 # value = value.replace('|||strong|||', '{\\bf ').replace('|||/strong|||', '}')

405 # value = value.replace('|||sub|||', '\\textsubscript{').replace('|||/sub|||', '}')

406 # value = value.replace('|||sup|||', '\\textsuperscript{').replace('|||/sup|||', '}')

407 # value = value.replace('|||ul|||', '\n\\begin{itemize}\n').replace('|||/ul|||', '\\end{itemize}\n')

408 # value = value.replace('|||ol|||', '\n\\begin{enumerate}\n').replace('|||/ol|||', '\\end{enumerate}\n')

409 # value = value.replace('|||li|||', '\\item ').replace('|||/li|||', '\n')

410 # value = value.replace('|||newline|||', '\\newline\n')

411 # for link in links:

412 # text = f'\\href{{{link[0]}}}{{{link[1]}}}'

413 # value = value.replace('|||a|||', text, 1)

414

415 lines.append("\\begin{abstract}\n")

416 lines.append(value + "\n")

417 lines.append("\\end{abstract}\n")

418

419 date_ = article.date_published.strftime("%Y-%m-%d") if article.date_published else "AAAA-MM-DD"

420 keyword = get_tex_keyword_date_published("PCJ", article)

421 lines.append(f"{keyword}{{{date_}}}\n")

422

423 lines.append("\\begin{document}\n")

424 lines.append("\\maketitle\n")

425 article_pdf = f"article_{article.pid}.pdf"

426 lines.append(f"\\PCIincludepdf{{{article_pdf}}}\n")

427

428 lines.append("\\end{document}\n")

429

430 return lines

431

432

433def compile_tex(lines, article, update=False):

434 """

435 1) Create a tex file from the list of lines

436 2) Upload the file to mathdoc-tex (+ the pdf for PCJ)

437 3) Compile the file

438 4) Replace the pdf in /mersenne_test_data

439 5) linearize the pdf

440 TODO: merge ptf_tools/views create_frontpage (not done while PCJ is unstable to avoid compilation bugs in prod)

441 """

442

443 # Only allowed on ptf-tools

444 if settings.SITE_NAME != "ptf_tools": 444 ↛ 445line 444 didn't jump to line 445, because the condition on line 444 was never true

445 return

446

447 user = settings.MERSENNE_TEX_USER

448 issue = article.my_container

449 colid = issue.my_collection.pid

450 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue.pid)

451 article_pdf = ""

452

453 if colid != "PCJ": 453 ↛ 454line 453 didn't jump to line 454, because the condition on line 453 was never true

454 article_tex_name = article.get_ojs_id()

455 if not article_tex_name:

456 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")

457 article_path = os.path.join(issue_path, article_tex_name)

458 else:

459 article_tex_name = article.pid

460 article_path = os.path.join(issue_path, article_tex_name)

461 article_pdf = f"article_{article.pid}.pdf"

462

463 if not update:

464 # Create the article folder

465 cmd = f"ssh {user}@mathdoc-tex mkdir -p {article_path}"

466 utils.execute_cmd(cmd)

467

468 # copy the pdf to mersenne-tex

469 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid)

470 folder = os.path.join(settings.RESOURCES_ROOT, relative_folder)

471 pdf_file_name = os.path.join(folder, article.pid + ".pdf")

472

473 cmd = f"scp {pdf_file_name} {user}@mathdoc-tex:{article_path}/{article_pdf}"

474 utils.execute_cmd(cmd)

475

476 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")

477 fpath = write_tex_file("", lines, create_temp_file=True)

478

479 # copy to mersenne-tex

480 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"

481 utils.execute_cmd(cmd)

482 # os.unlink(f.name)

483

484 # recompile article

485 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")

486 # execute script to compile

487 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name} {colid} {article_pdf}"

488 utils.execute_cmd(cmd)

489

490 # replace pdf

491 cedram_pdf_location = os.path.join(article_path, article_tex_name + ".pdf")

492 relative_folder = resolver.get_relative_folder(colid, issue.pid, article.pid)

493 to_path = os.path.join(

494 settings.MERSENNE_TEST_DATA_FOLDER, relative_folder, article.pid + ".pdf"

495 )

496 if settings.MERSENNE_CREATE_FRONTPAGE: 496 ↛ 497line 496 didn't jump to line 497, because the condition on line 496 was never true

497 utils.linearize_pdf(cedram_pdf_location, to_path)

498

499 return to_path

500

501

502def add_outline(reader, writer, outlines, parent=None):

503 child_parent = parent

504 for item in outlines:

505 if type(item) == list: 505 ↛ 506line 505 didn't jump to line 506, because the condition on line 505 was never true

506 add_outline(reader, writer, item, child_parent)

507 else:

508 title = item["/Title"]

509 page_num = reader.get_destination_page_number(item)

510

511 if item["/Type"] == "/XYZ": 511 ↛ 522line 511 didn't jump to line 522, because the condition on line 511 was never false

512 child_parent = writer.add_outline_item(

513 title,

514 page_num,

515 parent,

516 None,

517 False,

518 False,

519 pypdf.generic.Fit("/XYZ", (item["/Left"], item["/Top"], 1)),

520 )

521 else:

522 child_parent = writer.add_outline_item(title, page_num, parent, None, False, False)

523

524

525def test():

526 local_fp_pdf = "/home/touvierj/Bureau/test_FP.pdf"

527 local_content_pdf = "/home/touvierj/Bureau/test_content.pdf"

528 merged_pdf = "/home/touvierj/Bureau/test_merged.pdf"

529

530 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False)

531 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False)

532 pdf_writer = pypdf.PdfWriter()

533

534 for page in range(len(pdf_reader_fp.pages)):

535 current_page = pdf_reader_fp.pages[page]

536 if page == 0:

537 pdf_writer.add_page(current_page)

538

539 for page in range(len(pdf_reader_content.pages)):

540 current_page = pdf_reader_content.pages[page]

541 if page > 0:

542 pdf_writer.add_page(current_page)

543

544 # Add the Table of Contents (sidebar in a PDF reader)

545 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline)

546

547 # Add the anchors

548 for dest in pdf_reader_content.named_destinations.values():

549 pdf_writer.add_named_destination_object(dest)

550

551 with open(merged_pdf, "wb") as f_:

552 pdf_writer.write(f_)

553

554 # Add metadata to the PDF, including EXIF data

555 add_metadata(models.Article.objects.first(), local_content_pdf, merged_pdf)

556

557 exit()

558

559 fpage = "i"

560 merged_pdf = "/home/touvierj/Bureau/good2.pdf"

561 local_pdf = "/home/touvierj/Bureau/new2.pdf"

562

563 is_roman = False

564 try:

565 first_page = int(fpage)

566 except ValueError:

567 first_page = xml_utils.roman_to_int(fpage)

568 is_roman = True

569

570 reader = pypdf.PdfReader(merged_pdf)

571 writer = pypdf.PdfWriter()

572 for page in reader.pages:

573 writer.add_page(page)

574

575 if is_roman:

576 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/r")

577 else:

578 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/D")

579 writer.write(local_pdf)

580 writer.close()

581

582

583def add_metadata(article, in_pdf, out_pdf):

584 reader = pypdf.PdfReader(in_pdf, strict=False)

585

586 metadata = reader.metadata

587 cmd = f"exiftool -tagsFromFile {in_pdf}"

588

589 if in_pdf == out_pdf: 589 ↛ 590line 589 didn't jump to line 590, because the condition on line 589 was never true

590 cmd += " -overwrite_original_in_place"

591

592 container = article.my_container

593 collection = article.get_collection()

594

595 msc_kwds, kwds, trans_kwds = article.get_kwds_by_type()

596 keywords = ", ".join([str(x.value) for x in kwds])

597

598 lang = ""

599 if article.lang == "fr": 599 ↛ 600line 599 didn't jump to line 600, because the condition on line 599 was never true

600 lang = "fr-FR"

601 elif article.lang == "en": 601 ↛ 604line 601 didn't jump to line 604, because the condition on line 601 was never false

602 lang = "en-GB"

603

604 if "/Title" in metadata: 604 ↛ 611line 604 didn't jump to line 611, because the condition on line 604 was never false

605 title = metadata["/Title"]

606 if "'" in title and '"' not in title: 606 ↛ 607line 606 didn't jump to line 607, because the condition on line 606 was never true

607 cmd += f' -Title="{title}"'

608 elif "'" not in title: 608 ↛ 611line 608 didn't jump to line 611, because the condition on line 608 was never false

609 cmd += f" -Title='{title}'"

610

611 if "/Author" in metadata: 611 ↛ 618line 611 didn't jump to line 618, because the condition on line 611 was never false

612 author = metadata["/Author"]

613 if "'" in author and '"' not in author:

614 cmd += f' -Author="{author}"'

615 elif "'" not in author: 615 ↛ 618line 615 didn't jump to line 618, because the condition on line 615 was never false

616 cmd += f" -Author='{author}'"

617

618 cmd += " -Creator='Centre Mersenne'"

619 cmd += " -Subject=''"

620 if lang: 620 ↛ 622line 620 didn't jump to line 622, because the condition on line 620 was never false

621 cmd += f" -xmp-dc-Language='{lang}'"

622 cmd += f" -xmp-dc:publisher='{container.my_publisher.pub_name}'"

623 cmd += f" -xmp-prism:DOI='{article.doi}'"

624 cmd += f" -Keywords='{keywords}'"

625 cmd += f" -xmp-xmp:Keywords='{keywords}'"

626 cmd += f" -xmp-pdf:Keywords='{keywords}'"

627 cmd += " -xmp-pdf:Copyright='© The author(s)'"

628

629 if container.volume: 629 ↛ 631line 629 didn't jump to line 631, because the condition on line 629 was never false

630 cmd += f" -xmp-prism:Volume='{container.volume}'"

631 if container.number: 631 ↛ 632line 631 didn't jump to line 632, because the condition on line 631 was never true

632 cmd += f" -xmp-prism:Number='{container.number}'"

633 if collection.issn: 633 ↛ 635line 633 didn't jump to line 635, because the condition on line 633 was never false

634 cmd += f" -xmp-prism:ISSN='{collection.issn}'"

635 if collection.e_issn: 635 ↛ 637line 635 didn't jump to line 637, because the condition on line 635 was never false

636 cmd += f" -xmp-prism:EISSN='{collection.e_issn}'"

637 if container.title_tex: 637 ↛ 638line 637 didn't jump to line 638, because the condition on line 637 was never true

638 cmd += f" -xmp-prism:IssueName='{container.title_tex}'"

639 cmd += " " + out_pdf

640

641 output = subprocess.check_output(cmd, shell=True)

642 return output

643

644

645def replace_front_page(

646 article, article_tex_name, fp_pdf_file_name, content_pdf_file_name, final_pdf_file_name

647):

648 # At the point the PDF has been recompiled, possibly with a new template

649 # Use the 1st page of the new PDF with the other pages of the .pdf_SAV

650

651 user = settings.MERSENNE_TEX_USER

652

653 # Copy the PDF files locally (pypdf is installed in ptf-tools)

654 local_fp_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_FP")

655 cmd = f"scp {user}@mathdoc-tex:{fp_pdf_file_name} {local_fp_pdf}"

656 utils.execute_cmd(cmd)

657

658 local_content_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_content")

659 cmd = f"scp {user}@mathdoc-tex:{content_pdf_file_name} {local_content_pdf}"

660 utils.execute_cmd(cmd)

661

662 pdf_reader_fp = pypdf.PdfReader(local_fp_pdf, strict=False)

663 pdf_reader_content = pypdf.PdfReader(local_content_pdf, strict=False)

664 pdf_writer = pypdf.PdfWriter()

665

666 for page in range(len(pdf_reader_fp.pages)):

667 current_page = pdf_reader_fp.pages[page]

668 if page == 0:

669 pdf_writer.add_page(current_page)

670

671 for page in range(len(pdf_reader_content.pages)):

672 current_page = pdf_reader_content.pages[page]

673 if page > 0:

674 pdf_writer.add_page(current_page)

675

676 # Add the Table of Contents (sidebar in a PDF reader)

677 add_outline(pdf_reader_content, pdf_writer, pdf_reader_content.outline)

678

679 # Add the anchors

680 for dest in pdf_reader_content.named_destinations.values():

681 pdf_writer.add_named_destination_object(dest)

682

683 merged_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf_merged")

684 with open(merged_pdf, "wb") as f_:

685 pdf_writer.write(f_)

686

687 # Compiled PDF are sometimes buggy (wrong xref table). Use pdftk to fix the file.

688 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")

689 cmd = f"{ptf_tools_bin}/update_pdf.sh {local_content_pdf} {merged_pdf}"

690 utils.execute_cmd(cmd)

691

692 # Add metadata to the PDF, including EXIF data

693 add_metadata(article, local_content_pdf, merged_pdf)

694

695 local_pdf = os.path.join(settings.LOG_DIR, "tmp/", article_tex_name + ".pdf")

696

697 # pypdf creates a PDF that starts on page 1, fix it

698 if article.fpage: 698 ↛ 719line 698 didn't jump to line 719, because the condition on line 698 was never false

699 is_roman = False

700 try:

701 first_page = int(article.fpage)

702 except ValueError:

703 first_page = xml_utils.roman_to_int(article.fpage)

704 is_roman = True

705

706 reader = pypdf.PdfReader(merged_pdf)

707 writer = pypdf.PdfWriter()

708 for page in reader.pages:

709 writer.add_page(page)

710

711 if is_roman: 711 ↛ 712line 711 didn't jump to line 712, because the condition on line 711 was never true

712 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/r")

713 else:

714 writer.set_page_label(page_index_from=0, page_index_to=first_page - 1, style="/D")

715 writer.write(local_pdf)

716 writer.close()

717

718 # copy to mersenne-tex

719 cmd = f"scp {local_pdf} {user}@mathdoc-tex:{final_pdf_file_name}"

720 utils.execute_cmd(cmd)

721

722

723def compile_article(

724 article,

725 colid,

726 issue_id,

727 article_path,

728 article_tex_name,

729 replace_frontpage_only=False,

730 skip_compilation=False,

731 lang="",

732):

733 user = settings.MERSENNE_TEX_USER

734

735 if lang != "": 735 ↛ 736line 735 didn't jump to line 736, because the condition on line 735 was never true

736 article_tex_name += "-" + lang

737

738 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg")

739 # Regular compilation: compiled_pdf and final_pdf are the same

740 # recompilation of the front page: compiled_pdf is the entire pdf with the new front page

741 # final_pdf is the pdf after the merge (new front page; old content)

742 compiled_pdf_file_name = final_pdf_file_name = os.path.join(

743 article_path, article_tex_name + ".pdf"

744 )

745 content_pdf_file_name = compiled_pdf_file_name + "_SAV"

746

747 # Save the pdf file

748 cmd = f"ssh {user}@mathdoc-tex cp {compiled_pdf_file_name} {content_pdf_file_name}"

749 utils.execute_cmd(cmd)

750

751 # Save the cfg file (no cfg for translations)

752 if lang == "": 752 ↛ 757line 752 didn't jump to line 757, because the condition on line 752 was never false

753 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name}_SAV"

754 utils.execute_cmd(cmd)

755

756 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!.

757 prefix = os.path.join(settings.LOG_DIR, "tmp/")

758 resolver.create_folder(prefix)

759

760 if replace_frontpage_only:

761 # Copy CFG/TEX/PDF to a new name. pdflatex will generate new files, thus preserving existing files

762 article_tex_name2 = article_tex_name + "_FP"

763

764 cmd = f"ssh {user}@mathdoc-tex rm -f {os.path.join(article_path, article_tex_name2)}.*"

765 utils.execute_cmd(cmd)

766

767 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")

768 article_tex_file_name2 = os.path.join(article_path, article_tex_name2 + ".tex")

769 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name2}"

770 utils.execute_cmd(cmd)

771

772 article_cfg_file_name2 = os.path.join(article_path, article_tex_name2 + ".cfg")

773 cmd = f"ssh {user}@mathdoc-tex cp {article_cfg_file_name} {article_cfg_file_name2}_SAV"

774 utils.execute_cmd(cmd)

775

776 article_cdrdoidates_file_name = os.path.join(

777 article_path, article_tex_name + ".cdrdoidates"

778 )

779 if os.path.isfile(article_cdrdoidates_file_name): 779 ↛ 780line 779 didn't jump to line 780, because the condition on line 779 was never true

780 article_cdrdoidates_file_name2 = os.path.join(

781 article_path, article_tex_name2 + ".cdrdoidates"

782 )

783 cmd = f"ssh {user}@mathdoc-tex cp {article_cdrdoidates_file_name} {article_cdrdoidates_file_name2}"

784 utils.execute_cmd(cmd)

785

786 article_tex_name = article_tex_name2

787 article_cfg_file_name = os.path.join(article_path, article_tex_name + ".cfg")

788 compiled_pdf_file_name = os.path.join(article_path, article_tex_name + ".pdf")

789 final_pdf_file_name = compiled_pdf_file_name + ".new"

790

791 if not skip_compilation: 791 ↛ 843line 791 didn't jump to line 843, because the condition on line 791 was never false

792 # Remove \ItIsPublished from the cfg file

793 if lang == "": 793 ↛ 799line 793 didn't jump to line 799, because the condition on line 793 was never false

794 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\ItIsPublished//' {article_cfg_file_name}_SAV > {article_cfg_file_name}.1"'''

795 utils.execute_cmd(cmd)

796 cmd = f'''ssh {user}@mathdoc-tex "sed 's/\\\\\\\\gdef \\\\\\\\CDRpublished {{true}}//' {article_cfg_file_name}.1 > {article_cfg_file_name}"'''

797 utils.execute_cmd(cmd)

798

799 article_tex_file_name = os.path.join(article_path, article_tex_name + ".tex")

800

801 # Save the tex file

802 cmd = f"ssh {user}@mathdoc-tex cp {article_tex_file_name} {article_tex_file_name}_SAV"

803 utils.execute_cmd(cmd)

804

805 lines = read_tex_file(article_tex_file_name)

806 new_lines, bib_name = replace_dates_in_tex(

807 lines, article, colid, replace_frontpage_only, lang=lang

808 )

809

810 if bib_name and replace_frontpage_only:

811 convert_file_to_utf8(article_path, bib_name + ".bib", bib_name + "_FP.bib")

812

813 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)

814 fpath = f.name # ex: /tmp/Rxsft

815 f.write("".join(new_lines))

816 f.close()

817

818 # copy to mersenne-tex

819 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"

820 utils.execute_cmd(cmd)

821 # os.unlink(f.name)

822

823 # recompile article

824 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")

825 # execute script to compile

826 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/create_frontpage.sh {article_path} {article_tex_name}"

827 utils.execute_cmd(cmd)

828

829 # Protect the tex file with the 'published' option

830 new_lines = protect_tex(new_lines)

831

832 # create temporarly file ! attention sur ptf-tools apache n'a pas le droit d'écrire ds /tmp ?!.

833 prefix = os.path.join(settings.LOG_DIR, "tmp/")

834 f = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", prefix=prefix, delete=False)

835 fpath = f.name # ex: /tmp/Rxsft

836 f.write("".join(new_lines))

837 f.close()

838

839 # copy to mersenne-tex

840 cmd = f"scp {fpath} {user}@mathdoc-tex:{article_tex_file_name}"

841 utils.execute_cmd(cmd)

842

843 if replace_frontpage_only:

844 # At the point the PDF has been recompiled, possibly with a new template

845 # Use the 1st page of the new PDF with the other pages of the .pdf_SAV

846 replace_front_page(

847 article,

848 article_tex_name,

849 compiled_pdf_file_name,

850 content_pdf_file_name,

851 final_pdf_file_name,

852 )

853

854 # Copy PDF to MERSENNE_TEST_DATA_FOLDER

855 datastream = article.datastream_set.filter(mimetype="application/pdf").get()

856 to_path = os.path.join(settings.MERSENNE_TEST_DATA_FOLDER, datastream.location)

857 # remove destination if exists to test if final pdf is really created

858 if os.path.exists(to_path): 858 ↛ 860line 858 didn't jump to line 860, because the condition on line 858 was never false

859 os.remove(to_path)

860 utils.linearize_pdf(final_pdf_file_name, to_path)

861

862 # if not replace_frontpage_only:

863 # # Add EXIF metadata in the final PDF (replace_front_page already does it)

864 # add_metadata(article, to_path, to_path)

865

866

867def create_frontpage(

868 colid,

869 container,

870 updated_articles,

871 test=True,

872 replace_frontpage_only=False,

873 skip_compilation=False,

874 lang="",

875):

876 # create frontpage by recompiling articles on mersenne-tex with date XXXX-XX-XX

877 # flow :

878 # get directory of article sources : cedram_dev/production/ ..

879 # Add publication date in the source TeX

880 # remote execute latexmk -pdf article.pdf

881 # replace pdf of the article on mersenne_test_data

882

883 # TODO refactor the code and only use compile_tex for all collections

884

885 if colid == "PCJ":

886 for article in updated_articles:

887 lines = create_tex_for_pcj(article)

888 compile_tex(lines, article, update=True)

889 return

890

891 try:

892 year = int(container.year)

893 except ValueError:

894 year = 0

895

896 if ( 896 ↛ 902line 896 didn't jump to line 902

897 colid in ["CRMATH", "CRMECA", "CRPHYS", "CRGEOS", "CRCHIM", "CRBIOL"]

898 and year < 2020

899 and lang == ""

900 ):

901 # No front page for Elsevier CRAS

902 return

903

904 issue_id = container.pid

905

906 issue_path = resolver.get_cedram_issue_tex_folder(colid, issue_id)

907 # non utilisé ?now = datetime.now().astimezone()

908 # non utilisé ? timestamp = now.strftime("%Y-%m-%d %H:%M:%S %Z")

909

910 try:

911 for article in updated_articles:

912 # article path

913 article_tex_name = article.get_ojs_id()

914 if not article_tex_name: 914 ↛ 915line 914 didn't jump to line 915, because the condition on line 914 was never true

915 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")

916 article_path = os.path.join(issue_path, article_tex_name)

917 # non utilisé ? file_date = os.path.join(article_path, article_tex_name + '.ptf')

918

919 # publish_timestamp_file = os.path.join(article_path, article_tex_name + "-pdftimestamp.txt")

920 # onlinefirst_timestamp_file = os.path.join(article_path, article_tex_name + "-dateposted.txt")

921

922 # flow :

923 # - si on est en test ;

924 # date_pre_publish a été mis à jour mais pas les autres

925 # on ne crée une date temporaire type XXXX-XX-XX que pour online_first

926 # (car sinon lors de la mise en prod du online_first, la présence du fichier pdftimestamp

927 # avec XXXX-XX-XX ferait apparaitre cette date)

928 # - si article.my_container.with_online_first && et pas de article.date_online_first existe:

929 # : on met XXXX-xx-xx pour online first

930 # (si l'article a déjà une date online-first, il a à priori déjà était recompilé)

931 # - si on passe en prod, on prend les dates de l'article

932 # si container.with_online_first:

933 # article.date_online_first ds le bon fichier

934 # si article.date_published : on met à jour le fichier qui va bien

935

936 if not test and (article.date_online_first or article.date_published): 936 ↛ 911line 936 didn't jump to line 911, because the condition on line 936 was never false

937 compile_article(

938 article,

939 colid,

940 issue_id,

941 article_path,

942 article_tex_name,

943 replace_frontpage_only,

944 skip_compilation,

945 lang,

946 )

947

948 except Exception as e:

949 # pas de rollback car on ne modifie rien en BDD / éventuellement remettre un pdf.SAV en place

950 raise e

951

952

953def create_translated_pdf(

954 article, xml_content, lang, pdf_file_name, html_file_name, skip_compilation=False

955):

956 user = settings.MERSENNE_TEX_USER

957

958 issue_path = resolver.get_cedram_issue_tex_folder(

959 article.get_top_collection().pid, article.my_container.pid

960 )

961 article_tex_name = article.get_ojs_id()

962 if not article_tex_name:

963 raise Exception(f"Article {article.pid} has no ojs-id -> cedram tex path")

964 article_path = os.path.join(issue_path, article_tex_name)

965

966 xml_base_name = article_tex_name + ".xml"

967 local_xml = os.path.join(settings.LOG_DIR, "tmp", xml_base_name)

968 remote_xml = os.path.join(article_path, xml_base_name)

969

970 if not skip_compilation:

971 # Create the XML file locally

972 with open(local_xml, "w", encoding="utf-8") as file_:

973 file_.write(xml_content)

974

975 # Copy XML file to mersenne-tex

976 cmd = f"scp {local_xml} {user}@mathdoc-tex:{remote_xml}"

977 utils.execute_cmd(cmd)

978

979 remote_html_base_name = f"trad-{lang}.html"

980 remote_html = os.path.join(article_path, remote_html_base_name)

981 # Copy HTML file to mersenne-tex

982 cmd = f"scp {html_file_name} {user}@mathdoc-tex:{remote_html}"

983 utils.execute_cmd(cmd)

984

985 # Create the PDF

986 ptf_tools_bin = os.path.join(settings.BASE_DIR, "bin")

987 # execute script to compile

988 cmd = f"ssh {user}@mathdoc-tex 'bash -s' -- < {ptf_tools_bin}/translate_article.sh {article_path} {xml_base_name} {remote_html_base_name} {lang}"

989 utils.execute_cmd(cmd)

990

991 remote_pdf_base_name = f"{article_tex_name}-{lang}.pdf"

992 remote_pdf = os.path.join(article_path, remote_pdf_base_name)

993 # pdf-traduction should have created remote.pdf

994 # Copy the PDF file

995 cmd = f"scp {user}@mathdoc-tex:{remote_pdf} {pdf_file_name}"

996 utils.execute_cmd(cmd)