Coverage for apps/ptf/model_data

1##################################################################################################

3# README

5# Operations on the xml data objects

6# Django DB -> Data objects

8##################################################################################################

10import types

12from django.db.models import Q

14from ptf.cmds.xml.citation_html import get_citation_html

15from ptf.cmds.xml.jats import jats_parser

16from ptf.cmds.xml.xml_base import RefBase

17from ptf.cmds.xml.xml_utils import escape

18from ptf.cmds.xml.xml_utils import get_contrib_xml

19from ptf.model_data import ArticleData

20from ptf.model_data import BookData

21from ptf.model_data import BookPartData

22from ptf.model_data import Foo

23from ptf.model_data import IssueData

24from ptf.model_data import JournalData

25from ptf.model_data import MathdocPublicationData

26from ptf.model_data import PublisherData

27from ptf.model_data import RefData

28from ptf.model_data import create_contributor

31def db_append_obj_with_location_to_list(resource_qs, data_list):

32 for obj_with_location in resource_qs:

33 data = {

34 "rel": obj_with_location.rel,

35 "mimetype": obj_with_location.mimetype,

36 "location": obj_with_location.location,

37 "base": obj_with_location.base.base if obj_with_location.base else "",

38 }

39 # 'seq': obj_with_location.seq}

41 for attr in ["metadata", "text", "caption"]:

42 if hasattr(obj_with_location, attr):

43 data[attr] = getattr(obj_with_location, attr)

45 data_list.append(data)

48def db_to_contributors(qs):

49 contributors = []

50 for contribution in qs.all():

51 contributor = create_contributor()

53 contributor["first_name"] = contribution.first_name

54 contributor["last_name"] = contribution.last_name

55 contributor["prefix"] = contribution.prefix

56 contributor["suffix"] = contribution.suffix

57 contributor["orcid"] = contribution.orcid if contribution.orcid else ""

58 contributor["email"] = contribution.email

59 contributor["string_name"] = contribution.string_name

60 contributor["mid"] = contribution.mid if contribution.mid else ""

61 contributor["addresses"] = [

62 contrib_address.address for contrib_address in contribution.contribaddress_set.all()

63 ]

64 contributor["role"] = contribution.role

65 contributor["deceased_before_publication"] = contribution.deceased_before_publication

66 contributor["equal_contrib"] = contribution.equal_contrib

67 contributor["corresponding"] = contribution.corresponding

68 contributor["contrib_xml"] = contribution.contrib_xml

70 contributors.append(contributor)

72 return contributors

75def db_to_resource_data_common(resource, data_resource):

76 data_resource.pid = resource.pid

77 data_resource.doi = resource.doi

79 data_resource.lang = resource.lang

80 data_resource.title_xml = resource.title_xml

81 data_resource.title_tex = resource.title_tex

82 data_resource.title_html = resource.title_html

83 data_resource.abbrev = resource.abbrev

85 data_resource.trans_lang = resource.trans_lang

86 data_resource.trans_title_tex = resource.trans_title_tex

87 data_resource.trans_title_html = resource.trans_title_html

89 data_resource.funding_statement_xml = resource.funding_statement_xml

90 data_resource.funding_statement_html = resource.funding_statement_html

91 data_resource.footnotes_xml = resource.footnotes_xml

92 data_resource.footnotes_html = resource.footnotes_html

94 data_resource.ids = [(id.id_type, id.id_value) for id in resource.resourceid_set.all()]

95 data_resource.extids = [(extid.id_type, extid.id_value) for extid in resource.extid_set.all()]

97 db_append_obj_with_location_to_list(resource.extlink_set.all(), data_resource.ext_links)

98 db_append_obj_with_location_to_list(resource.datastream_set.all(), data_resource.streams)

99 db_append_obj_with_location_to_list(

100 resource.relatedobject_set.all(), data_resource.related_objects

101 )

102

103 # Ignore related_objects and figures: they are updated by the FullText import after the Cedrics import

104 # db_append_obj_with_location_to_list(resource.relatedobject_set.all(),

105 # data_resource.related_objects)

106 # db_append_obj_with_location_to_list(resource.relatedobject_set.filter(rel='html-image'),

107 # data_resource.figures)

108 db_append_obj_with_location_to_list(

109 resource.relatedobject_set.filter(Q(rel="supplementary-material") | Q(rel="review")),

110 data_resource.supplementary_materials,

111 )

112

113 data_resource.counts = [

114 (count.name, count.value) for count in resource.resourcecount_set.all()

115 ]

116

117 data_resource.contributors = db_to_contributors(resource.contributions)

118

119 data_resource.kwds = [

120 {"type": kwd.type, "lang": kwd.lang, "value": kwd.value} for kwd in resource.kwd_set.all()

121 ]

122 data_resource.subjs = [

123 {"type": subj.type, "lang": subj.lang, "value": subj.value}

124 for subj in resource.subj_set.all()

125 ]

126

127 data_resource.abstracts = [

128 {

129 "tag": abstract.tag,

130 "lang": abstract.lang,

131 "value_xml": abstract.value_xml,

132 "value_tex": abstract.value_tex,

133 "value_html": abstract.value_html,

134 }

135 for abstract in resource.abstract_set.all()

136 ]

137

138 data_resource.awards = [

139 {"abbrev": award.abbrev, "award_id": award.award_id} for award in resource.award_set.all()

140 ]

141

142 for relation in resource.subject_of.all(): 142 ↛ 143line 142 didn't jump to line 143, because the loop on line 142 never started

143 obj = Foo()

144 obj.rel_type = relation.rel_info.left

145 obj.id_value = relation.object_pid

146 data_resource.relations.append(obj)

147

148 for relation in resource.object_of.all():

149 obj = Foo()

150 obj.rel_type = relation.rel_info.right

151 obj.id_value = relation.subject_pid

152 data_resource.relations.append(obj)

153 if hasattr(resource, "issn"):

154 data_resource.issn = resource.issn

155 if hasattr(resource, "e_issn"):

156 data_resource.e_issn = resource.e_issn

157

158

159def db_to_publisher_data(publisher):

160 data_publisher = PublisherData()

161

162 data_publisher.name = publisher.pub_name

163 data_publisher.loc = publisher.pub_loc

164

165 # TODO: ext_links ?

166 data_publisher.ext_links = []

167

168 return data_publisher

169

170

171def db_to_publication_data(collection):

172 data_col = MathdocPublicationData()

173

174 db_to_resource_data_common(collection, data_col)

175

176 data_col.coltype = collection.coltype

177 data_col.wall = collection.wall

178 data_col.issn = collection.issn

179 data_col.e_issn = collection.e_issn

180

181 return data_col

182

183

184def db_to_journal_data(collection):

185 data_journal = JournalData()

186

187 # A JournalData has no coltype ?

188

189 # A JournalData has a publisher but it does not seem to be used anywhere ?

190 # The publisher seems to belong to the issue/article and not to the Journal.

191

192 db_to_resource_data_common(collection, data_journal)

193 return data_journal

194

195

196def db_to_collection_data(collection):

197 data_col = MathdocPublicationData()

198

199 db_to_resource_data_common(collection, data_col)

200

201 data_col.coltype = collection.coltype

202 data_col.issn = collection.issn

203 data_col.e_issn = collection.e_issn

204

205 # attributes used for CollectionMembership

206 if hasattr(collection, "vseries"):

207 data_col.vseries = collection.vseries

208 if hasattr(collection, "volume"):

209 data_col.volume = collection.volume

210 if hasattr(collection, "seq"):

211 data_col.seq = collection.seq

212

213 return data_col

214

215

216def db_to_issue_data(container, articles=None):

217 data_issue = IssueData()

218

219 db_to_resource_data_common(container, data_issue)

220

221 data_issue.ctype = container.ctype

222

223 data_issue.year = container.year

224 data_issue.vseries = container.vseries

225 data_issue.volume = container.volume

226 data_issue.number = container.number

227

228 data_issue.last_modified_iso_8601_date_str = (

229 container.last_modified.isoformat() if container.last_modified else ""

230 )

231 data_issue.prod_deployed_date_iso_8601_date_str = (

232 container.deployed_date().isoformat() if container.deployed_date() else ""

233 )

234

235 data_issue.journal = db_to_journal_data(container.my_collection)

236 data_issue.publisher = db_to_publisher_data(container.my_publisher)

237 data_issue.provider = container.provider.name

238

239 # a Container has a seq, but it is used only for the books collections

240

241 # articles may have been prefetched / filtered before

242 if not articles: 242 ↛ 245line 242 didn't jump to line 245, because the condition on line 242 was never false

243 articles = container.article_set.all()

244

245 for article in articles:

246 data_article = db_to_article_data(article)

247 data_issue.articles.append(data_article)

248

249 return data_issue

250

251

252def db_to_book_data(container):

253 data_book = BookData()

254

255 db_to_resource_data_common(container, data_book)

256

257 data_book.ctype = container.ctype

258 setattr(data_book, "year", container.year)

259

260 data_book.publisher = db_to_publisher_data(container.my_publisher)

261 data_book.provider = container.provider

262

263 data_col = db_to_collection_data(container.my_collection)

264 # These attributes are required when adding a container to solr

265 if not hasattr(data_col, "vseries"):

266 setattr(data_col, "vseries", 0)

267 if not hasattr(data_col, "volume"):

268 setattr(data_col, "volume", 0)

269 data_book.incollection.append(data_col)

270 for collection in container.my_other_collections.all():

271 data_col = db_to_collection_data(container.my_collection)

272 data_book.incollection.append(data_col)

273

274 if hasattr(container, "frontmatter") and container.frontmatter is not None:

275 data_book.frontmatter_xml = container.frontmatter.value_xml

276 data_book.frontmatter_toc_html = container.frontmatter.value_html

277 data_book.frontmatter_foreword_html = container.frontmatter.foreword_html

278 data_book.body = container.get_body()

279

280 data_book.last_modified_iso_8601_date_str = (

281 container.last_modified.isoformat() if container.last_modified else ""

282 )

283 data_book.prod_deployed_date_iso_8601_date_str = (

284 container.deployed_date().isoformat() if container.deployed_date() else ""

285 )

286

287 for bookpart in container.article_set.all():

288 data_bookpart = db_to_bookpart_data(bookpart)

289 data_book.parts.append(data_bookpart)

290

291 for bibitem in container.bibitem_set.all():

292 data_ref = db_to_ref_data(bibitem, data_book.lang)

293 data_book.bibitems.append(data_ref)

294 data_book.bibitem.append(data_ref.citation_html)

295

296 return data_book

297

298

299def db_to_article_data(article):

300 data_article = ArticleData()

301

302 db_to_resource_data_common(article, data_article)

303

304 data_article.atype = article.atype

305 data_article.seq = str(article.seq)

306

307 data_article.fpage = article.fpage

308 data_article.lpage = article.lpage

309 data_article.page_range = article.page_range

310 data_article.page_type = article.page_type

311

312 data_article.article_number = article.article_number

313 data_article.talk_number = article.talk_number

314 data_article.elocation = article.elocation

315 data_article.coi_statement = article.coi_statement if article.coi_statement else ""

316

317 data_article.date_published_iso_8601_date_str = (

318 article.date_published.isoformat() if article.date_published else ""

319 )

320 data_article.prod_deployed_date_iso_8601_date_str = (

321 article.deployed_date().isoformat()

322 if article.my_container and article.deployed_date()

323 else ""

324 )

325

326 data_article.history_dates = [

327 {"type": type, "date": date.isoformat()}

328 for type, date in [

329 ("received", article.date_received),

330 ("revised", article.date_revised),

331 ("accepted", article.date_accepted),

332 ("online", article.date_online_first),

333 ]

334 if date

335 ]

336

337 data_article.body = article.get_body()

338 data_article.body_html = article.body_html

339 data_article.body_tex = article.body_tex

340 data_article.body_xml = article.body_xml

341

342 for bibitem in article.bibitem_set.all():

343 data_ref = db_to_ref_data(bibitem, "und")

344 data_article.bibitems.append(data_ref)

345 data_article.bibitem.append(data_ref.citation_html)

346

347 for trans_article in article.translations.all(): 347 ↛ 348line 347 didn't jump to line 348, because the loop on line 347 never started

348 trans_data_article = db_to_article_data(trans_article)

349 data_article.translations.append(trans_data_article)

350

351 return data_article

352

353

354def db_to_bookpart_data(article):

355 data_bookpart = BookPartData()

356

357 db_to_resource_data_common(article, data_bookpart)

358

359 data_bookpart.atype = article.atype

360

361 data_bookpart.fpage = article.fpage

362 data_bookpart.lpage = article.lpage

363 data_bookpart.page_range = article.page_range

364 data_bookpart.page_type = article.page_type

365

366 if hasattr(article, "frontmatter") and article.frontmatter is not None:

367 data_bookpart.frontmatter_xml = article.frontmatter.value_xml

368 data_bookpart.frontmatter_toc_html = article.frontmatter.value_html

369 data_bookpart.frontmatter_foreword_html = article.frontmatter.foreword_html

370 data_bookpart.body = article.get_body()

371

372 for bibitem in article.bibitem_set.all():

373 data_ref = db_to_ref_data(bibitem, data_bookpart.lang)

374 data_bookpart.bibitems.append(data_ref)

375 data_bookpart.bibitem.append(data_ref.citation_html)

376

377 return data_bookpart

378

379

380def db_to_ref_data(bibitem, lang):

381 data_ref = RefData(lang=lang)

382

383 data_ref.type = bibitem.type

384 data_ref.user_id = bibitem.user_id

385 data_ref.label = bibitem.label

386

387 data_ref.citation_xml = bibitem.citation_xml

388 data_ref.citation_tex = bibitem.citation_tex

389 data_ref.citation_html = bibitem.citation_html

390

391 data_ref.publisher_name = bibitem.publisher_name

392 data_ref.publisher_loc = bibitem.publisher_loc

393

394 data_ref.article_title_tex = bibitem.article_title_tex

395 data_ref.chapter_title_tex = bibitem.chapter_title_tex

396 data_ref.institution = bibitem.institution

397 data_ref.series = bibitem.series

398 data_ref.volume = bibitem.volume

399 data_ref.issue = bibitem.issue

400 data_ref.month = bibitem.month

401 data_ref.year = bibitem.year

402 data_ref.comment = bibitem.comment

403 data_ref.annotation = bibitem.annotation

404 data_ref.fpage = bibitem.fpage

405 data_ref.lpage = bibitem.lpage

406 data_ref.page_range = bibitem.page_range

407 data_ref.size = bibitem.size

408 data_ref.source_tex = bibitem.source_tex

409

410 data_ref.extids = [

411 (bibitemid.id_type, bibitemid.id_value) for bibitemid in bibitem.bibitemid_set.all()

412 ]

413

414 data_ref.contributors = db_to_contributors(bibitem.contributions)

415

416 return data_ref

417

418

419def jats_from_ref_comment(ref):

420 attr = getattr(ref, "comment")

421 if attr is None: 421 ↛ 422line 421 didn't jump to line 422, because the condition on line 421 was never true

422 return ""

423

424 text = ""

425 start = attr.find("http://")

426 if start == -1: 426 ↛ 429line 426 didn't jump to line 429, because the condition on line 426 was never false

427 start = attr.find("https://")

428

429 if start != -1: 429 ↛ 430line 429 didn't jump to line 430, because the condition on line 429 was never true

430 end = attr.find(" ", start)

431 if end == -1:

432 url = escape(attr[start:])

433 else:

434 url = escape(attr[start:end])

435

436 text = escape(attr[0:start])

437 text += f'<ext-link xlink:href="{url}">{url}</ext-link>'

438

439 if end != -1:

440 text += escape(attr[end + 1 :])

441 else:

442 text = escape(attr)

443

444 text = f'<comment xml:space="preserve">{text}</comment>'

445

446 return text

447

448

449def jats_from_ref_attr(

450 ref,

451 attr_name,

452 jats_tag="",

453 preserve=False,

454 attr_type=None,

455 attr_type_value="",

456 convert_html_tag=False,

457):

458 if not hasattr(ref, attr_name): 458 ↛ 459line 458 didn't jump to line 459, because the condition on line 458 was never true

459 return ""

460

461 text = ""

462 attr = getattr(ref, attr_name)

463 if len(jats_tag) == 0:

464 jats_tag = attr_name

465 if attr and preserve:

466 value = jats_parser.get_single_title_xml(attr) if convert_html_tag else escape(attr)

467 if attr_type is not None: 467 ↛ 468line 467 didn't jump to line 468, because the condition on line 467 was never true

468 text = f'<{jats_tag} {attr_type}="{attr_type_value}" xml:space="preserve">{escape(attr)}</{jats_tag}>'

469 else:

470 text = f'<{jats_tag} xml:space="preserve">{value}</{jats_tag}>'

471 elif attr:

472 value = jats_parser.get_single_title_xml(attr) if convert_html_tag else escape(attr)

473 if attr_type is not None:

474 text = f'<{jats_tag} {attr_type}="{attr_type_value}">{value}</{jats_tag}>'

475 else:

476 text = f"<{jats_tag}>{escape(attr)}</{jats_tag}>"

477

478 return text

479

480

481def jats_from_ref(ref):

482 text = ""

483 authors = ref.get_authors()

484 if authors is not None: 484 ↛ 487line 484 didn't jump to line 487, because the condition on line 484 was never false

485 text += "".join([author["contrib_xml"] for author in authors])

486

487 text += jats_from_ref_attr(

488 ref, "article_title_tex", "article-title", preserve=True, convert_html_tag=True

489 )

490 text += jats_from_ref_attr(ref, "chapter_title_tex", "chapter-title", convert_html_tag=True)

491 text += jats_from_ref_attr(ref, "source_tex", "source", preserve=True, convert_html_tag=True)

492

493 editors = ref.get_editors()

494 if editors is not None: 494 ↛ 497line 494 didn't jump to line 497, because the condition on line 494 was never false

495 text += "".join([editor["contrib_xml"] for editor in editors])

496

497 text += jats_from_ref_attr(ref, "series", preserve=True)

498 text += jats_from_ref_attr(ref, "volume")

499 text += jats_from_ref_attr(ref, "publisher_name", "publisher-name")

500 text += jats_from_ref_attr(ref, "publisher_loc", "publisher-loc")

501 text += jats_from_ref_attr(ref, "institution")

502 text += jats_from_ref_attr(ref, "year")

503 text += jats_from_ref_attr(ref, "issue")

504 text += jats_from_ref_attr(

505 ref, "doi", "pub-id", attr_type="pub-id-type", attr_type_value="doi"

506 )

507 text += jats_from_ref_attr(ref, "fpage")

508 text += jats_from_ref_attr(ref, "lpage")

509 text += jats_from_ref_attr(ref, "size", "size")

510 text += jats_from_ref_comment(ref)

511

512 return text

513

514

515def update_ref_data_for_jats(ref, i, with_label=True):

516 """

517 Set with_label=False if you do not want a label in the citation_html (for example in the citedby)

518 """

519

520 if hasattr(ref, "eid") and ref.eid is not None and ref.eid != "": 520 ↛ 521line 520 didn't jump to line 521, because the condition on line 520 was never true

521 eids = [item for item in ref.extids if item[0] == "eid"]

522 if len(eids) > 0:

523 ref.extids.remove(eids[0])

524 ref.extids.append(("eid", ref.eid))

525

526 label = ref.label

527 if not label and with_label: 527 ↛ 528line 527 didn't jump to line 528, because the condition on line 527 was never true

528 label = f"[{i}]"

529 ref.label = label

530

531 if ref.type == "unknown": 531 ↛ 532line 531 didn't jump to line 532, because the condition on line 531 was never true

532 if not ref.citation_html:

533 if with_label and ref.citation_tex.find(label) != 0:

534 ref.citation_html = f"{label} {ref.citation_tex}"

535 else:

536 ref.citation_html = ref.citation_tex

537

538 if not ref.citation_xml:

539 ref.citation_xml = f'<label>{escape(ref.label)}</label><mixed-citation xml:space="preserve">{ref.citation_tex}</mixed_ciation>'

540 else:

541 ref.label = f"{label}" if with_label else ""

542 # ref can be a Munch dictionary, or a RefData object.

543 # Add RefBase member functions, like get_authors

544 # ref_base = RefBase(lang='und')

545 # ref_base.from_dict(ref)

546 ref.get_authors = types.MethodType(RefBase.get_authors, ref)

547 ref.get_editors = types.MethodType(RefBase.get_editors, ref)

548 text = get_citation_html(ref)

549 ref.citation_html = ref.citation_tex = text

550

551 for contrib in ref.contributors:

552 contrib["contrib_xml"] = get_contrib_xml(contrib, is_ref=True)

553

554 if ref.type != "unknown": 554 ↛ exitline 554 didn't return from function 'update_ref_data_for_jats', because the condition on line 554 was never false

555 element_citation = jats_from_ref(ref)

556 ref.citation_xml = f'<label>{escape(ref.label)}</label><element-citation publication-type="{ref.type}">{element_citation}</element-citation>'

557

558

559def update_data_for_jats(data_article, create_author_if_empty=False, with_label=True):

560 if not data_article.title_html:

561 data_article.title_html = data_article.title_tex

562 if not data_article.trans_title_html:

563 data_article.trans_title_html = data_article.trans_title_tex

564 if not data_article.title_xml:

565 data_article.title_xml = jats_parser.get_title_xml(

566 data_article.title_tex, data_article.trans_title_tex, data_article.trans_lang

567 )

568

569 for contrib in data_article.contributors:

570 contrib["contrib_xml"] = get_contrib_xml(contrib)

571

572 if data_article.doi is not None:

573 value = ("doi", data_article.doi)

574 if value not in data_article.ids:

575 data_article.ids.append(value)

576

577 if create_author_if_empty and len(data_article.contributors) == 0:

578 contrib = create_contributor()

579 contrib["role"] = "author"

580 contrib["contrib_xml"] = get_contrib_xml(contrib)

581 data_article.contributors = [contrib]

582

583 for i, ref in enumerate(data_article.bibitems, start=1):

584 update_ref_data_for_jats(ref, i, with_label=with_label)

585

586 for trans_data_article in data_article.translations:

587 update_data_for_jats(trans_data_article, create_author_if_empty, with_label)

588

589

590def convert_refdata_for_editor(ref):

591 contribs_text = "\n".join(

592 [f"{contrib['last_name']}, {contrib['first_name']}" for contrib in ref.contributors]

593 )

594 ref.contribs_text = contribs_text

595

596 if not ref.article_title_tex and not ref.chapter_title_tex and not ref.source_tex:

597 ref.type = "unknown"

598

599 ref.doi = ""

600 for extid in ref.extids:

601 if extid[0] == "doi":

602 ref.doi = extid[1]

603 elif extid[0] == "eid":

604 ref.eid = extid[1]

605 # URLs are in <comment>

606 # ref.url = ''

607 # for ext_link in ref.ext_links:

608 # if ext_link['link_type'] == '':

609 # ref.url = ext_link['location']

Coverage for apps/ptf/model_data_converter.py: 60%

362 statements