dataclass_models package#

Submodules#

dataclass_models.abstract_base_dataclass module#

class academic_metrics.dataclass_models.abstract_base_dataclass.AbstractBaseDataClass[source]#

Bases: ABC

Abstract base class for all data model classes providing common functionality.

to_dict()[source]#

Converts the dataclass to a dictionary, handling Set conversion for JSON serialization.

set_params()[source]#

Sets the parameters from a dictionary, handling type conversions.

to_dict(exclude_keys=None)[source]#

Convert the dataclass to a dictionary, handling Set conversion for JSON serialization.

Returns:

A dictionary representation of the dataclass.

Return type:

dict

set_params(params, debug=False)[source]#

Updates the dataclass fields, merging sets and handling nested updates.

It handles: 1. Converting lists to sets for fields annotated as Set 2. Merging sets instead of overwriting 3. Ignoring keys that don’t match attributes 4. Handling nested dataclass updates

Parameters:

params (Dict[str, Any]) – A dictionary of parameters to update the dataclass fields.

Return type:

None

Examples

>>> class MyClass(AbstractBaseDataClass):
...     items: Set[str] = field(default_factory=set)
>>> obj = MyClass()
>>> obj.set_params({"items": ["a", "b"]})
>>> obj.set_params({"items": ["c", "d"]})
>>> sorted(list(obj.items))  # Contains all items
['a', 'b', 'c', 'd']

dataclass_models.concrete_dataclasses module#

class academic_metrics.dataclass_models.concrete_dataclasses.CategoryInfo(_id='', url='', category_name='', faculty_count=0, department_count=0, article_count=0, files=<factory>, faculty=<factory>, departments=<factory>, titles=<factory>, tc_count=0, citation_average=0, doi_list=<factory>, themes=<factory>)[source]#

Bases: AbstractBaseDataClass

A dataclass representing information about an academic category.

This class stores various metrics and details related to an academic category, including counts of faculty, departments, and articles, as well as sets of related entities and citation information.

_id#

Unique identifier for the category

Type:

str

url#

A URL-friendly version of the category name

Type:

str

category_name#

Name of the category

Type:

str

faculty_count#

Number of faculty members in this category

Type:

int

department_count#

Number of departments in this category

Type:

int

article_count#

Number of articles in this category

Type:

int

files#

File names associated with this category

Type:

Set[str]

faculty#

Faculty names in this category

Type:

Set[str]

departments#

Department names in this category

Type:

Set[str]

titles#

Article titles in this category

Type:

Set[str]

tc_count#

Total citation count for articles

Type:

int

tc_list#

Individual citation counts for articles

Type:

List[int]

citation_average#

Average citations per article

Type:

int

doi_list#

List of DOIs for articles

Type:

Set[str]

themes#

Themes associated with this category

Type:

Set[str]

url: str = ''#
category_name: str = ''#
faculty_count: int = 0#
department_count: int = 0#
article_count: int = 0#
files: Set[str]#
faculty: Set[str]#
departments: Set[str]#
titles: Set[str]#
tc_count: int = 0#
citation_average: int = 0#
doi_list: Set[str]#
themes: Set[str]#
class academic_metrics.dataclass_models.concrete_dataclasses.GlobalFacultyStats(_id='', name='', total_citations=0, article_count=0, average_citations=0, department_affiliations=<factory>, dois=<factory>, titles=<factory>, categories=<factory>, top_level_categories=<factory>, mid_level_categories=<factory>, low_level_categories=<factory>, category_urls=<factory>, top_category_urls=<factory>, mid_category_urls=<factory>, low_category_urls=<factory>, themes=<factory>, citation_map=<factory>, journals=<factory>)[source]#

Bases: AbstractBaseDataClass

A dataclass representing all of a faculty member’s articles across all categories.

_id#

Unique identifier for the faculty member

Type:

str

name#

Name of the faculty member

Type:

str

total_citations#

Total number of citations across all articles

Type:

int

article_count#

Total number of articles

Type:

int

average_citations#

Average citations per article

Type:

int

department_affiliations#

All department affiliations

Type:

Set[str]

dois#

All DOIs of faculty’s articles

Type:

Set[str]

titles#

All article titles

Type:

Set[str]

categories#

All categories

Type:

Set[str]

category_ids#

All category IDs

Type:

Set[str]

top_level_categories#

High-level category classifications

Type:

Set[str]

mid_level_categories#

Mid-level category classifications

Type:

Set[str]

low_level_categories#

Detailed category classifications

Type:

Set[str]

themes#

Research themes

Type:

Set[str]

citation_map#

Mapping of articles to citation counts

Type:

Dict[str, int]

journals#

All journals published in

Type:

Set[str]

name: str = ''#
total_citations: int = 0#
article_count: int = 0#
average_citations: int = 0#
department_affiliations: Set[str]#
dois: Set[str]#
titles: Set[str]#
categories: Set[str]#
top_level_categories: Set[str]#
mid_level_categories: Set[str]#
low_level_categories: Set[str]#
category_urls: Set[str]#
top_category_urls: Set[str]#
mid_category_urls: Set[str]#
low_category_urls: Set[str]#
themes: Set[str]#
citation_map: Dict[str, int]#
journals: Set[str]#
class academic_metrics.dataclass_models.concrete_dataclasses.FacultyInfo(_id='', name='', category='', category_url='', total_citations=0, article_count=0, average_citations=0, titles=<factory>, dois=<factory>, department_affiliations=<factory>, doi_citation_map=<factory>)[source]#

Bases: AbstractBaseDataClass

A dataclass representing detailed information about a faculty member.

_id#

Unique identifier for faculty member

Type:

str

name#

Faculty member’s name

Type:

str

category#

Associated category

Type:

str

category_id#

Category identifier

Type:

str

total_citations#

Total number of citations for all articles

Type:

int

article_count#

Number of articles authored

Type:

int

average_citations#

Average citations per article

Type:

int

titles#

Set of article titles

Type:

Set[str]

dois#

Set of DOIs for articles

Type:

Set[str]

department_affiliations#

Departments affiliated with

Type:

Set[str]

doi_citation_map#

Maps DOIs to citation counts

Type:

Dict[str, int]

name: str = ''#
category: str = ''#
category_url: str = ''#
total_citations: int = 0#
article_count: int = 0#
average_citations: int = 0#
titles: Set[str]#
dois: Set[str]#
department_affiliations: Set[str]#
doi_citation_map: Dict[str, int]#
class academic_metrics.dataclass_models.concrete_dataclasses.FacultyStats(faculty_stats=<factory>)[source]#

Bases: AbstractBaseDataClass

A dataclass representing statistics for all faculty members.

faculty_stats#

Maps faculty names to their info

Type:

Dict[str, FacultyInfo]

faculty_stats: Dict[str, FacultyInfo]#
refine_faculty_stats(*, faculty_name_unrefined, variations)[source]#

Refines faculty statistics by updating faculty names based on variations.

Parameters:
  • faculty_name_unrefined (str) – Original faculty name

  • name_variations (Dict[str, Any]) – Dictionary of name variations

Return type:

None

get_refined_faculty_name(unrefined_name, variations)[source]#

Gets the refined name for a faculty member.

Parameters:
  • unrefined_name (str) – Original faculty name

  • name_variations (Dict[str, Any]) – Dictionary of name variations

Returns:

Refined faculty name

Return type:

str

set_params(params)[source]#

Override set_params to handle the nested FacultyInfo dictionary.

Parameters:
  • params (Dict[str, Any])

  • members. (- Dictionary that can include either a full faculty_stats dictionary or direct updates to individual faculty)

Return type:

None

Examples

Case 1 - Full faculty_stats dictionary: >>> faculty_stats = DataClassFactory.get_dataclass(DataClassTypes.FACULTY_STATS) >>> faculty_stats.set_params({ … “faculty_stats”: { … “Dr. Smith”: {“total_citations”: 100, “article_count”: 5}, … “Dr. Jones”: {“total_citations”: 50, “article_count”: 3} … } … })

Case 2 - Direct faculty member updates: >>> faculty_stats = DataClassFactory.get_dataclass(DataClassTypes.FACULTY_STATS) >>> faculty_stats.set_params({ … “Dr. Smith”: {“total_citations”: 100, “article_count”: 5} … })

class academic_metrics.dataclass_models.concrete_dataclasses.ArticleDetails(tc_count=0, faculty_members=<factory>, faculty_affiliations=<factory>, abstract='', license_url='', date_published_print='', date_published_online='', journal='', download_url='', doi='')[source]#

Bases: AbstractBaseDataClass

A dataclass representing details about an individual article.

tc_count#

Total citation count for the article

Type:

int

faculty_members#

Faculty members associated with article

Type:

Set[str]

faculty_affiliations#

Maps faculty to affiliations

Type:

Dict[str, List[str]]

abstract#

Article abstract

Type:

str

license_url#

URL to article license

Type:

str

date_published_print#

Print publication date

Type:

str

date_published_online#

Online publication date

Type:

str

journal#

Journal name

Type:

str

download_url#

URL to download article

Type:

str

doi#

Digital Object Identifier

Type:

str

tc_count: int = 0#
faculty_members: Set[str]#
faculty_affiliations: Dict[str, List[str]]#
abstract: str = ''#
license_url: str = ''#
date_published_print: str = ''#
date_published_online: str = ''#
journal: str = ''#
download_url: str = ''#
doi: str = ''#
class academic_metrics.dataclass_models.concrete_dataclasses.ArticleStats(article_citation_map=<factory>)[source]#

Bases: AbstractBaseDataClass

A dataclass representing statistics for all articles.

article_citation_map#

Maps article titles to details

Type:

Dict[str, ArticleDetails]

Examples

>>> article_stats = DataClassFactory.get_dataclass(DataClassTypes.ARTICLE_STATS)
>>> article_stats.set_params({
...     "article_citation_map": {
...         "Article Title": {
...             "tc_count": 10,
...             "faculty_members": {"Dr. Smith", "Dr. Jones"},
...             "journal": "Nature"
...         }
...     }
... })
article_citation_map: Dict[str, ArticleDetails]#
set_params(params)[source]#

Override set_params to handle the nested ArticleDetails dictionary.

Parameters:

params (Dict[str, Any]) – Dictionary containing article data

Return type:

None

Examples

>>> article_stats = DataClassFactory.get_dataclass(DataClassTypes.ARTICLE_STATS)
>>> article_stats.set_params({
...     "Article Title": {
...         "tc_count": 10,
...         "faculty_members": {"Dr. Smith"},
...         "journal": "Nature"
...     }
... })
class academic_metrics.dataclass_models.concrete_dataclasses.CrossrefArticleDetails(_id='', title='', tc_count=0, faculty_members=<factory>, faculty_affiliations=<factory>, abstract='', license_url='', date_published_print='', date_published_online='', journal='', download_url='', doi='', themes=<factory>, categories=<factory>, category_urls=<factory>, top_level_categories=<factory>, mid_level_categories=<factory>, low_level_categories=<factory>, top_category_urls=<factory>, mid_category_urls=<factory>, low_category_urls=<factory>, url='')[source]#

Bases: AbstractBaseDataClass

A dataclass representing details about an individual article from Crossref.

_id#

Unique identifier

Type:

str

title#

Article title

Type:

str

tc_count#

Total citation count

Type:

int

faculty_members#

Faculty members associated with article

Type:

Set[str]

faculty_affiliations#

Maps faculty to affiliations

Type:

Dict[str, List[str]]

abstract#

Article abstract

Type:

str

license_url#

URL to article license

Type:

str

date_published_print#

Print publication date

Type:

str

date_published_online#

Online publication date

Type:

str

journal#

Journal name

Type:

str

download_url#

URL to download article

Type:

str

doi#

Digital Object Identifier

Type:

str

themes#

Research themes

Type:

Set[str]

categories#

Article categories

Type:

Set[str]

category_ids#

Category identifiers

Type:

Set[str]

top_level_categories#

High-level categories

Type:

Set[str]

mid_level_categories#

Mid-level categories

Type:

Set[str]

low_level_categories#

Detailed categories

Type:

Set[str]

title: str = ''#
tc_count: int = 0#
faculty_members: Set[str]#
faculty_affiliations: Dict[str, List[str]]#
abstract: str = ''#
license_url: str = ''#
date_published_print: str = ''#
date_published_online: str = ''#
journal: str = ''#
download_url: str = ''#
doi: str = ''#
themes: Set[str]#
categories: Set[str]#
category_urls: Set[str]#
top_level_categories: Set[str]#
mid_level_categories: Set[str]#
low_level_categories: Set[str]#
top_category_urls: Set[str]#
mid_category_urls: Set[str]#
low_category_urls: Set[str]#
url: str = ''#
class academic_metrics.dataclass_models.concrete_dataclasses.CrossrefArticleStats(article_citation_map=<factory>)[source]#

Bases: AbstractBaseDataClass

A dataclass representing statistics for all Crossref articles.

article_citation_map#

Maps DOIs to article details

Type:

Dict[str, CrossrefArticleDetails]

Examples

>>> stats = DataClassFactory.get_dataclass(DataClassTypes.CROSSREF_ARTICLE_STATS)
>>> stats.set_params({
...     "article_citation_map": {
...         "10.1234/nature12345": {
...             "title": "Research Paper",
...             "tc_count": 10,
...             "faculty_members": {"Dr. Smith"},
...             "themes": {"AI", "ML"}
...         }
...     }
... })
article_citation_map: Dict[str, CrossrefArticleDetails]#
set_params(params, debug=False)[source]#

Override set_params to handle the nested CrossrefArticleDetails dictionary.

Parameters:

params (Dict[str, Any]) – Dictionary containing article data

Return type:

None

Examples

>>> stats = DataClassFactory.get_dataclass(DataClassTypes.CROSSREF_ARTICLE_STATS)
>>> stats.set_params({
...     "10.1234/nature12345": {
...         "title": "Research Paper",
...         "tc_count": 10,
...         "faculty_members": {"Dr. Smith"}
...     }
... })

Module contents#