Source code for academic_metrics.dataclass_models.concrete_dataclasses

from dataclasses import dataclass, field
from typing import Any, Dict, List, Set

from academic_metrics.enums import DataClassTypes
from academic_metrics.factories import DataClassFactory

from .abstract_base_dataclass import AbstractBaseDataClass


[docs] @dataclass @DataClassFactory.register_dataclass(DataClassTypes.CATEGORY_INFO) class CategoryInfo(AbstractBaseDataClass): """ A dataclass representing information about an academic category. This class stores various metrics and details related to an academic category, including counts of faculty, departments, and articles, as well as sets of related entities and citation information. Attributes: _id (str): Unique identifier for the category url (str): A URL-friendly version of the category name category_name (str): Name of the category faculty_count (int): Number of faculty members in this category department_count (int): Number of departments in this category article_count (int): Number of articles in this category files (Set[str]): File names associated with this category faculty (Set[str]): Faculty names in this category departments (Set[str]): Department names in this category titles (Set[str]): Article titles in this category tc_count (int): Total citation count for articles tc_list (List[int]): Individual citation counts for articles citation_average (int): Average citations per article doi_list (Set[str]): List of DOIs for articles themes (Set[str]): Themes associated with this category """ _id: str = "" url: str = "" category_name: str = "" faculty_count: int = 0 department_count: int = 0 article_count: int = 0 files: Set[str] = field(default_factory=set) faculty: Set[str] = field(default_factory=set) departments: Set[str] = field(default_factory=set) titles: Set[str] = field(default_factory=set) tc_count: int = 0 citation_average: int = 0 doi_list: Set[str] = field(default_factory=set) themes: Set[str] = field(default_factory=set)
[docs] @dataclass @DataClassFactory.register_dataclass(DataClassTypes.GLOBAL_FACULTY_STATS) class GlobalFacultyStats(AbstractBaseDataClass): """ A dataclass representing all of a faculty member's articles across all categories. Attributes: _id (str): Unique identifier for the faculty member name (str): Name of the faculty member total_citations (int): Total number of citations across all articles article_count (int): Total number of articles average_citations (int): Average citations per article department_affiliations (Set[str]): All department affiliations dois (Set[str]): All DOIs of faculty's articles titles (Set[str]): All article titles categories (Set[str]): All categories category_ids (Set[str]): All category IDs top_level_categories (Set[str]): High-level category classifications mid_level_categories (Set[str]): Mid-level category classifications low_level_categories (Set[str]): Detailed category classifications themes (Set[str]): Research themes citation_map (Dict[str, int]): Mapping of articles to citation counts journals (Set[str]): All journals published in """ _id: str = field(default="") name: str = field(default="") total_citations: int = 0 article_count: int = 0 average_citations: int = 0 department_affiliations: Set[str] = field(default_factory=set) dois: Set[str] = field(default_factory=set) titles: Set[str] = field(default_factory=set) categories: Set[str] = field(default_factory=set) top_level_categories: Set[str] = field(default_factory=set) mid_level_categories: Set[str] = field(default_factory=set) low_level_categories: Set[str] = field(default_factory=set) category_urls: Set[str] = field(default_factory=set) top_category_urls: Set[str] = field(default_factory=set) mid_category_urls: Set[str] = field(default_factory=set) low_category_urls: Set[str] = field(default_factory=set) themes: Set[str] = field(default_factory=set) citation_map: Dict[str, int] = field(default_factory=dict) journals: Set[str] = field(default_factory=set)
[docs] @dataclass @DataClassFactory.register_dataclass(DataClassTypes.FACULTY_INFO) class FacultyInfo(AbstractBaseDataClass): """ A dataclass representing detailed information about a faculty member. Attributes: _id (str): Unique identifier for faculty member name (str): Faculty member's name category (str): Associated category category_id (str): Category identifier total_citations (int): Total number of citations for all articles article_count (int): Number of articles authored average_citations (int): Average citations per article titles (Set[str]): Set of article titles dois (Set[str]): Set of DOIs for articles department_affiliations (Set[str]): Departments affiliated with doi_citation_map (Dict[str, int]): Maps DOIs to citation counts """ _id: str = field(default="") name: str = field(default="") category: str = field(default="") category_url: str = field(default="") total_citations: int = 0 article_count: int = 0 average_citations: int = 0 titles: Set[str] = field(default_factory=set) dois: Set[str] = field(default_factory=set) department_affiliations: Set[str] = field(default_factory=set) doi_citation_map: Dict[str, int] = field(default_factory=dict)
[docs] @dataclass @DataClassFactory.register_dataclass(DataClassTypes.FACULTY_STATS) class FacultyStats(AbstractBaseDataClass): """ A dataclass representing statistics for all faculty members. Attributes: faculty_stats (Dict[str, FacultyInfo]): Maps faculty names to their info """ faculty_stats: Dict[str, FacultyInfo] = field(default_factory=dict)
[docs] def refine_faculty_stats( self, *, faculty_name_unrefined: str, variations: Dict[str, Any] ) -> None: """ Refines faculty statistics by updating faculty names based on variations. Args: faculty_name_unrefined (str): Original faculty name name_variations (Dict[str, Any]): Dictionary of name variations """ refined_name = self.get_refined_faculty_name(faculty_name_unrefined, variations) if faculty_name_unrefined in self.faculty_stats: self.faculty_stats[refined_name] = self.faculty_stats.pop( faculty_name_unrefined )
[docs] def get_refined_faculty_name( self, unrefined_name: str, variations: Dict[str, Any] ) -> str: """ Gets the refined name for a faculty member. Args: unrefined_name (str): Original faculty name name_variations (Dict[str, Any]): Dictionary of name variations Returns: str: Refined faculty name """ for _, variation in variations.items(): if unrefined_name in variation.variations: return variation.most_frequent_variation() return unrefined_name
[docs] def set_params(self, params: Dict[str, Any]) -> None: """ Override set_params to handle the nested FacultyInfo dictionary. Args: params (Dict[str, Any]): - Dictionary that can include either a full faculty_stats dictionary or direct updates to individual faculty members. Examples: Case 1 - Full faculty_stats dictionary: >>> faculty_stats = DataClassFactory.get_dataclass(DataClassTypes.FACULTY_STATS) >>> faculty_stats.set_params({ ... "faculty_stats": { ... "Dr. Smith": {"total_citations": 100, "article_count": 5}, ... "Dr. Jones": {"total_citations": 50, "article_count": 3} ... } ... }) Case 2 - Direct faculty member updates: >>> faculty_stats = DataClassFactory.get_dataclass(DataClassTypes.FACULTY_STATS) >>> faculty_stats.set_params({ ... "Dr. Smith": {"total_citations": 100, "article_count": 5} ... }) """ # Case 1: If params contains a full faculty_stats dictionary if "faculty_stats" in params: faculty_data = params["faculty_stats"] # Case 2: If params is direct faculty member data else: faculty_data = params # Update faculty info for each member for name, info in faculty_data.items(): # Create FacultyInfo if it doesn't exist if name not in self.faculty_stats: self.faculty_stats[name] = DataClassFactory.get_dataclass( DataClassTypes.FACULTY_INFO ) # Update the faculty info if isinstance(info, dict): self.faculty_stats[name].set_params(info) elif isinstance(info, FacultyInfo): self.faculty_stats[name] = info
[docs] @dataclass @DataClassFactory.register_dataclass(DataClassTypes.ARTICLE_DETAILS) class ArticleDetails(AbstractBaseDataClass): """ A dataclass representing details about an individual article. Attributes: tc_count (int): Total citation count for the article faculty_members (Set[str]): Faculty members associated with article faculty_affiliations (Dict[str, List[str]]): Maps faculty to affiliations abstract (str): Article abstract license_url (str): URL to article license date_published_print (str): Print publication date date_published_online (str): Online publication date journal (str): Journal name download_url (str): URL to download article doi (str): Digital Object Identifier """ tc_count: int = 0 faculty_members: Set[str] = field(default_factory=set) faculty_affiliations: Dict[str, List[str]] = field(default_factory=dict) abstract: str = field(default="") license_url: str = field(default="") date_published_print: str = field(default="") date_published_online: str = field(default="") journal: str = field(default="") download_url: str = field(default="") doi: str = field(default="")
[docs] @dataclass @DataClassFactory.register_dataclass(DataClassTypes.ARTICLE_STATS) class ArticleStats(AbstractBaseDataClass): """ A dataclass representing statistics for all articles. Attributes: article_citation_map (Dict[str, ArticleDetails]): Maps article titles to details Examples: >>> article_stats = DataClassFactory.get_dataclass(DataClassTypes.ARTICLE_STATS) >>> article_stats.set_params({ ... "article_citation_map": { ... "Article Title": { ... "tc_count": 10, ... "faculty_members": {"Dr. Smith", "Dr. Jones"}, ... "journal": "Nature" ... } ... } ... }) """ article_citation_map: Dict[str, ArticleDetails] = field(default_factory=dict)
[docs] def set_params(self, params: Dict[str, Any]) -> None: """ Override set_params to handle the nested ArticleDetails dictionary. Args: params (Dict[str, Any]): Dictionary containing article data Examples: >>> article_stats = DataClassFactory.get_dataclass(DataClassTypes.ARTICLE_STATS) >>> article_stats.set_params({ ... "Article Title": { ... "tc_count": 10, ... "faculty_members": {"Dr. Smith"}, ... "journal": "Nature" ... } ... }) """ # Case 1: If params contains a full article_citation_map if "article_citation_map" in params: article_data = params["article_citation_map"] # Case 2: If params is direct article data else: article_data = params # Update article details for each article for title, details in article_data.items(): # Create ArticleDetails if it doesn't exist if title not in self.article_citation_map: self.article_citation_map[title] = DataClassFactory.get_dataclass( DataClassTypes.ARTICLE_DETAILS ) # Update the article details if isinstance(details, dict): self.article_citation_map[title].set_params(details) elif isinstance(details, ArticleDetails): self.article_citation_map[title] = details
[docs] @dataclass @DataClassFactory.register_dataclass(DataClassTypes.CROSSREF_ARTICLE_DETAILS) class CrossrefArticleDetails(AbstractBaseDataClass): """ A dataclass representing details about an individual article from Crossref. Attributes: _id (str): Unique identifier title (str): Article title tc_count (int): Total citation count faculty_members (Set[str]): Faculty members associated with article faculty_affiliations (Dict[str, List[str]]): Maps faculty to affiliations abstract (str): Article abstract license_url (str): URL to article license date_published_print (str): Print publication date date_published_online (str): Online publication date journal (str): Journal name download_url (str): URL to download article doi (str): Digital Object Identifier themes (Set[str]): Research themes categories (Set[str]): Article categories category_ids (Set[str]): Category identifiers top_level_categories (Set[str]): High-level categories mid_level_categories (Set[str]): Mid-level categories low_level_categories (Set[str]): Detailed categories """ _id: str = field(default="") title: str = field(default="") tc_count: int = 0 faculty_members: Set[str] = field(default_factory=set) faculty_affiliations: Dict[str, List[str]] = field(default_factory=dict) abstract: str = field(default="") license_url: str = field(default="") date_published_print: str = field(default="") date_published_online: str = field(default="") journal: str = field(default="") download_url: str = field(default="") doi: str = field(default="") themes: Set[str] = field(default_factory=set) categories: Set[str] = field(default_factory=set) category_urls: Set[str] = field(default_factory=set) top_level_categories: Set[str] = field(default_factory=set) mid_level_categories: Set[str] = field(default_factory=set) low_level_categories: Set[str] = field(default_factory=set) top_category_urls: Set[str] = field(default_factory=set) mid_category_urls: Set[str] = field(default_factory=set) low_category_urls: Set[str] = field(default_factory=set) url: str = field(default="")
[docs] @dataclass @DataClassFactory.register_dataclass(DataClassTypes.CROSSREF_ARTICLE_STATS) class CrossrefArticleStats(AbstractBaseDataClass): """ A dataclass representing statistics for all Crossref articles. Attributes: article_citation_map (Dict[str, CrossrefArticleDetails]): Maps DOIs to article details Examples: >>> stats = DataClassFactory.get_dataclass(DataClassTypes.CROSSREF_ARTICLE_STATS) >>> stats.set_params({ ... "article_citation_map": { ... "10.1234/nature12345": { ... "title": "Research Paper", ... "tc_count": 10, ... "faculty_members": {"Dr. Smith"}, ... "themes": {"AI", "ML"} ... } ... } ... }) """ article_citation_map: Dict[str, CrossrefArticleDetails] = field( default_factory=dict )
[docs] def set_params(self, params: Dict[str, Any], debug: bool = False) -> None: """ Override set_params to handle the nested CrossrefArticleDetails dictionary. Args: params (Dict[str, Any]): Dictionary containing article data Examples: >>> stats = DataClassFactory.get_dataclass(DataClassTypes.CROSSREF_ARTICLE_STATS) >>> stats.set_params({ ... "10.1234/nature12345": { ... "title": "Research Paper", ... "tc_count": 10, ... "faculty_members": {"Dr. Smith"} ... } ... }) """ if debug: print(f"set_params called with params: {params}") print( f"Current article_citation_map type: {type(self.article_citation_map)}" ) print(f"Current article_citation_map value: {self.article_citation_map}") # Case 1: If params contains a full article_citation_map if "article_citation_map" in params: article_data = params["article_citation_map"] # Case 2: If params is direct article data else: article_data = params # Update article details for each DOI for doi, details in article_data.items(): # Create CrossrefArticleDetails if it doesn't exist if doi not in self.article_citation_map.keys(): self.article_citation_map[doi] = DataClassFactory.get_dataclass( DataClassTypes.CROSSREF_ARTICLE_DETAILS ) # Update the article details if isinstance(details, dict): self.article_citation_map[doi].set_params(details) elif isinstance(details, CrossrefArticleDetails): self.article_citation_map[doi] = details