# std
from __future__ import annotations
import copy
import pathlib
import time
from contextlib import closing
from sqlite3 import Connection
from typing import Any, Iterable, Sequence
# 3rd
import numpy as np
import pandas as pd
import ankipandas._columns as _columns
# ours
import ankipandas.raw as raw
from ankipandas.util.checksum import field_checksum
from ankipandas.util.dataframe import merge_dfs, replace_df_inplace
from ankipandas.util.guid import guid as generate_guid
from ankipandas.util.log import log
from ankipandas.util.misc import flatten_list_list, invert_dict
from ankipandas.util.types import (
is_dict_list_like,
is_list_dict_like,
is_list_like,
is_list_list_like,
)
[docs]
class AnkiDataFrame(pd.DataFrame):
#: Additional attributes of a :class:`AnkiDataFrame` that a normal
#: :class:`pandas.DataFrame` does not possess. These will be copied in the
#: constructor.
#: See https://pandas.pydata.org/pandas-docs/stable/development/extending.html
_metadata = [
"col",
"_anki_table",
"fields_as_columns_prefix",
"_fields_format",
"_df_format",
]
[docs]
def __init__(self, *args, **kwargs):
"""Initializes a blank :class:`AnkiDataFrame`.
.. warning::
It is recommended to directly initialize this class with the notes,
cards or revs table, using one of the methods
:meth:`.notes`, :meth:`.cards` or :meth:`.revs` of the
:class:`~ankipandas.collection.Collection` class instead!
Args:
*args: Internal use only. See arguments of
:class:`pandas.DataFrame`.
**kwargs: Internal use only. See arguments of
:class:`pandas.DataFrame`.
"""
super().__init__(*args, **kwargs)
# IMPORTANT: Make sure to add all attributes to the class variable
# :attr:`._attributes`. Also all of them have to be initialized as None!
# (see the code where we copy attributes).
# todo: document
self.col = None
# noinspection PyTypeChecker
# gets set by _get_table
#: Type of anki table: 'notes', 'cards' or 'revlog'. This corresponds to
#: the meaning of the ID row.
self._anki_table: str = None
#: Prefix for fields as columns. Default is ``nfld_``.
self.fields_as_columns_prefix = "nfld_"
#: Fields format: ``none``, ``list`` or ``columns`` or ``in_progress``,
#: or ``anki`` (default)
self._fields_format = "anki"
# gets set by _get_table
# noinspection PyTypeChecker
#: Overall structure of the dataframe ``anki``, ``ours``, ``in_progress``
self._df_format: str = None
@property
def _constructor(self):
"""This needs to be overridden so that any DataFrame operations do not
return a :class:`pandas.DataFrame` but a :class:`AnkiDataFrame`."""
return AnkiDataFrame
# Constructors
# ==========================================================================
def _get_table(self, col, table, empty=False):
self._anki_table = table
self._df_format = "anki"
self.col = col
if empty:
df = raw.get_empty_table(table)
else:
with closing(col.db) as db:
df = raw.get_table(db, table)
replace_df_inplace(self, df)
self.normalize(inplace=True)
[docs]
@classmethod
def init_with_table(cls, col, table, empty=False):
new = AnkiDataFrame()
new._get_table(col, table, empty=empty)
return new
# Fixes
# ==========================================================================
def equals(self, other):
return pd.DataFrame(self).equals(other)
def append(
self, other, ignore_index=False, verify_integrity=False, sort=False
):
ret = pd.concat(
[self, other],
ignore_index=False,
verify_integrity=False,
sort=False,
)
ret.astype(_columns.dtype_casts2[self._anki_table])
return ret
def update(self, other, force=False, **kwargs):
if not force and isinstance(other, AnkiDataFrame):
if other._df_format != self._df_format:
raise ValueError(
"You're trying to update an AnkiDataFrame in format {f1}"
" using another AnkiDataFrame in format {f2}. That doesn't "
"sound like a good idea. However you can still do this "
"using the force=True option.".format(
f1=self._df_format, f2=other._df_format
)
)
if other._anki_table != self._anki_table:
raise ValueError(
"You're trying to update an AnkiDataFrame of table {f1} "
"with an AnkiDataFrame of table {f2}. That doesn't sound"
" like a good idea. However you can still do this using "
"the force=True option.".format(
f1=self._anki_table, f2=other._anki_table
)
)
if self._anki_table == "notes":
if other._fields_format != self._fields_format:
raise ValueError(
"You are trying to update a notes AnkiDataFrame where "
"the fields are in format '{f1}' with a notes "
"AnkiDataFrame where the fields are in format '{f2}'. "
"That doesn't sound like a good idea. However you can "
"still do this using the force=True option. "
"Or you simply ensure that both have the same format"
" using the fields_as_columns() or fields_as_list() "
"method.".format(
f1=self._fields_format, f2=other._fields_format
)
)
super().update(other, **kwargs)
# Fix https://github.com/pandas-dev/pandas/issues/4094
for col, typ in _columns.dtype_casts2[self._anki_table].items():
self[col] = self[col].astype(typ)
# Checks
# ==========================================================================
[docs]
def check_table_integrity(self):
duplicates = self.index[self.index.duplicated()].tolist()
if duplicates:
log.critical(
"Duplicated indizes in table %s discovered, so something "
"definitely went wrong. Please don't ignore this warning. "
"These indizes appear more than once: %s",
self._anki_table,
", ".join(map(str, duplicates)),
)
def _invalid_table(self):
raise ValueError(f"Invalid table: {self._anki_table}.")
def _check_df_format(self):
if self._df_format == "in_progress":
raise ValueError(
"Previous call to normalize() or raw() did not terminate "
"successfully. This is usually a very bad sign, but you can "
"try calling them again with the force option: raw(force=True) "
"or raw(force=True) and see if that works."
)
elif self._df_format == "anki":
pass
elif self._df_format == "ours":
pass
else:
raise ValueError(f"Unknown value of _df_format: {self._df_format}")
def _check_our_format(self):
self._check_df_format()
if not self._df_format == "ours":
raise ValueError(
"This operation is not supported for AnkiDataFrames in the "
"'raw' format. Perhaps you called raw() before or used the "
"raw=True option when loading? You can try switching to the "
"required format using the normalize() method."
)
# Properties
# ==========================================================================
@property
def db(self) -> Connection:
"""Opened Anki database (:class:`sqlite3.Connection`). Make sure to
call `db.close()` after you're done. Better still, use
`contextlib.closing`.
"""
return self.col.db
# IDs
# ==========================================================================
@property
def id(self):
"""Return note/card/review ID as :class:`pandas.Series` of integers."""
if self._anki_table == "notes":
return self.nid
elif self._anki_table == "cards":
return self.cid
elif self._anki_table == "revs":
return self.rid
else:
self._invalid_table()
@property
def nid(self):
"""Note ID as :class:`pandas.Series` of integers."""
if self._anki_table == "notes":
return self.index
elif self._anki_table == "cards":
if "nid" not in self.columns:
raise ValueError(
"You seem to have removed the 'nid' column. That was not "
"a good idea. Cannot get note ID anymore."
)
else:
return self["nid"]
elif self._anki_table == "revs":
if "nid" in self.columns:
return self["nid"]
else:
return self.cid.map(raw.get_cid2nid(self.db))
else:
self._invalid_table()
@nid.setter
def nid(self, value):
if self._anki_table == "notes":
raise ValueError(
"Note ID column should already be index and notes.nid() will "
"always return this index. Therefore you should not set nid "
"to a column."
)
else:
self["nid"] = value
@property
def cid(self):
"""Card ID as :class:`pandas.Series` of integers."""
if self._anki_table == "cards":
return self.index
if self._anki_table == "revs":
if "cid" not in self.columns:
raise ValueError(
"You seem to have removed the 'cid' column. That was not "
"a good idea. Cannot get card ID anymore."
)
else:
return self["cid"]
elif self._anki_table == "notes":
raise ValueError(
"Notes can belong to multiple cards. Therefore it is impossible"
" to associate a card ID with them."
)
else:
self._invalid_table()
@cid.setter
def cid(self, value):
if self._anki_table == "cards":
raise ValueError(
"Card ID column should already be index and notes.cid() will "
"always return this index. Therefore you should not set cid "
"to a column."
)
elif self._anki_table == "revs":
self["cid"] = value
else:
raise ValueError(
"Notes can belong to multiple cards. Therefore please "
" do not associate a card ID with them."
)
@property
def rid(self):
"""Review ID as :class:`pandas.Series` of integers."""
if self._anki_table == "revs":
return self.index
else:
if "rid" in self.columns:
return self["rid"]
else:
raise ValueError(
"Review index is only available for the 'revs' table by"
" default."
)
# noinspection PyUnusedLocal
@rid.setter
def rid(self, value):
if self._anki_table == "revs":
raise ValueError(
"Review ID column should already be index and notes.rid() will "
"always return this index. Therefore you should not set rid "
"to a column."
)
else:
raise ValueError(
"Setting a review index 'rid' makes no sense in "
"tables other than 'rev'."
)
@property
def mid(self):
"""Model ID as :class:`pandas.Series` of integers."""
if self._anki_table in ["notes"]:
if "nmodel" not in self.columns:
raise ValueError(
"You seem to have removed the 'nmodel' column. That was not"
" a good idea. Cannot get model ID anymore."
)
else:
return self["nmodel"].map(raw.get_model2mid(self.db))
if self._anki_table in ["revs", "cards"]:
if "nmodel" in self.columns:
return self["nmodel"].map(raw.get_model2mid(self.db))
else:
return self.nid.map(raw.get_nid2mid(self.db))
else:
self._invalid_table()
@mid.setter
def mid(self, value):
if self._anki_table == "notes":
log.warning(
"You can set an additional 'mid' column, but this will always"
" be overwritten with the information from the 'nmodel' "
"column."
)
self["mid"] = value
@property
def did(self):
"""Deck ID as :class:`pandas.Series` of integers."""
if self._anki_table == "cards":
if "cdeck" not in self.columns:
raise ValueError(
"You seem to have removed the 'cdeck' column. That was not "
"a good idea. Cannot get deck ID anymore."
)
return self["cdeck"].map(raw.get_deck2did(self.db))
elif self._anki_table == "notes":
raise ValueError(
"Notes can belong to multiple decks. Therefore it is impossible"
" to associate a deck ID with them."
)
elif self._anki_table == "revs":
return self.cid.map(raw.get_cid2did(self.db))
else:
self._invalid_table()
@did.setter
def did(self, value):
if self._anki_table == "cards":
log.warning(
"You can set an additional deck ID 'did' column, but this "
"will always be overwritten with the information from the "
"'cdeck' column."
)
self["did"] = value
@property
def odid(self):
"""Original deck ID for cards in filtered deck as
:class:`pandas.Series` of integers.
"""
if self._anki_table == "cards":
if "odeck" not in self.columns:
raise ValueError(
"You seem to have removed the 'odeck' column. That was not "
"a good idea. Cannot get original deck ID anymore."
)
return self["odeck"].map(raw.get_deck2did(self.db))
elif self._anki_table == "revs":
if "odeck" in self.columns:
return self["odeck"].map(raw.get_deck2did(self.db))
elif self._anki_table == "notes":
raise ValueError(
"The original deck ID (odid) is not available for the notes "
"table."
)
else:
self._invalid_table()
@odid.setter
def odid(self, value):
if self._anki_table == "cards":
log.warning(
"You can set an additional 'odid' column, but this will always"
" be overwritten with the information from the 'odeck' "
"column."
)
self["odid"] = value
# Merge tables
# ==========================================================================
[docs]
def merge_notes(
self,
inplace=False,
columns=None,
drop_columns=None,
prepend="n",
prepend_clash_only=True,
):
"""Merge note table into existing dataframe.
Args:
inplace: If False, return new dataframe, else update old one
columns: Columns to merge
drop_columns: Columns to ignore when merging
prepend: Prepend this string to fields from note table
prepend_clash_only: Only prepend the ``prepend`` string when column
names would otherwise clash.
Returns:
New :class:`AnkiDataFrame` if inplace==True, else None
"""
self._check_our_format()
if self._anki_table == "notes":
raise ValueError(
"AnkiDataFrame was already initialized as a table of type"
" notes, therefore merge_notes() doesn't make any sense."
)
elif self._anki_table == "revs":
self["nid"] = self.nid
ret = merge_dfs(
df=self,
df_add=self.col.notes,
id_df="nid",
id_add="nid",
inplace=inplace,
prepend=prepend,
prepend_clash_only=prepend_clash_only,
columns=columns,
drop_columns=drop_columns,
)
return ret
[docs]
def merge_cards(
self,
inplace=False,
columns=None,
drop_columns=None,
prepend="c",
prepend_clash_only=True,
):
"""
Merges information from the card table into the current dataframe.
Args:
inplace: If False, return new dataframe, else update old one
columns: Columns to merge
drop_columns: Columns to ignore when merging
prepend: Prepend this string to fields from card table
prepend_clash_only: Only prepend the ``prepend`` string when column
names would otherwise clash.
Returns:
New :class:`AnkiDataFrame` if inplace==True, else None
"""
if self._anki_table == "cards":
raise ValueError(
"AnkiDataFrame was already initialized as a table of type"
" cards, therefore merge_cards() doesn't make any sense."
)
elif self._anki_table == "notes":
raise ValueError(
"One note can correspond to more than one card, therefore it "
"it is not supported to merge the cards table into the "
"notes table."
)
self._check_our_format()
ret = merge_dfs(
df=self,
df_add=self.col.cards,
id_df="cid",
inplace=inplace,
columns=columns,
drop_columns=drop_columns,
id_add="cid",
prepend=prepend,
prepend_clash_only=prepend_clash_only,
)
return ret
# Toggle format
# ==========================================================================
[docs]
def fields_as_columns(self, inplace=False, force=False):
"""
In the 'notes' table, the field contents of the notes is contained in
one column ('flds') by default. With this method, this column can be
split up into a new column for every field.
Args:
inplace: If False, return new dataframe, else update old one
force: Internal use
Returns:
New :class:`pandas.DataFrame` if inplace==True, else None
"""
if not force:
self._check_our_format()
if not inplace:
df = self.copy(True)
df.fields_as_columns(inplace=True)
return df
if self._fields_format == "columns":
log.warning(
"Fields are already as columns."
" Returning without doing anything."
)
return
elif self._fields_format == "in_progress" and not force:
raise ValueError(
"It looks like the last call to fields_as_list or"
"fields_as_columns was not successful, so you better start "
"over."
)
elif self._fields_format == "list":
pass
else:
raise ValueError(f"Unknown _fields_format: {self._fields_format}")
if "nflds" not in self.columns:
raise ValueError("Could not find fields column 'nflds'.")
self._fields_format = "in_progress"
# fixme: What if one field column is one that is already in use?
prefix = self.fields_as_columns_prefix
mids = self.mid.unique()
for mid in mids:
if mid == 0:
continue
df_model = self[self.mid == mid]
fields = pd.DataFrame(df_model["nflds"].tolist())
field_names = raw.get_mid2fields(self.db)[mid]
for field in field_names:
if prefix + field not in self.columns:
self[prefix + field] = ""
for ifield, field in enumerate(field_names):
# todo: can we speed this up?
self.loc[self.mid == mid, [prefix + field]] = pd.Series(
fields[ifield].tolist(),
index=self.loc[self.mid == mid].index,
)
self.drop("nflds", axis=1, inplace=True)
self._fields_format = "columns"
[docs]
def fields_as_list(self, inplace=False, force=False):
"""
This reverts :meth:`.fields_as_columns`, all columns that represented
field contents are now merged into one column 'nflds'.
Args:
inplace: If False, return new dataframe, else update old one
force: Internal use
Returns:
New :class:`AnkiDataFrame` if inplace==True, else None
"""
if not force:
self._check_our_format()
if not inplace:
df = self.copy(True)
df.fields_as_list(inplace=True, force=force)
return df
if self._fields_format == "list":
log.warning(
"Fields are already as list. Returning without doing anything."
)
return
elif self._fields_format == "in_progress" and not force:
raise ValueError(
"It looks like the last call to fields_as_list or"
"fields_as_columns was not successful, so you better start "
"over."
)
elif self._fields_format == "columns":
pass
else:
raise ValueError(f"Unknown _fields_format: {self._fields_format}")
self._fields_format = "in_progress"
mids = self.mid.unique()
to_drop = []
for mid in mids:
fields = raw.get_mid2fields(self.db)[mid]
fields = [self.fields_as_columns_prefix + field for field in fields]
self.loc[self.mid == mid, "nflds"] = pd.Series(
self.loc[self.mid == mid, fields].values.tolist(),
index=self.loc[self.mid == mid].index,
)
# Careful: Do not delete the fields here yet, other models
# might still use them
to_drop.extend(fields)
self.drop(to_drop, axis=1, inplace=True)
self._fields_format = "list"
# Quick access
# ==========================================================================
def _check_tag_col(self):
if "ntags" not in self.columns:
raise ValueError(
"Tag column 'ntags' doesn't exist. Perhaps you forgot to merge "
"the notes into your table?"
)
[docs]
def list_decks(self) -> list[str]:
"""Return sorted list of deck names in the current table."""
if "cdeck" not in self.columns:
raise ValueError(
"Deck column 'cdeck' not present. Either use the cards table "
"or merge it into your table."
)
else:
decks = sorted(self["cdeck"].unique())
if "" in decks:
decks.remove("")
return decks
[docs]
def list_models(self):
"""Return sorted list of model names in the current table."""
if "nmodel" not in self.columns:
raise ValueError(
"Model column 'nmodel' not present. Either use the notes table"
" or merge it into your table."
)
return sorted(self["nmodel"].unique())
[docs]
def has_tag(self, tags: Iterable[str] | str | None = None):
"""Checks whether row has a certain tag ('ntags' column).
Args:
tags: String or list thereof. In the latter case, True is returned
if the row contains any of the specified tags.
If None (default), True is returned if the row has any tag at
all.
Returns:
Boolean :class:`pd.Series`
Examples:
.. code-block:: python
# Get all tagged notes:
notes[notes.has_tag()]
# Get all untagged notes:
notes[~notes.has_tag()]
# Get all notes tagged Japanese:
japanese_notes = notes[notes.has_tag("Japanese")]
# Get all notes tagged either Japanese or Chinese:
asian_notes = notes[notes.has_tag(["Japanese", "Chinese"])]
"""
self._check_our_format()
self._check_tag_col()
if isinstance(tags, str):
tags = [tags]
if tags is not None:
def _has_tag(other):
return not set(tags).isdisjoint(other)
return self["ntags"].apply(_has_tag)
else:
return self["ntags"].apply(bool)
[docs]
def add_tag(self, tags: Sequence[str] | str, inplace=False):
"""Adds tag ('ntags' column).
Args:
tags: String or list thereof.
inplace: If False, return new dataframe, else update old one
Returns:
New :class:`AnkiDataFrame` if inplace==True, else None
"""
self._check_our_format()
if not inplace:
df = self.copy(True)
df.add_tag(tags, inplace=True)
return df
self._check_tag_col()
if isinstance(tags, str):
tags = [tags]
if len(tags) == 0:
return
def _add_tags(other):
return other + sorted(set(tags) - set(other))
self["ntags"] = self["ntags"].apply(_add_tags)
[docs]
def remove_tag(self, tags: Iterable[str] | str | None, inplace=False):
"""Removes tag ('ntags' column).
Args:
tags: String or list thereof. If None, all tags are removed.
inplace: If False, return new dataframe, else update old one
Returns:
New :class:`AnkiDataFrame` if inplace==True, else None
"""
self._check_our_format()
if not inplace:
df = self.copy(True)
df.remove_tag(tags, inplace=True)
return df
self._check_tag_col()
if isinstance(tags, str):
tags = [tags]
if tags is not None:
def _remove_tags(other):
return [tag for tag in other if tag not in tags]
self["ntags"] = self["ntags"].apply(_remove_tags)
else:
self["ntags"] = self["ntags"].apply(lambda _: [])
# Compare
# ==========================================================================
[docs]
def was_modified(
self, other: pd.DataFrame | None = None, na=True, _force=False
):
"""Compare with original table; show which rows have changed.
Will only compare columns existing in both dataframes.
Args:
other: Compare with this :class:`pandas.DataFrame`.
If None (default), use original unmodified dataframe as reloaded
from the database.
na: Value for new or deleted columns
_force: internal use
Returns:
Boolean value for each row, showing if it was modified.
"""
if not _force:
self._check_our_format()
if other is None:
_other: AnkiDataFrame = self.col._get_original_item(
self._anki_table
)
else:
_other = other
del other # avoid confusion
self_sf = self
if self._fields_format == "columns":
self_sf = self.fields_as_list(inplace=False, force=_force)
cols = sorted(set(self_sf.columns) & set(_other.columns))
other_nids = set(_other.index)
inters = set(self_sf.index) & other_nids
result = pd.Series(na, index=self_sf.index)
new_bools = np.any(
_other.loc[_other.index.isin(inters), cols].values
!= self_sf.loc[self_sf.index.isin(inters), cols].values,
axis=1,
)
result.loc[self_sf.index.isin(inters)] = pd.Series(
new_bools, index=result[self_sf.index.isin(inters)].index
)
return result
[docs]
def modified_columns(
self, other: pd.DataFrame | None = None, _force=False, only=True
):
"""Compare with original table, show which columns in which rows
were modified.
Args:
other: Compare with this :class:`pandas.DataFrame`.
If None (default), use original unmodified dataframe as reloaded
from the database.
only: Only show rows where at least one column is changed.
_force: internal use
Returns:
Boolean value for each row, showing if it was modified. New rows
are considered to be modified as well.
"""
if other is None:
other = self.init_with_table(col=self.col, table=self._anki_table)
cols = [c for c in self.columns if c in other.columns]
other_nids = set(other.index)
inters = set(self.index) & other_nids
if only:
inters &= set(
self[self.was_modified(other=other, _force=_force)].index
)
inters_st = sorted(inters)
del inters
return pd.DataFrame(
self.loc[inters_st, cols].values
!= other.loc[inters_st, cols].values,
index=self.loc[inters_st].index,
columns=cols,
)
[docs]
def was_added(self, other: pd.DataFrame | None = None, _force=False):
"""Compare with original table, show which rows were added.
Args:
other: Compare with this :class:`pandas.DataFrame`.
If None (default), use original unmodified dataframe as reloaded
from the database.
_force: internal use
Returns:
Boolean value for each row, showing if it was modified. New rows
are considered to be modified as well.
"""
if not _force:
self._check_our_format()
if other is not None:
other_ids = set(other.index)
else:
other_ids = set(self.col._get_original_item(self._anki_table).id)
new_indices = set(self.index) - other_ids
return self.index.isin(new_indices)
[docs]
def was_deleted(
self, other: pd.DataFrame | None = None, _force=False
) -> list:
"""Compare with original table, return deleted indizes.
Args:
other: Compare with this :class:`pandas.DataFrame`.
If None (default), use original unmodified dataframe as reloaded
from the database.
_force: internal use
Returns:
Sorted list of indizes.
"""
if not _force:
self._check_our_format()
if other is not None:
other_ids = set(other.index)
else:
other_ids = set(self.col._get_original_item(self._anki_table).id)
deleted_indices = other_ids - set(self.index)
return sorted(deleted_indices)
# Update modification stamps and similar
# ==========================================================================
def _set_usn(self):
"""Update usn (update sequence number) for all changed rows."""
self.loc[
self.was_modified(na=True, _force=True),
_columns.columns_anki2ours[self._anki_table]["usn"],
] = -1
def _set_mod(self):
"""Update modification timestamps for all changed rows."""
if self._anki_table in ["cards", "notes"]:
self.loc[
self.was_modified(na=True, _force=True),
_columns.columns_anki2ours[self._anki_table]["mod"],
] = int(time.time())
# todo: test
def _set_guid(self):
"""Update globally unique id"""
if self._anki_table == "notes":
self.loc[~self["nguid"].apply(bool)].apply(generate_guid)
# Raw and normalized
# ==========================================================================
[docs]
def normalize(self, inplace=False, force=False):
"""Bring a :class:`AnkiDataFrame` from the ``raw`` format (i.e. the
exact format that Anki uses in its internal representation) to our
convenient format.
Args:
inplace: If False, return new dataframe, else update old one
force: If a previous conversion fails, :meth:`normalize` will
refuse to attempt another one by default. Use this option
to force it to attempt in anyway.
Returns:
New :class:`AnkiDataFrame` if inplace==True, else None
"""
if not inplace:
df = self.copy(True)
df.normalize(inplace=True, force=force)
return df
if not force:
self._check_df_format()
if self._df_format == "ours":
log.warning(
"Dataframe already is in our format. "
"Returning without doing anything."
)
return
table = self._anki_table
if table not in ["cards", "revs", "notes"]:
self._invalid_table()
self._df_format = "in_progress"
# Dtypes
# ------
for column, typ in _columns.dtype_casts[table].items():
self[column] = self[column].astype(typ)
# Renames
# -------
self.rename(columns=_columns.columns_anki2ours[table], inplace=True)
# Value maps
# ----------
# We sometimes interpret cryptic numeric values
if table in _columns.value_maps:
for column in _columns.value_maps[table]:
self[column] = self[column].map(
_columns.value_maps[table][column]
)
# IDs
# ---
id_field = _columns.table2index[table]
duplicate_ids = self[id_field][self[id_field].duplicated()].tolist()
if duplicate_ids:
log.critical(
"The following IDs occur "
"more than once: %s. Please do not use this dataframe.",
", ".join(map(str, duplicate_ids)),
)
self.set_index(id_field, inplace=True)
if table == "cards":
self["cdeck"] = self["did"].map(raw.get_did2deck(self.db))
self["codeck"] = self["codid"].map(raw.get_did2deck(self.db))
elif table == "notes":
self["nmodel"] = self["mid"].map(raw.get_mid2model(self.db))
# Tags
# ----
if table == "notes":
# Tags as list, rather than string joined by space
self["ntags"] = self["ntags"].apply(
lambda joined: [item for item in joined.split(" ") if item]
)
# Fields
# ------
if table == "notes":
# Fields as list, rather than as string joined by \x1f
self["nflds"] = self["nflds"].str.split("\x1f")
self._fields_format = "list"
# Drop columns
# ------------
drop_columns = set(self.columns) - set(_columns.our_columns[table])
self.drop(drop_columns, axis=1, inplace=True)
self.check_table_integrity()
self._df_format = "ours"
[docs]
def raw(self, inplace=False, force=False):
"""Bring a :class:`AnkiDataFrame` into the ``raw`` format (i.e. the
exact format that Anki uses in its internal representation) .
Args:
inplace: If False, return new dataframe, else update old one
force: If a previous conversion fails, :meth:`raw` will
refuse to attempt another one by default. Use this option
to force it to attempt in anyway.
Returns:
New :class:`AnkiDataFrame` if inplace==True, else None
"""
if not inplace:
df = self.copy(True)
df.raw(inplace=True, force=force)
return df
if not force:
self._check_df_format()
if self._df_format == "anki":
log.warning(
"Dataframe already is in Anki format. "
"Returning without doing anything."
)
return
table = self._anki_table
if table not in ["revs", "cards", "notes"]:
self._invalid_table()
self._df_format = "in_progress"
# Note: Here we pretty much go through self.normalize() and revert
# every single step.
# Update automatic fields
# -----------------------
self._set_mod()
self._set_usn()
self._set_guid()
# IDs
# ---
# Index as column:
self.reset_index(inplace=True, drop=False)
if table == "cards":
self["did"] = self["cdeck"].map(raw.get_deck2did(self.db))
self["odid"] = self["codeck"].map(raw.get_deck2did(self.db))
if table == "notes":
self["mid"] = self["nmodel"].map(raw.get_model2mid(self.db))
# Fields & Hashes
# ---------------
if table == "notes":
if not self._fields_format == "list":
self.fields_as_list(inplace=True, force=True)
# Check if success
if not self._fields_format == "list":
raise ValueError(
"It looks like the last call to fields_as_list or"
"fields_as_columns was not successful, so you better start "
"over."
)
# Restore the sort field.
mids = list(self["mid"].unique())
mid2sfld = raw.get_mid2sortfield(self.db)
for mid in mids:
sfield = mid2sfld[mid]
df_model = self[self["mid"] == mid]
fields = pd.DataFrame(df_model["nflds"].tolist())
self.loc[self["mid"] == mid, "nsfld"] = fields[sfield].tolist()
self["ncsum"] = self["nflds"].apply(
lambda lst: field_checksum(lst[0])
)
self["nflds"] = self["nflds"].str.join("\x1f")
# Tags
# ----
if table == "notes" and "nflds" in self.columns:
self["ntags"] = self["ntags"].str.join(" ")
# Value Maps
# ----------
if table in _columns.value_maps:
for column in _columns.value_maps[table]:
if column not in self.columns:
continue
self[column] = self[column].map(
invert_dict(_columns.value_maps[table][column])
)
# Renames
# -------
self.rename(
columns=invert_dict(_columns.columns_anki2ours[table]), inplace=True
)
self.rename(columns={"index": "id"}, inplace=True)
# Dtypes
# ------
for column, typ in _columns.dtype_casts_back[table].items():
self[column] = self[column].astype(typ)
# Unused columns
# --------------
if table in ["cards", "notes"]:
self["data"] = ""
self["flags"] = 0
# Drop and Rearrange
# ------------------
# Todo: warn about dropped columns?
if len(self) == 0:
new = pd.DataFrame(columns=_columns.anki_columns[table])
else:
new = pd.DataFrame(self[_columns.anki_columns[table]])
self.drop(self.columns, axis=1, inplace=True)
for col in new.columns:
self[col] = new[col]
self.check_table_integrity()
self._df_format = "anki"
# Write
# ==========================================================================
[docs]
def summarize_changes(self, output="print") -> dict | None:
"""Summarize changes that were made with respect to the table
as loaded from the database.
Args:
output: Output mode: 'print' (default: print)
or 'dict' (return as dictionary)
Returns:
None or dictionary
"""
as_dict = {
"n": len(self),
"n_modified": sum(self.was_modified(na=False)),
"n_added": sum(self.was_added()),
"n_deleted": len(self.was_deleted()),
}
as_dict["has_changed"] = (
as_dict["n_modified"] or as_dict["n_added"] or as_dict["n_deleted"]
)
if output == "print":
print("Total rows: {}".format(as_dict["n"]))
print("Compared to original version:")
print("Modified rows: {}".format(as_dict["n_modified"]))
print("Added rows: {}".format(as_dict["n_added"]))
print("Deleted rows: {}".format(as_dict["n_deleted"]))
return None # make explicit for mypy
elif output == "dict":
return as_dict
else:
raise ValueError(f"Invalid output setting: {output}")
# Append
# ==========================================================================
def _get_id(self, others=()) -> int:
"""Generate ID from timestamp and increment if it is already in use.
.. warning::
Do not call repeatedly without adding new IDs to index (might
produce identical IDs). Rather use :meth:`_get_ids` instead.
"""
idx = int(1000 * time.time())
while idx in self.index or idx in others:
idx += 1
return idx
# todo: documentation
[docs]
def add_card(
self,
nid: int,
cdeck: str,
cord: int | list[int] | None = None,
cmod: int | None = None,
cusn: int | None = None,
cqueue: str | None = None,
ctype: str | None = None,
civl: int | None = None,
cfactor: int | None = None,
creps: int | None = None,
clapses: int | None = None,
cleft: int | None = None,
cdue: int | None = None,
inplace=False,
):
"""
Similar to :py:meth:`ankipandas.ankipdf.AnkiDataFrame.add_cards`
Args:
nid:
cdeck:
cord:
cmod:
cusn:
cqueue:
ctype:
civl:
cfactor:
creps:
clapses:
cleft:
cdue:
inplace:
Returns:
"""
return self.add_cards(
nid=[nid],
cdeck=cdeck,
cord=cord,
cmod=cmod,
cusn=cusn,
cqueue=cqueue,
ctype=ctype,
civl=civl,
cfactor=cfactor,
creps=creps,
clapses=clapses,
cleft=cleft,
cdue=cdue,
inplace=inplace,
)
# todo: change order of arguments?
# fixme: cord will be replaced
# todo: duplicate cards (same note, same cord)?
# fixme: This is an absolute mess with the signature and mypy...
[docs]
def add_cards(
self,
nid: list[int],
cdeck: str | list[str],
cord: int | list[int] | None = None,
cmod: int | list[int] | None = None,
cusn: int | list[int] | None = None,
cqueue: str | list[str] | None = None,
ctype: str | list[str] | None = None,
civl: int | list[int] | None = None,
cfactor: int | list[int] | None = None,
creps: int | list[int] | None = None,
clapses: int | list[int] | None = None,
cleft: int | list[int] | None = None,
cdue: int | list[int] | None = None,
inplace=False,
):
"""
Add cards belonging to notes of one model.
Args:
nid: Note IDs of the notes that you want to add cards for
cdeck: Name of deck to add cards to as string or list of strings
(different deck for each nid).
cord: Number of the template to add cards for as int or list
thereof. The template corresponds to the reviewing
direction. If left ``None`` (default), cards for all
templates will be added.
It is not possible to specify different cord for different
nids!
cmod: List of modification timestamps.
Will be set automatically if ``None`` (default) and it is
discouraged to set your own.
cusn: List of Update Sequence Numbers.
Will be set automatically (to -1, i.e. needs update)
if ``None`` (default) and it is
very discouraged to set your own.
cqueue: 'sched buried', 'user buried', 'suspended', 'new',
'learning', 'due', 'in learning' (learning but next rev at
least a day after the previous review). If ``None`` (default),
'new' is chosen for all cards. Specify as string or list
thereof.
ctype: List of card types ('learning', 'review', 'relearn', 'cram').
If ``None`` (default) 'learning' is chosen for all.
civl: The new interval that the card was pushed to after the review.
Positive values are in days, negative values are in seconds
(for learning cards). If ``None`` (default) 0 is chosen for
all cards.
cfactor: The new ease factor of the card in permille. If ``None``
(default) 0 is chosen for all.
creps: Number of reviews. If ``None`` (default), 0 is chosen for
all cards.
clapses: The number of times the card went from a 'was answered
correctly' to 'was answered incorrectly'. If ``None`` (default),
0 is chosen for all cards.
cleft: Of the form ``a*1000+b``, with: ``b`` the number of reps
left till graduation and ``a`` the number of reps left today.
If ``None`` (default), 0 is chosen for all cards.
cdue: Due is used differently for different card types: new:
note id or random int, due: integer day, relative to the
collection's creation time, learning: integer timestamp.
If ``None`` (default), check that we're adding a new card and
set to note ID.
inplace: If ``False`` (default), return a new
:class:`~ankipandas.AnkiDataFrame`, if True, modify in place and
return new card IDs
Returns:
:class:`~ankipandas.AnkiDataFrame` if ``inplace==True``, else
list of new card IDs
"""
self._check_our_format()
if not self._anki_table == "cards":
raise ValueError("Cards can only be added to cards table!")
# --- Ord ---
nid2mid = raw.get_nid2mid(self.db)
missing_nids = sorted(set(nid) - set(nid2mid))
if missing_nids:
raise ValueError(
"The following note IDs (nid) can't be found in the notes "
"table: {}. Perhaps you didn't call notes.write() to write "
"them back into the database?".format(
", ".join(map(str, missing_nids))
)
)
mids = {nid2mid[x] for x in nid}
if len(mids) >= 2:
raise ValueError(
"It is only supported to add cards for notes of the same model"
", but you're trying to add cards for notes of "
"models: {}".format(", ".join(map(str, mids)))
)
mid = mids.pop()
# fixme: should use function from ankipandas.raw
available_ords = raw.get_mid2templateords(self.db)[mid]
if cord is None:
cord = available_ords
elif isinstance(cord, int):
cord = [cord]
elif is_list_like(cord):
pass
else:
raise ValueError(
f"Unknown type for cord specification: {type(cord)}"
)
not_available = sorted(set(cord) - set(available_ords))
if not_available:
raise ValueError(
"The following templates are not available for notes of "
"this model: {}".format(", ".join(map(str, not_available)))
)
# --- Deck ---
if isinstance(cdeck, str):
cdeck = [cdeck] * len(nid)
elif is_list_like(cdeck):
if len(cdeck) != len(nid):
raise ValueError(
"Number of decks doesn't match number of "
"notes for which cards should be added: {} "
"instead of {}.".format(len(cdeck), len(nid))
)
else:
raise ValueError(f"Unknown format for cdeck: {type(cdeck)}")
unknown_decks = sorted(
set(cdeck) - set(raw.get_did2deck(self.db).values())
)
if unknown_decks:
raise ValueError(
"The following decks do not seem to exist: {}".format(
", ".join(unknown_decks)
)
)
# --- Rest ---
def _handle_input(inpt, name, default, typ, options=None) -> list[Any]:
if inpt is None:
inpt = [default] * len(nid)
elif is_list_like(inpt):
if len(inpt) != len(nid):
raise ValueError(
"Number of {} doesn't match number of "
"notes for which cards should be added: {} "
"instead of {}.".format(name, len(inpt), len(nid))
)
elif isinstance(inpt, typ):
inpt = [inpt] * len(nid)
else:
raise ValueError(
"Invalid type of {} specification: {}".format(
name, type(inpt)
)
)
if options is not None:
invalid = sorted(set(inpt) - set(options))
if invalid:
raise ValueError(
"The following values are no valid "
"entries for {}: {}".format(name, ", ".join(invalid))
)
return inpt
cmod = _handle_input(cmod, "cmod", int(time.time()), int)
cusn = _handle_input(cusn, "cusn", -1, int)
cqueue = _handle_input(
cqueue,
"cqueue",
"new",
str,
options=[
"sched buried",
"user buried",
"suspended",
"new",
"learning",
"due",
"in learning",
],
)
ctype = _handle_input(
ctype,
"ctype",
"learning",
str,
options=["learning", "review", "relearn", "cram"],
)
civl = _handle_input(civl, "civl", 0, int)
cfactor = _handle_input(cfactor, "cfactor", 0, int)
creps = _handle_input(creps, "creps", 0, int)
clapses = _handle_input(clapses, "clapses", 0, int)
cleft = _handle_input(cleft, "cleft", 0, int)
# --- Due ---
# Careful: Has to come after cqueue is defined!
if cdue is None:
if set(cqueue) == {"new"}:
cdue = nid
else:
raise ValueError(
"Due date can only be set automatically for cards of type"
"/queue 'new', but you have types: {}".format(
", ".join(set(cqueue))
)
)
elif is_list_like(cdue):
if len(cdue) != len(nid): # type: ignore
raise ValueError(
"Number of cdue doesn't match number of "
"notes for which cards should be added: {} "
"instead of {}.".format(len(cdue), len(nid)) # type: ignore
)
elif isinstance(cdue, int):
cdue = [cdue] * len(nid)
else:
raise ValueError(
f"Invalid type of cdue specification: {type(cdue)}"
)
# Now we need to decide on contents for EVERY column in the DF
all_cids = self._get_ids(n=len(nid) * len(cord))
add = pd.DataFrame(columns=self.columns, index=all_cids)
for icord, co in enumerate(cord):
cid = all_cids[icord * len(nid) : (icord + 1) * len(nid)]
known_columns = {
"nid": nid,
"cdeck": cdeck,
"cord": [co] * len(nid),
"cdue": cdue,
"cmod": cmod,
"cusn": cusn,
"cqueue": cqueue,
"ctype": ctype,
"civl": civl,
"cfactor": cfactor,
"creps": creps,
"clapses": clapses,
"cleft": cleft,
"codeck": [""] * len(nid),
"codue": [0] * len(nid),
}
for key, item in known_columns.items():
add.loc[cid, key] = pd.Series(item, index=cid)
add = add.astype(
{
key: value
for key, value in _columns.dtype_casts_all.items()
if key in self.columns
}
)
if not inplace:
return self.append(add)
else:
replace_df_inplace(self, self.append(add))
return all_cids
def _get_ids(self, n=1) -> list[int]:
"""Generate ID from timestamp and increment if it is already in use.
Args:
n: Number of IDs to generate
"""
indices: list[int] = []
for _ in range(n):
indices.append(self._get_id(others=indices))
return indices
# Todo: If tags single list: Same for all!
[docs]
def add_notes(
self,
nmodel: str,
nflds: list[list[str]] | dict[str, list[str]] | list[dict[str, str]],
ntags: list[list[str]] | None = None,
nid=None,
nguid=None,
nmod=None,
nusn=None,
inplace=False,
):
"""Add multiple new notes corresponding to one model.
Args:
nmodel: Name of the model (must exist already, check
:meth:`list_models` for a list of available models)
nflds: Fields of the note either as list of lists, e.g.
``[[field1_note1, ... fieldN_note1], ...,
[field1_noteM, ... fieldN_noteM]]`` or dictionary
``{field name: [field_value1, ..., field_valueM]}`` or list of
dictionaries: ``[{field_name: field_value for note 1}, ...,
{field_name: field_value for note N}]``.
If dictionaries are used: If fields are not present,
they are filled with empty strings.
ntags: Tags of the note as list of list of strings:
``[[tag1_note1, tag2_note1, ... ], ... [tag_1_noteM, ...]]``.
If ``None``, no tags will be added.
nid: List of note IDs. Will be set automatically if ``None``
(default) and it is discouraged to set your own.
nguid: List of Globally Unique IDs. Will be set automatically if
``None`` (default), and it is discouraged to set your own.
nmod: List of modification timestamps.
Will be set automatically if ``None`` (default) and it is
discouraged to set your own.
nusn: List of Update Sequence Number.
Will be set automatically (to -1, i.e. needs update)
if ``None`` (default) and it is
very discouraged to set your own.
inplace: If ``False`` (default), return a new
:class:`~ankipandas.AnkiDataFrame`, if True, modify in place and
return new note ID
Returns:
:class:`~ankipandas.AnkiDataFrame` if ``inplace==True``, else
new note ID (int)
"""
self._check_our_format()
if not self._anki_table == "notes":
raise ValueError("Notes can only be added to notes table.")
# --- Model ---
model2mid = raw.get_model2mid(self.db)
if nmodel not in model2mid:
raise ValueError(f"No model of with name '{nmodel}' exists.")
field_keys = raw.get_mid2fields(self.db)[model2mid[nmodel]]
# --- Fields ---
if is_list_dict_like(nflds):
n_notes = len(nflds)
specified_fields = set(
flatten_list_list(list(map(lambda x: list(x), nflds)))
) # mypy doesn't want to use list as a function here
unknown_fields = sorted(specified_fields - set(field_keys))
if unknown_fields:
raise ValueError(
"Unknown fields: {}".format(", ".join(unknown_fields))
)
field_key2field = {
key: [d.get(key) for d in nflds] for key in field_keys # type: ignore
}
elif is_list_list_like(nflds):
n_fields = list({len(x) for x in nflds})
n_notes = len(nflds)
if not (len(n_fields) == 1 and n_fields[0] == len(field_keys)):
raise ValueError(
"Wrong number of items for specification of field contents:"
" There are {} fields for your model type, but you"
" specified {} items.".format(
len(field_keys), ", ".join(map(str, n_fields))
)
)
field_key2field = {
field_key: [x[i] for x in nflds] # type: ignore
for i, field_key in enumerate(field_keys)
}
elif is_dict_list_like(nflds):
lengths = {len(x) for x in nflds.values()} # type: ignore
if len(lengths) >= 2:
raise ValueError(
"Inconsistent number of "
"fields: {}".format(", ".join(map(str, lengths)))
)
elif not lengths:
raise ValueError("Are you trying to add zero notes?")
n_notes = lengths.pop()
field_key2field = copy.deepcopy(nflds) # type: ignore
for key in field_keys:
if key not in field_key2field:
field_key2field[key] = [""] * n_notes
else:
raise ValueError("Unsupported fields specification.")
# --- Tags ---
if ntags is not None:
if len(ntags) != n_notes:
raise ValueError(
"Number of tags doesn't match number of notes to"
" be added: {} instead of {}.".format(len(ntags), n_notes)
)
else:
ntags = [[]] * n_notes
# --- Nids ---
if nid is not None:
if len(nid) != n_notes:
raise ValueError(
"Number of note IDs doesn't match number of notes to"
" be added: {} instead of {}.".format(len(nid), n_notes)
)
else:
nid = self._get_ids(n=n_notes)
already_present = sorted(set(nid) & set(self.index))
if already_present:
raise ValueError(
"The following note IDs (nid) are "
"already present: {}".format(", ".join(map(str, nid)))
)
if len(set(nid)) < len(nid):
raise ValueError("Your note ID specification contains duplicates!")
# --- Mod ---
if nmod is not None:
if len(nmod) != n_notes:
raise ValueError(
"Number of modification dates doesn't match number of "
"notes to be added: {} "
"instead of {}.".format(len(nmod), n_notes)
)
else:
nmod = [int(time.time()) for _ in range(n_notes)]
# --- Guid ---
if nguid is not None:
if len(nguid) != n_notes:
raise ValueError(
"Number of globally unique IDs (guid) doesn't match number "
"of notes to be added: {} "
"instead of {}.".format(len(nguid), n_notes)
)
else:
nguid = [generate_guid() for _ in range(n_notes)]
existing_guids = sorted(set(nguid) & set(self["nguid"].unique()))
if existing_guids:
raise ValueError(
"The following globally unique IDs (guid) are already"
" present: {}.".format(", ".join(map(str, existing_guids)))
)
# todo: make efficient
duplicate_guids = sorted({g for g in nguid if nguid.count(g) >= 2})
if duplicate_guids:
raise ValueError(
"The following gloally unique IDs (guid) are not unique: ",
", ".join(map(str, duplicate_guids)),
)
# --- Usn ---
if nusn is None:
nusn = -1
else:
if len(nusn) != n_notes:
raise ValueError(
"Number of update sequence numbers (usn) doesn't match"
"number of notes to be added: {} "
"instead of {}.".format(len(nusn), n_notes)
)
# --- Collect all ---
# Now we need to decide on contents for EVERY column in the DF
known_columns = {
"nmodel": nmodel,
"ntags": ntags,
"nguid": nguid,
"nmod": nmod,
"nusn": nusn,
}
# More difficult: Field columns:
if self._fields_format == "list":
# Be careful with order!
# Also need to flip dimensions
known_columns["nflds"] = np.swapaxes(
[field_key2field[field_key] for field_key in field_keys], 0, 1
).tolist()
elif self._fields_format == "columns":
# First we need to make sure that the df has the columns for our
# model (perhaps this is the first note of this model that we're
# adding, so fields_as_columns() didn't add them).
for col in field_keys:
if col not in self:
self[self.fields_as_columns_prefix + col] = ""
# Let's first set all fields as columns to '', because we also
# need to set those which aren't from our model:
for col in self.columns:
if col.startswith(self.fields_as_columns_prefix):
known_columns[col] = [""] * n_notes
# Now let's fill those of our model
for col, values in field_key2field.items():
known_columns[self.fields_as_columns_prefix + col] = values
else:
raise ValueError(
"Fields have to be in 'list' or 'columns' format, but yours "
"are in '{}' format.".format(self._fields_format)
)
add = pd.DataFrame(columns=self.columns, index=nid)
for key, item in known_columns.items():
add.loc[:, key] = pd.Series(item, index=nid)
add = add.astype(
{
key: value
for key, value in _columns.dtype_casts_all.items()
if key in self.columns
}
)
if not inplace:
return self.append(add)
else:
replace_df_inplace(self, self.append(add))
return nid
[docs]
def add_note(
self,
nmodel: str,
nflds: list[str] | dict[str, str],
ntags=None,
nid=None,
nguid=None,
nmod=None,
nusn=-1,
inplace=False,
):
"""Add new note.
.. note::
For better performance it is advisable to use :meth:`add_notes`
when adding many notes.
Args:
nmodel: Name of the model (must exist already, check
:meth:`list_models` for a list of available models)
nflds: Fields of the note either as list or as dictionary
``{field name: field value}``. In the latter case, if fields
are not present, they are filled with empty strings.
ntags: Tags of the note as string or Iterable thereof. Defaults to
no tags.
nid: Note ID. Will be set automatically by default and it is
discouraged to set your own. If you do so and it already
exists, the existing note will be overwritten.
nguid: Note Globally Unique ID. Will be set automatically by
default, and it is discouraged to set your own.
nmod: Modification timestamp. Will be set automatically by default
and it is discouraged to set your own.
nusn: Update sequence number. Will be set automatically
(to -1, i.e. needs update) if ``None`` (default) and it is
very discouraged to set your own.
inplace: If False (default), return a new
:class:`ankipandas.AnkiDataFrame`, if True, modify in place and
return new note ID
Returns:
:class:`ankipandas.AnkiDataFrame` if ``inplace==True``, else
new note ID (``int``)
"""
_nflds: list[list[str]] | dict[str, list[str]] = []
if is_list_like(nflds):
_nflds = [nflds] # type: ignore
elif isinstance(nflds, dict):
_nflds = {key: [value] for key, value in nflds.items()}
else:
raise ValueError(
f"Unknown type for fields specification: {type(nflds)}"
)
del nflds
if ntags is not None:
ntags = [ntags]
if nid is not None:
nid = [nid]
if nguid is not None:
nguid = [nguid]
if nmod is not None:
nmod = [nmod]
if nusn is not None:
nusn = [nusn]
ret = self.add_notes(
nmodel=nmodel,
nflds=_nflds,
ntags=ntags,
nid=nid,
nguid=nguid,
nmod=nmod,
nusn=nusn,
inplace=inplace,
)
if inplace:
# We get nids back
return ret[0]
else:
# We get new AnkiDataFrame back
return ret
# Help
# ==========================================================================
# todo: test?
[docs]
def help_col(self, column, ret=False) -> str | None:
"""
Show description/help about a column. To get information about all
columns, use the :meth:`.help_cols` method instead.
Args:
column: Name of the column
ret: If True, return as string, rather than printing
"""
df = self.help_cols(column)
if len(df) == 0:
raise ValueError("Could not find help for your search request.")
if len(df) == 2:
# fix for nid and cid column:
df = self.help_cols(column, table=self._anki_table)
if len(df) != 1:
raise ValueError("Could not find help due to bug.")
data = df.loc[column].to_dict()
h = f"Help for column '{column}'\n"
h += "-" * (len(h) - 1) + "\n"
if data["Native"]:
h += "Name in raw Anki database: " + data["AnkiColumn"] + "\n"
h += "Information from table: " + data["Table"] + "\n"
h += "Present by default: " + str(data["Default"]) + "\n\n"
h += "Description: " + data["Description"]
if ret:
return h
else:
print(h)
return None # Make explicit for mypy
[docs]
def help_cols(
self, column="auto", table="all", ankicolumn="all"
) -> pd.DataFrame:
"""
Show information about the columns and their interpretations. To
get information about a single column, please use :meth:`.help_col`.
Args:
column: Name of a field or column (as used by us) or list thereof.
If 'auto' (default), all columns from the current dataframe will
be shown.
If 'all' no filtering based on the table will be performed
table: Possible values: 'notes', 'cards', 'revlog' or list thereof.
If 'all' no filtering based on the table will be performed
ankicolumn: Name of a field or column (as used by Anki) or list
thereof.
If 'all' no filtering based on the table will be performed
Returns:
Pandas DataFrame with all matches.
.. warning::
As there are problems with text wrapping in pandas DataFrame, this
method might change or disappear in the future.
"""
help_path = pathlib.Path(__file__).parent / "data" / "anki_fields.csv"
df = pd.read_csv(help_path)
if column == "auto":
column = list(self.columns)
if table != "all":
if isinstance(table, str):
table = [table]
df = df[df["Table"].isin(table)]
if column != "all":
if isinstance(column, str):
column = [column]
df = df[df["Column"].isin(column)]
if ankicolumn != "all":
if isinstance(ankicolumn, str):
ankicolumn = [ankicolumn]
df = df[df["AnkiColumn"].isin(ankicolumn)]
df.set_index("Column", inplace=True)
return df
[docs]
@staticmethod
def help(ret=False) -> str | None:
"""Display short help text.
Args:
ret: Return as string instead of printing it.
Returns:
string if ret==True, else None
"""
h = (
"This is the help for the class AnkiDataFrame, a subclass of "
"pandas.DataFrame. \n"
"The full documentation of all class methods "
"unique to AnkiDataFrame can be found on "
"https://ankipandas.readthedocs.io. \n"
"The inherited methods from "
"pandas.DataFrame are documented at https://pandas.pydata.org/"
"pandas-docs/stable/reference/api/pandas.DataFrame.html.\n"
"To get information about the fields currently in this table, "
"please use the help_cols() method."
)
if ret:
return h
else:
print(h)
return None # explicit for mypy