Analysis#
Note
All examples assume the line
col = Collection()
Or col = Collection("/path/to/col.anki2")
, etc.
In which deck are the most leeches?#
1cards = col.cards.merge_notes()
2counts = cards[cards.has_tag("leech")]["cdeck"].value_counts()
3counts.plot.pie(title="Leeches per deck")

Which deck has the longest average retention rates?#
1grouped = col.cards.groupby("cdeck")
2data = grouped.mean()["civl"].sort_values().tail()
3ax = data.plot.barh()
4ax.set_ylabel("Deck name")
5ax.set_xlabel("Average expected retention length/review interval [days]")
6ax.set_title("Average retention length per deck")

Repetitions vs type#
Minimal:
col.cards.hist("crepts", by="ctype")
Prettier:
1axs = col.cards.hist(column="creps", by="ctype", layout=(1, 2), figsize=(12, 3))
2for ax in axs:
3 ax.set_xlabel("#Reviews")
4 ax.set_ylabel("Count")

Repetitions vs deck#
One liner:
col.cards.hist(column="creps", by="cdeck")
Prettier:
1interesting_decks = list(col.cards.cdeck.unique())
2interesting_decks.remove("archived::physics")
3selected = col.cards[col.cards.cdeck.isin(interesting_decks)]
4axss = selected.hist(
5 column="creps",
6 by="cdeck",
7 sharex=True,
8 layout=(5, 4),
9 figsize=(15, 15),
10 density=True,
11)
12for axs in axss:
13 for ax in axs:
14 ax.set_xlabel("#Reviews")
15 ax.set_ylabel("Count")

Retention distribution vs deck#
1from __future__ import annotations
2
3import matplotlib.pyplot as plt
4import numpy as np
5
6ax = plt.gca()
7for deck in col.cards.cdeck.unique():
8 selected = col.cards[col.cards.cdeck == deck]["civl"]
9 if len(selected) < 1000:
10 continue
11 selected.plot.hist(
12 ax=ax,
13 label=deck,
14 histtype="step",
15 linewidth=2,
16 xlim=(0, 365),
17 bins=np.linspace(0, 365, 10),
18 )
19ax.set_xlabel("Predicted retention length (review interval)")
20ax.set_ylabel("Number of cards")
21ax.set_title("Expected retention length per deck [days]")
22ax.legend(frameon=False)

Reviews vs retention length vs deck#
1from __future__ import annotations
2
3import pandas as pd
4
5xs = []
6ys = []
7decks = []
8for deck in col.cards.cdeck.unique():
9 selected = col.cards[col.cards["cdeck"] == deck]
10 if len(selected) < 500:
11 continue
12 decks.append(deck)
13 binned = pd.qcut(selected["creps"], 15, duplicates="drop")
14 results = selected.groupby(binned)["civl"].mean()
15 y = results.tolist()
16 x = results.index.map(lambda x: x.mid).tolist()
17 xs.append(x)
18 ys.append(y)
19
20ax = plt.gca()
21for i in range(len(xs)):
22 ax.plot(xs[i], ys[i], "o-", label=decks[i])
23ax.set_xlabel("#Reviews")
24ax.set_ylabel("Expected retention length/review interval [days]")
25ax.set_title("Number of reviews vs retention length")
26ax.legend(frameon=False)
