Analysis#

Note

All examples assume the line

col = Collection()

Or col = Collection("/path/to/col.anki2"), etc.

In which deck are the most leeches?#

1cards = col.cards.merge_notes()
2counts = cards[cards.has_tag("leech")]["cdeck"].value_counts()
3counts.plot.pie(title="Leeches per deck")
_images/leeches_per_deck.png

Which deck has the longest average retention rates?#

1grouped = col.cards.groupby("cdeck")
2data = grouped.mean()["civl"].sort_values().tail()
3ax = data.plot.barh()
4ax.set_ylabel("Deck name")
5ax.set_xlabel("Average expected retention length/review interval [days]")
6ax.set_title("Average retention length per deck")
_images/retention_rate_per_deck.png

Repetitions vs type#

Minimal:

col.cards.hist("crepts", by="ctype")

Prettier:

1axs = col.cards.hist(column="creps", by="ctype", layout=(1, 2), figsize=(12, 3))
2for ax in axs:
3    ax.set_xlabel("#Reviews")
4    ax.set_ylabel("Count")
_images/repetitions_per_type.png

Repetitions vs deck#

One liner:

col.cards.hist(column="creps", by="cdeck")

Prettier:

 1interesting_decks = list(col.cards.cdeck.unique())
 2interesting_decks.remove("archived::physics")
 3selected = col.cards[col.cards.cdeck.isin(interesting_decks)]
 4axss = selected.hist(
 5    column="creps",
 6    by="cdeck",
 7    sharex=True,
 8    layout=(5, 4),
 9    figsize=(15, 15),
10    density=True,
11)
12for axs in axss:
13    for ax in axs:
14        ax.set_xlabel("#Reviews")
15        ax.set_ylabel("Count")
_images/repetitions_per_deck.png

Retention distribution vs deck#

 1from __future__ import annotations
 2
 3import matplotlib.pyplot as plt
 4import numpy as np
 5
 6ax = plt.gca()
 7for deck in col.cards.cdeck.unique():
 8    selected = col.cards[col.cards.cdeck == deck]["civl"]
 9    if len(selected) < 1000:
10        continue
11    selected.plot.hist(
12        ax=ax,
13        label=deck,
14        histtype="step",
15        linewidth=2,
16        xlim=(0, 365),
17        bins=np.linspace(0, 365, 10),
18    )
19ax.set_xlabel("Predicted retention length (review interval)")
20ax.set_ylabel("Number of cards")
21ax.set_title("Expected retention length per deck [days]")
22ax.legend(frameon=False)
_images/retention_distribution_vs_deck.png

Reviews vs retention length vs deck#

 1from __future__ import annotations
 2
 3import pandas as pd
 4
 5xs = []
 6ys = []
 7decks = []
 8for deck in col.cards.cdeck.unique():
 9    selected = col.cards[col.cards["cdeck"] == deck]
10    if len(selected) < 500:
11        continue
12    decks.append(deck)
13    binned = pd.qcut(selected["creps"], 15, duplicates="drop")
14    results = selected.groupby(binned)["civl"].mean()
15    y = results.tolist()
16    x = results.index.map(lambda x: x.mid).tolist()
17    xs.append(x)
18    ys.append(y)
19
20ax = plt.gca()
21for i in range(len(xs)):
22    ax.plot(xs[i], ys[i], "o-", label=decks[i])
23ax.set_xlabel("#Reviews")
24ax.set_ylabel("Expected retention length/review interval [days]")
25ax.set_title("Number of reviews vs retention length")
26ax.legend(frameon=False)
_images/reviews_vs_ease.png