-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_data.py
More file actions
64 lines (52 loc) · 1.72 KB
/
plot_data.py
File metadata and controls
64 lines (52 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import pandas as pd
import matplotlib.pyplot as plt
import argparse
from time import time
plt.style.use("tableau-colorblind10")
from config import TAGS
def main() -> None:
parser = argparse.ArgumentParser(
description="Plot data split",
)
parser.add_argument(
"--data_dir",
type=str,
default="dataset",
help="Directory containing dataset (split as `dev`, `test`, `train` and `all`)",
)
parser.add_argument(
"--data_split",
type=str,
default="all",
help="Select the data split",
choices=["dev", "test", "train", "all"]
)
args = parser.parse_args()
print("loading data...")
df = pd.read_csv(f"{args.data_dir}/{args.data_split}.csv")
df["datetime"] = pd.to_datetime(df["date"])
df["year"] = df["datetime"].dt.year
print("merging `UNRELATED_TO_CLIMATE` with `SIMILAR_BUT_NOT_CLIMATE`")
df["category"] = df["category"].apply(
lambda x: "UNRELATED_TO_CLIMATE" if x == "SIMILAR_BUT_NOT_CLIMATE" else x
)
tags = [x for x in list(TAGS.keys()) if x != "SIMILAR_BUT_NOT_CLIMATE"]
dfs = {}
for category in tags:
dfs[category] = (
df[(df["use"] == True) & (df["category"] == category)]
.groupby(["year"])["category"]
.count()
)
stacked_df = pd.DataFrame(dfs)
print("plotting...")
stacked_df = stacked_df.fillna(0)
fig = stacked_df.plot(
kind="bar", stacked=True, xlabel="year", ylabel="number of articles"
)
output_filename = f"barchart_label_per_year_{time()}.png"
print(f"storing .png at {output_filename}")
chart = fig.get_figure()
chart.savefig(output_filename)
if __name__ == "__main__":
main()