1 | # function to display cool(?) statistics about the subtitle
|
---|
2 | import datetime
|
---|
3 | from dateutil.relativedelta import relativedelta
|
---|
4 | from alive_progress import alive_bar
|
---|
5 | from pymongo import MongoClient
|
---|
6 |
|
---|
7 | # 1. get minimum date
|
---|
8 | # 2. get number of document that fulfill the criteria for each month starting from min month
|
---|
9 | # 3. display on graph or put into csv for easy R implementation
|
---|
10 |
|
---|
11 | def main():
|
---|
12 | mClient = MongoClient("mongodb://localhost:27017/")
|
---|
13 | mDatabase = mClient["test"]
|
---|
14 | mCollection = mDatabase["crossref"]
|
---|
15 |
|
---|
16 | getMinDateQuery = "asdas"
|
---|
17 |
|
---|
18 | minimumDateQuery = mCollection.find().sort("dateCreated").limit(1)
|
---|
19 | sortedCollectionQuery = mCollection.find().sort("dateCreated")
|
---|
20 | countQuery = mCollection.estimated_document_count()
|
---|
21 |
|
---|
22 |
|
---|
23 | minimumDate = minimumDateQuery[0]["dateCreated"]
|
---|
24 |
|
---|
25 | #print("done with process")
|
---|
26 | #print(found)
|
---|
27 |
|
---|
28 | #print(found[0]["dateCreated"])
|
---|
29 |
|
---|
30 | dTMinimumDate = datetime.datetime.strptime(minimumDate, "%Y-%m-%dT%H:%M:%SZ")
|
---|
31 | print(dTMinimumDate)
|
---|
32 |
|
---|
33 | statistics = []
|
---|
34 | statsToAdd = [0]
|
---|
35 | currentDate = dTMinimumDate + relativedelta(months = 1)
|
---|
36 |
|
---|
37 | with alive_bar(countQuery) as bar:
|
---|
38 | for item in sortedCollectionQuery:
|
---|
39 | itemDate = item["dateCreated"]
|
---|
40 | if itemDate <= currentDate:
|
---|
41 | if "subtitle" in item.keys():
|
---|
42 | statsToAdd[0] = statsToAdd[0] + 1
|
---|
43 | else:
|
---|
44 | statistics.append(statsToAdd)
|
---|
45 | statsToAdd = []
|
---|
46 | currentDate = currentDate + relativedelta(months = 1)
|
---|
47 | bar()
|
---|
48 |
|
---|
49 | #if not past a month past current month:
|
---|
50 | #add to array
|
---|
51 | #else
|
---|
52 | #add array to statistics
|
---|
53 | #increase currentdate by a month
|
---|
54 |
|
---|
55 | main() |
---|