zoukankan      html  css  js  c++  java
  • Coursera课程笔记----P4E.Capstone----Week 6&7

    Visualizing Email Data(Week 6&7)

    code segment

    gword.py

    import sqlite3
    import time
    import zlib
    import string
    
    conn = sqlite3.connect('index.sqlite')
    cur = conn.cursor()
    
    cur.execute('SELECT id, subject FROM Subjects')
    subjects = dict()
    for message_row in cur :
        subjects[message_row[0]] = message_row[1]
    
    # cur.execute('SELECT id, guid,sender_id,subject_id,headers,body FROM Messages')
    cur.execute('SELECT subject_id FROM Messages')
    counts = dict()
    for message_row in cur :
        text = subjects[message_row[0]]
        text = text.translate(str.maketrans('','',string.punctuation))
        text = text.translate(str.maketrans('','','1234567890'))
        text = text.strip()
        text = text.lower()
        words = text.split()
        for word in words:
            if len(word) < 4 : continue
            counts[word] = counts.get(word,0) + 1
    
    x = sorted(counts, key=counts.get, reverse=True)
    highest = None
    lowest = None
    for k in x[:100]:
        if highest is None or highest < counts[k] :
            highest = counts[k]
        if lowest is None or lowest > counts[k] :
            lowest = counts[k]
    print('Range of counts:',highest,lowest)
    
    # Spread the font sizes across 20-100 based on the count
    bigsize = 80
    smallsize = 20
    
    fhand = open('gword.js','w')
    fhand.write("gword = [")
    first = True
    for k in x[:100]:
        if not first : fhand.write( ",
    ")
        first = False
        size = counts[k]
        size = (size - lowest) / float(highest - lowest)
        size = int((size * bigsize) + smallsize)
        fhand.write("{text: '"+k+"', size: "+str(size)+"}")
    fhand.write( "
    ];
    ")
    fhand.close()
    
    print("Output written to gword.js")
    print("Open gword.htm in a browser to see the vizualization")
    

    gline.py

    import sqlite3
    import time
    import zlib
    
    conn = sqlite3.connect('index.sqlite')
    cur = conn.cursor()
    
    cur.execute('SELECT id, sender FROM Senders')
    senders = dict()
    for message_row in cur :
        senders[message_row[0]] = message_row[1]
    
    cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
    messages = dict()
    for message_row in cur :
        messages[message_row[0]] = (message_row[1],message_row[2],message_row[3],message_row[4])
    
    print("Loaded messages=",len(messages),"senders=",len(senders))
    
    sendorgs = dict()
    for (message_id, message) in list(messages.items()):
        sender = message[1]
        pieces = senders[sender].split("@")
        if len(pieces) != 2 : continue
        dns = pieces[1]
        sendorgs[dns] = sendorgs.get(dns,0) + 1
    
    # pick the top schools
    orgs = sorted(sendorgs, key=sendorgs.get, reverse=True)
    orgs = orgs[:10]
    print("Top 10 Organizations")
    print(orgs)
    
    counts = dict()
    months = list()
    # cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages')
    for (message_id, message) in list(messages.items()):
        sender = message[1]
        pieces = senders[sender].split("@")
        if len(pieces) != 2 : continue
        dns = pieces[1]
        if dns not in orgs : continue
        month = message[3][:7]
        if month not in months : months.append(month)
        key = (month, dns)
        counts[key] = counts.get(key,0) + 1
    
    months.sort()
    # print counts
    # print months
    
    fhand = open('gline.js','w')
    fhand.write("gline = [ ['Month'")
    for org in orgs:
        fhand.write(",'"+org+"'")
    fhand.write("]")
    
    for month in months:
        fhand.write(",
    ['"+month+"'")
        for org in orgs:
            key = (month, org)
            val = counts.get(key,0)
            fhand.write(","+str(val))
        fhand.write("]");
    
    fhand.write("
    ];
    ")
    fhand.close()
    
    print("Output written to gline.js")
    print("Open gline.htm to visualize the data")
    
  • 相关阅读:
    pytorch中torch.unsqueeze()函数与np.expand_dims()
    python内存泄漏,gc模块
    pytorch初始化网络参数
    pytorch保存模型等相关参数,利用torch.save(),以及读取保存之后的文件
    pytorch将cpu训练好的模型参数load到gpu上,或者gpu->cpu上
    ubuntu ssh连接服务器保持长时间不断
    pytorch遇到的问题:RuntimeError: randperm is only implemented for CPU
    利用pytorch复现spatial pyramid pooling层
    Non-maximum suppression(非极大值抑制算法)
    numpy之flatnonzero函数
  • 原文地址:https://www.cnblogs.com/maimai-d/p/12775931.html
Copyright © 2011-2022 走看看