Last update:
#!/opt/homebrew/bin/python3
# -*- coding: utf-8 -*-
# Description: Checking Python and NLTK
# Date: Saturday, December 25, 2021
# Tomonori Nagano
import platform
print(platform.python_version())
# 3.9.9
#!/opt/homebrew/bin/python3
# -*- coding: utf-8 -*-
# Description: Showing paths
# Date: Tuesday, December 21, 2021
# Tomonori Nagano
import os
# this will show where your current path
print("Showing the current path:")
print(os.path.abspath('.'))
# /Users/home
print(os.getcwd())
# /Users/home
print("Showing the path to the Python code:")
print(os.path.abspath(__file__))
# /Users/home/ScriptDoc
print("Showing the files in the current path:")
print(os.listdir('.'))
# ['Music', 'Pictures', 'Desktop', 'Library', ...]
# alternatively, you can append a path to pythonpath
import sys
# change the argument to your own path
sys.path.append("/Volumes/USB/mypath/")
#!/opt/homebrew/bin/python3
# -*- coding: utf-8 -*-
# Description: A simple demonstration of methods for string
# Date: Tuesday, December 21, 2021
# Tomonori Nagano
name = 'Tomonori Nagano'
keyword = 'Tomo'
if name.startswith(keyword):
print('Tomonori starts with ' + keyword)
# Tomonori starts with Tomo
if name.endswith(keyword):
print('Tomonori ends with ' + keyword)
else:
print('Tomonori does not end with ' + keyword)
# Tomonori does not end with Tomo
if keyword in name:
print('Yes, ' + name + ' contains the string "' + keyword + '"')
# Yes, Tomonori Nagano contains the string "Tomo"
print(name[0:4]) # printing the first four (index 0 to 3) letters
# Tomo
print(name[4:]) # printing the after the fourth character
# nori Nagano
print(name.capitalize()) # capitalize the first character
# Tomonori nagano
print(name.title()) # capitalize the first character of each word
# Tomonori Nagano
print(name.upper()) # capitalize all characters
# TOMONORI NAGANO
print(name.lower()) # change all characters lower case
# tomonori nagano
print("My name is %s. I have $%.2f' " % (name, 20))
# My name is Tomonori Nagano. I have $20.00'
print("My name is {0}. I have ${1}' ".format(name, 20))
# My name is Tomonori Nagano. I have $20'
print("My name is {name}. I have ${money}' ".format(name='Tomonori Nagano', money = 20))
# My name is Tomonori Nagano. I have $20'
print("My name is {name:<20s}. I have ${money:.2f}' ".format(name='Tomonori Nagano', money = 20))
# My name is Tomonori Nagano . I have $20.00'
print("My name is {name:>20s}. I have ${money:05d}' ".format(name='Tomonori Nagano', money = 20))
# My name is Tomonori Nagano. I have $00020'
#!/opt/homebrew/bin/python3
# -*- coding: utf-8 -*-
# Description: Demonstration of various basic operations in NLTK
# Date: Wednesday, December 22, 2021
# Tomonori Nagano
import nltk
from nltk.corpus import gutenberg,brown
from nltk.probability import FreqDist, ConditionalFreqDist
thisFD = FreqDist(brown.words())
for word in list(thisFD)[:10]:
print (word, thisFD[word])
for word, freq in thisFD.most_common()[:10]:
print (word, freq)
#!/opt/homebrew/bin/python3
# -*- coding: utf-8 -*-
# Description: Demonstration conditional frequency (ConditionalFreqDist)
# Date: Wednesday, December 22, 2021
# Tomonori Nagano
import nltk
from nltk.corpus import gutenberg,brown
from nltk.probability import FreqDist, ConditionalFreqDist
from operator import itemgetter
# POS analyses (conditional frequency)
thisFD = FreqDist()
thisCondFD = ConditionalFreqDist()
for text in brown.fileids():
for sentence in brown.tagged_sents(text):
for (token, tag) in sentence:
thisFD[tag] += 1
thisCondFD[token][tag] += 1
for pos in thisCondFD['light']:
print(pos, thisCondFD['light'][pos])
#!/opt/homebrew/bin/python3
# -*- coding: utf-8 -*-
# Description: Demonstrating Porter stemmer
# Date: Friday, December 24, 2021
# Tomonori Nagano
import nltk
# Porter's stemmer
stemmer = nltk.PorterStemmer()
stemmer.stem('appearance')
verbs = ['appears', 'appear', 'appeared', 'appearing', 'appearance']
print(list(map(stemmer.stem, verbs)))
#!/opt/homebrew/bin/python3
# -*- coding: utf-8 -*-
# Description: Demonstration of various basic operations in NLTK
# Date: Wednesday, December 22, 2021
# Tomonori Nagano
import nltk
from nltk.corpus import gutenberg,brown
from nltk.probability import FreqDist, ConditionalFreqDist
from operator import itemgetter
# finding collocations
def collocations(words):
# Count the words and bigrams
wfd = nltk.FreqDist(words)
pfd = nltk.FreqDist(tuple(words[i:i+2]) for i in range(len(words)-1))
# score them
scored = [((w1,w2), score(w1, w2, wfd, pfd)) for w1, w2 in pfd]
scored.sort(key=itemgetter(1), reverse=True)
return list(map(itemgetter(0), scored))
def score(word1, word2, wfd, pfd, power=3):
freq1 = wfd[word1]
freq2 = wfd[word2]
freq12 = pfd[(word1, word2)]
return freq12 ** power / float(freq1 * freq2)
words = [word.lower() for word in gutenberg.words('chesterton-brown.txt') if len(word) > 2]
print([word1 + ' ' + word2 for word1, word2 in collocations(words)[:15]])
#!/opt/homebrew/bin/python3
# -*- coding: utf-8 -*-
# Description: Demonstrating Benford's law
# Date: Friday, December 24, 2021
# Tomonori Nagano
import nltk
from nltk.corpus import reuters
import re
# nltk.download('reuters')
text = reuters.words()
thisFD = nltk.FreqDist()
for word in reuters.words():
thisRegex = re.search('([1-9])\d*', word, re.IGNORECASE)
if thisRegex: thisFD[thisRegex.group(1)] += 1
for word in thisFD:
print (word,thisFD[word])