Day12: os,sys,re,json,xml

当前位置:

首页 > 编程开发 > 数据分析 >

Day12: os,sys,re,json,xml

OS:

import sys,os
# print(os.getcwd())
# print(os.chdir('..'))
# # os.removedirs('dir1/dir2')#blank will be del,or can't
# print(os.listdir())
#print(os.stat('ss.py')) #****the time of create and modify
# print(os.stat('path/filename'))
#os.sep  windows:\\ linus:/
# os.linesep  windows:\r\n linus:\n
# print(os.system('dir'))
# print(os.environ)   #get the system's environment variables
# os.path.dirname(path=)
# os.path.basedirname(path=)  #the file name
# os.path.abspath(path=)  #absolute path
# os.path.split(path=)    #split with dirname and filename
# os.path.exists(path=)   #judge
# os.path.isabs(path=)
# os.path.isfile(path=)
# os.path.isdir(path=)
#**************os.path.join(path1[,path2[,...]])
# os.path.getatime()  #the last save time
# os.path.getmtime()  #the last modify time

SYS:

# **************sys******************
# sys.argv    #the same as input :order line list:the first character is the path of the current program
# sys.exit()
# sys.version
# sys.maxint
# sys.path
# sys.platform
# print(sys.argv)
# command = sys.argv[1]
# path = sys.argv[2]
# if command == 'post':
#     pass
# if command == 'get':
#     pass

#********the prosessing line is based on the method
# import time
# for i in range(10):
#     sys.stdout.write('#') #the same as print
#     time.sleep(0.1)
#     sys.stdout.flush()

JSON & PICKLE & SHELVE:

JSON:Can be transformed in defferent language,expecially in JS;

Pickle: can only be used in Python，and will be incompatible in defferent python version,so usually use pickle to save datas that is not important .

#dumps:Serialization  loads:Deserialization

# dir='{"name":"alex"}'
# f=open("hello","w")
# f.write(dir)

# f_read=open("hello","r")
# data=f_read.read()
# print(data)
# data=eval(data)
# print(data["name"])

import json
# dir='{"name":"alex"}'
# data=json.dumps(dir)
# i=8
# l=[11,22]
# data1=json.dumps(i)
# print(data1)
# print(type(data1))
# data2=json.dumps(l)
# print(data2)
# print(type(data2))
# print(data)
# print(type(data))

# f=open("new_hello","w")
# f.write(data)
#the former two sentense is equal to json.dump(data,f)

# f_read=open("new_hello","r")
# data=json.loads(f_read.read())
#the former two sentense is equal to json.load(f)
# print(data)
# print(type(data))
# import json
# with open("json_test","r") as f:
#     data = f.read()
#     data = json.loads(data)
#     print(data["name"])

# import pickle
# # j = pickle.dumps(dir)
# # print((type(j)))    #<class 'bytes'>
# # f = open("pickle_test","wb")
# # f.write(j)
# #=======>data=pickle.dump(f,dir)
# f = open("pickle_test","rb")
# data=pickle.loads(f.read())
# print(data)
#=======>data=pickle.load(f)

#shelve:the same as pickle
# import shelve
# f = shelve.open(r'shelve')
# # f['stu1_info']={'name':'alex',"age":'29'}
# # f.close()
# #generate 3 files
# print(f.get('stu1_info')["age"])

XML:the same as json but json is more esier, XML can be recognized in every program language;

The file:

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

xml operations:add,del,modify and check:

# import xml.etree.ElementTree as ET
# tree = ET.parse('xml_lesson')
# root = tree.getroot()
# print(root.tag)
#traversing xml file
# for i in root:
#     # print(i)
#     # print(i.tag)
#     for j in i:
#         # print(j.tag)
#         # print(j.attrib)
#         print(j.text)
    # print(i.attrib)
#trversing year nodes:
# for node in root.iter('year'):
#     print(node.tag,node.text)
 ####modify:
# for node in root.iter('year'):
#     new_year = int(node.text) + 1
#     node.text = str(new_year)
#     node.set("updated","yes")
# tree.write("abc.xml")

######delete:
# for country in root.findall('country'):
#     rank=int(country.find('rank').text)
#     if rank > 50:
#         root.remove(country)
# tree.write('output.xml')

########create a new xml file:
# new_xml =ET.Element("namelist")
# name = ET.SubElement(new_xml,"name",attrib={"enrolled":"yes"})
# age = ET.SubElement(name,"age",attrib={"checked":"no"})
# sex = ET.SubElement(name,"sex")
# sex.text = "fale"
# name2 = ET.SubElement(new_xml,"name",attrib={"enrolled":"yes"})
# age = ET.SubElement(name2,"age")
# age.text = "19"
#
# et = ET.ElementTree(new_xml)
# et.write("test_xml",encoding="utf-8",xml_declaration=True)

RE:

import re

ret = re.findall('a..in', 'helloalvin')
print(ret)  # ['alvin']

ret = re.findall('^a...n', 'alvinhelloawwwn')
print(ret)  # ['alvin']

ret = re.findall('a...n$', 'alvinhelloawwwn')
print(ret)  # ['awwwn']

ret = re.findall('a...n$', 'alvinhelloawwwn')
print(ret)  # ['awwwn']

ret = re.findall('abc*', 'abcccc')  # 贪婪匹配[0,+oo]
print(ret)  # ['abcccc']

ret = re.findall('abc+', 'abccc')  # [1,+oo]
print(ret)  # ['abccc']

ret = re.findall('abc?', 'abccc')  # [0,1]
print(ret)  # ['abc']

ret = re.findall('abc{1,4}', 'abccc')
print(ret)  # ['abccc'] 贪婪匹配
#注意：前面的 *, +,?等都是贪婪匹配，也就是尽可能匹配，后面加?号使其变成惰性匹配

ret = re.findall('abc*?', 'abcccccc')
print(ret)  # ['ab']
#元字符之字符集［］：
# --------------------------------------------字符集[]
ret = re.findall('a[bc]d', 'acd')
print(ret)  # ['acd']

ret = re.findall('[a-z]', 'acd')
print(ret)  # ['a', 'c', 'd']

ret = re.findall('[.*+]', 'a.cd+')
print(ret)  # ['.', '+']

# 在字符集里有功能的符号: - ^ \

ret = re.findall('[1-9]', '45dha3')
print(ret)  # ['4', '5', '3']

ret = re.findall('[^ab]', '45bdha3')
print(ret)  # ['4', '5', 'd', 'h', '3']

ret = re.findall('[\d]', '45bdha3')
print(ret)  # ['4', '5', '3']

# 元字符之转义符\
# 反斜杠后边跟元字符去除特殊功能,比如\.
# 反斜杠后边跟普通字符实现特殊功能,比如\d,\d+
#
# \d  匹配任何十进制数；它相当于类 [0-9]。
# \D 匹配任何非数字字符；它相当于类 [^0-9]。
# \s  匹配任何空白字符；它相当于类 [ \t\n\r\f\v]。
# \S 匹配任何非空白字符；它相当于类 [^ \t\n\r\f\v]。
# \w 匹配任何字母数字字符；它相当于类 [a-zA-Z0-9_]。
# \W 匹配任何非字母数字字符；它相当于类 [^a-zA-Z0-9_]
# \b  匹配一个特殊字符边界，比如空格 ，&，＃等

栏目列表