728x90

from tkinter import *
from tkinter.filedialog import *
import pandas as pd
import xlrd
import re

file = "C:\\Users\\htsnote\\Desktop\\pypy\\juju\\삼보.xlsx"
#file = "C:\\Users\\htsnote\\Desktop\\pypy\\juju\\아세아제지.xlsx"
xls = xlrd.open_workbook(file, on_demand=True)
nsheets = xls.nsheets  # 179개

#목차 확인.
table_of_contents = pd.read_excel(file, sheet_name="Page1").fillna(0)
all_table_of_contents = table_of_contents.values

for i in range(len(all_table_of_contents)):
    if "주식의 총수" in all_table_of_contents[i][0]:
        str_num_of_stock = all_table_of_contents[i][0]
        #print(str_num_of_stock)
        for j in range(len(str_num_of_stock)-1,0,-1):
            if str_num_of_stock[j] == ".":
                tag_num_of_stock = j + 1
                break
        page_num_of_stock = int(str_num_of_stock[tag_num_of_stock:])
    elif "배당에 관한 사항" in all_table_of_contents[i][0]:
        str_allocation = all_table_of_contents[i][0]
        for j in range(len(str_allocation)-1,0,-1):
            if str_allocation[j] == ".":
                tag_allocation = j + 1
                break
        page_allocation = int(str_allocation[tag_allocation:])
    elif "주주에 관한 사항" in all_table_of_contents[i][0]:
        str_juju = all_table_of_contents[i][0]
        for j in range(len(str_juju)-1,0,-1):
            if str_juju[j] == ".":
                tag_juju = j + 1
                break
        page_juju = int(str_juju[tag_juju:])
    elif "임원 및 직원 등의 현황" in all_table_of_contents[i][0]:
        str_employee = all_table_of_contents[i][0]
        for j in range(len(str_employee)-1,0,-1):
            if str_employee[j] == ".":
                tag_employee = j + 1
                break
        page_employee = int(str_employee[tag_employee:])
    elif "임원의 보수" in all_table_of_contents[i][0]:
        str_ceo = all_table_of_contents[i][0]
        for j in range(len(str_ceo)-1,0,-1):
            if str_ceo[j] == ".":
                tag_ceo = j + 1
                break
        page_ceo = int(str_ceo[tag_ceo:])
    

flag_num_of_stock = 0 #주식의 총수 
flag_allocation_per = 0 #배당성향
flag_allocation_won = 0 #배당금
flag_juju = 0 #주주관한 사항
flag_employee = 0 #직원 등 현황
flag_ceo = 0 #임원의 보수

#1.주식의 총수
for p in range(page_num_of_stock, page_num_of_stock+3): #pdf페이지와 엑셀 페이지가 2page정도 차이남.. 이것도 여러게 확인필요
    page = 'Page' + str(p)
    readXL = pd.read_excel(file, sheet_name=page).fillna(0)
    readXL = readXL.values
    for i in range(len(readXL)):
        # if readXL[i][0] in ["Ⅳ. 발행주식의 총수 (Ⅱ-Ⅲ)"]:
        if "발행주식의 총수" in str(readXL[i][0]):
            flag_num_of_stock = 1
            print(readXL[i])
            print(readXL[i][3])
            break
        elif "작성기준지침에 따라 분기보고서에는 기재하지 않습니다" in str(readXL[i][0]):
            flag_num_of_stock = 1
            print(readXL[i])
            print(readXL[i][0])
            break
        elif "작성기준지침에 따라 반기보고서에는 기재하지 않습니다" in str(readXL[i][0]):
            flag_num_of_stock = 1
            print(readXL[i])
            print(readXL[i][0])
            break
    if flag_num_of_stock == 1:
        break
#2.배당에 관한 사항
for p in range(page_allocation, page_allocation+3):
    page = 'Page' + str(p)
    readXL = pd.read_excel(file, sheet_name=page).fillna(0)
    readXL = readXL.values
    for i in range(len(readXL)):
        if "현금배당성향" in str(readXL[i][0]):
            flag_allocation_per = 1
            print(readXL[i])
            print(readXL[i][3])
            break
    for i in range(len(readXL)):
        if "주당 현금배당금" in str(readXL[i][0]):
            flag_allocation_won = 1
            print(readXL[i])
            print(readXL[i][3])
            break
    if flag_allocation_per == 1:
        break
    elif flag_allocation_won == 1:
        break

#3. 주주에 관한 사항
for p in range(page_juju, page_juju+3):
    page = 'Page' + str(p)
    readXL = pd.read_excel(file, sheet_name=page).fillna(0)
    readXL = readXL.values
    for i in range(len(readXL)):
        if "계" in str(readXL[i][0]) and "보통주" in str(readXL[i][1]) :
            flag_juju = 1
            if type(readXL[i][-2]) is float:
                print(readXL[i])
                print(readXL[i][-2])
            elif type(readXL[i][-3]) is float:
                print(readXL[i])
                print(readXL[i][-3])
            break
    if flag_juju == 1:
        break

#4. 직원 등 현황
#for p in range(page_employee, page_ceo+10):#166
for p in range(page_ceo-2, page_ceo + 3):  # 166
    page = 'Page' + str(p)
    readXL = pd.read_excel(file, sheet_name=page).fillna(0)
    readXL = readXL.values
    for i in range(len(readXL)):
        if "합 계" in str(readXL[i][0]) :
            flag_employee = 1
            print(readXL[i]) #여러 보고서 비교가 필요함
            print(readXL[i][2])  #직원수
            print(readXL[i][5])  #근속연수
            print(readXL[i][9])  #평균연봉
            break
        elif "작성기준지침에 따라 분기보고서에는 기재하지 않습니다" in str(readXL[i][0]):
            flag_employee = 1
            print(readXL[i])
            print(readXL[i][0])
            break
    if flag_employee == 1:
        break

#4. 임원의 보수
for p in range(page_ceo, page_ceo+3):
    page = 'Page' + str(p)
    readXL = pd.read_excel(file, sheet_name=page).fillna(0)
    readXL = readXL.values
    for i in range(len(readXL)):
        if "인원수" in str(readXL[i][0]) :
            flag_ceo = 1
            print(readXL[i+1]) #여러 보고서 비교가 필요함
            print(readXL[i+1][5])
            break
        elif "작성기준지침에 따라 분기보고서에는 기재하지 않습니다" in str(readXL[i][0]):
            flag_ceo = 1
            print(readXL[i])
            print(readXL[i][0])
            break
    if flag_ceo == 1:
        break
728x90

+ Recent posts