KR_FileIO

Python 파일 입출력(File I/O) 개념 정리

1️⃣ 파일 열기/닫기

파이썬에서는 open() 함수를 사용하여 파일을 열 수 있다.

# 기본 파일 열기/닫기
file = open('example.txt', 'w')  # 쓰기 모드로 파일 열기
file.close()                     # 파일 닫기

# with 문을 사용한 파일 처리 (권장)
with open('example.txt', 'w') as file:
    file.write('Hello, World!')  # 자동으로 파일이 닫힘

# 인코딩 지정
with open('unicode.txt', 'w', encoding='utf-8') as file:
    file.write('안녕하세요!')  # 유니코드 문자 처리

# 에러 처리
try:
    with open('nonexistent.txt', 'r') as file:
        content = file.read()
except FileNotFoundError:
    print("파일이 존재하지 않습니다.")
except PermissionError:
    print("파일에 접근할 권한이 없습니다.")
except IOError as e:
    print(f"입출력 오류가 발생했습니다: {e}")

파일 모드

'r': 읽기 모드 (기본값) - 파일이 없으면 에러 발생
'w': 쓰기 모드 (파일 내용 덮어쓰기) - 파일이 없으면 새로 생성
'a': 추가 모드 (파일 끝에 내용 추가) - 파일이 없으면 새로 생성
'x': 배타적 생성 모드 - 파일이 이미 있으면 에러 발생
'b': 바이너리 모드 (텍스트가 아닌 바이너리 데이터 처리)
't': 텍스트 모드 (기본값)
'+': 읽기/쓰기 모드 (예: 'r+', 'w+', 'a+')

# 각 모드 예시
with open('file.txt', 'r') as f:       # 읽기 전용
    content = f.read()

with open('file.txt', 'w') as f:       # 쓰기 전용 (덮어쓰기)
    f.write('새로운 내용')

with open('file.txt', 'a') as f:       # 추가 모드
    f.write('기존 내용에 추가')

try:
    with open('new_file.txt', 'x') as f:  # 배타적 생성
        f.write('새 파일 내용')
except FileExistsError:
    print("파일이 이미 존재합니다")

with open('binary.dat', 'wb') as f:    # 바이너리 쓰기
    f.write(b'\x00\x01\x02\x03')

with open('file.txt', 'r+') as f:      # 읽기+쓰기
    content = f.read()
    f.write('더 많은 내용')

2️⃣ 파일 쓰기

# 단순 텍스트 쓰기
with open('test.txt', 'w', encoding='utf-8') as f:
    f.write('Hello\n')
    f.write('World\n')

# print() 함수로 파일에 쓰기
with open('test.txt', 'w', encoding='utf-8') as f:
    print('Hello', file=f)
    print('World', file=f)
    print('안녕하세요', '반갑습니다', sep=', ', file=f)

# 리스트의 내용을 파일에 쓰기
lines = ['Line 1', 'Line 2', 'Line 3']
with open('test.txt', 'w', encoding='utf-8') as f:
    f.writelines(line + '\n' for line in lines)

# 딕셔너리 데이터 쓰기
data = {'name': '홍길동', 'age': 30, 'city': '서울'}
with open('data.txt', 'w', encoding='utf-8') as f:
    for key, value in data.items():
        f.write(f"{key}: {value}\n")

# 추가 모드로 파일에 내용 추가
with open('log.txt', 'a', encoding='utf-8') as f:
    import datetime
    now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    f.write(f"[{now}] 로그 항목 추가\n")

파일 위치 제어

# 파일 쓰기 위치 제어
with open('position.txt', 'w+', encoding='utf-8') as f:
    f.write("첫 번째 줄\n")
    f.write("두 번째 줄\n")
    
    position = f.tell()  # 현재 위치 저장
    print(f"현재 위치: {position} 바이트")
    
    f.seek(0)  # 파일의 처음으로 이동
    f.write("덮어쓴 첫 번째 줄\n")  # 첫 줄 덮어쓰기
    
    f.seek(0, 2)  # 파일의 끝으로 이동 (0은 위치, 2는 파일 끝 기준)
    f.write("세 번째 줄\n")

3️⃣ 파일 읽기

# 전체 내용 읽기
with open('test.txt', 'r', encoding='utf-8') as f:
    content = f.read()
    print(content)

# 특정 크기만큼 읽기
with open('test.txt', 'r', encoding='utf-8') as f:
    chunk = f.read(10)  # 처음 10바이트만 읽기
    print(chunk)
    
    next_chunk = f.read(10)  # 다음 10바이트 읽기
    print(next_chunk)

# 한 줄씩 읽기
with open('test.txt', 'r', encoding='utf-8') as f:
    line = f.readline()  # 첫 번째 줄
    print(line, end='')  # 줄바꿈 문자가 이미 포함되어 있음
    
    second_line = f.readline()  # 두 번째 줄
    print(second_line, end='')

# 모든 줄을 리스트로 읽기
with open('test.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()
    print(f"파일의 총 줄 수: {len(lines)}")
    
    for i, line in enumerate(lines, 1):
        print(f"줄 {i}: {line.strip()}")  # strip()으로 줄바꿈 제거

# for 문으로 파일 읽기 (메모리 효율적)
with open('test.txt', 'r', encoding='utf-8') as f:
    for line in f:  # 파일 객체는 이터러블하여 한 줄씩 순회
        print(line.strip())

# 위치 지정하여 읽기
with open('test.txt', 'r', encoding='utf-8') as f:
    f.seek(10)  # 10번째 바이트로 이동
    partial = f.read(20)  # 20바이트 읽기
    print(partial)

대용량 파일 처리

# 큰 파일 효율적으로 처리
def count_lines(filename):
    count = 0
    with open(filename, 'r', encoding='utf-8') as f:
        for _ in f:
            count += 1
    return count

# 청크 단위로 파일 읽기
def read_in_chunks(filename, chunk_size=1024):
    """지정된 크기의 청크 단위로 파일 읽기"""
    with open(filename, 'rb') as f:  # 바이너리 모드 사용
        while True:
            chunk = f.read(chunk_size)
            if not chunk:  # 파일 끝에 도달
                break
            yield chunk

# 사용 예시
# for chunk in read_in_chunks('large_file.txt'):
#     process_data(chunk)

4️⃣ 파일 관리

import os
import shutil

# 파일 존재 여부 확인
if os.path.exists('test.txt'):
    print('파일이 존재합니다')
else:
    print('파일이 존재하지 않습니다')

# 파일인지 디렉토리인지 확인
if os.path.isfile('test.txt'):
    print('test.txt는 파일입니다')
elif os.path.isdir('test_dir'):
    print('test_dir은 디렉토리입니다')

# 파일 정보 확인
if os.path.exists('test.txt'):
    size = os.path.getsize('test.txt')  # 파일 크기(바이트)
    mtime = os.path.getmtime('test.txt')  # 수정 시간 (Unix 타임스탬프)
    import datetime
    mod_time = datetime.datetime.fromtimestamp(mtime)
    print(f"파일 크기: {size} 바이트")
    print(f"최종 수정 시간: {mod_time}")

# 파일 삭제
if os.path.exists('temp.txt'):
    os.remove('temp.txt')
    print('파일이 삭제되었습니다')

# 파일 이름 변경
if os.path.exists('old.txt'):
os.rename('old.txt', 'new.txt')
    print('파일 이름이 변경되었습니다')

# 파일 복사
import shutil
if os.path.exists('source.txt'):
    shutil.copy('source.txt', 'destination.txt')  # 파일 복사
    shutil.copy2('source.txt', 'destination2.txt')  # 메타데이터 포함 복사

# 디렉토리 생성
if not os.path.exists('new_directory'):
    os.makedirs('new_directory', exist_ok=True)  # 중첩 디렉토리 생성
    print('디렉토리가 생성되었습니다')

# 디렉토리 삭제
if os.path.exists('empty_dir') and os.path.isdir('empty_dir'):
    os.rmdir('empty_dir')  # 빈 디렉토리만 삭제 가능
    print('빈 디렉토리가 삭제되었습니다')

# 디렉토리와 내용 모두 삭제
if os.path.exists('dir_with_files') and os.path.isdir('dir_with_files'):
    shutil.rmtree('dir_with_files')
    print('디렉토리와 내용이 모두 삭제되었습니다')

# 파일 경로 다루기
filename = 'document.txt'
directory = 'projects/python'
full_path = os.path.join(directory, filename)  # 'projects/python/document.txt'
print(f"전체 경로: {full_path}")

# 경로 분해
path = '/home/user/documents/report.pdf'
dirname = os.path.dirname(path)    # '/home/user/documents'
basename = os.path.basename(path)  # 'report.pdf'
filename, ext = os.path.splitext(basename)  # ('report', '.pdf')
print(f"디렉토리: {dirname}")
print(f"파일명: {filename}, 확장자: {ext}")

5️⃣ 특수 파일 형식 처리

CSV 파일

import csv

# CSV 파일 쓰기
data = [
    ['이름', '나이', '직업'],
    ['김철수', 28, '개발자'],
    ['이영희', 32, '디자이너'],
    ['박지민', 24, '학생']
]

with open('people.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerows(data)  # 모든 행 한 번에 쓰기

# CSV 파일 읽기
with open('people.csv', 'r', encoding='utf-8') as f:
    reader = csv.reader(f)
    header = next(reader)  # 헤더 행 읽기
    print(f"헤더: {header}")
    
    for row in reader:
        print(f"{row[0]}는 {row[1]}세 {row[2]}입니다.")

# 딕셔너리로 CSV 다루기
dict_data = [
    {'이름': '김철수', '나이': 28, '직업': '개발자'},
    {'이름': '이영희', '나이': 32, '직업': '디자이너'},
    {'이름': '박지민', '나이': 24, '직업': '학생'}
]

with open('people_dict.csv', 'w', newline='', encoding='utf-8') as f:
    fieldnames = ['이름', '나이', '직업']
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    
    writer.writeheader()  # 헤더 쓰기
    writer.writerows(dict_data)  # 딕셔너리 목록 쓰기

# 딕셔너리로 CSV 읽기
with open('people_dict.csv', 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(f"{row['이름']}는 {row['나이']}세 {row['직업']}입니다.")

JSON 파일

import json

# JSON 파일 쓰기
data = {
    'name': '홍길동',
    'age': 30,
    'city': '서울',
    'hobbies': ['독서', '등산', '프로그래밍'],
    'active': True,
    'scores': {
        'math': 85,
        'english': 92,
        'python': 98
    }
}

with open('data.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=4)
    # ensure_ascii=False: 한글 등 유니코드 문자 그대로 저장
    # indent=4: 들여쓰기로 가독성 향상

# JSON 파일 읽기
with open('data.json', 'r', encoding='utf-8') as f:
    loaded_data = json.load(f)
    print(f"이름: {loaded_data['name']}")
    print(f"취미: {', '.join(loaded_data['hobbies'])}")
    print(f"파이썬 점수: {loaded_data['scores']['python']}")

# JSON 문자열 다루기
json_str = json.dumps(data, ensure_ascii=False)
print(json_str)

parsed_data = json.loads(json_str)
print(parsed_data['city'])

바이너리 파일

# 바이너리 파일 쓰기
with open('binary.dat', 'wb') as f:
    f.write(b'\x48\x65\x6c\x6c\x6f')  # 'Hello'의 바이트 표현

# 바이너리 파일 읽기
with open('binary.dat', 'rb') as f:
    data = f.read()
    print(data)  # b'Hello'
    print(data.decode('utf-8'))  # 'Hello'

# 구조화된 바이너리 데이터
import struct

# 정수와 문자열 패킹
with open('packed.bin', 'wb') as f:
    # 'i'는 4바이트 정수, '5s'는 5바이트 문자열
    packed_data = struct.pack('i5s', 42, b'Hello')
    f.write(packed_data)

# 언패킹
with open('packed.bin', 'rb') as f:
    data = f.read()
    unpacked = struct.unpack('i5s', data)
    print(f"정수: {unpacked[0]}, 문자열: {unpacked[1].decode('utf-8')}")

# 이미지 파일 처리 예시
def copy_image(src, dst):
    """이미지 파일을 복사하는 함수"""
    with open(src, 'rb') as src_file:
        with open(dst, 'wb') as dst_file:
            dst_file.write(src_file.read())

# copy_image('source.jpg', 'copy.jpg')

6️⃣ 파일 시스템 탐색

import os

# 현재 작업 디렉토리 확인
current_dir = os.getcwd()
print(f"현재 디렉토리: {current_dir}")

# 디렉토리 변경
# os.chdir('../')  # 상위 디렉토리로 이동
# print(f"변경된 디렉토리: {os.getcwd()}")

# 디렉토리 내용 나열
entries = os.listdir('.')  # 현재 디렉토리 내용
print("현재 디렉토리 내용:")
for entry in entries:
    if os.path.isfile(entry):
        print(f"파일: {entry}")
    elif os.path.isdir(entry):
        print(f"디렉토리: {entry}")

# 특정 확장자 파일 찾기
def find_files_by_extension(directory, extension):
    """지정된 디렉토리에서 특정 확장자를 가진 파일 찾기"""
    result = []
    for entry in os.listdir(directory):
        full_path = os.path.join(directory, entry)
        if os.path.isfile(full_path) and entry.endswith(extension):
            result.append(full_path)
    return result

# 재귀적으로 디렉토리 탐색
def list_all_files(directory):
    """디렉토리 내의 모든 파일을 재귀적으로 나열"""
    for root, dirs, files in os.walk(directory):
        for file in files:
            full_path = os.path.join(root, file)
            print(full_path)

# 특정 패턴의 파일 찾기
import glob
python_files = glob.glob('*.py')  # 현재 디렉토리의 모든 .py 파일
print("Python 파일:")
for file in python_files:
    print(file)

# 재귀적으로 특정 패턴 찾기
all_python_files = glob.glob('**/*.py', recursive=True)
print("모든 하위 디렉토리의 Python 파일:")
for file in all_python_files:
    print(file)

주요 팁

✅ 파일 처리 모범 사례:

항상 with 문 사용하여 파일을 자동으로 닫히게 한다
텍스트 파일 처리 시 항상 인코딩을 명시한다 (주로 'utf-8')
한글 등 유니코드 처리 시 encoding='utf-8' 사용이 필수적이다
적절한 예외 처리로 파일 접근 오류에 대응한다
파일 경로는 문자열 연결이 아닌 os.path.join() 사용을 권장한다
큰 파일은 한 번에 모두 읽지 말고 한 줄씩 또는 청크 단위로 처리한다
CSV, JSON 등 특수 형식은 전용 모듈을 활용한다
os.path 함수를 이용해 경로를 안전하게 다룬다
바이너리 데이터는 항상 바이너리 모드('rb', 'wb')로 처리한다
파일 이름에 변수가 포함된 경우 f-string 보다는 os.path.join()을 사용한다

KR_FileIO

Python 파일 입출력(File I/O) 개념 정리

1️⃣ 파일 열기/닫기

파일 모드

2️⃣ 파일 쓰기

파일 위치 제어

3️⃣ 파일 읽기

대용량 파일 처리

4️⃣ 파일 관리

5️⃣ 특수 파일 형식 처리

CSV 파일

JSON 파일

바이너리 파일

6️⃣ 파일 시스템 탐색

주요 팁

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!