Programmatically Modifying Text Files Using Python

In machine learning, data is usually stored as text files. When the size of a data file is relatively small, say a few hundred lines or less, then it’s usually easy to edit the file in a text editor or in Excel. But when you’re working with a relatively large data file, you’ll usually need to edit the file programmatically.

One rainy afternoon I sat down and coded up five basic functions in Python. I named them after their SQL counterparts: delete_lines(), update_line(), insert_line(), append_line(), and show_file().

There are many ways to approach implementing any of these operations. My preference is . . . well, it’s best explained by looking at the screenshot above and my code below.

Good fun on a rainy Sunday afternoon.


Text files can’t modify themselves. Cellular automata are fascinating systems that are self-modifying. I vividly remember programming Rule 30 (left) years ago using BASIC on a TRS-80 computer when I was first learning how to code. The center image is a 3D example. The image on the right is Rule 3283936144.


# file_modify.py
# delete_lines(), update_line(),
# insert_line(), append_line(),
# show_file()

def show_file(fn, num_lines, indices=False,
 strip_nl=False):
  fin = open(fn, "r")
  idx = 1
  for line in fin:
    if strip_nl == True:
      line = line.rstrip()
    if indices == True:
      print("[%3d]  " % idx, end="")
    print(line)
    if idx == num_lines:
      break
    idx += 1
  fin.close()

def delete_lines(src, dest, omit_lines):
  fin = open(src, "r")
  fout = open(dest, "w")
  line_num = 1
  for line in fin:
    if line_num in omit_lines:
      line_num += 1 
    else:
      fout.write(line)  # has embedded nl
      line_num += 1
  fout.close()
  fin.close()

def update_line(src, dest, line_num, new_val):
  fin = open(src, "r")
  fout = open(dest, "w")
  ln = 1

  while ln < line_num:
    line = fin.readline()  # has embedded nl
    fout.write(line)
    ln += 1

  fin.readline()  # consume old line
  fout.write(new_val)  # must have embedded nl

  for line in fin:
    fout.write(line)  # remaining lines

  fout.close()
  fin.close()

def insert_line(src, dest, at_line, new_val):
  fin = open(src, "r")
  fout = open(dest, "w")
  ln = 1

  while ln < at_line:
    line = fin.readline()  # has embedded nl
    fout.write(line)
    ln += 1

  fout.write(new_val)  # must have embedded nl

  for line in fin:
    fout.write(line)  # remaining lines

  fout.close()
  fin.close()

def append_line(fn, new_val):
  f = open(fn, "a")
  f.write(new_val)
  f.close()


# ==================

print("\nBegin modify file demo ")

print("\nData: ")
src = ".\\dummy_data.txt"
show_file(src, -1, indices=True, strip_nl=True)

print("\nInserting new data as line 2")
src = ".\\dummy_data.txt"
dest = ".\\dummy_data_new.txt"
new_val = "X" + "\t" + "99" + "\t" + "central" + \
 "\t" + "99999.00" + "\t" + "moderate" + "\n"
insert_line(src, dest, 2, new_val) 
show_file(dest, -1, indices=True, strip_nl=True)

print("\nModifying line 2")
src = ".\\dummy_data_new.txt"
dest = ".\\dummy_data_new_new.txt"
new_val = "M" + "\t" + "55" + "\t" + "central" + \
 "\t" + "55555.00" + "\t" + "moderate" + "\n"
update_line(src, dest, 2, new_val) 
show_file(dest, -1, indices=True, strip_nl=True)

print("\nDeleting lines 2, 3")
src = ".\\dummy_data_new_new.txt"
dest = ".\\dummy_data_new_new_new.txt"
delete_lines(src, dest, [2,3])
show_file(dest, -1, indices=True, strip_nl=True)

print("\nAppending X 99 central 99999.00 moderate")
fn = ".\\dummy_data_new_new_new.txt"
new_val = "X" + "\t" + "99" + "\t" + "central" + \
 "\t" + "99999.00" + "\t" + "moderate" + "\n"
append_line(fn, new_val)
show_file(fn, -1, indices=True, strip_nl=True)

print("\nEnd modify")
This entry was posted in Machine Learning. Bookmark the permalink.