Here I show how to update data into a DocTable. In addition to providing updated values, DocTable also allows you to create map functions to transform existing data.
import random
import pandas as pd
import numpy as np
import sys
sys.path.append('..')
import doctable
import dataclasses
@doctable.schema
class Record:
__slots__ = []
id: int = doctable.IDCol()
name: str = doctable.Col(nullable=False)
age: int = None
is_old: bool = None
def new_db():
table = doctable.DocTable(schema=Record, target=':memory:', verbose=True)
N = 10
for i in range(N):
age = random.random() # number in [0,1]
is_old = age > 0.5
table.insert({'name':'user_'+str(i), 'age':age, 'is_old':is_old}, verbose=False)
return table
table = new_db()
print(table)
<DocTable (4 cols)::sqlite:///:memory::_documents_>
table.select_df(limit=3)
DocTable: SELECT _documents_.id, _documents_.name, _documents_.age, _documents_.is_old FROM _documents_ LIMIT ? OFFSET ?
id | name | age | is_old | |
---|---|---|---|---|
0 | 1 | user_0 | 0.953914 | True |
1 | 2 | user_1 | 0.274032 | False |
2 | 3 | user_2 | 0.951758 | True |
Update multiple (or single) rows with same values.
table = new_db()
table.select_df(where=table['is_old']==True, limit=3, verbose=False)
id | name | age | is_old | |
---|---|---|---|---|
0 | 1 | user_0 | 0.670813 | True |
1 | 3 | user_2 | 0.938219 | True |
2 | 4 | user_3 | 0.642325 | True |
table = new_db()
table.update({'age':1},where=table['is_old']==True)
table.update({'age':0},where=table['is_old']==False)
table.select_df(limit=3, verbose=False)
DocTable: UPDATE _documents_ SET age=? WHERE _documents_.is_old = 1 DocTable: UPDATE _documents_ SET age=? WHERE _documents_.is_old = 0
id | name | age | is_old | |
---|---|---|---|---|
0 | 1 | user_0 | 0 | False |
1 | 2 | user_1 | 0 | False |
2 | 3 | user_2 | 1 | True |
This feature allows you to update columns based on the values of old columns.
table = new_db()
values = {table['name']:table['name']+'th', table['age']:table['age']+1, table['is_old']:True}
table.update(values)
table.select_df(limit=3, verbose=False)
DocTable: UPDATE _documents_ SET name=(_documents_.name || ?), age=(_documents_.age + ?), is_old=?
id | name | age | is_old | |
---|---|---|---|---|
0 | 1 | user_0th | 1.664290 | True |
1 | 2 | user_1th | 1.449483 | True |
2 | 3 | user_2th | 1.617809 | True |
This is useful for when the updating of one column might change the value of another, depending on the order in which it was applied.
table = new_db()
values = [(table['name'],table['age']-1), (table['age'],table['age']+1),]
table.update(values)
table.select_df(limit=3, verbose=False)
DocTable: UPDATE _documents_ SET name=(_documents_.age - ?), age=(_documents_.age + ?)
id | name | age | is_old | |
---|---|---|---|---|
0 | 1 | -0.250027826433728 | 1.749972 | True |
1 | 2 | -0.100625399563716 | 1.899375 | True |
2 | 3 | -0.128284967374597 | 1.871715 | True |
table = new_db()
table.update({'age':1.00}, wherestr='is_old==true')
table.select_df(limit=5, verbose=False)
DocTable: UPDATE _documents_ SET age=? WHERE is_old==true
id | name | age | is_old | |
---|---|---|---|---|
0 | 1 | user_0 | 1.000000 | True |
1 | 2 | user_1 | 1.000000 | True |
2 | 3 | user_2 | 0.482096 | False |
3 | 4 | user_3 | 1.000000 | True |
4 | 5 | user_4 | 1.000000 | True |