--
-- data-model.sql
--
-- created by philg@mit.edu on July 6, 1996
--

-- Illustra doesn't have built-in sequence generators so you 
-- have to use transactions on a one-row table to generate unique
-- keys

create table id_numbers (
	reader		integer,
	reader_order	integer,
	reader_address	integer,
        ...
);

create table journals (
	issn 		text not null primary key,
	title		text,
	title_prefix	text,
	-- a Unix directory name, usually sort of like the title
	-- this is where we'll expect to find static files
	dirname	text,
	-- stuff we need for presentation of journal characteristics
	frequency	text,
	...
	-- extra stuff for the PLS Blade to catch
	search_keywords		text,
	-- prices
	individual_price	numeric(7,2),
	institutional_price	numeric(7,2),
	student_price		numeric(7,2),
	-- for journals like LEA, where subscribers to Leonardo
	-- get a price break
	special_price			numeric(7,2),	-- usually NULL
	special_price_explanation	text,
	foreign_shipping	numeric(7,2),
	-- individual issue prices
	current_issue_price	numeric(7,2),
	individual_back_issue_price	numeric(7,2),
	institutional_back_issue_price	numeric(7,2),
	individual_back_issue_double_price	numeric(7,2),
	institutional_back_issue_double_price	numeric(7,2),
	foreign_shipping_single_issue		numeric(7,2)
);

-- build a full-text index using the fabulous PLS Blade
create index journals_pls_index on journals using pls
( issn, title, editor_name, editor_bio, editorial_board, submission_guidelines, blurb );

-- a journal can be in more than one category, so we need a separate
-- table to express the relation
create table journal_category_map (
	issn		text references journals,
	category	text
);

-- let's make it fast to find out either which journals are in 
-- a category OR which categories a journal is in

create index journal_category_map_by_issn 
on journal_category_map using btree ( issn );

create index journal_category_map_by_category
on journal_category_map using btree ( category );

create table books (
	isbn 	text not null primary key,
	-- to make sorting work, we do NOT include leading article, e.g., "The" 
	title		text,
	subtitle	text,
	title_prefix	text,	-- put an article here, with no trailing space
	volume_number	integer,	-- can only have 1, 2, 3
	series		text,
	-- a Unix directory name
	-- this is where we'll expect to find static files
	-- we construct this with the first three chars of the
	-- author name, then the first char of the title
	-- then P or H for binding, all uppercase
	-- e.g., ABESH for Abelson & Sussman, Structure & Interp
	-- of Computer Programs, hardcover
	dirname	text,
	-- some books we don't want to present in category home pages
	-- for example, if we have a multivolume set, we don't want
	-- to see Foobar Vol 1, Foobar Vol 2, Foobar Vol 1 & Vol 2
	-- in the Engineering section because the page for 
	-- Foobar Vol 1 & Vol 2 will have all three ordering options
	-- the solution is to set the PRESENT_P to false on the 
	-- individual volumes but then make them have a related_to_type
	-- of 'book' and a related_to_key that is the ISBN of the 
	-- multi-volume set
	present_p	boolean default 't',
	-- this is the type of the thing to which I'm related
	related_to_type	text,	-- 'book','video','cd','journal'
	related_to_key	text,	-- ISBN or ISSN
	out_of_print_p	boolean default 'f',
	publication_date	date,
	-- this column is computed from the publication_date column (see below)
	new_p		boolean virtual,
	-- we want to know if a book is a paperback original
	-- in which case we present it with "new" books despite its
	-- binding_type of 'paper'
	paperback_original_p	boolean default 'f',		
	blurb		text,	-- HTML, main sales stuff
	description	text,	-- e.g., '144 pp., 78 illus, 8 in color'
	table_of_contents	large_text,	-- HTML
	author_name	text,	-- could be several people
	author_bio	text,	-- HTML, describes author(s) in one paragraph
	author_web_site	text,	-- url
	author_email	text,
	search_keywords text,	-- arbitrary stuff that Terry and Marney add for PLS
	-- prices
	intro_price	numeric(7,2),
	intro_price_limit_date	date,	-- last day you can get the limit price
	price		numeric(7,2),
	discount_code	text,		-- 'S', 'T', 'X' (only used by mega-bookstore-chains)
	-- we're going to add semantics to binding_type to handle videos
	-- and software with binding_type = 'video' and 'software' and 'audiocd'
	binding_type	text
);

-- for the virtual new_p column

create function new_p(like books) 
  	returns boolean
  	as return $1.publication_date + interval '180' day(3) > current_date;

-- this makes it fast to find all the books related to Book X

create index books_by_related_to_key on books
	using btree(related_to_key);

-- a PLS index on a bunch of columns

create index books_pls_index on books using pls
( isbn, title, subtitle, author_name, binding_type, description, search_keywords );


--
-- a book can be in more than one category
-- so we have a map table
--

create table book_category_map (
	isbn		text,
	category	text
);
create index book_category_map_by_isbn on book_category_map
	using btree(isbn);
create index book_category_map_by_category on book_category_map
	using btree(category);

--
-- textbooks are categorized separately as well
--

create table textbook_category_map (
	isbn		text,
	-- is this primarily a textbook?
	primary_p	boolean default 'f',
	category	text
);
create index textbook_category_map_by_isbn on textbook_category_map
	using btree(isbn);
create index textbook_category_map_by_category on textbook_category_map
	using btree(category);

-- requests for textbooks for evaluation by professors we only allow
-- each prof to request each book once in the table; they have to send
-- email explaining why they want a second copy (probably they are in
-- fact complaining that they didn't get the first copy they requested)

create table textbook_requests (
	reader_id		integer not null references readers,
	isbn			text not null references books,
	-- bunch of stuff typed in by the requesting professor
	course			text,
	enrollment		text,
	date_commences		text,
	level			text,
	current_textbook	text,
	institution		text,
	department		text,
	shipping_address	integer not null references reader_addresses(reader_address_id),
	request_time		timestamp(0),
	-- here's the integrity constraint that keeps prof from
	-- requesting the same book again
	primary key(reader_id,isbn)
);

-- professional, published reviews of books or journals.  We can store 
-- either the complete review or just a URL from a well-known, stable 
-- site.

create table reviews (
	product_key	text,	-- ISBN for a book, ISSN for a journal
	product_type	text,	-- 'book', 'journal'
	review_date	date,	
	url		text,	-- URL of a review (if blank, we assume to store the text)
	review_text	large_text
);


-- 
-- Readers
--

-- Names of people who've either bought books 
-- or joined a spam list.
create table readers (
	reader_id	integer not null primary key,
	email		text,	-- we use email, first_names, last_names as a key
	reader_type	text,	-- probably one of 'individual', 'library', 'wholesaler', 'retailer/bookseller'
	first_names	text,
	last_name	text,
	birthyear	integer,
	sex		char(1),	-- 'M' or 'F'
	day_phone	text,
	eve_phone	text
);
create index readers_index on readers using btree(reader_id);

-- this should dramatically speed looking up to see if a person
-- is already in the READERS table (except it doesn't
-- because we need to do case-insensitive search)
create index readers_email_index on readers using btree(email);

-- this functional index actually does speed the lookup, but 
-- sadly it locks us into Illustra or Informix since Oracle doesn't
-- give us functional indices.
create index readers_upper_email_index on readers using btree(upper(email));

-- we use this to figure out if a nickname or something was typed
create function substring_p(text,text) returns boolean
as 
return position($1 in $2) > 0 or position($2 in $1) > 0;

-- takes email, first_names, last_name and tries to find a match
-- returns the reader_id or NULL

-- must be used like this 
-- select * from lookup_reader('philg@mit.edu','Philip G.','Greenspun');

-- this was wicked slow until we added the functional index above

create function lookup_reader(text,text,text) returns setof(integer)
as
select reader_id from readers where upper(email) = upper($1) and substring_p(upper(first_names),upper($2)) and substring_p(upper(last_name),upper($3));

create table reader_addresses (
	reader_address_id	integer not null,
	reader_id integer	not null references readers,
	address_type	text,   -- bill, ship, spam_list
	line2	text,
	line1	text,
	postal_code	text,
	state	text,
	city	text,
	country	character(2) references country_codes(iso_code)	-- ISO country code
);

-- args are reader_id ($1), line1($2), line2($3), postal_code($4)
create function lookup_reader_address(integer,text,text,text) returns setof(integer)
as
select reader_address_id from reader_addresses where 
reader_id = $1 and line1 = $2 and ((line2 = $3) or (line2 is null and $3 is null)) and postal_code = $4;


create table reader_comments (
	reference	text,	-- 'ISBN0262123451' for a book, 'ISSN903412' for a journal
	email		text,
	name		text,
	message		text,
	posting_time	timestamp(0)
);
create index reader_comments_index on reader_comments using btree(reference);

--
-- we record readers who are interested in books or journals
--

-- we allow duplicate submissions because
-- we are going to do a DISTINCT for all queries 
-- anyway

-- we record interest in specific books or journals though
-- we tell people that they're joining "the Anthropology list"
-- and send out email accordingly

create table spam_list (
	domain		text,	-- for random lists
	issn		text,	-- if a journal
	isbn		text,	-- if a book
	reader_id	integer references readers
);


--
-- Orders
--
-- the business end of the system
--

-- a table like this is useful for generating user interface
-- but also to maintain database integrity and make sure that bogus
-- country codes can't be entered 

create table country_codes (
	full_name	text not null,
	iso_code	char(2)
);

--
-- if multiple items are ordered, then that puts multiple rows 
-- in this table, but all of those rows will have the same
-- order ID
--

-- we don't store credit card numbers in the database; we encrypt them
-- with a public key and then transmit them to the legacy system where
-- they are decrypted with a corresponding private key

create table reader_orders (
	reader_order_id	integer not null,
	reader_id	integer not null,	-- who ordered it
	product_key	text,	-- ISBN for a book, ISSN for a journal
	-- these have the values that go into the PPSB report
	product_type	text,	-- 'book', 'journal'
	subscriber_class	text,	-- relevant mostly for journals, e.g., 'individual', 'current_issue'
	renewal_p	boolean default 'f',
	account_number	text,	-- relevant only for journal renewals
	order_time	timestamp(0),
	quantity	integer default 1,
	payment_method	text,	-- 'creditcard', 'purchaseorder', maybe 'ecash'
	credit_card_type char(1),	-- M, V, or A
	-- holds output of a PGP run with both cc# and exp date
	credit_card_encrypted	text,
	purchase_order_number	text,
	gift_name	text,	-- NULL if not a gift
	transmitted_p	boolean default 'f',	-- set to true when order xmitted to legacy system
	fulfilled_p	boolean default 'f',	-- set to true when order is shipped
	paid_p		boolean default 'f',	-- set to true when First Virtual is confirmed, for example
	cancelled_p	boolean default 'f',	-- set to true if user cancels subscription or returns physical product
	-- this is stuff that we really ought to be able to get from other 
	-- tables but it is safer to insert them here (since the price
	-- in the books table might change but in a report you want an order
	-- from 1996 to have the price paid at the time
	price		decimal(7,2),
	shipping	decimal(7,2),
	tax		decimal(7,2),	-- also use this field for 20% international tariff
	-- arbitrary user-entered comment
	comment		text
) archive;
create index reader_orders_index_0 on reader_orders using btree (reader_order_id);
create index reader_orders_index_1 on reader_orders using btree (reader_id);
create index reader_orders_index_2 on reader_orders using btree (product_key);


---
--- Reporting
---

--- many of these are kludges to work around Illustra's inability to 
--- GROUP BY anything other than a raw column name

create function book_0_or_1 (text) returns integer
as
return position ($1 in 'book');

create function journal_0_or_1 (text) returns integer
as
return position ($1 in 'journal');

create function video_0_or_1 (text) returns integer
as
return position ($1 in 'video');

create view orders_for_report (reader_order_id, year, month, book, journal, video) as
select reader_order_id,
extract(year from order_time),
extract(month from order_time),
book_0_or_1(product_type),
journal_0_or_1(product_type),
video_0_or_1(product_type)
from reader_orders;

create view all_products (title, product_key, product_type)
as 
select title, isbn, 'book' from books
union 
select title, issn, 'journal' from journals;

create index all_products_by_product_key on all_products using btree (product_key);